Added new test file and got the initial parsing code added

This commit is contained in:
Benjamyn Love 2017-05-22 02:11:15 +10:00
parent 4865e04986
commit cb706839bc
2 changed files with 86 additions and 6 deletions

View File

@ -3,22 +3,31 @@ from HTMLParser import HTMLParser
import httplib import httplib
starttag = "" starttag = ""
conf_temp = "pre"
#URL https://www.whois.com/whois/benjamyn-testing.com
#class df-block-raw
class MyHTMLParser(HTMLParser): class MyHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
global starttag global conf_temp
starttag = tag global starttag
if tag == conf_temp:
for name, value in attrs:
if name == "class" and value == "df-raw":
print tag
starttag = tag
pass
#print starttag #print starttag
def handle_data(self, data): def handle_data(self, data):
#print starttag #print starttag
if starttag == "code": if starttag == conf_temp:
print data print data
pass pass
conn = httplib.HTTPSConnection("www.python.org") conn = httplib.HTTPSConnection("www.whois.com")
conn.request("GET", "/") conn.request("GET", "/whois/benjamyn-testing.com")
r1 = conn.getresponse() r1 = conn.getresponse()
parser = MyHTMLParser() parser = MyHTMLParser()
parser.feed(r1.read()) parser.feed(r1.read())

71
test.html Normal file
View File

@ -0,0 +1,71 @@
<pre class="df-raw" id="registrarData">Domain Name: benjamyn-testing.com
Registry Domain ID: 2110713513_DOMAIN_COM-VRSN
Registrar WHOIS Server: whois.synergywholesale.com
Registrar URL: http://whois.synergywholesale.com
Updated Date: 2017-04-03 02:12:06
Creation Date: 2017-04-03 02:12:06
Registrar Registration Expiration Date: 2018-04-03 02:12:06
Registrar: Synergy Wholesale
Registrar IANA ID: 1609
Registrar Abuse Contact Email: <img src="/eimg/4/fd/4fd1c9804b75461c125b4496b8c49e67fd85b4cd.png" class="whois_email" alt="email">@synergywholesale.com
Registrar Abuse Contact Phone: +61 3 8399 9483
Reseller: VentraIP Australia
Reseller: http://www.ventraip.com.au
Reseller: <img src="/eimg/f/7a/f7ab4861b92936834e151afc429aee012b829c4e.png" class="whois_email" alt="email">@ventraip.com.au
Domain Status: ok http://www.icann.org/epp#ok
Registry Registrant ID: Not Available From Registry
Registrant Name: Domain Privacy
Registrant Organization:
Registrant Street: PO BOX 119
Registrant Street:
Registrant Street:
Registrant City: Beaconsfield
Registrant State/Province: VIC
Registrant Postal Code: 3807
Registrant Country: AU
Registrant Phone: +61.385145121
Registrant Fax:
Registrant Email: <img src="/eimg/0/f9/0f964f9f848ba0b2fb9f927eb3ae82b5157fe437.png" class="whois_email" alt="email">@obscure.me
Registry Admin ID: Not Available From Registry
Admin Name: Domain Privacy
Admin Organization:
Admin Street: PO BOX 119
Admin Street:
Admin Street:
Admin City: Beaconsfield
Admin State/Province: VIC
Admin Postal Code: 3807
Admin Country: AU
Admin Phone: +61.385145121
Admin Fax:
Admin Email: <img src="/eimg/0/f9/0f964f9f848ba0b2fb9f927eb3ae82b5157fe437.png" class="whois_email" alt="email">@obscure.me
Registry Tech ID: Not Available From Registry
Tech Name: Domain Privacy
Tech Organization:
Tech Street: PO BOX 119
Tech Street:
Tech Street:
Tech City: Beaconsfield
Tech State/Province: VIC
Tech Postal Code: 3807
Tech Country: AU
Tech Phone: +61.385145121
Tech Fax:
Tech Email: <img src="/eimg/0/f9/0f964f9f848ba0b2fb9f927eb3ae82b5157fe437.png" class="whois_email" alt="email">@obscure.me
Name Server: bns1.ventraip.net.au
Name Server: bns2.ventraip.net.au
Name Server: bns3.ventraip.net.au
DNSSEC: unsigned
URL of the ICANN WHOIS Data Problem Reporting System: http://wdprs.internic.net/
For more information on Whois status codes, please visit https://icann.org/epp
&gt;&gt;&gt; Last update of WHOIS database: 2017-05-19 05:43:52 &lt;&lt;&lt;
Domain Service Provider:
VentraIP Australia, <img src="/eimg/f/7a/f7ab4861b92936834e151afc429aee012b829c4e.png" class="whois_email" alt="email">@ventraip.com.au
03 9013 8464
03 8080 6481 (fax)
http://www.ventraip.com.au
Please contact the provider listed above for assistance with
your domain name, such as updating your name servers or WHOIS contact information.
</pre>