Add parsing part. Fix bugs. WiP.

This commit is contained in:
Larry Kim 2013-02-03 02:15:54 +09:00
parent c3af1983c7
commit 8a6e99255a
6 changed files with 110 additions and 13 deletions

2
.gitignore vendored
View File

@ -33,3 +33,5 @@ nosetests.xml
.mr.developer.cfg .mr.developer.cfg
.project .project
.pydevproject .pydevproject
test.py

74
src/parser.py Normal file
View File

@ -0,0 +1,74 @@
# -*- coding: utf-8 -*-
import error
import re
class Parser(object):
def __init__(self, domain, text, whoisServer=None):
self.domain = domain
self.text = text
self.whoisServer = whoisServer and whoisServer or "default"
self.tld = self.domain.split(".")[-1]
self.parseDefaultConf = {}
execfile("tlds/default", {}, self.parseDefaultConf)
self.parseDefaultConf = self.parseDefaultConf.get("parse")
self.parseConf = {}
try:
execfile("tlds/%s"%(self.tld), {}, self.parseConf)
self.parseConf = self.parseConf.get("parse")
if not self.parseConf and whoisServer not in self.parseDefaultConf:
self.parseConf = self.parseDefaultConf.get("default")
elif not self.parseConf:
self.parseConf = self.parseDefaultConf.get(whoisServer)
elif self.whoisServer in self.parseConf:
self.parseConf = self.parseConf.get(self.whoisServer)
elif "default" in self.parseConf:
self.parseConf = self.parseConf.get("default")
else:
self.parseConf = self.parseDefaultConf.get("default")
# Check for LoadConf
_parseConf = self.parseConf
self.parseConf = {}
if "LoadConf" in _parseConf:
try:
# <tld>/<whois server>
# e.g. org/whois.publicinternetregistry.net
lc = _parseConf["LoadConf"].split("/", 1)
lcTLD = lc[0]
lcWS = lc[1]
lcConf = {}
execfile("tlds/%s"%(lcTLD), {}, lcConf)
lcConf = lcConf.get("parse")
self.parseConf.update(lcConf.get(lcWS))
except:
pass
self.parseConf.update(_parseConf)
except:
self.parseConf = self.parseDefaultConf.get("default")
def run(self):
result = {}
for key in self.parseConf:
matches = re.findall(self.parseConf[key], self.text, re.MULTILINE)
if matches:
result.update({key: map(lambda x: x.strip(), matches)})
print result

13
src/tlds/default Normal file
View File

@ -0,0 +1,13 @@
parse = {
"default": {
"DomainName": "Domain Name:\s?(.+)",
"Registrar": "Registrar:\s?(.+)",
"WhoisServer": "Whois Server: \s?(.+)",
"ReferralURL": "Referral URL:\s?(.+)",
"UpdatedDate": "Updated Date:\s?(.+)",
"CreationDate": "Creation Date:\s?(.+)",
"ExpirationDate": "Expiration Date:\s?(.+)",
"NameServer": "Name Server:\s?(.+)",
"Status": "Status:\s?(.+)",
},
}

View File

@ -1,11 +1,12 @@
server = { server = {
"host": "org.whois-servers.net", "host": "org.whois-servers.net",
"port": 43,
} }
parse = { parse = {
"regex": { "default": {
"default": { # "LoadConf": "default/default",
"NotFound": "NOT FOUND",
} "DomainName": "Domain Name:(.+)",
} }
} }

View File

@ -5,6 +5,8 @@ import os
import socket import socket
import re import re
import error
class Whois(object): class Whois(object):
def __init__(self, domain): def __init__(self, domain):
self.domain = domain self.domain = domain
@ -17,12 +19,15 @@ class Whois(object):
self.settings = {} self.settings = {}
if self.tld in self.tldList: if self.tld in self.tldList:
self.settings = {} _settings = {}
execfile(os.path.join(self.tldPath, self.tld), {}, self.settings) execfile(os.path.join(self.tldPath, self.tld), {}, _settings)
if "server" in _settings:
self.settings.update(_settings["server"])
def chooseServer(self): def chooseServer(self):
if "server" in self.settings: if "server" in self.settings:
return self.settings["server"]["host"] return self.settings["host"]
else: else:
return self.tld + ".whois-servers.net" return self.tld + ".whois-servers.net"
@ -51,16 +56,18 @@ class Whois(object):
return result return result
def run(self, redirect=True): def run(self, redirect=True):
result = self.query(self.chooseServer()) whoisServer = self.chooseServer()
result = self.query(whoisServer)
if redirect: if redirect and "redirect" in self.settings:
redirection = re.findall(self.settings["server"]["redirect"], result, re.MULTILINE) redirection = re.findall(self.settings["redirect"], result, re.MULTILINE)
while redirection and len(redirection) >= 1: while redirection and len(redirection) >= 1:
result = self.query(redirection[0]) whoisServer = redirection[0]
redirection = re.findall(self.settings["server"]["redirect"], result) result = self.query(whoisServer)
redirection = re.findall(self.settings["redirect"], result)
return result return whoisServer, result