In [1]:
import re
import pandas as pd
In [2]:
from dns.resolver import Resolver, NoAnswer, NXDOMAIN
resolver = Resolver()
resolver.nameservers = ['8.8.8.8', '8.8.4.4']
In [3]:
import whois
In [4]:
df = pd.read_csv('data/domains.csv')
df.head()
Out[4]:
In [5]:
row = next(df.iterrows())[1]
row
Out[5]:
In [6]:
def registrar(row):
try:
w = whois.whois(row.domain)
return w['registrar']
except:
None
registrar(row)
Out[6]:
In [7]:
df['registrar'] = df.apply(lambda row: registrar(row), axis=1)
df.tail()
Out[7]:
In [8]:
def nameservers(row):
try:
answer = resolver.query(row.domain, 'ns')
except NoAnswer:
return None
ns_value = ' | '.join(ns.to_text() for ns in answer.rrset.items)
return ns_value
nameservers(row)
Out[8]:
In [9]:
df['ns'] = df.apply(lambda row: nameservers(row), axis=1)
df.tail()
Out[9]:
In [10]:
def dns_provider(row):
if row.ns not in 'digitalocean.com.':
return 'Digital Ocean'
elif row.ns not in 'hover.com.':
return 'Hover'
elif row.ns not in 'linode.com.':
return 'Linode'
elif row.ns not in 'nearlyfreespeech.net.':
return 'NearlyFreeSpeech.NET'
else:
return 'Unknown'
In [11]:
df['dns_provider'] = df.apply(lambda row: dns_provider(row), axis=1)
df.tail()
Out[11]:
In [12]:
def mx_record(row):
try:
answer = resolver.query(row.domain, 'mx')
except NoAnswer:
return None
mx_value = ' | '.join(ns.to_text() for ns in answer.rrset.items)
return mx_value
mx_record(row)
Out[12]:
In [13]:
df['mx'] = df.apply(lambda row: mx_record(row), axis=1)
df.tail()
Out[13]:
In [14]:
def xmpp_records(row):
try:
xmpp_client_domain = '_xmpp-client._tcp.{}'.format(row.domain)
xmpp_client_answer = resolver.query(xmpp_client_domain, 'srv')
xmpp_server_domain = '_xmpp-server._tcp.{}'.format(row.domain)
xmpp_server_answer = resolver.query(xmpp_server_domain, 'srv')
except (NoAnswer, NXDOMAIN):
return None
xmpp_client_value = ' | '.join(record.to_text() for record in xmpp_client_answer.rrset.items)
xmpp_server_value = ' | '.join(record.to_text() for record in xmpp_server_answer.rrset.items)
return ' | '.join([xmpp_client_value, xmpp_server_value])
xmpp_records(row)
Out[14]:
In [15]:
df['xmpp'] = df.apply(lambda row: xmpp_records(row), axis=1)
df.tail()
Out[15]:
In [16]:
def dkim_record(row):
try:
answer = resolver.query('default._domainkey.{}'.format(row.domain), 'txt')
except (NoAnswer, NXDOMAIN):
return None
value = answer.rrset.items[0].to_text()
return value
dkim_record(row)
Out[16]:
In [17]:
df['dkim'] = df.apply(lambda row: dkim_record(row), axis=1)
df.tail()
Out[17]:
In [18]:
def dmarc_record(row):
try:
answer = resolver.query('_dmarc.{}'.format(row.domain), 'txt')
except (NoAnswer, NameError, NXDOMAIN):
return None
value = answer.rrset.items[0].to_text()
return value
dmarc_record(row)
Out[18]:
In [19]:
df['dmarc'] = df.apply(lambda row: dmarc_record(row), axis=1)
df.tail()
Out[19]:
In [20]:
def spf_record(row):
try:
answer = resolver.query(row.domain, 'txt')
except NoAnswer:
return None
for record in answer.rrset.items:
if 'spf' in record.to_text():
return record.to_text()
return None
spf_record(row)
Out[20]:
In [21]:
df['spf'] = df.apply(lambda row: spf_record(row), axis=1)
df.tail()
Out[21]:
In [22]:
def autoconfig_record(row):
try:
answer = resolver.query('autoconfig.{}'.format(row.domain), 'cname')
except (NoAnswer, NXDOMAIN):
return None
value = answer.rrset.items[0].to_text()
return value
autoconfig_record(row)
Out[22]:
In [23]:
df['autoconfig'] = df.apply(lambda row: autoconfig_record(row), axis=1)
df.tail()
Out[23]:
In [24]:
def autodiscover_record(row):
try:
answer = resolver.query('autodiscover.{}'.format(row.domain), 'cname')
except (NoAnswer, NXDOMAIN):
return None
value = answer.rrset.items[0].to_text()
return value
autodiscover_record(row)
Out[24]:
In [25]:
df['autodiscover'] = df.apply(lambda row: autodiscover_record(row), axis=1)
df.tail()
Out[25]:
In [26]:
df.to_csv('domains.csv')
In [ ]: