Domains


In [1]:
import re
import pandas as pd

In [2]:
from dns.resolver import Resolver, NoAnswer, NXDOMAIN

resolver = Resolver()
resolver.nameservers = ['8.8.8.8', '8.8.4.4']

In [3]:
import whois

In [4]:
df = pd.read_csv('data/domains.csv')

df.head()


Out[4]:
domain
0 6953727248492483.com
1 mylesb.ca
2 braithwaitecorp.com
3 mbraithwaite.com
4 mylesbraithwaite.ca

In [5]:
row = next(df.iterrows())[1]

row


Out[5]:
domain    6953727248492483.com
Name: 0, dtype: object

In [6]:
def registrar(row):
    try:
        w = whois.whois(row.domain)
        return w['registrar']
    except:
        None

registrar(row)


Out[6]:
'TUCOWS, INC.'

In [7]:
df['registrar'] = df.apply(lambda row: registrar(row), axis=1)

df.tail()


Socket Error: [Errno 8] nodename nor servname provided, or not known
Socket Error: [Errno 8] nodename nor servname provided, or not known
Out[7]:
domain registrar
28 impersonatefunctioninghuman.com TUCOWS, INC.
29 myl.be None
30 myles.tk None
31 braithwaite.tk None
32 myles.braithwaite.ca None

In [8]:
def nameservers(row):
    try:
        answer = resolver.query(row.domain, 'ns')
    except NoAnswer:
        return None
        
    ns_value = ' | '.join(ns.to_text() for ns in answer.rrset.items)
    return ns_value

nameservers(row)


Out[8]:
'ns2.digitalocean.com. | ns1.digitalocean.com. | ns3.digitalocean.com.'

In [9]:
df['ns'] = df.apply(lambda row: nameservers(row), axis=1)

df.tail()


Out[9]:
domain registrar ns
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ...
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ...
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode...
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ...
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode...

In [10]:
def dns_provider(row):
    if row.ns not in 'digitalocean.com.':
        return 'Digital Ocean'
    elif row.ns not in 'hover.com.':
        return 'Hover'
    elif row.ns not in 'linode.com.':
        return 'Linode'
    elif row.ns not in 'nearlyfreespeech.net.':
        return 'NearlyFreeSpeech.NET'
    else:
        return 'Unknown'

In [11]:
df['dns_provider'] = df.apply(lambda row: dns_provider(row), axis=1)

df.tail()


Out[11]:
domain registrar ns dns_provider
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean

In [12]:
def mx_record(row):
    try:
        answer = resolver.query(row.domain, 'mx')
    except NoAnswer:
        return None
    
    mx_value = ' | '.join(ns.to_text() for ns in answer.rrset.items)
    return mx_value

mx_record(row)


Out[12]:
'1 chimp.mylesbraithwaite.net.'

In [13]:
df['mx'] = df.apply(lambda row: mx_record(row), axis=1)

df.tail()


Out[13]:
domain registrar ns dns_provider mx
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net.
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net.
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean 1 chimp.mylesbraithwaite.net.
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net.
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean 10 ASPMX.L.GOOGLE.COM. | 20 ALT1.ASPMX.L.GOOGL...

In [14]:
def xmpp_records(row):
    try:
        xmpp_client_domain = '_xmpp-client._tcp.{}'.format(row.domain)
        xmpp_client_answer = resolver.query(xmpp_client_domain, 'srv')
        
        xmpp_server_domain = '_xmpp-server._tcp.{}'.format(row.domain)
        xmpp_server_answer = resolver.query(xmpp_server_domain, 'srv')
    except (NoAnswer, NXDOMAIN):
        return None
    
    xmpp_client_value = ' | '.join(record.to_text() for record in xmpp_client_answer.rrset.items)
    xmpp_server_value = ' | '.join(record.to_text() for record in xmpp_server_answer.rrset.items)

    return ' | '.join([xmpp_client_value, xmpp_server_value])

xmpp_records(row)


Out[14]:
'0 5 5222 chimp.mylesbraithwaite.net. | 0 5 5222 chimp.mylesbraithwaite.net. | 0 5 5269 chimp.mylesbraithwaite.net.'

In [15]:
df['xmpp'] = df.apply(lambda row: xmpp_records(row), axis=1)

df.tail()


Out[15]:
domain registrar ns dns_provider mx xmpp
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean 1 chimp.mylesbraithwaite.net. None
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean 10 ASPMX.L.GOOGLE.COM. | 20 ALT1.ASPMX.L.GOOGL... None

In [16]:
def dkim_record(row):
    try:
        answer = resolver.query('default._domainkey.{}'.format(row.domain), 'txt')
    except (NoAnswer, NXDOMAIN):
        return None
    
    value = answer.rrset.items[0].to_text()
    return value

dkim_record(row)


Out[16]:
'"v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC277/aaIYlnCa5vsZfTUC0zfUhG/6d1DCo0GhXqYp+SyuXD019hTRpW41YSKzShbfcZneIH+u9Pg7XHbwqmcXYmf/7P+9RP9YzKhuB0Tw+NIu/YteYa2Z3x7sZFRNCzlT1w3+8qXdKEig9qmS7y7n3QLB6rzcNaRAVY+MqIY6DhQIDAQAB"'

In [17]:
df['dkim'] = df.apply(lambda row: dkim_record(row), axis=1)

df.tail()


Out[17]:
domain registrar ns dns_provider mx xmpp dkim
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ...
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ...
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ...
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ...
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean 10 ASPMX.L.GOOGLE.COM. | 20 ALT1.ASPMX.L.GOOGL... None None

In [18]:
def dmarc_record(row):
    try:
        answer = resolver.query('_dmarc.{}'.format(row.domain), 'txt')
    except (NoAnswer, NameError, NXDOMAIN):
        return None
    
    value = answer.rrset.items[0].to_text()
    return value

dmarc_record(row)


Out[18]:
'"v=DMARC1; p=none"'

In [19]:
df['dmarc'] = df.apply(lambda row: dmarc_record(row), axis=1)

df.tail()


Out[19]:
domain registrar ns dns_provider mx xmpp dkim dmarc
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none"
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none"
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none"
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none"
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean 10 ASPMX.L.GOOGLE.COM. | 20 ALT1.ASPMX.L.GOOGL... None None None

In [20]:
def spf_record(row):
    try:
        answer = resolver.query(row.domain, 'txt')
    except NoAnswer:
        return None
    
    for record in answer.rrset.items:
        if 'spf' in record.to_text():
            return record.to_text()
    
    return None

spf_record(row)


Out[20]:
'"v=spf1 include:mylesbraithwaite.net ~all"'

In [21]:
df['spf'] = df.apply(lambda row: spf_record(row), axis=1)

df.tail()


Out[21]:
domain registrar ns dns_provider mx xmpp dkim dmarc spf
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all"
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all"
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all"
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all"
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean 10 ASPMX.L.GOOGLE.COM. | 20 ALT1.ASPMX.L.GOOGL... None None None None

In [22]:
def autoconfig_record(row):
    try:
        answer = resolver.query('autoconfig.{}'.format(row.domain), 'cname')
    except (NoAnswer, NXDOMAIN):
        return None
    
    value = answer.rrset.items[0].to_text()
    return value

autoconfig_record(row)


Out[22]:
'chimp.mylesbraithwaite.net.'

In [23]:
df['autoconfig'] = df.apply(lambda row: autoconfig_record(row), axis=1)

df.tail()


Out[23]:
domain registrar ns dns_provider mx xmpp dkim dmarc spf autoconfig
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net.
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net.
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net.
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net.
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean 10 ASPMX.L.GOOGLE.COM. | 20 ALT1.ASPMX.L.GOOGL... None None None None None

In [24]:
def autodiscover_record(row):
    try:
        answer = resolver.query('autodiscover.{}'.format(row.domain), 'cname')
    except (NoAnswer, NXDOMAIN):
        return None
    
    value = answer.rrset.items[0].to_text()
    return value

autodiscover_record(row)


Out[24]:
'chimp.mylesbraithwaite.net.'

In [25]:
df['autodiscover'] = df.apply(lambda row: autodiscover_record(row), axis=1)

df.tail()


Out[25]:
domain registrar ns dns_provider mx xmpp dkim dmarc spf autoconfig autodiscover
28 impersonatefunctioninghuman.com TUCOWS, INC. ns3.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net. chimp.mylesbraithwaite.net.
29 myl.be None ns1.digitalocean.com. | ns2.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net. chimp.mylesbraithwaite.net.
30 myles.tk None ns4.linode.com. | ns1.linode.com. | ns5.linode... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net. chimp.mylesbraithwaite.net.
31 braithwaite.tk None ns1.digitalocean.com. | ns3.digitalocean.com. ... Digital Ocean 1 chimp.mylesbraithwaite.net. None "v=DKIM1; k=rsa; s=email; p=MIGfMA0GCSqGSIb3DQ... "v=DMARC1; p=none" "v=spf1 include:mylesbraithwaite.net ~all" chimp.mylesbraithwaite.net. chimp.mylesbraithwaite.net.
32 myles.braithwaite.ca None ns3.linode.com. | ns1.linode.com. | ns4.linode... Digital Ocean 10 ASPMX.L.GOOGLE.COM. | 20 ALT1.ASPMX.L.GOOGL... None None None None None None

In [26]:
df.to_csv('domains.csv')

In [ ]: