In [88]:
'''
Analysis on how the ownership of a token holder change
'''
Out[88]:
In [89]:
from bs4 import BeautifulSoup
import urllib3
#disable the annoying security warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import re
import pandas
import numpy
from time import sleep
import time
from multiprocessing.dummy import Pool as ThreadPool
import itertools
#avoid counting duplicate transactions
from sets import Set
In [90]:
#the base url for etherscan
baseUrl='https://etherscan.io/'
#the connection pool, making 3 connections
pool=urllib3.PoolManager(3)
#number of threads
numthread=3
def html_convert_top100(tokenid, classname):
'''
Get the top 100 owners of the coin
'''
global baseUrl
global pool
funUrl_chart='token/tokenholderchart/'
r=pool.request('GET',baseUrl+funUrl_chart+tokenid)
html=r.data
soup = BeautifulSoup(html)
table = soup.find("table", attrs=classname)
headings = [th.get_text() for th in table.find("tr").find_all("th")]
for idx in range(len(headings)):
headings[idx]=str(headings[idx])
datasets = []
for row in table.find_all("tr")[1:]:
dataset = (td.get_text() for td in row.find_all("td"))
ls=list(dataset)
datasets.append(ls)
'''
Clean the data
'''
for idx in range(len(datasets)):
for idxx in range(len(datasets[idx])):
tmp=re.sub(r'\([^)]*\)', '', datasets[idx][idxx])
tmp=tmp.strip()
tmp=str(tmp)
datasets[idx][idxx]=tmp
'''
Create pandas dataframe and convert it to float
'''
df=pandas.DataFrame(datasets, columns=headings)
df['Quantity (Token)']=df['Quantity (Token)'].str.replace(',','')
df['Quantity (Token)']=df['Quantity (Token)'].astype(numpy.float64)
df['Percentage']=df['Percentage'].str.strip('%')
df['Percentage']=df['Percentage'].astype(numpy.float64)/100
return df
def owners_tr(ownerid, tokenname, classname):
global baseUrl
global pool
#the token balance
balance=0
#first of all, get a snapshot of the earliest block and the corresponding balance
earlUrl='token/'+tokenname.upper()+'?a='+ownerid
pre=pool.request('GET',baseUrl+earlUrl)
preHtml=pre.data
preSoup=BeautifulSoup(preHtml)
summ_div = preSoup.find('div' , {'id': 'ContentPlaceHolder1_divSummary'})
summ_table = summ_div.find('table')
#find the current token balance
for row in summ_table.find_all("tr")[0:]:
dataset = (td.get_text() for td in row.find_all("td"))
ls=list(dataset)
for n,i in enumerate(ls):
ls[n]=i.encode('utf-8').strip()
if len(ls)==2 and ls[0]=='Token Balance:':
ls[1]=ls[1].replace("EOS","")
ls[1]=ls[1].replace(",","")
ls[1].strip()
balance = float(ls[1])
break
else:
continue
#if no balance is found
if balance==0:
print "No balance found for owner "+ownerid+" , abandoning job"
voiddic=[]
return voiddic
#now find the earliest transaction
iframe = preSoup.find('iframe',{'id':'tokentxnsiframe'})
print iframe
transUrl='tokentxns?a='
nextlinks=[]
nextlinks.append(transUrl+ownerid)
'''
trans_dic=[
tx:{
Value:'',
Block:''
}
]
'''
trans_dic=[]
#avoid counting duplicate transactions
trans_set=Set()
#the earliest block (only block after this will be considered)
earliest_block=0
i=0
while len(nextlinks)>0:
starttime=time.time()
print "processing page "+str(i)+" of owner: "+ownerid
link=nextlinks.pop()
r=pool.request('GET',baseUrl+link)
html=r.data
soup = BeautifulSoup(html)
'''
Get the next link
'''
a_tag=soup.find_all('a', id="ContentPlaceHolder1_HyperLinkNext",href=True)
if a_tag[0]['href']!='#':
next_link=a_tag[0]['href']
nextlinks.append(next_link)
table = soup.find("table", attrs=classname)
headings = [th.get_text() for th in table.find("tr").find_all("th")]
for idx in range(len(headings)):
headings[idx]=str(headings[idx])
if headings[idx]=='':
headings[idx]='direction'
datasets = []
for row in table.find_all("tr")[1:]:
dataset = (td.get_text() for td in row.find_all("td"))
ls=list(dataset)
datasets.append(ls)
'''
Clean the data
'''
for idx in range(len(datasets)):
for idxx in range(len(datasets[idx])):
tmp=re.sub(r'\([^)]*\)', '', datasets[idx][idxx])
tmp=tmp.strip()
tmp=str(tmp)
datasets[idx][idxx]=tmp
'''
Create pandas dataframe and convert it to float
'''
df=pandas.DataFrame(datasets, columns=headings)
df['Token']=df['Token'].str.upper()
df=df.loc[df['Token'] == tokenname]
df['Value']=df['Value'].str.replace(',','')
df['Value']=df['Value'].astype(numpy.float64)
j=0
for index, row in df.iterrows():
if row['TxHash']!='' and row['TxHash'] not in trans_set:
trans_set.add(row['TxHash'])
tmp_dic={}
if row['Value']!=0:
if row['direction'] == 'OUT':
val=-row['Value']
else:
val=+row['Value']
tx=row['TxHash']
tmp_dic[tx]={}
tmp_dic[tx]['Value']=val
'''
Now, get the block number
'''
txurl='tx/'
block=''
req=pool.request('GET',baseUrl+txurl+tx)
html_tx=req.data
tx_soup=BeautifulSoup(html_tx)
tx_a_tag=tx_soup.find_all('a',href=True)
m=0
for tag in tx_a_tag:
if '/block/' in tag['href']:
block=str(tag.getText())
if block=='':
if m==len(tx_a_tag)-1:
block=0
else:
j=-1
continue
if i==0 and j==0:
earliest_block=long(block)
tmp_dic[tx]['Block']=block
m=m+1
if 'Block' in tmp_dic[tx] and long(tmp_dic[tx]['Block']) <= earliest_block:
trans_dic.append(tmp_dic)
else:
j=-1
j=j+1
i=i+1
elapsed=time.time()-starttime
print str(elapsed)+" second for each request (Owner: "+ownerid+" )"
return trans_dic
def tr_wrapper(args):
tokenname=args[0]
owners=args[1]
'''
Construct the ownership transaction table
'''
trans_history={}
for owner in owners:
if owners != '':
trans_history[owner]=owners_tr(owner,tokenname, 'table table-hover ')
else:
pass
'''
Backout the transaction history
'''
headtable=['Block Height', 'Owner', 'TransactionID', 'TOKEN', 'BALANCE']
for owner in owners:
content=[]
balance=owners[owner]
trans=trans_history[owner]
i=0
for l in trans:
#The first record
if i==0:
entry=[]
TID='ENDING BALANCE'
Block='N/A'
entry.append(Block)
entry.append(owner)
entry.append(TID)
entry.append(tokenname)
entry.append(balance)
content.append(entry)
i=i+1
for t in l:
entry=[]
#transaction ID
TID=t
#Block Height
Block=l[t]['Block']
entry.append(Block)
entry.append(owner)
entry.append(TID)
entry.append(tokenname)
entry.append(balance)
content.append(entry)
balance=balance-l[t]['Value']
#The last record
if i==len(trans):
entry=[]
TID='BEGINNING BALANCE'
Block='N/A'
entry.append(Block)
entry.append(owner)
entry.append(TID)
entry.append(tokenname)
entry.append(balance)
content.append(entry)
i=i+1
dataframe=pandas.DataFrame(content, columns=headtable)
dataframe.to_csv('./csv/'+owner+'top100.csv')
def ICO_TOKEN(tokenid, tokenname):
global numthread
df=html_convert_top100(tokenid, "table table-hover ")
'''
Construct the ownership table
'''
owners={}
for index, row in df.iterrows():
owners[row['Address']]=row['Quantity (Token)']
owner_list=[]
for key in owners:
tmp_dict={}
tmp_dict[key]=owners[key]
owner_list.append(tmp_dict)
owner_list=[{'0x000ea6df3b680cb96f66a42041235d1ed776ef4f':38012}]
# make the Pool of workers
print "starting "+str(numthread)+" threads..."
tpool = ThreadPool(numthread)
tpool.map(tr_wrapper, itertools.izip(itertools.repeat(tokenname), owner_list))
tpool.close()
tpool.join()
ICO_TOKEN('0x86Fa049857E0209aa7D9e616F7eb3b3B78ECfdb0','EOS')