In [ ]:
'''
Get the transaction history of the ERC20 tokens. 
Author: Jinhua Wang
License: MIT
Powered by Etherscan.io APIs
'''

In [ ]:
from bs4 import BeautifulSoup
import urllib3
import urllib
#disable the annoying security warnings
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import sys
import json
import collections
import pandas as pd
#the rate control module
import ratel
import time

In [ ]:
#the base url for etherscan api
baseUrl='https://api.etherscan.io'
#the connection pool (increase the timeout value and retry amount)
pool=urllib3.PoolManager(num_pools=5,timeout=20,retries=10)
#number of threads
numthread=5

#get the API key
key=''
keyarr=[]
keyfile='../Credentials/etherscankey.txt'
with open(keyfile) as f:
    keyarr = f.readlines()
if len(keyarr)==0 or keyarr[0] == '':
    print "API Key Not Found in Directory! Aborting ..."
    sys.exit(0)
key=keyarr[0].rstrip() #remeber the trim the newline char

#limit the rate that the api could be called (5 times per second)
@ratel.rate_limited(5,1)
def call_api(url):
    '''
    Make API calls 
    '''    
    global pool
    r=pool.request('GET',url)
    if r.status != 200:
        raise Exception('Cannot call API: {}'.format(r.status))
    return r

#util function to convert dict items to strings
def convert(data):
    if isinstance(data, basestring):
        return str(data)
    elif isinstance(data, collections.Mapping):
        return dict(map(convert, data.iteritems()))
    elif isinstance(data, collections.Iterable):
        return type(data)(map(convert, data))
    else:
        return data

def convert_hex_add_wrapper(arr):
    arr[1]=hex_add_converter(arr[1])
    arr[2]=hex_add_converter(arr[2])
    return arr
    
def hex_add_converter(add):
    '''
    Drop the 24 characters after 0x in the hex value of the topic addresses
    '''
    return add[0:2]+add[26:]
    
def getHistory(fromBlock, toBlock, contractAdd, topic):
    global key
    print "processing from "+str(fromBlock)+" to "+str(toBlock)
    starttime=time.time()
    url="https://api.etherscan.io/api?module=logs&action=getLogs&fromBlock="+str(fromBlock)+"&toBlock="+str(toBlock)+"&address="+contractAdd+"&topic0="+topic+"&apikey="+key
    r=call_api(url)
    html=r.data
    #remove the escaped string
    o=json.loads(html)
    o=convert(o)
    #get the length of the result
    dicLen=len(o["result"])
    #there is no data here, therefore we return
    if dicLen==0:
        print "Finished. Returning ..."
        return
    df=pd.DataFrame.from_dict(o["result"])
    #convert block number from hex to int
    df.blockNumber=df.blockNumber.apply(lambda x: int(x, 16))
    #convert transfer value to int
    df.data=df.data.apply(lambda x: int(x, 16)/(10**18))
    #convert the address
    df.topics=df.topics.apply(lambda x: convert_hex_add_wrapper(x))
    for index, row in df.iterrows():
        from_add= row['topics'][1]
        to_add = row['topics'][2]
        df.loc[index,'from']=from_add
        df.loc[index,'to']=to_add
    df=df[['data','blockNumber','from','to','transactionHash']]
    df=df.sort('blockNumber', ascending=True)
    #write the dataframe to file
    filename='csv/'+contractAdd+"_"+str(fromBlock)+"_"+str(toBlock)+".csv"
    print "writing to "+filename
    df.to_csv(str(filename))
    df_last=df.iloc[-1]
    elapsed=time.time()-starttime
    print str(elapsed)+" second for each request of blocks "+"from "+str(fromBlock)+" to "+str(toBlock)
    #now check if the block request is compelete
    lastblock=int(df_last["blockNumber"])
    #if it is not complete, make new request
    if lastblock<toBlock:
        print "new block found. proceeding from "+str(lastblock)
        getHistory(lastblock+1,toBlock,contractAdd,topic)
    else:
        print "Finished. Returning ..."
        return
    
getHistory(0,4108412,'0x888666CA69E0f178DED6D75b5726Cee99A87D698','0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef')