Chain Project - Draft


Import Library


In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import pylab as plt
from time import sleep
from IPython import display
from urllib2 import Request, urlopen
import json
from pandas.io.json import json_normalize
import networkx as nx
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 12
fig_size[1] = 9

Download data from etherchain API and convert to dataframe


In [2]:
offset='1'
count='10000'
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
       'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
       'Accept-Encoding': 'none',
       'Accept-Language': 'en-US,en;q=0.8',
       'Connection': 'keep-alive'}
request=Request('https://etherchain.org/api/txs/'+offset+'/'+count+'',headers=hdr)
response = urlopen(request)
elevations = response.read()
data = json.loads(elevations)
df = json_normalize(data['data'])
df.dtypes


Out[2]:
accountNonce     object
amount           object
blockHash        object
block_id          int64
gasLimit          int64
gasUsed           int64
hash             object
isContractTx     object
newContract       int64
parentHash       object
price             int64
recipient        object
sender           object
time             object
txIndex         float64
type             object
dtype: object

Convert dataframe edge list to graph


In [5]:
G = nx.DiGraph()
G=nx.from_pandas_dataframe(df, 'sender', 'recipient', ['amount', 'time'],nx.DiGraph())

In [ ]:
nx.draw_networkx(G, pos=None, arrows=True, with_labels=False, node_size=10, width=0.01)
plt.savefig("graph.pdf")

Graph Properties


In [ ]:
# Network Characteristics
print 'Number of nodes:', G.number_of_nodes() 
print 'Number of edges:', G.number_of_edges() 
print 'Number of connected components:', nx.number_connected_components(G.to_undirected())
# Connected components
GCC=list(nx.connected_component_subgraphs(G.to_undirected()))[0]
# Fraction of nodes and edges in GCC 
print "Fraction of nodes in GCC: ", GCC.number_of_nodes() / G.number_of_nodes()
print "Fraction of edges in GCC: ", GCC.number_of_edges() / G.number_of_edges()
# Degree
degree_sequence = G.degree().values()
degree_out_sequence = G.out_degree().values()
degree_in_sequence = G.in_degree().values()
# Average clustering coefficient
avg_clust_coef = nx.average_clustering(G.to_undirected())


print "Min degree ", np.min(degree_sequence)
print "Max degree ", np.max(degree_sequence)
print "Median degree ", np.median(degree_sequence)
print "Mean degree ", np.mean(degree_sequence)

print "Min degree IN", np.min(degree_in_sequence)
print "Max degree IN", np.max(degree_in_sequence)
print "Median degree IN", np.median(degree_in_sequence)
print "Mean degree IN", np.mean(degree_in_sequence)

print "Min degree OUT", np.min(degree_out_sequence)
print "Max degree OUT", np.max(degree_out_sequence)
print "Median degree OUT", np.median(degree_out_sequence)
print "Mean degree OUT", np.mean(degree_out_sequence)

print "Average clustering coefficient ", avg_clust_coef

In [ ]:
# Degree distribution
y=nx.degree_histogram(G)
plt.figure(1)
plt.loglog(y,'b-',marker='o')
plt.ylabel("Frequency")
plt.xlabel("Degree")
plt.draw()
plt.show()

Top 10 nodes Degree, Degree In, Degree Out


In [7]:
#Dataframe Node degree
df_degrees = pd.DataFrame.from_dict(sorted(G.degree().items(), key=lambda (k,v): (v,k)), orient='columns', dtype=None)
df_degrees.columns = ['Nodes', 'Degree']
#Dataframe Node degree IN
df_degrees_in = pd.DataFrame.from_dict(sorted(G.in_degree().items(), key=lambda (k,v): (v,k)), orient='columns', dtype=None)
df_degrees_in.columns = ['Nodes', 'Degree IN']
#Dataframe Node degree OUT
df_degrees_out = pd.DataFrame.from_dict(sorted(G.out_degree().items(), key=lambda (k,v): (v,k)), orient='columns', dtype=None)
df_degrees_out.columns = ['Nodes', 'Degree OUT']

#join Dataframes
result = pd.merge(df_degrees_in, df_degrees_out, on='Nodes')
df_deg = pd.merge(df_degrees,result, on='Nodes')

Top 10 Degree


In [8]:
df_deg.sort_values('Degree', axis=0, ascending=False).head(10)


Out[8]:
Nodes Degree Degree IN Degree OUT
5168 0xd34da389374caad1a048fbdc4569aae33fd5a375 1077 0 1077
5167 0xbb9bc244d798123fde783fcc1c72d3bb8c189413 1003 1003 0
5166 0x2a65aca4d5fc5b5c859090a6c34d164135398226 717 0 717
5165 0xbfc39b6f805a9e40e77291aff27aee3c96915bdd 488 487 1
5164 0xea674fdde714fd979de3edf0f56aa9716b898ec8 476 0 476
5163 0xbf4ed7b27f1d666546e30d74d50d173d20bca754 356 179 177
5162 0xcd88e0e0c455345833ce31c5452c1c37f4b4c438 339 0 339
5161 0xaa1a6e3e6ef20068f7f8d8c835d2d22fd5116444 335 295 40
5160 0x91337a300e0361bddb2e377dd4e88ccb7796663d 189 182 7
5159 0xa42af2c70d316684e57aefcc6e393fecb1c7e84e 160 0 160

Top 10 Degree In


In [9]:
df_deg.sort_values('Degree IN', axis=0, ascending=False).head(10)


Out[9]:
Nodes Degree Degree IN Degree OUT
5167 0xbb9bc244d798123fde783fcc1c72d3bb8c189413 1003 1003 0
5165 0xbfc39b6f805a9e40e77291aff27aee3c96915bdd 488 487 1
5161 0xaa1a6e3e6ef20068f7f8d8c835d2d22fd5116444 335 295 40
5160 0x91337a300e0361bddb2e377dd4e88ccb7796663d 189 182 7
5163 0xbf4ed7b27f1d666546e30d74d50d173d20bca754 356 179 177
5158 0xe94b04a0fed112f3664e45adb2b8915693dd5ff3 140 139 1
5157 0xfa52274dd61e1643d2205169732f29114bc240b3 109 108 1
5156 0x68fc16129d48a0940cf5c238a3e9192db05c601c 99 99 0
5155 0x8b2232c5006a7c9bcb7b52d07ad429bb71ae2c1c 91 89 2
5154 0x58c0cc4b126c53ea46db6451df8e87b7256df61b 89 89 0

Top 10 Degree Out


In [10]:
df_deg.sort_values('Degree OUT', axis=0, ascending=False).head(10)


Out[10]:
Nodes Degree Degree IN Degree OUT
5168 0xd34da389374caad1a048fbdc4569aae33fd5a375 1077 0 1077
5166 0x2a65aca4d5fc5b5c859090a6c34d164135398226 717 0 717
5164 0xea674fdde714fd979de3edf0f56aa9716b898ec8 476 0 476
5162 0xcd88e0e0c455345833ce31c5452c1c37f4b4c438 339 0 339
5163 0xbf4ed7b27f1d666546e30d74d50d173d20bca754 356 179 177
5159 0xa42af2c70d316684e57aefcc6e393fecb1c7e84e 160 0 160
5153 0x32be343b94f860124dc4fee278fdcbd38c102d88 65 2 63
5152 0x4bb96091ee9d802ed039c4d1a5f6216f90f81b01 57 0 57
5151 0xa8f769b88d6d74fb2bd3912f6793f75625228baf 53 0 53
5150 0x6c7f03ddfdd8a37ca267c88630a4fee958591de0 53 0 53

In [ ]: