In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import pylab as plt
from time import sleep
from IPython import display
from urllib2 import Request, urlopen
import json
from pandas.io.json import json_normalize
import networkx as nx
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 12
fig_size[1] = 9
In [2]:
offset='1'
count='10000'
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
request=Request('https://etherchain.org/api/txs/'+offset+'/'+count+'',headers=hdr)
response = urlopen(request)
elevations = response.read()
data = json.loads(elevations)
df = json_normalize(data['data'])
df.dtypes
Out[2]:
In [5]:
G = nx.DiGraph()
G=nx.from_pandas_dataframe(df, 'sender', 'recipient', ['amount', 'time'],nx.DiGraph())
In [ ]:
nx.draw_networkx(G, pos=None, arrows=True, with_labels=False, node_size=10, width=0.01)
plt.savefig("graph.pdf")
In [ ]:
# Network Characteristics
print 'Number of nodes:', G.number_of_nodes()
print 'Number of edges:', G.number_of_edges()
print 'Number of connected components:', nx.number_connected_components(G.to_undirected())
# Connected components
GCC=list(nx.connected_component_subgraphs(G.to_undirected()))[0]
# Fraction of nodes and edges in GCC
print "Fraction of nodes in GCC: ", GCC.number_of_nodes() / G.number_of_nodes()
print "Fraction of edges in GCC: ", GCC.number_of_edges() / G.number_of_edges()
# Degree
degree_sequence = G.degree().values()
degree_out_sequence = G.out_degree().values()
degree_in_sequence = G.in_degree().values()
# Average clustering coefficient
avg_clust_coef = nx.average_clustering(G.to_undirected())
print "Min degree ", np.min(degree_sequence)
print "Max degree ", np.max(degree_sequence)
print "Median degree ", np.median(degree_sequence)
print "Mean degree ", np.mean(degree_sequence)
print "Min degree IN", np.min(degree_in_sequence)
print "Max degree IN", np.max(degree_in_sequence)
print "Median degree IN", np.median(degree_in_sequence)
print "Mean degree IN", np.mean(degree_in_sequence)
print "Min degree OUT", np.min(degree_out_sequence)
print "Max degree OUT", np.max(degree_out_sequence)
print "Median degree OUT", np.median(degree_out_sequence)
print "Mean degree OUT", np.mean(degree_out_sequence)
print "Average clustering coefficient ", avg_clust_coef
In [ ]:
# Degree distribution
y=nx.degree_histogram(G)
plt.figure(1)
plt.loglog(y,'b-',marker='o')
plt.ylabel("Frequency")
plt.xlabel("Degree")
plt.draw()
plt.show()
In [7]:
#Dataframe Node degree
df_degrees = pd.DataFrame.from_dict(sorted(G.degree().items(), key=lambda (k,v): (v,k)), orient='columns', dtype=None)
df_degrees.columns = ['Nodes', 'Degree']
#Dataframe Node degree IN
df_degrees_in = pd.DataFrame.from_dict(sorted(G.in_degree().items(), key=lambda (k,v): (v,k)), orient='columns', dtype=None)
df_degrees_in.columns = ['Nodes', 'Degree IN']
#Dataframe Node degree OUT
df_degrees_out = pd.DataFrame.from_dict(sorted(G.out_degree().items(), key=lambda (k,v): (v,k)), orient='columns', dtype=None)
df_degrees_out.columns = ['Nodes', 'Degree OUT']
#join Dataframes
result = pd.merge(df_degrees_in, df_degrees_out, on='Nodes')
df_deg = pd.merge(df_degrees,result, on='Nodes')
Top 10 Degree
In [8]:
df_deg.sort_values('Degree', axis=0, ascending=False).head(10)
Out[8]:
Top 10 Degree In
In [9]:
df_deg.sort_values('Degree IN', axis=0, ascending=False).head(10)
Out[9]:
Top 10 Degree Out
In [10]:
df_deg.sort_values('Degree OUT', axis=0, ascending=False).head(10)
Out[10]:
In [ ]: