In [204]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [205]:
def plot_special_dates():
# http://blog.startupdigest.com/2017/03/09/history-major-bitcoin-crashes/
_ = plt.axvline(x='2013-11', alpha=.2, c='g', label='2013-11 - 1K')
# mt. gox ...
_ = plt.axvline(x='2014-02', alpha=.2, c='r', label='2014-02 - mt. gox (hacked)')
_ = plt.axvline(x='2014-04', alpha=.2, c='r', label='2014-04 - mt. gox (liquidation)')
# hack
_ = plt.axvline(x='2016-08', alpha=.2, c='k', label='2016-08 - hack')
# chinese influence
_ = plt.axvline(x='2017-01', alpha=.2, c='b', label='2017-01 - chinese mull restrictions')
# price highs ...
_ = plt.axvline(x='2017-05', alpha=.2, c='g', label='2017-05 - 2K')
_ = plt.axvline(x='2017-09', alpha=.2, c='g', label='2017-09 - 5K')
_ = plt.axvline(x='2017-11', alpha=.2, c='g', label='2017-11 - 10K')
In [206]:
scores_df = pd.read_csv('./data/reddit/_all/scores.csv', index_col='author', header=0, low_memory=False)
In [207]:
keys = scores_df.columns[0:]
In [208]:
# top degree centrality for entire graph.
top_dc_authors = [
('rydan', 0.8739361315196043),
('ebaley', 0.7552526611866831),
('Explodicle', 0.6387029897432167),
('Natanael_L', 0.6086236997163017),
('Introshine', 0.5352349603549865)
]
plt.clf()
plt.figure(figsize=(18, 8))
for top_dc_author in range(0,len(top_dc_authors)):
author = top_dc_authors[top_dc_author][0]
dc_scores = scores_df.loc[author].fillna("{'degc': 0}").map(lambda x: float(eval(x)['degc']))
dc_scores = [float('nan') if x==0 else x for x in dc_scores]
_ = plt.plot(keys[:-1], dc_scores[:-1], marker='.', linestyle='none', label=author, markersize=20, alpha=.7)
plot_special_dates()
plt.ylabel('degree centrality')
plt.title('degree centrality by month for top 5')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [209]:
top_eigs_authors = [
('skereMan', 0.0943111926317215),
('BluSyn', 0.046781279146671295),
('Myceilingfan', 0.041636910289525986),
('DeanMaverick', 0.039750922471284866),
('0xDDDD', 0.033229414373636246)
]
plt.clf()
plt.figure(figsize=(18, 8))
for top_eigs_author in range(0,len(top_eigs_authors)):
author = top_eigs_authors[top_eigs_author][0]
eigs_scores = scores_df.loc[author].fillna("{'eigs': -1}").map(lambda x: float(eval(x)['eigs']))
eigs_scores = [float('nan') if x==-1 else x for x in eigs_scores]
_ = plt.plot(keys[:-1], eigs_scores[:-1], marker='.', linestyle='none', label=author, markersize=20, alpha=.7)
plot_special_dates()
plt.ylabel('eigenvector centrality')
plt.title('eigenvector centrality by month for top 5')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [210]:
nodes_per_month = []
path = r'./data/reddit/_nodes/'
for key in keys:
npm = len(pd.read_csv(path + key + '_nodes.csv'))
nodes_per_month.append(npm)
In [211]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('number of nodes')
plt.plot(keys[:-1], nodes_per_month[:-1], marker='.', markersize=10)
plot_special_dates()
plt.title('nodes per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [212]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('log of number of nodes')
plt.plot(keys[:-1], np.log(nodes_per_month[:-1]), marker='.', markersize=10)
plot_special_dates()
plt.title('log of nodes per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [213]:
edges_per_month = []
path = r'./data/reddit/_edges/'
for key in keys:
npm = len(pd.read_csv(path + key + '_edgelist.csv'))
edges_per_month.append(npm)
In [214]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('log of number of edges')
plt.plot(keys[:-1], edges_per_month[:-1], marker='.', markersize=10)
plot_special_dates()
plt.title('edges per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [215]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('log of number of edges')
plt.plot(keys[:-1], np.log(edges_per_month[:-1]), marker='.', markersize=10)
plot_special_dates()
plt.title('log of edges per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [216]:
dc_max_ = []
dc_min_ = []
dc_mean_ = []
dc_median_ = []
for key in keys:
dc_scores = scores_df[key].fillna("{'degc': 0}").map(lambda x: float(eval(x)['degc']))
dc_scores = np.array([x for x in dc_scores if not x==0])
dc_max_.append(dc_scores.max())
dc_min_.append(dc_scores.min())
dc_mean_.append(dc_scores.mean())
dc_median_.append(np.median(dc_scores))
In [217]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('degree centrality')
plt.plot(keys[:-1], dc_max_[:-1], label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], dc_min_[:-1], label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], dc_mean_[:-1], label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], dc_median_[:-1], label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()
plt.title('degree centrality per month')
plt.legend(loc='upper left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [218]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('log of degree centrality')
plt.plot(keys[:-1], np.log(dc_max_[:-1]), label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(dc_min_[:-1]), label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(dc_mean_[:-1]), label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(dc_median_[:-1]), label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()
plt.title('log of degree centrality per month')
plt.legend(loc='lower left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [219]:
eigs_max_ = []
eigs_min_ = []
eigs_mean_ = []
eigs_median_ = []
for key in keys:
eigs_scores = scores_df[key].fillna("{'eigs': 0}").map(lambda x: float(eval(x)['eigs']))
eigs_scores = np.array([x for x in eigs_scores if not x==0])
eigs_max_.append(eigs_scores.max())
eigs_min_.append(eigs_scores.min())
eigs_mean_.append(eigs_scores.mean())
eigs_median_.append(np.median(eigs_scores))
In [220]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('eigenvector centrality')
plt.plot(keys[:-1], eigs_max_[:-1], label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], eigs_min_[:-1], label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], eigs_mean_[:-1], label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], eigs_median_[:-1], label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()
plt.title('eigenvector centrality per month')
plt.legend(loc='upper right')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [221]:
plt.clf()
plt.figure(figsize=(18, 8))
plt.ylabel('log of eigenvector centrality')
plt.plot(keys[:-1], np.log(eigs_max_[:-1]), label='max', color='r', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(eigs_min_[:-1]), label='min', color='b', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(eigs_mean_[:-1]), label='mean', color='k', marker='.', markersize=10, alpha=.5)
plt.plot(keys[:-1], np.log(eigs_median_[:-1]), label='median', color='c', marker='.', markersize=10, alpha=.5)
plot_special_dates()
plt.title('log of eigenvector centrality per month')
plt.legend(loc='lower left')
plt.xticks(keys[:-1], rotation='vertical')
plt.show()
In [ ]: