Plots 3 and 6 from the paper
In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
%load_ext autoreload
%autoreload 2
import csv, codecs
from matplotlib import colors, ticker, cm
import seaborn as sns
sns.set(color_codes=True)
In [2]:
# degree distribution plots
data_file = "dataset/node_stats.csv"
in_degrees = list()
out_degrees = list()
in_degrees_ref = list()
out_degrees_ref = list()
with codecs.open(data_file, "r", encoding="utf-8") as f:
csv_reader = csv.reader(f,delimiter=';', quotechar='"')
next(csv_reader, None) # skip the headers
for row in csv_reader:
if len(row) < 10:
continue
internal_id,bid,title,author,lb,year,consultation,indegree,outdegree,degree = row
in_degrees.append(int(indegree))
out_degrees.append(int(outdegree))
if lb == "True":
in_degrees_ref.append(int(indegree))
if int(outdegree)>0:
out_degrees_ref.append(int(outdegree))
In [3]:
print(len(in_degrees))
print(sum(in_degrees))
print(sum(in_degrees_ref))
In [4]:
sns.distplot(in_degrees, bins=150, kde=False, rug=False, hist=True, label="All")
sns.distplot(in_degrees_ref, bins=150, kde=False, rug=False, hist=True, color="red", label="Reference monographs")
plt.legend(loc="best",fontsize=14)
plt.ylabel("Number of cited monographs", fontsize=14)
plt.xlabel("In-degree", fontsize=14)
plt.xlim([0,100])
plt.tight_layout()
In [5]:
plt.hist(in_degrees, bins=150, log=True, label="All")
plt.hist(in_degrees_ref, bins=150, log=True, color="red", label="Reference monographs")
plt.legend(loc="best",fontsize=14)
plt.ylabel("Number of cited monographs", fontsize=14)
plt.xlabel("In-degree", fontsize=14)
plt.xlim([0,80])
plt.tight_layout()
In [6]:
sns.distplot(out_degrees_ref, bins=600, kde=False, rug=False, hist=True, label="Reference monographs")
plt.legend(loc="best",fontsize=14)
plt.ylabel("Number of citing monographs", fontsize=14)
plt.xlabel("Out-degree", fontsize=14)
plt.xlim([0,620])
plt.tight_layout()
In [ ]: