In [1]:
import yaml
import matplotlib.pyplot as plt
#disease = 'consumption'
filename = 'find_figures_20150616_1110_6.yml'
filename = 'datain/find_figures_20150616_1110.yml'
In [2]:
with open(filename, 'r') as f:
results = yaml.load(f)
In [3]:
results.keys()
Out[3]:
In [4]:
import pandas as pd
In [67]:
for key, value in results.items():
#print key, value[3]
for page in value[3]:
#print key, page[3]
In [6]:
dates=[]
percentages=[]
tobin = {}
for key, value in results.items():
for page in value[3]:
dates.append(key)
percentages.append(page[3])
if key in tobin:
tobin[key].append(page[3])
else:
tobin[key] = [page[3]]
In [7]:
plt.plot(dates, percentages,'.')
plt.savefig("sizes.jpg")
In [8]:
from bokeh.plotting import figure, output_file, show
In [9]:
output_file("figures1.html", title="All points")
p = figure(title= " references", x_axis_label='Year', y_axis_label='%')
p.scatter(dates, percentages)
show(p)
In [47]:
histograms = {}
for year, percentages in tobin.items():
histograms[year] = plt.hist(percentages, [10,20,30,40,50,60,70,80,90])
In [48]:
histograms[1850][0]
Out[48]:
In [49]:
len(histograms[1606][1])
len(histograms[year][1][1:len(histograms[year][1])])
Out[49]:
In [50]:
for year in histograms:
plt.plot(histograms[year][1][1:len(histograms[year][1])], histograms[year][0])
legend()
In [52]:
output_file("figures_overtime.html", title="Place holder")
p = figure(title= " references", x_axis_label='%', y_axis_label='Occurrences')
for year in histograms:
p.line(histograms[year][1][1:len(histograms[year][1])], histograms[year][0])
show(p)
In [53]:
normal_filename = 'normaliser_20150616_1844.yml'
with open('datain/' + normal_filename, 'r') as f:
publication = yaml.load(f)
Normalise by number of pages?
In [54]:
histos_normed_page = {}
for year in publication:
#histos_normed_word[year]
if year in histograms:
histos_normed_page[year] = []
for count in histograms[year][0]:
histos_normed_page[year].append(count/publication[year][1])
In [66]:
data = {}
for year in histos_normed_page:
plt.plot(histograms[year][1][1:len(histograms[year][1])], histos_normed_page[year], '.-')
plt.legend()
plt.xlim(30, 80)
plt.ylim(0, 3e-4)
Out[66]: