notebook.community

Edit and run



In [1]:

    
import yaml
import matplotlib.pyplot as plt
#disease = 'consumption'
filename = 'find_figures_20150616_1110_6.yml'
filename = 'datain/find_figures_20150616_1110.yml'



In [2]:

    
with open(filename, 'r') as f:
    results = yaml.load(f)



In [3]:

    
results.keys()



In [4]:

    
import pandas as pd



In [67]:

    
for key, value in results.items():
    #print key, value[3]
    for page in value[3]:
        #print key, page[3]









    



  File "<ipython-input-67-d7e53e889adc>", line 4
    #print key, page[3]
                       ^
IndentationError: expected an indented block



In [6]:

    
dates=[]
percentages=[]
tobin = {}
for key, value in results.items():
    for page in value[3]:
        dates.append(key)
        percentages.append(page[3])
        if key in tobin:
            tobin[key].append(page[3])
        else:
            tobin[key] = [page[3]]



In [7]:

    
plt.plot(dates, percentages,'.')
plt.savefig("sizes.jpg")



In [8]:

    
from bokeh.plotting import figure, output_file, show



In [9]:

    
output_file("figures1.html", title="All points")
p = figure(title= " references", x_axis_label='Year', y_axis_label='%')
p.scatter(dates, percentages)
show(p)



In [47]:

    
histograms = {}
for year, percentages in tobin.items():
    histograms[year] = plt.hist(percentages, [10,20,30,40,50,60,70,80,90])



In [48]:

    
histograms[1850][0]









    Out[48]:





array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])



In [49]:

    
len(histograms[1606][1])
len(histograms[year][1][1:len(histograms[year][1])])









    Out[49]:





8



In [50]:

    
for year in histograms:
    plt.plot(histograms[year][1][1:len(histograms[year][1])], histograms[year][0]) 
legend()



In [52]:

    
output_file("figures_overtime.html", title="Place holder")
p = figure(title= " references", x_axis_label='%', y_axis_label='Occurrences')
for year in histograms:
    p.line(histograms[year][1][1:len(histograms[year][1])], histograms[year][0])
show(p)

Normalisation

Loading the overall publication rate in the period



In [53]:

    
normal_filename = 'normaliser_20150616_1844.yml'
with open('datain/' + normal_filename, 'r') as f:
    publication = yaml.load(f)

Normalise by number of pages?



In [54]:

    
histos_normed_page = {}
for year in publication:
    #histos_normed_word[year]
    if year in histograms:
        histos_normed_page[year] = []
        for count in histograms[year][0]:
            histos_normed_page[year].append(count/publication[year][1])



In [66]:

    
data = {}
for year in histos_normed_page:
    plt.plot(histograms[year][1][1:len(histograms[year][1])], histos_normed_page[year], '.-') 
plt.legend()
plt.xlim(30, 80)
plt.ylim(0, 3e-4)









    Out[66]:





(0, 0.0003)