In [ ]:
collaboration_data = pd.DataFrame(columns=['Year', 'Country', 'Internal'])
i = 0
for year, graph in zip(years, graphs):
    for country in graph.nodes():
        counts = Counter()
        total = sum([graph[country][neighbor]['weight'] 
                        for neighbor in graph.neighbors(country)])
        external = sum([graph[country][neighbor]['weight'] 
                        for neighbor in graph.neighbors(country)
                        if neighbor != country])
        

        internal = 1. - (external/total)
        if np.isnan(internal):
            continue
        collaboration_data.loc[i] = [year, country, internal]
        i += 1

In [ ]:
plt.scatter(collaboration_data.Year, collaboration_data.Internal, alpha=0.25)
plt.boxplot([collaboration_data[collaboration_data.Year == year].Internal for year in years], positions=years)
plt.ylabel('Internal collaborations')
plt.show()

In [ ]:
import pymc
rates = pymc.Container([pymc.Gamma('rate_%i' % year, 
                                   alpha=1., 
                                   beta=1., 
                                   value=5.) 
                        for year in years])

@pymc.deterministic
def rate(rates=rates):
    return np.array([rates[years.index(year)] 
                     for year in collaboration_data.Year])

internal = pymc.Gamma('internal', 
                      alpha=1., beta=rate,
                      value=collaboration_data.Internal, 
                      observed=True)

In [ ]:
model = pymc.Model({
    'rates': rates,
    'internal': internal
})

In [ ]:
M = pymc.MCMC(model)
M.sample(10000, burn=200, thin=20)

In [ ]:
plt.figure(figsize=(10, 4))
plt.scatter(collaboration_data.Year, collaboration_data.Internal, alpha=0.25)

# Plot the 95% credible interval as box/whiskers.
plt.boxplot([1./dist.trace()[:] for dist in M.rates], 
            positions=years, 
            whis=[2.5, 97.5], 
            showfliers=False)
plt.ylim(0, 1.0)

plt.legend(loc='best')
plt.show()