In [7]:
import pandas as pd

In [8]:
# Read stage 2
gr_15 = pd.read_csv('grant_15.csv')
gr_16 = pd.read_csv('grant_16.csv')

In [9]:
# Only use important values and properly name them
gr_15 = gr_15.iloc[:, 0:2]
gr_16 = gr_16.iloc[:, 0:2]
gr_15.rename(columns={'Gesamt': '2015'}, inplace=True)
gr_16.rename(columns={'Gesamt': '2016'}, inplace=True)
# Debug Information
t3_15, t3_16 = gr_15['2015'].sum(), gr_16['2016'].sum()
print(t3_15/(10**9))
print(t3_16/(10**9))


6.45210659279
5.73757592941

In [10]:
# Merge both years
progress = pd.merge(gr_15, gr_16, how='outer', on=['BauerID'])

In [11]:
# Debug Information
#t_15, t_16 = progress['2015'].sum(), progress['2016'].sum()
#print(t_15/(10**9))
#print(t_16/(10**9))

In [12]:
# Save insight
progress.to_csv('insights/progress.csv', encoding='utf-8', index=False)