In [1]:
import os
import pandas as pd
In [2]:
raw_data_path = 'C:\\Users\\EEU227\\Documents\\IPython Notebooks\\DEEdata\\SPF_individual_forecasts'
In [3]:
if not os.path.isdir(raw_data_path + '\\CleanData'):
os.mkdir(raw_data_path + '\\CleanData')
In [4]:
out_data = raw_data_path + '\\CleanData\\'
In [5]:
out_data
Out[5]:
In [6]:
dfList = []
writer = pd.ExcelWriter(out_data + 'PointForecasts.xlsx')
years = [2014,2015,2016]
quarters = [1,2,3,4]
for year in years:
for q in quarters:
f = str(year) + 'Q' + str(q)
fname = f + '.csv'
if os.path.isfile(raw_data_path + '\\' + fname):
raw_df = pd.read_csv(raw_data_path + '\\' + fname,header = True)
# find the row where the growth expectations start
dum = raw_df[raw_df['TARGET_PERIOD'] == 'GROWTH EXPECTATIONS; YEAR-ON-YEAR CHANGE IN REAL GDP'].index[0]
mask_columns = ~raw_df.columns.str.contains('Unnamed')
df = raw_df.iloc[0:dum-1,[0,1,2]]
df['source'] = str(year) + '-Q' + str(q)
df = df.rename(columns={'TARGET_PERIOD':'target','FCT_SOURCE':'id','POINT':'point'})
df = df[['source','target','id','point']]
df['id'] = df['id'].astype('int')
df['point'] = df['point'].astype('float32')
df.to_excel(writer,f,index=False)
dfList.append(df)
writer.save()
In [7]:
# Define the aggregation calculations
aggregations = {
'point': { #
'mean': 'mean', #
'variance': 'var', #
}
}
In [8]:
dfAll = pd.concat(dfList,axis=0)
dfAll.groupby(['source','target']).agg(aggregations)
Out[8]:
In [ ]:
In [ ]:
In [ ]:
In [90]:
# Define the aggregation calculations
aggregations = {
'point': { #
'mean': 'mean', #
'variance': 'var', #
}
}
# Perform groupby aggregation by "month", but only on the rows that are of type "call"
In [ ]:
In [ ]:
In [116]:
from scipy.stats import norm
import numpy as np
In [119]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)
In [117]:
x = np.linspace(norm.ppf(0.01), norm.ppf(0.99), 100)
In [120]:
ax.plot(x, norm.pdf(x),'r-', lw=5, alpha=0.6, label='norm pdf')
Out[120]:
In [ ]: