Untitled



In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import os
import seaborn as sns

In [11]:
os.listdir('../clean_data/')


Out[11]:
['PointForecasts.xlsx',
 'Point_and_Ind_Uncertainty.xlsx',
 'SPF_aggregate_histogram.xlsx',
 'SPF_time_series.xlsx']

In [18]:
df1 = pd.read_excel('../clean_data/SPF_aggregate_histogram.xlsx',sheetname='2016Q1',header = 0,index_col=1)
df1.head()


Out[18]:
source [-2.0,-1.1] [-1.0,-0.6] [-0.5,-0.1] [0.0,0.4] [0.5,0.9] [1.0,1.4] [1.5,1.9] [2.0,2.4] [2.5,2.9] [3.0,3.4] [3.5,3.9] [4.0,5.0]
target
2016 2016-Q1 0.413101 1.398154 6.563698 20.755563 36.195539 24.786644 7.592633 1.621901 0.447736 0.153583 0.051039 0.020409
2016Dec NaN 0.195111 0.806320 4.042026 13.342081 28.040684 29.923766 15.302813 5.559588 1.886260 0.725573 0.132911 0.042867
2017 NaN 0.156856 0.477735 1.518767 6.433687 15.147962 29.303042 32.341059 10.275199 3.194858 0.981854 0.120430 0.048552
2017Dec NaN 0.185830 0.564403 1.435289 6.079712 14.309872 26.287529 31.065046 12.809149 4.601514 2.036121 0.469910 0.155625
2018 NaN 0.257009 0.444428 1.476107 5.034666 12.351745 23.639347 33.018521 16.313100 5.195082 1.649630 0.443358 0.177007

In [19]:
df2 = pd.read_excel('../clean_data/SPF_aggregate_histogram.xlsx',sheetname='2015Q4',header = 0,index_col=1)
df2.head()


Out[19]:
source [-2.0,-1.1] [-1.0,-0.6] [-0.5,-0.1] [0.0,0.4] [0.5,0.9] [1.0,1.4] [1.5,1.9] [2.0,2.4] [2.5,2.9] [3.0,3.4] [3.5,3.9] [4.0,5.0]
target
2015 2015-Q4 0.602000 2.276000 24.296903 63.002736 7.678361 1.576000 0.398000 0.098000 0.040000 0.022000 0.010000 0.000000
2016 NaN 0.219028 0.829607 3.714362 11.753718 29.132909 34.504901 14.095859 4.038049 1.155750 0.367736 0.124080 0.064002
2016Sep NaN 0.366086 0.954973 4.393809 11.584200 27.678981 30.215566 15.823224 5.869419 2.134730 0.655915 0.233833 0.089264
2017 NaN 0.284444 0.653333 1.560000 5.448889 11.971111 26.715556 33.200000 14.075556 4.217778 1.426667 0.326667 0.120000
2017Sep NaN 0.237143 0.697143 1.568571 5.814286 12.582857 26.068571 31.622857 13.760000 4.948571 1.934286 0.522857 0.242857

In [ ]:


In [2]:
import matplotlib.pyplot as plt

In [3]:
n_bins = 10
x = np.random.randn(1000, 3)

fig, axes = plt.subplots(nrows=2, ncols=2)
ax0, ax1, ax2, ax3 = axes.flat

colors = ['red', 'tan', 'lime']
ax0.hist(x, n_bins, normed=1, histtype='bar', color=colors, label=colors)
ax0.legend(prop={'size': 10})
ax0.set_title('bars with legend')

ax1.hist(x, n_bins, normed=1, histtype='bar', stacked=True)
ax1.set_title('stacked bar')

ax2.hist(x, n_bins, histtype='step', stacked=True, fill=True)
ax2.set_title('stepfilled')

# Make a multiple-histogram of data-sets with different length.
x_multi = [np.random.randn(n) for n in [10000, 5000, 2000]]
ax3.hist(x_multi, n_bins, histtype='bar')
ax3.set_title('different sample sizes')

plt.tight_layout()
plt.show()



In [ ]:


In [ ]:
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np
import matplotlib.pyplot as plt

In [16]:
mu, sigma = 0, 2.5
x = mu + sigma * np.random.randn(100)

In [17]:
# the histogram of the data
n, bins, patches = plt.hist(x, 10, normed=1, facecolor='g', alpha=0.75)

hist, bins = np.histogram(x,)
mid_points = (bins[1:] + bins[:-1])/2
norm_hist = hist/np.sum(hist)
plt.hist(x, 10, normed=1, facecolor='g', alpha=0.75)


Out[17]:
(array([ 0.03724133,  0.027931  ,  0.06517232,  0.16758597,  0.1768963 ,
         0.10241365,  0.15827564,  0.13034465,  0.04655166,  0.01862066]),
 array([-5.33061924, -4.25654367, -3.18246809, -2.10839251, -1.03431694,
         0.03975864,  1.11383421,  2.18790979,  3.26198536,  4.33606094,
         5.41013651]),
 <a list of 10 Patch objects>)

In [ ]:
popt,pcov = opt.curve_fit(lambda x,mu,sig: stats.norm.pdf(x,mu,sig), mid_points,norm_hist)
popt

In [18]:
plt.bar(mid_points, norm_hist, align='center', alpha=.9)
plt.show()



In [19]:
norm_hist


Out[19]:
array([ 0.04,  0.03,  0.07,  0.18,  0.19,  0.11,  0.17,  0.14,  0.05,  0.02])

In [20]:
mid_points


Out[20]:
array([-4.79358145, -3.71950588, -2.6454303 , -1.57135473, -0.49727915,
        0.57679642,  1.650872  ,  2.72494758,  3.79902315,  4.87309873])

In [29]:
hist


Out[29]:
array([ 4,  3,  7, 18, 19, 11, 17, 14,  5,  2], dtype=int64)

In [28]:
norm_hist1 = norm_hist

In [30]:
hist2 = np.array([1,  2,  10, 28, 17, 11, 10, 9,  7,  6])

In [31]:
norm_hist2 = hist2/np.sum(hist2)

In [27]:
sns.barplot(x=np.round(mid_points), y=norm_hist,color='grey');



In [24]:
np.round(mid_points)


Out[24]:
array([-5., -4., -3., -2.,  0.,  1.,  2.,  3.,  4.,  5.])

In [41]:
df1 = pd.DataFrame(norm_hist1,index = mid_points)
df1.rename(columns={0:'d1'},inplace=True)
df1['sex'] = 'male'
df1


Out[41]:
d1 sex
-4.793581 0.04 male
-3.719506 0.03 male
-2.645430 0.07 male
-1.571355 0.18 male
-0.497279 0.19 male
0.576796 0.11 male
1.650872 0.17 male
2.724948 0.14 male
3.799023 0.05 male
4.873099 0.02 male

In [57]:
df1 = pd.DataFrame([norm_hist1,mid_points])
df1 = df1.T
df1


Out[57]:
0 1
0 0.04 -4.793581
1 0.03 -3.719506
2 0.07 -2.645430
3 0.18 -1.571355
4 0.19 -0.497279
5 0.11 0.576796
6 0.17 1.650872
7 0.14 2.724948
8 0.05 3.799023
9 0.02 4.873099

In [58]:
df1.rename(columns={0:'p',1:'mid'},inplace=True)
df1['sex'] = 'male'
df1


Out[58]:
p mid sex
0 0.04 -4.793581 male
1 0.03 -3.719506 male
2 0.07 -2.645430 male
3 0.18 -1.571355 male
4 0.19 -0.497279 male
5 0.11 0.576796 male
6 0.17 1.650872 male
7 0.14 2.724948 male
8 0.05 3.799023 male
9 0.02 4.873099 male

In [56]:
df2 = pd.DataFrame([norm_hist2,mid_points])
df2 = df2.T
df2


Out[56]:
0 1
0 0.009901 -4.793581
1 0.019802 -3.719506
2 0.099010 -2.645430
3 0.277228 -1.571355
4 0.168317 -0.497279
5 0.108911 0.576796
6 0.099010 1.650872
7 0.089109 2.724948
8 0.069307 3.799023
9 0.059406 4.873099

In [64]:
df2.rename(columns={0:'p',1:'mid'},inplace=True)
df2['sex'] = 'female'
df2


Out[64]:
p mid sex
0 0.009901 -4.793581 female
1 0.019802 -3.719506 female
2 0.099010 -2.645430 female
3 0.277228 -1.571355 female
4 0.168317 -0.497279 female
5 0.108911 0.576796 female
6 0.099010 1.650872 female
7 0.089109 2.724948 female
8 0.069307 3.799023 female
9 0.059406 4.873099 female

In [65]:
df1.head()


Out[65]:
p mid sex
0 0.04 -4.793581 male
1 0.03 -3.719506 male
2 0.07 -2.645430 male
3 0.18 -1.571355 male
4 0.19 -0.497279 male

In [66]:
df2.head()


Out[66]:
p mid sex
0 0.009901 -4.793581 female
1 0.019802 -3.719506 female
2 0.099010 -2.645430 female
3 0.277228 -1.571355 female
4 0.168317 -0.497279 female

In [69]:
df = pd.concat([df1,df2],join='inner',ignore_index=True)
df.shape


Out[69]:
(20, 3)

In [77]:
np.array(df['mid'])


Out[77]:
array([-4.79358145, -3.71950588, -2.6454303 , -1.57135473, -0.49727915,
        0.57679642,  1.650872  ,  2.72494758,  3.79902315,  4.87309873,
       -4.79358145, -3.71950588, -2.6454303 , -1.57135473, -0.49727915,
        0.57679642,  1.650872  ,  2.72494758,  3.79902315,  4.87309873])

In [78]:
np.round(np.array(df['mid']))


Out[78]:
array([-5., -4., -3., -2.,  0.,  1.,  2.,  3.,  4.,  5., -5., -4., -3.,
       -2.,  0.,  1.,  2.,  3.,  4.,  5.])

In [79]:
df['mid_new'] = np.round(np.array(df['mid']))

In [80]:
df


Out[80]:
p mid sex mid_new
0 0.040000 -4.793581 male -5.0
1 0.030000 -3.719506 male -4.0
2 0.070000 -2.645430 male -3.0
3 0.180000 -1.571355 male -2.0
4 0.190000 -0.497279 male 0.0
5 0.110000 0.576796 male 1.0
6 0.170000 1.650872 male 2.0
7 0.140000 2.724948 male 3.0
8 0.050000 3.799023 male 4.0
9 0.020000 4.873099 male 5.0
10 0.009901 -4.793581 female -5.0
11 0.019802 -3.719506 female -4.0
12 0.099010 -2.645430 female -3.0
13 0.277228 -1.571355 female -2.0
14 0.168317 -0.497279 female 0.0
15 0.108911 0.576796 female 1.0
16 0.099010 1.650872 female 2.0
17 0.089109 2.724948 female 3.0
18 0.069307 3.799023 female 4.0
19 0.059406 4.873099 female 5.0

In [85]:
sns.barplot(x='mid_new', y='p',hue='sex',data=df);



In [86]:
df


Out[86]:
p mid sex mid_new
0 0.040000 -4.793581 male -5.0
1 0.030000 -3.719506 male -4.0
2 0.070000 -2.645430 male -3.0
3 0.180000 -1.571355 male -2.0
4 0.190000 -0.497279 male 0.0
5 0.110000 0.576796 male 1.0
6 0.170000 1.650872 male 2.0
7 0.140000 2.724948 male 3.0
8 0.050000 3.799023 male 4.0
9 0.020000 4.873099 male 5.0
10 0.009901 -4.793581 female -5.0
11 0.019802 -3.719506 female -4.0
12 0.099010 -2.645430 female -3.0
13 0.277228 -1.571355 female -2.0
14 0.168317 -0.497279 female 0.0
15 0.108911 0.576796 female 1.0
16 0.099010 1.650872 female 2.0
17 0.089109 2.724948 female 3.0
18 0.069307 3.799023 female 4.0
19 0.059406 4.873099 female 5.0

In [ ]:


In [ ]: