notebook.community

Edit and run



In [1]:

    
%matplotlib inline
import pandas as pd
import numpy as np
import os
import seaborn as sns



In [11]:

    
os.listdir('../clean_data/')









    Out[11]:





['PointForecasts.xlsx',
 'Point_and_Ind_Uncertainty.xlsx',
 'SPF_aggregate_histogram.xlsx',
 'SPF_time_series.xlsx']



In [18]:

    
df1 = pd.read_excel('../clean_data/SPF_aggregate_histogram.xlsx',sheetname='2016Q1',header = 0,index_col=1)
df1.head()









    Out[18]:






  
    
      
      source
      [-2.0,-1.1]
      [-1.0,-0.6]
      [-0.5,-0.1]
      [0.0,0.4]
      [0.5,0.9]
      [1.0,1.4]
      [1.5,1.9]
      [2.0,2.4]
      [2.5,2.9]
      [3.0,3.4]
      [3.5,3.9]
      [4.0,5.0]
    
    
      target
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2016
      2016-Q1
      0.413101
      1.398154
      6.563698
      20.755563
      36.195539
      24.786644
      7.592633
      1.621901
      0.447736
      0.153583
      0.051039
      0.020409
    
    
      2016Dec
      NaN
      0.195111
      0.806320
      4.042026
      13.342081
      28.040684
      29.923766
      15.302813
      5.559588
      1.886260
      0.725573
      0.132911
      0.042867
    
    
      2017
      NaN
      0.156856
      0.477735
      1.518767
      6.433687
      15.147962
      29.303042
      32.341059
      10.275199
      3.194858
      0.981854
      0.120430
      0.048552
    
    
      2017Dec
      NaN
      0.185830
      0.564403
      1.435289
      6.079712
      14.309872
      26.287529
      31.065046
      12.809149
      4.601514
      2.036121
      0.469910
      0.155625
    
    
      2018
      NaN
      0.257009
      0.444428
      1.476107
      5.034666
      12.351745
      23.639347
      33.018521
      16.313100
      5.195082
      1.649630
      0.443358
      0.177007



In [19]:

    
df2 = pd.read_excel('../clean_data/SPF_aggregate_histogram.xlsx',sheetname='2015Q4',header = 0,index_col=1)
df2.head()









    Out[19]:






  
    
      
      source
      [-2.0,-1.1]
      [-1.0,-0.6]
      [-0.5,-0.1]
      [0.0,0.4]
      [0.5,0.9]
      [1.0,1.4]
      [1.5,1.9]
      [2.0,2.4]
      [2.5,2.9]
      [3.0,3.4]
      [3.5,3.9]
      [4.0,5.0]
    
    
      target
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      2015
      2015-Q4
      0.602000
      2.276000
      24.296903
      63.002736
      7.678361
      1.576000
      0.398000
      0.098000
      0.040000
      0.022000
      0.010000
      0.000000
    
    
      2016
      NaN
      0.219028
      0.829607
      3.714362
      11.753718
      29.132909
      34.504901
      14.095859
      4.038049
      1.155750
      0.367736
      0.124080
      0.064002
    
    
      2016Sep
      NaN
      0.366086
      0.954973
      4.393809
      11.584200
      27.678981
      30.215566
      15.823224
      5.869419
      2.134730
      0.655915
      0.233833
      0.089264
    
    
      2017
      NaN
      0.284444
      0.653333
      1.560000
      5.448889
      11.971111
      26.715556
      33.200000
      14.075556
      4.217778
      1.426667
      0.326667
      0.120000
    
    
      2017Sep
      NaN
      0.237143
      0.697143
      1.568571
      5.814286
      12.582857
      26.068571
      31.622857
      13.760000
      4.948571
      1.934286
      0.522857
      0.242857



In [ ]:



In [2]:

    
import matplotlib.pyplot as plt



In [3]:

    
n_bins = 10
x = np.random.randn(1000, 3)

fig, axes = plt.subplots(nrows=2, ncols=2)
ax0, ax1, ax2, ax3 = axes.flat

colors = ['red', 'tan', 'lime']
ax0.hist(x, n_bins, normed=1, histtype='bar', color=colors, label=colors)
ax0.legend(prop={'size': 10})
ax0.set_title('bars with legend')

ax1.hist(x, n_bins, normed=1, histtype='bar', stacked=True)
ax1.set_title('stacked bar')

ax2.hist(x, n_bins, histtype='step', stacked=True, fill=True)
ax2.set_title('stepfilled')

# Make a multiple-histogram of data-sets with different length.
x_multi = [np.random.randn(n) for n in [10000, 5000, 2000]]
ax3.hist(x_multi, n_bins, histtype='bar')
ax3.set_title('different sample sizes')

plt.tight_layout()
plt.show()



In [ ]:



In [ ]:

    
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np
import matplotlib.pyplot as plt



In [16]:

    
mu, sigma = 0, 2.5
x = mu + sigma * np.random.randn(100)



In [17]:

    
# the histogram of the data
n, bins, patches = plt.hist(x, 10, normed=1, facecolor='g', alpha=0.75)

hist, bins = np.histogram(x,)
mid_points = (bins[1:] + bins[:-1])/2
norm_hist = hist/np.sum(hist)
plt.hist(x, 10, normed=1, facecolor='g', alpha=0.75)









    Out[17]:





(array([ 0.03724133,  0.027931  ,  0.06517232,  0.16758597,  0.1768963 ,
         0.10241365,  0.15827564,  0.13034465,  0.04655166,  0.01862066]),
 array([-5.33061924, -4.25654367, -3.18246809, -2.10839251, -1.03431694,
         0.03975864,  1.11383421,  2.18790979,  3.26198536,  4.33606094,
         5.41013651]),
 <a list of 10 Patch objects>)



In [ ]:

    
popt,pcov = opt.curve_fit(lambda x,mu,sig: stats.norm.pdf(x,mu,sig), mid_points,norm_hist)
popt



In [18]:

    
plt.bar(mid_points, norm_hist, align='center', alpha=.9)
plt.show()



In [19]:

    
norm_hist









    Out[19]:





array([ 0.04,  0.03,  0.07,  0.18,  0.19,  0.11,  0.17,  0.14,  0.05,  0.02])



In [20]:

    
mid_points









    Out[20]:





array([-4.79358145, -3.71950588, -2.6454303 , -1.57135473, -0.49727915,
        0.57679642,  1.650872  ,  2.72494758,  3.79902315,  4.87309873])



In [29]:

    
hist









    Out[29]:





array([ 4,  3,  7, 18, 19, 11, 17, 14,  5,  2], dtype=int64)



In [28]:

    
norm_hist1 = norm_hist



In [30]:

    
hist2 = np.array([1,  2,  10, 28, 17, 11, 10, 9,  7,  6])



In [31]:

    
norm_hist2 = hist2/np.sum(hist2)



In [27]:

    
sns.barplot(x=np.round(mid_points), y=norm_hist,color='grey');



In [24]:

    
np.round(mid_points)









    Out[24]:





array([-5., -4., -3., -2.,  0.,  1.,  2.,  3.,  4.,  5.])



In [41]:

    
df1 = pd.DataFrame(norm_hist1,index = mid_points)
df1.rename(columns={0:'d1'},inplace=True)
df1['sex'] = 'male'
df1



In [57]:

    
df1 = pd.DataFrame([norm_hist1,mid_points])
df1 = df1.T
df1



In [58]:

    
df1.rename(columns={0:'p',1:'mid'},inplace=True)
df1['sex'] = 'male'
df1



In [56]:

    
df2 = pd.DataFrame([norm_hist2,mid_points])
df2 = df2.T
df2



In [64]:

    
df2.rename(columns={0:'p',1:'mid'},inplace=True)
df2['sex'] = 'female'
df2



In [65]:

    
df1.head()



In [66]:

    
df2.head()



In [69]:

    
df = pd.concat([df1,df2],join='inner',ignore_index=True)
df.shape









    Out[69]:





(20, 3)



In [77]:

    
np.array(df['mid'])









    Out[77]:





array([-4.79358145, -3.71950588, -2.6454303 , -1.57135473, -0.49727915,
        0.57679642,  1.650872  ,  2.72494758,  3.79902315,  4.87309873,
       -4.79358145, -3.71950588, -2.6454303 , -1.57135473, -0.49727915,
        0.57679642,  1.650872  ,  2.72494758,  3.79902315,  4.87309873])



In [78]:

    
np.round(np.array(df['mid']))









    Out[78]:





array([-5., -4., -3., -2.,  0.,  1.,  2.,  3.,  4.,  5., -5., -4., -3.,
       -2.,  0.,  1.,  2.,  3.,  4.,  5.])



In [79]:

    
df['mid_new'] = np.round(np.array(df['mid']))



In [80]:

    
df



In [85]:

    
sns.barplot(x='mid_new', y='p',hue='sex',data=df);



In [86]:

    
df



In [ ]:



In [ ]:

	0	1
0	0.009901	-4.793581
1	0.019802	-3.719506
2	0.099010	-2.645430
3	0.277228	-1.571355
4	0.168317	-0.497279
5	0.108911	0.576796
6	0.099010	1.650872
7	0.089109	2.724948
8	0.069307	3.799023
9	0.059406	4.873099

	source	[-2.0,-1.1]	[-1.0,-0.6]	[-0.5,-0.1]	[0.0,0.4]	[0.5,0.9]	[1.0,1.4]	[1.5,1.9]	[2.0,2.4]	[2.5,2.9]	[3.0,3.4]	[3.5,3.9]	[4.0,5.0]
target
2016	2016-Q1	0.413101	1.398154	6.563698	20.755563	36.195539	24.786644	7.592633	1.621901	0.447736	0.153583	0.051039	0.020409
2016Dec	NaN	0.195111	0.806320	4.042026	13.342081	28.040684	29.923766	15.302813	5.559588	1.886260	0.725573	0.132911	0.042867
2017	NaN	0.156856	0.477735	1.518767	6.433687	15.147962	29.303042	32.341059	10.275199	3.194858	0.981854	0.120430	0.048552
2017Dec	NaN	0.185830	0.564403	1.435289	6.079712	14.309872	26.287529	31.065046	12.809149	4.601514	2.036121	0.469910	0.155625
2018	NaN	0.257009	0.444428	1.476107	5.034666	12.351745	23.639347	33.018521	16.313100	5.195082	1.649630	0.443358	0.177007

	source	[-2.0,-1.1]	[-1.0,-0.6]	[-0.5,-0.1]	[0.0,0.4]	[0.5,0.9]	[1.0,1.4]	[1.5,1.9]	[2.0,2.4]	[2.5,2.9]	[3.0,3.4]	[3.5,3.9]	[4.0,5.0]
target
2015	2015-Q4	0.602000	2.276000	24.296903	63.002736	7.678361	1.576000	0.398000	0.098000	0.040000	0.022000	0.010000	0.000000
2016	NaN	0.219028	0.829607	3.714362	11.753718	29.132909	34.504901	14.095859	4.038049	1.155750	0.367736	0.124080	0.064002
2016Sep	NaN	0.366086	0.954973	4.393809	11.584200	27.678981	30.215566	15.823224	5.869419	2.134730	0.655915	0.233833	0.089264
2017	NaN	0.284444	0.653333	1.560000	5.448889	11.971111	26.715556	33.200000	14.075556	4.217778	1.426667	0.326667	0.120000
2017Sep	NaN	0.237143	0.697143	1.568571	5.814286	12.582857	26.068571	31.622857	13.760000	4.948571	1.934286	0.522857	0.242857

	d1	sex
-4.793581	0.04	male
-3.719506	0.03	male
-2.645430	0.07	male
-1.571355	0.18	male
-0.497279	0.19	male
0.576796	0.11	male
1.650872	0.17	male
2.724948	0.14	male
3.799023	0.05	male
4.873099	0.02	male

	p	mid	sex
0	0.009901	-4.793581	female
1	0.019802	-3.719506	female
2	0.099010	-2.645430	female
3	0.277228	-1.571355	female
4	0.168317	-0.497279	female
5	0.108911	0.576796	female
6	0.099010	1.650872	female
7	0.089109	2.724948	female
8	0.069307	3.799023	female
9	0.059406	4.873099	female