In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import os
import seaborn as sns
In [11]:
os.listdir('../clean_data/')
Out[11]:
In [18]:
df1 = pd.read_excel('../clean_data/SPF_aggregate_histogram.xlsx',sheetname='2016Q1',header = 0,index_col=1)
df1.head()
Out[18]:
In [19]:
df2 = pd.read_excel('../clean_data/SPF_aggregate_histogram.xlsx',sheetname='2015Q4',header = 0,index_col=1)
df2.head()
Out[19]:
In [ ]:
In [2]:
import matplotlib.pyplot as plt
In [3]:
n_bins = 10
x = np.random.randn(1000, 3)
fig, axes = plt.subplots(nrows=2, ncols=2)
ax0, ax1, ax2, ax3 = axes.flat
colors = ['red', 'tan', 'lime']
ax0.hist(x, n_bins, normed=1, histtype='bar', color=colors, label=colors)
ax0.legend(prop={'size': 10})
ax0.set_title('bars with legend')
ax1.hist(x, n_bins, normed=1, histtype='bar', stacked=True)
ax1.set_title('stacked bar')
ax2.hist(x, n_bins, histtype='step', stacked=True, fill=True)
ax2.set_title('stepfilled')
# Make a multiple-histogram of data-sets with different length.
x_multi = [np.random.randn(n) for n in [10000, 5000, 2000]]
ax3.hist(x_multi, n_bins, histtype='bar')
ax3.set_title('different sample sizes')
plt.tight_layout()
plt.show()
In [ ]:
In [ ]:
import os
import pandas as pd
import scipy.stats as stats
import scipy.optimize as opt
import numpy as np
import matplotlib.pyplot as plt
In [16]:
mu, sigma = 0, 2.5
x = mu + sigma * np.random.randn(100)
In [17]:
# the histogram of the data
n, bins, patches = plt.hist(x, 10, normed=1, facecolor='g', alpha=0.75)
hist, bins = np.histogram(x,)
mid_points = (bins[1:] + bins[:-1])/2
norm_hist = hist/np.sum(hist)
plt.hist(x, 10, normed=1, facecolor='g', alpha=0.75)
Out[17]:
In [ ]:
popt,pcov = opt.curve_fit(lambda x,mu,sig: stats.norm.pdf(x,mu,sig), mid_points,norm_hist)
popt
In [18]:
plt.bar(mid_points, norm_hist, align='center', alpha=.9)
plt.show()
In [19]:
norm_hist
Out[19]:
In [20]:
mid_points
Out[20]:
In [29]:
hist
Out[29]:
In [28]:
norm_hist1 = norm_hist
In [30]:
hist2 = np.array([1, 2, 10, 28, 17, 11, 10, 9, 7, 6])
In [31]:
norm_hist2 = hist2/np.sum(hist2)
In [27]:
sns.barplot(x=np.round(mid_points), y=norm_hist,color='grey');
In [24]:
np.round(mid_points)
Out[24]:
In [41]:
df1 = pd.DataFrame(norm_hist1,index = mid_points)
df1.rename(columns={0:'d1'},inplace=True)
df1['sex'] = 'male'
df1
Out[41]:
In [57]:
df1 = pd.DataFrame([norm_hist1,mid_points])
df1 = df1.T
df1
Out[57]:
In [58]:
df1.rename(columns={0:'p',1:'mid'},inplace=True)
df1['sex'] = 'male'
df1
Out[58]:
In [56]:
df2 = pd.DataFrame([norm_hist2,mid_points])
df2 = df2.T
df2
Out[56]:
In [64]:
df2.rename(columns={0:'p',1:'mid'},inplace=True)
df2['sex'] = 'female'
df2
Out[64]:
In [65]:
df1.head()
Out[65]:
In [66]:
df2.head()
Out[66]:
In [69]:
df = pd.concat([df1,df2],join='inner',ignore_index=True)
df.shape
Out[69]:
In [77]:
np.array(df['mid'])
Out[77]:
In [78]:
np.round(np.array(df['mid']))
Out[78]:
In [79]:
df['mid_new'] = np.round(np.array(df['mid']))
In [80]:
df
Out[80]:
In [85]:
sns.barplot(x='mid_new', y='p',hue='sex',data=df);
In [86]:
df
Out[86]:
In [ ]:
In [ ]: