In [157]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statistics
%matplotlib inline
In [158]:
df=pd.DataFrame()
In [159]:
df['age']=[28, 42, 27, 24, 35, 54, 35, 37]
In [160]:
sum(df['age'])/len(df['age'])
Out[160]:
In [161]:
np.mean(df['age'])
Out[161]:
In [162]:
statistics.median(df['age'])
Out[162]:
In [163]:
np.median(df['age'])
Out[163]:
In [164]:
statistics.mode(df['age'])
Out[164]:
In [165]:
(values, counts) = np.unique(df['age'], return_counts=True)
In [166]:
ind = np.argmax(counts)
print(ind)
values[ind]
Out[166]:
In [167]:
a=np.var(df['age'])
In [168]:
np.std(df['age'])
Out[168]:
In [169]:
np.std(df['age'],ddof=1)
Out[169]:
In [170]:
v = a**0.5
print(v)
In [171]:
np.std(df['age'])/np.sqrt(len(df['age']))
Out[171]:
In [172]:
pop=pd.DataFrame()
pop['low_var']=np.random.normal(60,10,1000)
pop['high_var']=np.random.normal(60,100,1000)
In [173]:
pop.hist(layout=(2, 1), sharex=True)
plt.show()
In [174]:
print(pop.max())
print(pop.min())
In [175]:
sample=pd.DataFrame()
sample['low_var'] = np.random.choice(pop['low_var'], 1000)
sample['high_var']=np.random.choice(pop['high_var'], 1000)
In [176]:
sample.hist()
plt.show()
In [177]:
sample.mean()
sample.std(ddof=1)
Out[177]:
In [ ]:
In [ ]:
In [ ]: