In [157]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statistics
%matplotlib inline

In [158]:
df=pd.DataFrame()

In [159]:
df['age']=[28, 42, 27, 24, 35, 54, 35, 37]

In [160]:
sum(df['age'])/len(df['age'])


Out[160]:
35.25

In [161]:
np.mean(df['age'])


Out[161]:
35.25

In [162]:
statistics.median(df['age'])


Out[162]:
35.0

In [163]:
np.median(df['age'])


Out[163]:
35.0

In [164]:
statistics.mode(df['age'])


Out[164]:
35

In [165]:
(values, counts) = np.unique(df['age'], return_counts=True)

In [166]:
ind = np.argmax(counts)
print(ind)
values[ind]


3
Out[166]:
35

In [167]:
a=np.var(df['age'])

In [168]:
np.std(df['age'])


Out[168]:
8.9965271077232902

In [169]:
np.std(df['age'],ddof=1)


Out[169]:
9.6176920308356717

In [170]:
v = a**0.5
print(v)


8.99652710772

In [171]:
np.std(df['age'])/np.sqrt(len(df['age']))


Out[171]:
3.1807526624998679

In [172]:
pop=pd.DataFrame()
pop['low_var']=np.random.normal(60,10,1000)
pop['high_var']=np.random.normal(60,100,1000)

In [173]:
pop.hist(layout=(2, 1), sharex=True)
plt.show()



In [174]:
print(pop.max())
print(pop.min())


low_var      86.598866
high_var    369.461379
dtype: float64
low_var      23.371762
high_var   -247.338647
dtype: float64

In [175]:
sample=pd.DataFrame()
sample['low_var'] = np.random.choice(pop['low_var'], 1000)
sample['high_var']=np.random.choice(pop['high_var'], 1000)

In [176]:
sample.hist()
plt.show()



In [177]:
sample.mean()
sample.std(ddof=1)


Out[177]:
low_var       9.698314
high_var    103.008697
dtype: float64

In [ ]:


In [ ]:


In [ ]: