In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [5]:
pop1 = np.random.binomial(10, 0.2, 10000)
pop2 = np.random.binomial(10,0.5, 10000)
In [6]:
plt.hist(pop1, alpha=0.5, label='Population 1')
plt.hist(pop2, alpha=0.5, label='Population 2')
plt.legend(loc='upper right')
plt.show()
In [7]:
sample1 = np.random.choice(pop1, 100, replace=True)
sample2 = np.random.choice(pop2, 100, replace=True)
plt.hist(sample1, alpha=0.5, label='sample 1')
plt.hist(sample2, alpha=0.5, label='sample 2')
plt.legend(loc='upper right')
plt.show()
In [8]:
print(sample1.mean())
print(sample2.mean())
print(sample1.std())
print(sample2.std())
diff=sample2.mean( ) -sample1.mean()
print(diff)
In [9]:
size = np.array([len(sample1), len(sample2)])
sd = np.array([sample1.std(), sample2.std()])
diff_se = sum(sd ** 2 / size) ** 0.5
print(diff/diff_se)
print(size)
In [13]:
import scipy
In [10]:
from scipy.stats import ttest_ind
print(ttest_ind(sample2, sample1, equal_var=False))
In [ ]: