In [43]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
In [234]:
dat = pd.read_csv("tarants.csv", header=None, dtype=np.float64)[0]
mean = np.mean(dat)
median = np.median(dat)
mode = stats.mode(dat)
skew = stats.skew(dat)
kurt = stats.kurtosis(dat)
stddev = np.std(dat)
b_min = 14
b_max = 28.9
b_step = 0.01
qtd = len(dat)
x = np.arange(b_min, b_max, b_step)
count = int((b_max - b_min) / b_step)
observados = np.zeros(count+1)
esperados = np.zeros(count+1)
chi2 = 0
esp_sum = 0
for i in range(0, count):
i_min = b_min + b_step * i
i_max = b_min + b_step * (i + 1)
y = len(dat[(dat >= i_min) & (dat < i_max)])
observados[i] = y
esperados[i] = stats.norm.pdf((i_min + i_max) / 2, loc=mean, scale=stddev) * qtd * b_step
chi2 += (esperados[i] - observados[i])**2 / esperados[i]
In [235]:
plt.plot(x, esperados, 'r-*')
plt.bar(x, observados, width=b_step)
plt.xlim(14, 29)
plt.title("Frequência dos dados")
plt.show()
In [233]:
stats.norm.pdf(obs, loc=mean, scale=stddev)
Out[233]:
In [236]:
chi2
Out[236]:
In [ ]: