In [43]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats

In [234]:
dat = pd.read_csv("tarants.csv", header=None, dtype=np.float64)[0]

mean = np.mean(dat)
median = np.median(dat)
mode = stats.mode(dat)
skew = stats.skew(dat)
kurt = stats.kurtosis(dat)
stddev = np.std(dat)

b_min = 14
b_max = 28.9
b_step = 0.01
qtd = len(dat)

x = np.arange(b_min, b_max, b_step)

count = int((b_max - b_min) / b_step)
observados = np.zeros(count+1)
esperados = np.zeros(count+1)
chi2 = 0
esp_sum = 0

for i in range(0, count):
    i_min = b_min + b_step * i
    i_max = b_min + b_step * (i + 1)
    y = len(dat[(dat >= i_min) & (dat < i_max)])
    observados[i] = y 
    esperados[i] = stats.norm.pdf((i_min + i_max) / 2, loc=mean, scale=stddev) * qtd * b_step
    chi2 += (esperados[i] - observados[i])**2 / esperados[i]

In [235]:
plt.plot(x, esperados, 'r-*')
plt.bar(x, observados, width=b_step)
plt.xlim(14, 29)
plt.title("Frequência dos dados")
plt.show()

In [233]:
stats.norm.pdf(obs, loc=mean, scale=stddev)


Out[233]:
0.14266549958793434

In [236]:
chi2


Out[236]:
9855.2014531731147

In [ ]: