In [1]:
%matplotlib inline
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import thinkstats2
import thinkplot
In [18]:
data = pd.read_csv("zemi.csv")
data = data[data['junior'] > 0]
data
Out[18]:
In [3]:
# summary statistics
data['junior'].describe()
Out[3]:
In [4]:
# histgram
junior = thinkstats2.Hist(np.asarray(data['junior']))
thinkplot.PrePlot(1)
thinkplot.Hist(junior, label='Juniors')
thinkplot.Config(xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 7])
In [21]:
# separate seminar from lecture
semi = data[data['zemi']==1]
lec = data[data['zemi']==0]
In [22]:
# summary statistics of 'seminar'
semi['junior'].describe()
Out[22]:
In [24]:
# histgram of 'seminar'
semi_hist = thinkstats2.Hist(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Hist(semi_hist, label='seminar')
thinkplot.Config(xlabel='The number of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 5])
plt.savefig("zemi.png")
In [8]:
semi_cmf = thinkstats2.Cdf(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Cdf(semi_cmf, label='seminar')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 1])
In [20]:
junior = []
double = []
single = []
for i in np.asarray(data[['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16']], dtype=int).reshape(16*42).tolist():
if i >=0:
junior.append(i)
hist = thinkstats2.Hist(junior)
for val, freq in hist.Items():
if freq == 2:
double.append(val)
elif freq == 1:
single.append(val)
else:
print('ERROR')
print(val)
print(freq)
print('Capa' ,str(len(junior)))
print('Single', len(single))
print('Double', len(double))
print('None', 345-len(single)-len(double))
print('All', len(single)+len(double))
In [17]:
len(np.asarray(data[['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16']], dtype=int))
Out[17]: