In [1]:
%matplotlib inline
from __future__ import division
import pandas as pd
import numpy as np
import thinkstats2
import thinkplot
In [2]:
data = pd.read_csv("zemi.csv")
data = data[data['junior'] > 0]
data.head()
Out[2]:
In [3]:
# summary statistics
data['junior'].describe()
Out[3]:
In [4]:
# histgram
junior = thinkstats2.Hist(np.asarray(data['junior']))
thinkplot.PrePlot(2)
thinkplot.Hist(junior, label='Juniors')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 7])
In [5]:
# separate seminar from lecture
semi = data[data['zemi']==1]
lec = data[data['zemi']==0]
In [6]:
# summary statistics of 'seminar'
semi['junior'].describe()
Out[6]:
In [7]:
# histgram of 'seminar'
semi_hist = thinkstats2.Hist(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Hist(semi_hist, label='seminar')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 5])
In [8]:
semi_cmf = thinkstats2.Cdf(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Cdf(semi_cmf, label='seminar')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 1])
In [9]:
junior = []
double = []
single = []
for i in np.asarray(semi[['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16']], dtype=int).reshape(16*35).tolist():
if i >=0:
junior.append(i)
hist = thinkstats2.Hist(junior)
for val, freq in hist.Items():
if freq == 2:
double.append(val)
elif freq == 1:
single.append(val)
else:
print('ERROR')
print('Capa' ,str(len(junior)))
print('Single', len(single))
print('Double', len(double))
print('None', 345-len(single)-len(double))
print('All', len(single)+len(double))