In [1]:
    
%matplotlib inline
from __future__ import division
import pandas as pd
import numpy as np
import thinkstats2
import thinkplot
    
In [2]:
    
data = pd.read_csv("zemi.csv")
data = data[data['junior'] > 0]
data.head()
    
    Out[2]:
In [3]:
    
# summary statistics
data['junior'].describe()
    
    Out[3]:
In [4]:
    
# histgram
junior = thinkstats2.Hist(np.asarray(data['junior']))
thinkplot.PrePlot(2)
thinkplot.Hist(junior, label='Juniors')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 7])
    
    
In [5]:
    
# separate seminar from lecture
semi = data[data['zemi']==1]
lec = data[data['zemi']==0]
    
In [6]:
    
# summary statistics of 'seminar'
semi['junior'].describe()
    
    Out[6]:
In [7]:
    
# histgram of 'seminar'
semi_hist = thinkstats2.Hist(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Hist(semi_hist, label='seminar')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 5])
    
    
In [8]:
    
semi_cmf = thinkstats2.Cdf(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Cdf(semi_cmf, label='seminar')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 1])
    
    
In [9]:
    
junior = []
double = []
single = []
for i in np.asarray(semi[['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16']], dtype=int).reshape(16*35).tolist():
    if i >=0:
        junior.append(i)
hist = thinkstats2.Hist(junior)
for val, freq in hist.Items():
    if freq == 2:
        double.append(val)
    elif freq == 1:
        single.append(val)
    else:
        print('ERROR')
print('Capa' ,str(len(junior)))
print('Single', len(single))
print('Double', len(double))
print('None', 345-len(single)-len(double))
print('All', len(single)+len(double))