Analysis of the current state



In [1]:

    
%matplotlib inline
from __future__ import division
import pandas as pd
import numpy as np
import thinkstats2
import thinkplot



In [2]:

    
data = pd.read_csv("zemi.csv")
data = data[data['junior'] > 0]
data.head()









    Out[2]:






  
    
      
      id
      name
      12
      13
      14
      15_1
      15_2
      15_3
      all
      junior
      ...
      s7
      s8
      s9
      s10
      s11
      s12
      s13
      s14
      s15
      s16
    
  
  
    
      0
      1
      aoki
      0
      0
      9
      6
      5
      0
      20
      11
      ...
      118
      1103
      1119
      1120
      2050
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      2
      ishihara
      0
      0
      9
      6
      8
      0
      23
      14
      ...
      31
      42
      101
      139
      171
      1042
      1058
      2044
      NaN
      NaN
    
    
      2
      3
      itou
      0
      0
      12
      13
      0
      0
      25
      13
      ...
      174
      109
      99
      96
      36
      35
      34
      NaN
      NaN
      NaN
    
    
      3
      4
      ueda
      0
      0
      10
      13
      0
      0
      23
      13
      ...
      173
      1075
      1093
      2014
      2029
      2035
      2040
      NaN
      NaN
      NaN
    
    
      4
      5
      oohashi
      0
      1
      4
      2
      9
      0
      16
      11
      ...
      1009
      1034
      70
      1109
      78
      NaN
      NaN
      NaN
      NaN
      NaN
    
  

5 rows × 29 columns



In [3]:

    
# summary statistics
data['junior'].describe()









    Out[3]:





count    42.000000
mean      9.166667
std       4.247907
min       1.000000
25%       5.250000
50%      10.000000
75%      13.000000
max      16.000000
Name: junior, dtype: float64



In [4]:

    
# histgram
junior = thinkstats2.Hist(np.asarray(data['junior']))
thinkplot.PrePlot(2)
thinkplot.Hist(junior, label='Juniors')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 7])



In [5]:

    
# separate seminar from lecture
semi = data[data['zemi']==1]
lec = data[data['zemi']==0]



In [6]:

    
# summary statistics of 'seminar'
semi['junior'].describe()









    Out[6]:





count    35.000000
mean      9.657143
std       3.925354
min       3.000000
25%       6.000000
50%      10.000000
75%      13.000000
max      16.000000
Name: junior, dtype: float64



In [7]:

    
# histgram of 'seminar'
semi_hist = thinkstats2.Hist(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Hist(semi_hist, label='seminar')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 5])



In [8]:

    
semi_cmf = thinkstats2.Cdf(np.asarray(semi['junior']))
thinkplot.PrePlot(2)
thinkplot.Cdf(semi_cmf, label='seminar')
thinkplot.Config(title='The number of juniors in each seminar', xlabel='The num of juniors', ylabel='Freq', loc=2, axis=[0, 17, 0, 1])



In [9]:

    
junior = []
double = []
single = []
for i in np.asarray(semi[['s1','s2','s3','s4','s5','s6','s7','s8','s9','s10','s11','s12','s13','s14','s15','s16']], dtype=int).reshape(16*35).tolist():
    if i >=0:
        junior.append(i)
hist = thinkstats2.Hist(junior)
for val, freq in hist.Items():
    if freq == 2:
        double.append(val)
    elif freq == 1:
        single.append(val)
    else:
        print('ERROR')
print('Capa' ,str(len(junior)))
print('Single', len(single))
print('Double', len(double))
print('None', 345-len(single)-len(double))
print('All', len(single)+len(double))









    



('Capa', '338')
('Single', 236)
('Double', 51)
('None', 58)
('All', 287)

	id	name	13	14	15_1	15_2	all	junior	...	s7	s8	s9	s10	s11	s12	s13	s14	s15	s16
0	1	aoki	0	9	6	5	20	11	...	118	1103	1119	1120	2050	NaN	NaN	NaN	NaN	NaN
1	2	ishihara	0	9	6	8	23	14	...	31	42	101	139	171	1042	1058	2044	NaN	NaN
2	3	itou	0	12	13	0	25	13	...	174	109	99	96	36	35	34	NaN	NaN	NaN
3	4	ueda	0	10	13	0	23	13	...	173	1075	1093	2014	2029	2035	2040	NaN	NaN	NaN
4	5	oohashi	1	4	2	9	16	11	...	1009	1034	70	1109	78	NaN	NaN	NaN	NaN	NaN