In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from scipy.stats import norm
plt.style.use('fivethirtyeight')

In [3]:
kcse = pd.read_csv('KCSE_2016.csv')
kcse


Out[3]:
Gender A A- B+ B B- C+ C C- D+ D D- E
0 FEMALE(2016) 58 2685 6581 10204 13649 17238 22960 30979 41632 57487 77718 18007
1 MALE(2016) 83 1960 4394 7012 10096 14969 21832 30047 39319 54648 72211 15322
2 ALL(2016) 141 4645 10975 17216 23745 32207 44792 61026 80951 112135 149929 33399
3 MALE(2015) 2024 7952 13517 19826 25312 29556 33437 37482 40181 40442 25531 3127
4 FEMALE(2015) 661 4117 8410 13634 19269 25214 31476 36633 38976 39113 23127 2223
5 ALL(2015) 2685 12069 21927 33460 44581 54770 64913 74115 79157 79555 48658 5350
6 MALE (2006) 148 638 1195 1627 2108 2569 2984 3299 3418 3291 2635 834
7 FEMALE (2006) 69 242 446 772 1234 1873 2554 3193 3519 3513 2909 897
8 ALL (2006) 217 880 1641 2399 3342 4442 5538 6492 6937 6804 5544 1731
9 MALE (2007) 110 563 1159 1761 2268 2871 3314 3609 3721 3493 2762 779
10 FEMALE (2007) 50 191 473 844 1339 2100 2865 3439 3817 3730 2924 790
11 ALL (2007) 160 754 1632 2605 3607 4971 6179 7048 7538 7223 5686 1569
12 MALE (2008) 102 698 1413 1957 2469 2972 3463 3824 3981 3942 3400 1379
13 FEMALE(2008) 66 274 596 920 1417 2129 2946 3618 4043 4150 3593 1376
14 ALL (2008) 168 972 2009 2877 3886 5101 6409 7442 8024 8092 6993 2755
15 MALE(2009) 138 628 1436 2124 2783 3311 3824 4247 4402 4336 3606 1257
16 FEMALE(2009) 58 218 553 974 1572 2388 3216 3996 4447 4514 3897 1326
17 ALL(2009) 196 846 1989 3098 4355 5699 7040 8243 8849 8850 7503 2583
18 MALE(2010) 182 892 1787 2458 3094 3639 4168 4536 4704 4561 3776 1306
19 FEMLAE(2010) 97 344 744 1219 1875 2722 3637 4389 4734 4769 4012 1304
20 ALL(2010) 279 1236 2531 3677 4969 6361 7805 8925 9438 9330 7788 2610

In [16]:
kcse.iloc[2][1:].index


Out[16]:
Index(['A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'D+', 'D', 'D-', 'E'], dtype='object')

In [64]:
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
xes = [x for x in range(12)]
my_xticks = kcse.iloc[2][1:].index
plt.xticks(xes, my_xticks)
plt.plot(xes,kcse.iloc[2][1:], lw = 1.5, label='2016')
plt.plot(xes,kcse.iloc[5][1:], lw = 1.5, label='2015')
plt.plot(xes,kcse.iloc[8][1:], lw = 1.5, label='2014')
plt.plot(xes,kcse.iloc[11][1:], lw = 1.5, label='2013')
plt.plot(xes,kcse.iloc[14][1:], lw = 1.5, label='2012')
plt.plot(xes,kcse.iloc[17][1:], lw = 1.5, label='2011')
plt.plot(xes,kcse.iloc[20][1:], lw = 1.5, label='2010')



plt.ylabel('No. of Students')
plt.xlabel('Grades')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig('no_students.png', bbox_inches='tight')
plt.show()



In [38]:
newkcse = kcse.drop('Gender',1)

In [92]:
newkcse.sum(1)


Out[92]:
0     299198
1     271893
2     571161
3     278387
4     242853
5     521240
6      24746
7      21221
8      45967
9      26410
10     22562
11     48972
12     29600
13     25128
14     54728
15     32092
16     27159
17     59251
18     35103
19     29846
20     64949
dtype: int64

In [94]:
521240/45967*100


Out[94]:
1133.9439162877718

In [39]:
newkcse = newkcse.div(newkcse.sum(1)/100,0)

In [93]:
newkcse.set_index(kcse['Gender'])


Out[93]:
A A- B+ B B- C+ C C- D+ D D- E
Gender
FEMALE(2016) 0.019385 0.897399 2.199547 3.410451 4.561862 5.761402 7.673848 10.354013 13.914532 19.213698 25.975441 6.018423
MALE(2016) 0.030527 0.720872 1.616077 2.578956 3.713225 5.505475 8.029629 11.051038 14.461203 20.099083 26.558609 5.635305
ALL(2016) 0.024687 0.813256 1.921525 3.014211 4.157322 5.638865 7.842272 10.684553 14.173062 19.632818 26.249866 5.847563
MALE(2015) 0.727045 2.856455 4.855471 7.121741 9.092379 10.616875 12.010978 13.463991 14.433504 14.527259 9.171046 1.123256
FEMALE(2015) 0.272181 1.695264 3.463000 5.614096 7.934429 10.382412 12.960927 15.084434 16.049215 16.105628 9.523045 0.915369
ALL(2015) 0.515118 2.315440 4.206699 6.419308 8.552874 10.507636 12.453572 14.218978 15.186287 15.262643 9.335047 1.026399
MALE (2006) 0.598076 2.578194 4.829063 6.574800 8.518548 10.381476 12.058515 13.331448 13.812333 13.299119 10.648186 3.370242
FEMALE (2006) 0.325150 1.140380 2.101692 3.637906 5.814995 8.826163 12.035248 15.046416 16.582630 16.554357 13.708119 4.226945
ALL (2006) 0.472078 1.914417 3.569952 5.218961 7.270433 9.663454 12.047773 14.123175 15.091261 14.801923 12.060826 3.765745
MALE (2007) 0.416509 2.131768 4.388489 6.667929 8.587656 10.870882 12.548277 13.665278 14.089360 13.226051 10.458160 2.949640
FEMALE (2007) 0.221612 0.846556 2.096445 3.740803 5.934758 9.307685 12.698342 15.242443 16.917826 16.532222 12.959844 3.501463
ALL (2007) 0.326717 1.539655 3.332517 5.319366 7.365433 10.150698 12.617414 14.391897 15.392469 14.749244 11.610716 3.203872
MALE (2008) 0.344595 2.358108 4.773649 6.611486 8.341216 10.040541 11.699324 12.918919 13.449324 13.317568 11.486486 4.658784
FEMALE(2008) 0.262655 1.090417 2.371856 3.661254 5.639128 8.472620 11.723973 14.398281 16.089621 16.515441 14.298790 5.475963
ALL (2008) 0.306973 1.776056 3.670881 5.256907 7.100570 9.320640 11.710642 13.598158 14.661599 14.785850 12.777737 5.033986
MALE(2009) 0.430014 1.956874 4.474635 6.618472 8.671943 10.317213 11.915742 13.233828 13.716814 13.511155 11.236445 3.916864
FEMALE(2009) 0.213557 0.802681 2.036157 3.586288 5.788137 8.792665 11.841379 14.713355 16.373946 16.620641 14.348835 4.882359
ALL(2009) 0.330796 1.427824 3.356905 5.228604 7.350087 9.618403 11.881656 13.912001 14.934769 14.936457 12.663077 4.359420
MALE(2010) 0.518474 2.541093 5.090733 7.002251 8.814061 10.366635 11.873629 12.921972 13.400564 12.993191 10.756915 3.720480
FEMLAE(2010) 0.325002 1.152583 2.492796 4.084299 6.282249 9.120150 12.185888 14.705488 15.861422 15.978691 13.442337 4.369095
ALL(2010) 0.429568 1.903032 3.896904 5.661365 7.650618 9.793838 12.017121 13.741551 14.531402 14.365117 11.990947 4.018538

In [65]:
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
xes = [x for x in range(12)]
my_xticks = newkcse.columns
plt.xticks(xes, my_xticks)
plt.plot(xes,newkcse.iloc[2], lw = 1.5, label='2016')
plt.plot(xes,newkcse.iloc[5], lw = 1.5, label='2015')
plt.plot(xes,newkcse.iloc[8], lw = 1.5, label='2014')
plt.plot(xes,newkcse.iloc[11], lw = 1.5, label='2013')
plt.plot(xes,newkcse.iloc[14], lw = 1.5, label='2012')
plt.plot(xes,newkcse.iloc[17], lw = 1.5, label='2011')
plt.plot(xes,newkcse.iloc[20], lw = 1.5, label='2010')



plt.ylabel('% of Students')
plt.xlabel('Grades')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.savefig('per_students.png', bbox_inches='tight')
plt.show()