In [1]:
import pandas as pd

In [2]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [3]:
plt.style.use('ggplot')
#Some default stuff for my plotting
aspect_mult = 0.9
figsize(aspect_mult*16,aspect_mult*9)
linewidth = 3

In [4]:
df = pd.read_csv("data/universities_2015.csv")

In [5]:
df.head()


Out[5]:
University
0 CUT
1 NMMU
2 UCT
3 UCT
4 UCT

In [6]:
result = df.groupby('University').size()

In [7]:
result.sort_index()


Out[7]:
University
CUT      1
NMMU     1
UCT      8
UJ       1
UKZN     3
UL       2
UP       1
US       1
UV       1
WSU      1
Wits    14
dtype: int64

In [8]:
result


Out[8]:
University
CUT      1
NMMU     1
UCT      8
UJ       1
UKZN     3
UL       2
UP       1
US       1
UV       1
WSU      1
Wits    14
dtype: int64

In [9]:
result.keys()


Out[9]:
Index([u'CUT', u'NMMU', u'UCT', u'UJ', u'UKZN', u'UL', u'UP', u'US', u'UV',
       u'WSU', u'Wits'],
      dtype='object', name=u'University')

In [10]:
plt.barh(range(result.shape[0]),result.values)
plt.yticks(np.arange(result.shape[0])+0.4,result.keys(), rotation=0,fontsize=14)
plt.xlim(0,max(result)+1)
plt.title("DSIDE 2016/2017 University Representation",fontsize=18, color = 'k')
plt.ylabel("Universities",fontsize=16, color = 'k')
plt.xlabel("Number of Students",fontsize=16, color = 'k')
pyplot.savefig('../images/2015-universities.png',bbox_inches='tight')



In [ ]: