In [1]:
import pandas as pd

In [2]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [4]:
plt.style.use('ggplot')
#Some default stuff for my plotting
aspect_mult = 0.9
figsize(aspect_mult*16,aspect_mult*9)
linewidth = 3

In [5]:
df = pd.read_csv("data/universities_2017.csv")

In [6]:
df.head()


Out[6]:
University
0 SU
1 SPU
2 WSU
3 UP
4 UP

In [7]:
result = df.groupby('University').size()

In [8]:
result.sort_index()


Out[8]:
University
AIMS        1
AIMS/UCT    1
MU          1
NWU         2
RU          1
SMU         5
SPU         2
SU          1
TUT         1
UCT         3
UJ          1
UKZN        2
UL          8
UNISA       2
UNIVEN      1
UNIZUL      1
UP          8
UWC         1
WITS        3
WSU         1
dtype: int64

In [9]:
result


Out[9]:
University
AIMS        1
AIMS/UCT    1
MU          1
NWU         2
RU          1
SMU         5
SPU         2
SU          1
TUT         1
UCT         3
UJ          1
UKZN        2
UL          8
UNISA       2
UNIVEN      1
UNIZUL      1
UP          8
UWC         1
WITS        3
WSU         1
dtype: int64

In [10]:
result.keys()


Out[10]:
Index([u'AIMS', u'AIMS/UCT', u'MU', u'NWU', u'RU', u'SMU', u'SPU', u'SU',
       u'TUT', u'UCT', u'UJ', u'UKZN', u'UL', u'UNISA', u'UNIVEN', u'UNIZUL',
       u'UP', u'UWC', u'WITS', u'WSU'],
      dtype='object', name=u'University')

In [11]:
plt.barh(range(result.shape[0]),result.values)
plt.yticks(np.arange(result.shape[0])+0.4,result.keys(), rotation=0,fontsize=14)
plt.xlim(0,max(result)+1)
plt.title("DSIDE 2017/2018 University Representation",fontsize=18, color = 'k')
plt.ylabel("Universities",fontsize=16, color = 'k')
plt.xlabel("Number of Students",fontsize=16, color = 'k')
pyplot.savefig('../images/2017-universities.png',bbox_inches='tight')



In [ ]: