In [1]:
import pandas as pd

In [2]:
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [3]:
plt.style.use('ggplot')
#Some default stuff for my plotting
aspect_mult = 0.9
figsize(aspect_mult*16,aspect_mult*9)
linewidth = 3

In [4]:
df = pd.read_csv("data/universities_2016.csv")

In [5]:
df.head()


Out[5]:
University
0 DSSA
1 NWU
2 NWU
3 SMU
4 SMU

In [6]:
result = df.groupby('University').size()

In [7]:
result.sort_index()


Out[7]:
University
DSSA     1
NWU      2
SMU      6
TUT      1
UCT      5
UFH      3
UJ       4
UKZN     5
UL       5
UNISA    3
UP       8
WSU      2
Wits     4
dtype: int64

In [8]:
result


Out[8]:
University
DSSA     1
NWU      2
SMU      6
TUT      1
UCT      5
UFH      3
UJ       4
UKZN     5
UL       5
UNISA    3
UP       8
WSU      2
Wits     4
dtype: int64

In [9]:
result.keys()


Out[9]:
Index([u'DSSA', u'NWU', u'SMU', u'TUT', u'UCT', u'UFH', u'UJ', u'UKZN', u'UL',
       u'UNISA', u'UP', u'WSU', u'Wits'],
      dtype='object', name=u'University')

In [10]:
plt.barh(range(result.shape[0]),result.values)
plt.yticks(np.arange(result.shape[0])+0.4,result.keys(), rotation=0,fontsize=14)
plt.xlim(0,max(result)+1)
plt.title("DSIDE 2016/2017 University Representation",fontsize=18, color = 'k')
plt.ylabel("Universities",fontsize=16, color = 'k')
plt.xlabel("Number of Students",fontsize=16, color = 'k')
pyplot.savefig('../images/2016-universities.png',bbox_inches='tight')



In [ ]: