In [1]:
import pandas as pd
In [2]:
%matplotlib inline
In [3]:
upcat_data = pd.read_csv("./passers.csv")
upcat_data[:5]
Out[3]:
In [4]:
course_data = upcat_data[['Course']]
course_data[:10]
Out[4]:
In [5]:
course_series = upcat_data['Course'].astype('category')
In [6]:
course_series.value_counts()[:10]
Out[6]:
In [7]:
course_series.value_counts()[2:10].plot(kind='bar')
Out[7]:
In [8]:
names = upcat_data['Name'].astype('category')
names[:10]
Out[8]:
In [9]:
upcat_data.insert(loc=0, column='Familyname', value='')
upcat_data[:5]
Out[9]:
In [10]:
def process_names(text):
familyname, name = text.split(',')
return pd.Series([familyname, name])
In [11]:
upcat_data[['Familyname', 'Name']] = upcat_data.Name.apply(process_names)
upcat_data[:5]
Out[11]:
In [12]:
familynames = upcat_data['Familyname'].astype('category')
familynames[:5]
Out[12]:
In [13]:
familynames.value_counts()
Out[13]:
In [14]:
familynames.value_counts()[:10].plot(kind='bar')
Out[14]:
In [15]:
campuses = upcat_data['Campus'].astype('category')
campuses[:10]
Out[15]:
In [16]:
campuses.value_counts()[:5]
Out[16]:
In [17]:
campuses.value_counts()[:5].plot(kind='bar')
Out[17]:
In [ ]: