In [32]:
import pandas as pd
In [33]:
df = pd.DataFrame({'name': ['Alice', 'Bob', 'Charlie', 'Danielle'],
'balance': [100.0, 200.0, 300.0, 400.0],
'gender': ['Female', 'Male', 'Male', 'Female']},
columns=['name', 'balance', 'gender'])
In [34]:
df.dtypes
Out[34]:
In [35]:
df['gender'] = df['gender'].astype('category')
In [36]:
df
Out[36]:
In [37]:
df.dtypes
Out[37]:
In [38]:
df.gender
Out[38]:
In [39]:
df.gender.cat.categories
Out[39]:
In [40]:
df.gender.cat.codes
Out[40]:
In [41]:
df = pd.read_csv('trip_data_1.csv')
In [42]:
df.head()
Out[42]:
In [43]:
%time
df.groupby(df.medallion).trip_distance.sum().sort(ascending=False, inplace=False).head()
Out[43]:
In [44]:
%time df['medallion'] = df['medallion'].astype('category')
In [45]:
%time
df.groupby(df.medallion).trip_distance.sum().sort(ascending=False, inplace=False).head()
Out[45]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: