In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline
data = pd.read_csv('data/nyc_data.csv', parse_dates=['pickup_datetime',
'dropoff_datetime'])
fare = pd.read_csv('data/nyc_fare.csv', parse_dates=['pickup_datetime'])
In [2]:
weekly = data.groupby(data.pickup_datetime.dt.weekofyear)
In [3]:
len(weekly)
Out[3]:
In [4]:
y = weekly.size()
y.head(3)
Out[4]:
In [5]:
x = weekly.pickup_datetime.first()
x.head(3)
Out[5]:
In [6]:
pd.Series(y.values, index=x).plot()
plt.ylim(0) # Set the lower y value to 0.
plt.xlabel('Week') # Label of the x axis.
plt.ylabel('Taxi rides') # Label of the y axis.
In [7]:
tip = fare[['medallion', 'tip_amount']] \
.loc[fare.tip_amount>0].groupby('medallion').mean()
print(len(tip))
tip.head(3)
Out[7]:
In [8]:
tip.hist(bins=np.linspace(0., 6., 100))
plt.xlabel('Average tip')
plt.ylabel('Number of taxis')
In [9]:
data_merged = pd.merge(data, tip, how='left', left_on='medallion', right_index=True)
data_merged.head(3)