In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
data = pd.read_csv('data/nyc_data.csv', parse_dates=['pickup_datetime',
'dropoff_datetime'])
fare = pd.read_csv('data/nyc_fare.csv', parse_dates=['pickup_datetime'])
In [2]:
data[['trip_distance', 'trip_time_in_secs']].head(3)
Out[2]:
In [3]:
data.loc[0]
Out[3]:
In [4]:
data.loc[[0, 100000]]
In [5]:
data.loc[1000:2000:10,
['trip_distance', 'trip_time_in_secs']]
Out[5]:
In [6]:
data.loc[data.trip_distance>50]
In [7]:
from ipywidgets import interact
In [8]:
@interact
def show_nrows(distance_threshold=(0, 200)):
return len(data.loc[data.trip_distance > distance_threshold])
In [9]:
data['trip_time_in_mins'] = data.trip_time_in_secs / 60.0
In [10]:
data[['trip_time_in_secs', 'trip_time_in_mins']].head(3)
Out[10]:
In [11]:
a = data.trip_distance[:5]
a
Out[11]:
In [12]:
b = data.trip_distance[2:6]
b
Out[12]:
In [13]:
a + b
Out[13]:
In [14]:
data.medallion.head(3)
Out[14]:
In [15]:
data.medallion.str.slice(0, 4).head(3)
Out[15]:
In [16]:
data.pickup_datetime.dt.dayofweek[::200000]
Out[16]:
In [17]:
day_p = data.pickup_datetime.dt.day
day_d = data.dropoff_datetime.dt.day
selection = (day_p != day_d)
print(len(data.loc[selection]))
data.loc[selection].head(3)
Out[17]: