In [171]:
handle = '/Users/datascientist/jared/MTA_delay_prediction/google_transit/stop_times.txt'
In [172]:
import pandas as pd
df = pd.read_csv(handle)
In [173]:
df.columns
Out[173]:
In [174]:
df.shape
Out[174]:
In [175]:
df = df.drop(['stop_headsign', u'pickup_type', u'drop_off_type', u'shape_dist_traveled'],1)
In [176]:
df.columns
Out[176]:
In [177]:
def get_day(id_string):
return id_string.split('_')[0][-3:]
In [178]:
df['trip_day'] = df['trip_id'].apply(get_day)
In [207]:
df_mon = df[df['trip_day']=='WKD']
df_tue = df[df['trip_day']=='WKD']
df_wed = df[df['trip_day']=='WKD']
df_thu = df[df['trip_day']=='WKD']
df_fri = df[df['trip_day']=='WKD']
df_sat = df[df['trip_day']=='SAT']
df_sun = df[df['trip_day']=='SUN']
In [180]:
max(df_tue['arrival_time'])
Out[180]:
In [181]:
df_sat.shape
Out[181]:
In [208]:
df_mon['trip_day'] = df_mon['trip_day'].apply(lambda x:'MON')
df_tue['trip_day'] = df_tue['trip_day'].apply(lambda x:'TUE')
df_wed['trip_day'] = df_wed['trip_day'].apply(lambda x:'WED')
df_thu['trip_day'] = df_thu['trip_day'].apply(lambda x:'THU')
df_fri['trip_day'] = df_fri['trip_day'].apply(lambda x:'FRI')
In [209]:
df_mon['arrival_time'] = df_mon['arrival_time'].apply(lambda x: pd.to_timedelta(x))
df_tue['arrival_time'] = df_tue['arrival_time'].apply(lambda x: pd.to_timedelta(x))
df_wed['arrival_time'] = df_wed['arrival_time'].apply(lambda x: pd.to_timedelta(x))
df_thu['arrival_time'] = df_thu['arrival_time'].apply(lambda x: pd.to_timedelta(x))
df_fri['arrival_time'] = df_fri['arrival_time'].apply(lambda x: pd.to_timedelta(x))
df_sat['arrival_time'] = df_sat['arrival_time'].apply(lambda x: pd.to_timedelta(x))
df_sun['arrival_time'] = df_sun['arrival_time'].apply(lambda x: pd.to_timedelta(x))
In [210]:
df_mon['departure_time'] = df_mon['departure_time'].apply(lambda x: pd.to_timedelta(x))
df_tue['departure_time'] = df_tue['departure_time'].apply(lambda x: pd.to_timedelta(x))
df_wed['departure_time'] = df_wed['departure_time'].apply(lambda x: pd.to_timedelta(x))
df_thu['departure_time'] = df_thu['departure_time'].apply(lambda x: pd.to_timedelta(x))
df_fri['departure_time'] = df_fri['departure_time'].apply(lambda x: pd.to_timedelta(x))
df_sat['departure_time'] = df_sat['departure_time'].apply(lambda x: pd.to_timedelta(x))
df_sun['departure_time'] = df_sun['departure_time'].apply(lambda x: pd.to_timedelta(x))
In [211]:
overtime_mon = df_mon[df_mon['arrival_time']>=pd.to_timedelta('24:00:00')]
overtime_tue = df_tue[df_tue['arrival_time']>=pd.to_timedelta('24:00:00')]
overtime_wed = df_wed[df_wed['arrival_time']>=pd.to_timedelta('24:00:00')]
overtime_thu = df_thu[df_thu['arrival_time']>=pd.to_timedelta('24:00:00')]
overtime_fri = df_fri[df_fri['arrival_time']>=pd.to_timedelta('24:00:00')]
overtime_sat = df_sat[df_sat['arrival_time']>=pd.to_timedelta('24:00:00')]
overtime_sun = df_sun[df_sun['arrival_time']>=pd.to_timedelta('24:00:00')]
In [212]:
df_mon = df_mon[df_mon['arrival_time']<pd.to_timedelta('24:00:00')]
df_tue = df_tue[df_tue['arrival_time']<pd.to_timedelta('24:00:00')]
df_wed = df_wed[df_wed['arrival_time']<pd.to_timedelta('24:00:00')]
df_thu = df_thu[df_thu['arrival_time']<pd.to_timedelta('24:00:00')]
df_fri = df_fri[df_fri['arrival_time']<pd.to_timedelta('24:00:00')]
df_sat = df_sat[df_sat['arrival_time']<pd.to_timedelta('24:00:00')]
df_sun = df_sun[df_sun['arrival_time']<pd.to_timedelta('24:00:00')]
In [213]:
overtime_mon['arrival_time'] = overtime_mon['arrival_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_tue['arrival_time'] = overtime_tue['arrival_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_wed['arrival_time'] = overtime_wed['arrival_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_thu['arrival_time'] = overtime_thu['arrival_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_fri['arrival_time'] = overtime_fri['arrival_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_sat['arrival_time'] = overtime_sat['arrival_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_sun['arrival_time'] = overtime_sun['arrival_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
In [166]:
max(overtime_mon['arrival_time'])
Out[166]:
In [214]:
overtime_mon['departure_time'] = overtime_mon['departure_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_tue['departure_time'] = overtime_tue['departure_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_wed['departure_time'] = overtime_wed['departure_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_thu['departure_time'] = overtime_thu['departure_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_fri['departure_time'] = overtime_fri['departure_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_sat['departure_time'] = overtime_sat['departure_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
overtime_sun['departure_time'] = overtime_sun['departure_time'].apply(lambda x: x-pd.to_timedelta('24:00:00'))
In [157]:
overtime_sun.head()
Out[157]:
In [203]:
df_tue.shape
Out[203]:
In [ ]:
# Drop over columns
In [215]:
min(overtime_mon['arrival_time'])
Out[215]:
In [216]:
max(df_tue['arrival_time'])
Out[216]:
In [217]:
df_mon.append(overtime_sun);
df_tue.append(overtime_mon);
df_wed.append(overtime_tue);
df_thu.append(overtime_wed);
df_fri.append(overtime_thu);
df_sat.append(overtime_fri);
df_sun.append(overtime_sat);
In [218]:
max(df_mon['arrival_time'])
Out[218]:
In [219]:
new_df = pd.DataFrame()
new_df = new_df.append([df_mon,df_tue,df_wed,df_thu,df_fri,df_sat,df_sun])
In [220]:
new_df.shape
Out[220]:
In [221]:
new_df['arrival_time'] = new_df['arrival_time'].apply(lambda x: pd.to_datetime(x))
new_df['departure_time'] = new_df['departure_time'].apply(lambda x: pd.to_datetime(x))
In [222]:
new_df.to_csv('/Users/datascientist/jared/MTA_delay_prediction/google_transit/corrected_stop_times.txt')