In [1]:
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', 100)
%matplotlib inline
In [2]:
df = pd.read_csv("DATA/babs_open_data_year_1/201402_babs_open_data/201402_trip_data.csv")
In [3]:
df.shape
Out[3]:
In [4]:
df['Start Date'] = pd.to_datetime(df['Start Date'])
df['End Date'] = pd.to_datetime(df['End Date'])
In [6]:
df.rename(columns={'Start Date':'Start Date Time', 'End Date':'End Date Time'}, inplace=True)
In [7]:
# fix the end time
df['End Date Time'] = df['Start Date Time'] + df['Duration'].apply(lambda x: np.timedelta64(x, 'm'))
In [8]:
df.head()
Out[8]:
In [9]:
df['Start Date'] = df['Start Date Time'].apply(lambda x: x.date())
df['End Date'] = df['End Date Time'].apply(lambda x: x.date())
df['Start Time'] = df['Start Date Time'].apply(lambda x: x.time())
df['End Time'] = df['End Date Time'].apply(lambda x: x.time())
In [10]:
df.head()
Out[10]:
In [11]:
df.shape
Out[11]:
In [12]:
df_loc = pd.read_csv("DATA/babs_open_data_year_1/201402_babs_open_data/201402_station_data.csv")
In [13]:
df_loc.head()
Out[13]:
In [14]:
df_merged_1 = pd.merge(df, df_loc, left_on='Start Station', right_on='name', how='inner', sort=False)
In [15]:
df_merged_1.shape
Out[15]:
In [16]:
df_merged_1.head()
Out[16]:
In [17]:
df_merged_1.rename(columns={'lat':'Start Latitute',
'long':'Start Longitude',
'dockcount': 'Start Station Dockcount',
'landmark': 'Start Station Landmark',
'installation': 'Start Station Installation'},
inplace=True)
In [18]:
df_merged_1.head()
Out[18]:
In [19]:
df_merged_1 = df_merged_1.drop('name', 1)
In [20]:
df_merged_1 = pd.merge(df_merged_1, df_loc, left_on='End Station', right_on='name', how='inner', sort=False)
In [21]:
df_merged_1.head()
Out[21]:
In [22]:
df_merged_1.rename(columns={'lat':'End Latitute',
'long':'End Longitude',
'dockcount':'End Station Dockcount',
'landmark':'End Station Landmark',
'installation':'End Station Installation'},
inplace=True)
In [23]:
df_merged_1 = df_merged_1.drop('name', 1)
In [24]:
df_merged_1.to_csv("DATA/babs_open_data_year_1/201402_babs_open_data/201402_merged_data.csv", index=False)
In [25]:
df_merged_1.head()
Out[25]:
In [26]:
df_merged_1[['Start Time', 'End Time', 'Duration']].head()
Out[26]: