In [ ]:
import pandas as pd
from IPython.display import display
pd.options.mode.chained_assignment = None #this is to avoid some useless warnings
In [ ]:
station = pd.read_csv('station.csv', index_col=0)
weather = pd.read_csv('weather.csv', index_col=0)
trip = pd.read_csv('trip.csv',parse_dates=['starttime', 'stoptime'],
infer_datetime_format=True)
In [ ]:
#Adding new features to raw data (used later)
weather.index=pd.to_datetime(weather.index) #to datetime format
ind = pd.DatetimeIndex(trip.starttime)
trip['date'] = ind.date.astype('datetime64')
trip['hour'] = ind.hour
In [ ]:
print('There are {} stations.'.format(station.shape[0]))
print('Station database shape: {} , the 5 first lines are:'.format(station.shape))
display(station.iloc[:5, :])
print('\nWe have {} days of weather data.'.format(weather.shape[0]))
print('Weather database shape: {} , the 5 first lines are:'.format(weather.shape))
display(weather.iloc[:5, :])
print('\nThere are {} trips in total.'.format(trip.shape[0]))
print('Trip database shape: {} , the 5 first lines are:'.format(trip.shape))
display(trip.iloc[:5, :])