In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
# import plotly.plotly as py
%matplotlib inline
from datetime import datetime, timedelta
In [2]:
df = pd.read_csv("DATA/babs_master/merged_master.csv", low_memory=False)
In [3]:
df_station = pd.read_csv("DATA/babs_master/station_master.csv")
In [4]:
df_station
Out[4]:
In [5]:
df.columns
Out[5]:
In [6]:
df.head()
Out[6]:
In [7]:
df = df[df['Duration'] < 1440]
df['minutes'] = df['Duration'] /60
plt.hist(df['minutes'], bins = 10, range = (df['minutes'].min(),df['minutes'].max()), color="orange")
plt.title("Duration of Bike Rental")
plt.xlabel("Bike Ride in Minutes")
plt.ylabel("Frequency")
plt.plot()
Out[7]:
In [8]:
df['Subscription Type'].value_counts()
Out[8]:
In [9]:
labels = ['Subscriber', 'Customer']
fig, ax = plt.subplots()
y = [df['Subscription Type'].value_counts()[0], df['Subscription Type'].value_counts()[1]]
N = len(y)
x = range(N)
ax.set_xticklabels(labels, rotation='vertical')
width = 1/1.5
plt.bar(x, y, width, color="green")
plt.show()
In [10]:
labels = df_station['name'].unique()
fig, ax = plt.subplots()
y = [df_station['dockcount'][i] for i in range(len(df_station))]
N = len(y)
x = range(N)
ax.set_xticklabels(labels, rotation='vertical')
width = 1/1.5
plt.bar(x, y, width, color="blue")
plt.show()
In [11]:
df['Start Station'].value_counts()
Out[11]:
In [12]:
labels = df['Start Station'].unique()
fig, ax = plt.subplots()
y = [df['Start Station'].value_counts()[i] for i in range(len(df['Start Station'].unique()))]
N = len(y)
x = range(N)
ax.set_xticklabels(labels, rotation='vertical')
width = 1/1.5
plt.bar(x, y, width, color="blue")
plt.show()
In [13]:
df['Start Station'].value_counts()
Out[13]:
In [14]:
labels = df['End Station'].unique()
fig, ax = plt.subplots()
y = [df['End Station'].value_counts()[i] for i in range(len(df['Start Station'].unique()))]
N = len(y)
x = range(N)
ax.set_xticklabels(labels, rotation='vertical')
width = 1/1.5
plt.bar(x, y, width, color="red")
plt.show()
In [15]:
month =[]
day_of_month =[]
hour_of_day =[]
day_of_week =[]
weekend = []
for i in range(len(df['Start Date Time'].values)):
date = datetime.strptime(df['Start Date Time'].values[i], "%m/%d/%Y %H:%M")
moy = date.month
dom = date.day
hod = date.hour
month.append(moy)
day_of_month.append(dom)
hour_of_day.append(hod)
day_of_week.append(date.isoweekday())
if date.isoweekday() in range(1, 6):
weekend.append(0)
else:
weekend.append(1)
df['month'] = month
df['day_of_month'] = day_of_month
df['hour_of_day'] = hour_of_day
df['day_of_week'] = day_of_week
df['weekend'] = weekend
In [15]:
labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
fig, ax = plt.subplots()
plt.hist(df['day_of_week'], bins = 7, range = (df['day_of_week'].min(),df['day_of_week'].max()))
ax.set_xticklabels(labels, rotation='vertical')
plt.title("day of the Week for Bike Rental")
plt.xlabel("Day of the Week")
plt.ylabel("Frequency")
plt.plot()
Out[15]:
In [16]:
plt.hist(df['hour_of_day'], bins = 24, range = (df['hour_of_day'].min(),df['hour_of_day'].max()))
plt.title("hour of day of Bike Rental")
plt.xlabel("Hour of the day of Bike Rental")
plt.ylabel("Frequency")
plt.plot()
Out[16]:
In [17]:
labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
fig, ax = plt.subplots()
plt.hist(df['month'], bins = 12, range = (df['month'].min(),df['month'].max()))
ax.set_xticklabels(labels, rotation='vertical')
plt.title("month of Bike Rental")
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.plot()
Out[17]:
In [104]:
labels = ['Weekday', 'Weekend']
fig, ax = plt.subplots()
plt.hist(df['weekend'],bins = 2, width = .4, range = (df['weekend'].min(),df['weekend'].max()) )
plt.title("Is Weekend of Bike Rental")
ax.set_xticklabels(labels, rotation='vertical')
plt.xlabel("Value")
plt.ylabel("Frequency")
Out[104]:
In [18]:
df['Start Latitute'].max(), df['Start Latitute'].min(), df['Start Longitude'].max(), df['Start Longitude'].min()
Out[18]:
In [19]:
fig = plt.figure(figsize=(20,10))
ax = fig.add_subplot(111)
ax.set_axis_bgcolor('black')
plt.scatter( x=df['Start Longitude'], y=df['Start Latitute'], color='white', s=.02,alpha=.6)
Out[19]:
In [21]:
df_w = pd.read_csv("DATA/babs_master/weather_master.csv")
In [44]:
df_w.columns
Out[44]:
In [25]:
df_w['month'] = df_w['Date'].apply(lambda x : x.split("/")[0])
In [40]:
df_grp = df_w.groupby('month').mean()
In [48]:
df_grp.columns
Out[48]:
In [63]:
mpl.rc('savefig', dpi=200)
params = {'figure.dpi' : 200,
'figure.figsize' : (12, 10),
'axes.axisbelow' : True,
'lines.antialiased' : True,
'axes.titlesize' : 'xx-large',
'axes.labelsize' : 'x-large',
'xtick.labelsize' : 'large',
'ytick.labelsize' : 'large'}
for (k, v) in params.iteritems():
plt.rcParams[k] = v
In [64]:
y = df_grp['Mean_Temperature_F'].values
N = len(y)
x = range(N)
labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
plt.bar(x, y, width=0.8, align='center', edgecolor='#cccccc')
plt.title("Mean Temperature")
plt.xlabel("\nMonth")
plt.ylabel("Temperature in F")
plt.gca().get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda y, p: format(int(y), ','))
)
plt.xlim(-1, N);
plt.xticks(range(len(labels))[::], labels[::], rotation=90);
In [49]:
y = df_grp['Mean_Humidity '].values
N = len(y)
x = range(N)
labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
plt.bar(x, y, width=0.8, align='center', edgecolor='#cccccc')
plt.title("Mean Humidity")
plt.xlabel("\nMonth")
plt.ylabel("Humidity in %")
plt.gca().get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda y, p: format(int(y), ','))
)
plt.xlim(-1, N);
plt.xticks(range(len(labels))[::], labels[::], rotation=90);
In [52]:
y = df_grp['Max_Visibility_Miles '].values
N = len(y)
x = range(N)
labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
plt.bar(x, y, width=0.8, align='center', edgecolor='#cccccc')
plt.title("Max Visibility")
plt.xlabel("\nMonth")
plt.ylabel("Max Visibility in miles")
plt.gca().get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda y, p: format(int(y), ','))
)
plt.xlim(-1, N);
plt.xticks(range(len(labels))[::], labels[::], rotation=90);
In [53]:
y = df_grp['Mean_Wind_Speed_MPH '].values
N = len(y)
x = range(N)
labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
plt.bar(x, y, width=0.8, align='center', edgecolor='#cccccc')
plt.title("Wind Speed")
plt.xlabel("\nMonth")
plt.ylabel("Wind Speed in miles/hour")
plt.gca().get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda y, p: format(int(y), ','))
)
plt.xlim(-1, N);
plt.xticks(range(len(labels))[::], labels[::], rotation=90);
In [54]:
y = df_grp['Cloud_Cover '].values
N = len(y)
x = range(N)
labels = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
plt.bar(x, y, width=0.8, align='center', edgecolor='#cccccc')
plt.title("Cloud Cover")
plt.xlabel("\nMonth")
plt.ylabel("Cloud Cover")
plt.gca().get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda y, p: format(int(y), ','))
)
plt.xlim(-1, N);
plt.xticks(range(len(labels))[::], labels[::], rotation=90);
In [ ]: