The recent Citi Bike data has garnered a lot of attention. Attached you will find a data set of Citibike riders. The challenge is threefold:
In [1]:
import matplotlib.pyplot as plt
from matplotlib import gridspec
import numpy as np
from datetime import datetime, timedelta
import citibike_1
In [2]:
cbc = citibike_1.CitiBikeChallenge()
In [3]:
f = cbc.load_file('citibike-files/2013-07.csv')
print 'Total entries read: ',f.shape[0]
In [4]:
gender = cbc.gender(f)
g = (np.sum(gender[0]), np.sum(gender[1]), np.sum(gender[2]))
print 'Total Male: ',g[0]
print 'Total Female: ',g[1]
print 'Unknown: ',g[2]
In [5]:
avg = cbc.avg_ride_time(f)
print 'Average Ride Time: ',avg[0]
In [6]:
pk = np.zeros(24)
hourly_dist = cbc.peak_hours(avg[1])
for i in xrange(24):
pk[i] = np.sum(np.clip(hourly_dist[i].nonzero()[0], 0, 1))
In [7]:
usertype = cbc.tourists(f)
utype = (np.sum(usertype[0]), np.sum(usertype[1]))
print 'Total New Yorkers: ',utype[0]
print 'Total Tourists: ',utype[1]
In [8]:
x_g = 0.22
y_g = 0.5
x_t = 0.22
y_t = 0.5
ap_width = 1
ap_const = 1
ap_ind = np.arange(24)
In [9]:
mens = np.zeros([24, f.shape[0]])
womens = np.zeros([24, f.shape[0]])
unknown = np.zeros([24, f.shape[0]])
for i in xrange(24):
mens[i] = np.logical_and(gender[0], hourly_dist[i])
womens[i] = np.logical_and(gender[1], hourly_dist[i])
unknown[i] = np.logical_and(gender[2], hourly_dist[i])
In [10]:
print 'mens: ',np.sum(mens[0])
print 'womens: ',np.sum(womens[0])
print 'unknown: ',np.sum(unknown[0])
In [11]:
m=np.zeros(24)
w=np.zeros(24)
u=np.zeros(24)
for i in xrange(24):
m[i] = np.sum(mens[i])
w[i] = np.sum(womens[i])
u[i] = np.sum(unknown[i])
z = m+w+u
In [64]:
fig = plt.figure()
fig.canvas.set_window_title("Citibike Challenge")
gs = gridspec.GridSpec(2, 2)
# For Hourly Usage
ax = fig.add_subplot(gs[0,:])
rect1 = ax.bar(np.arange(24), pk,
color='#3F5D7D',edgecolor=None, alpha=0.3)
rect2 = ax.bar(np.arange(24), w,
color='#3F5D7D',edgecolor=None, alpha=0.7, bottom=m)
rect3 = ax.bar(np.arange(24), u,
color='#3F5D7D',edgecolor=None, alpha=1, bottom=m+w)
ax.plot(np.arange(24)+0.4, pk, color='#ff0000')
ap1 = ax.bar(ap_ind[0], np.average(pk[0:4]), width=4,
color='#727272', edgecolor=None, alpha=0.5, align='edge')
ap2 = ax.bar(ap_ind[3] + ap_width, np.average(pk[4:8]), width=4,
color='#79c36a', edgecolor=None, alpha=0.5, align='edge')
ap3 = ax.bar(ap_ind[6] + ap_width*2, np.average(pk[8:12]), width=4,
color='#f1595f', edgecolor=None, alpha=0.5, align='edge')
ap4 = ax.bar(ap_ind[9] + ap_width*3, np.average(pk[12:16]), width=4,
color='#f9a65a', edgecolor=None, alpha=0.5, align='edge')
ap5 = ax.bar(ap_ind[12] + ap_width*4, np.average(pk[16:20]), width=4,
color='#599ad3', edgecolor=None, alpha=0.5, align='edge')
ap6 = ax.bar(ap_ind[15] + ap_width*5, np.average(pk[20:24]), width=4,
color='#9e66ab', edgecolor=None, alpha=0.5, align='edge')
#ax.axis('tight')
#ax.xaxis.grid(False)
def autolabel(aps):
for ap in aps:
h = ap.get_height()
ax.text(ap.get_x()+ap.get_width()/2., 1.05*h, '%d'%int(h),
ha='center', va='bottom', weight='bold')
#autolabel(rect1)
autolabel(ap1)
autolabel(ap2)
autolabel(ap3)
autolabel(ap4)
autolabel(ap5)
autolabel(ap6)
ax.legend( (rect1, rect2, rect3), ('Men', 'Women', 'Unknown') )
ax.set_xlabel('Hour of the Day')
ax.set_xticks(np.arange(pk.shape[0]))
ax.set_xlim(right = pk.shape[0])
ax.grid(color='grey', linestyle='--', linewidth=1, alpha=0.2)
ax.set_ylabel('Number of People')
ax.set_title('Peak Hour for July 2013')
Out[64]:
In [65]:
# For Gender
bx = fig.add_subplot(gs[1,0])
rect2 = bx.bar(np.arange(2)+0.3, (g[0],g[1]), 0.3, color='#3F5D7D',
edgecolor=None, alpha=0.7)
bx.set_xlabel('Gender')
bx.set_ylabel('Number of people')
bx.set_title('Rides by Men vs Women')
bx.grid(color='grey', linestyle='--', linewidth=1, alpha=0.2)
bx.set_xlim([0, 2])
bx.set_ylim(top = 1.02 * f.shape[0])
bx.axes.get_xaxis().set_visible(False)
bx.tick_params(labelsize=8)
bx.text(x_g, y_g, 'Male', horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=bx.transAxes)
bx.text(x_g, y_g-0.2, np.sum(g[0]), horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=bx.transAxes)
bx.text(x_g+0.5, y_g, 'Female', horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=bx.transAxes)
bx.text(x_g+0.5, y_g-0.2, np.sum(g[1]), horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=bx.transAxes)
Out[65]:
In [66]:
cx = fig.add_subplot(gs[1,1])
rect3 = cx.bar(np.arange(2)+0.3, utype, 0.3, color='#3F5D7D',
edgecolor=None, alpha=0.7)
cx.set_xlabel('Types of Customers')
cx.set_ylabel('Number of Customers')
cx.set_title('Tourists vs Residents')
cx.grid(color='grey', linestyle='--', linewidth=1, alpha=0.2)
cx.set_xlim([0, 2])
cx.set_ylim(top = 1.02 * f.shape[0])
cx.axes.get_xaxis().set_visible(False)
cx.tick_params(labelsize=8)
cx.text(x_t, y_t, 'New \nYorkers', horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=cx.transAxes)
cx.text(x_t, y_t-0.2, utype[0], horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=cx.transAxes)
cx.text(x_t+0.5, y_t, 'Tourists', horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=cx.transAxes)
cx.text(x_t+0.5, y_t-0.2, utype[1], horizontalalignment='center',
verticalalignment='center', color='#303030',
weight='ultralight', rotation='horizontal', transform=cx.transAxes)
Out[66]:
In [67]:
plt.show()
In [ ]:
weather = np.genfromtxt('citibike-files/weather.csv', delimiter=',', dtype=str)
In [ ]:
w_monthly = np.zeros([32, 930])
w_daily = np.zeros(930)
def _date_converter(d, _cond):
temp = datetime.strptime(str(np.char.strip(d, '<br')), '%Y:%m:%d-%H:%M:%S')
if (temp.day == _cond):
#print 'yes'
return 1
else:
return 0
v_date_converter = np.vectorize(_date_converter)
In [ ]:
# Monthly Temperature
for i in xrange(0,32):
w_monthly[i] = v_date_converter(weather[:,13], i)
In [ ]:
np.average(weather[np.argmax(x[0]):np.argmax(x[1].nonzero()),1].astype(np.float))
In [ ]:
day1 = weather[0:np.argmax(x.nonzero()),1].astype(np.float)
In [ ]:
np.average(day1)