CitiBike Challenge


The recent Citi Bike data has garnered a lot of attention. Attached you will find a data set of Citibike riders. The challenge is threefold:

  1. Calculate the avergae trip time.
  2. Visualize the male and the female ridership of the citi bikes over the period of a month.
  3. Visualize the ratio of their monthly usage over a period of 24 hours.

In [1]:
import matplotlib.pyplot as plt
from matplotlib import gridspec
import numpy as np
from datetime import datetime, timedelta
import citibike_1

In [2]:
cbc = citibike_1.CitiBikeChallenge()

In [3]:
f = cbc.load_file('citibike-files/2013-07.csv')
print 'Total entries read: ',f.shape[0]


Loading data ... 
Total entries read:  843416

In [4]:
gender = cbc.gender(f)
g = (np.sum(gender[0]), np.sum(gender[1]), np.sum(gender[2]))
print 'Total Male: ',g[0]
print 'Total Female: ',g[1]
print 'Unknown: ',g[2]


Detecting gender Distribution...
Total Male:  511479
Total Female:  157006
Unknown:  174931

In [5]:
avg = cbc.avg_ride_time(f)
print 'Average Ride Time: ',avg[0]


Calculating average ride time ...
Average Ride Time:  0:16:38.100707

In [6]:
pk = np.zeros(24)
hourly_dist = cbc.peak_hours(avg[1])
for i in xrange(24):
    pk[i] = np.sum(np.clip(hourly_dist[i].nonzero()[0], 0, 1))


Calculating usage per hours ...

In [7]:
usertype = cbc.tourists(f)
utype = (np.sum(usertype[0]), np.sum(usertype[1]))
print 'Total New Yorkers: ',utype[0]
print 'Total Tourists: ',utype[1]


Detecting tourists ...
Total New Yorkers:  668501
Total Tourists:  174915

In [8]:
x_g = 0.22
y_g = 0.5
x_t = 0.22
y_t = 0.5
ap_width = 1
ap_const = 1
ap_ind = np.arange(24)

In [9]:
mens = np.zeros([24, f.shape[0]])
womens = np.zeros([24, f.shape[0]])
unknown = np.zeros([24, f.shape[0]])
for i in xrange(24):
    mens[i] = np.logical_and(gender[0], hourly_dist[i])
    womens[i] = np.logical_and(gender[1], hourly_dist[i])
    unknown[i] = np.logical_and(gender[2], hourly_dist[i])

In [10]:
print 'mens: ',np.sum(mens[0])
print 'womens: ',np.sum(womens[0])
print 'unknown: ',np.sum(unknown[0])


mens:  6197.0
womens:  1483.0
unknown:  2951.0

In [11]:
m=np.zeros(24)
w=np.zeros(24)
u=np.zeros(24)

for i in xrange(24):
    m[i] = np.sum(mens[i])
    w[i] = np.sum(womens[i])
    u[i] = np.sum(unknown[i])
z = m+w+u

In [64]:
fig = plt.figure()
fig.canvas.set_window_title("Citibike Challenge")
gs = gridspec.GridSpec(2, 2)

# For Hourly Usage
ax = fig.add_subplot(gs[0,:])

rect1 = ax.bar(np.arange(24), pk, 
    color='#3F5D7D',edgecolor=None, alpha=0.3)

rect2 = ax.bar(np.arange(24), w, 
    color='#3F5D7D',edgecolor=None, alpha=0.7, bottom=m)

rect3 = ax.bar(np.arange(24), u, 
    color='#3F5D7D',edgecolor=None, alpha=1, bottom=m+w)


ax.plot(np.arange(24)+0.4, pk, color='#ff0000')
    
ap1 = ax.bar(ap_ind[0], np.average(pk[0:4]), width=4,
             color='#727272', edgecolor=None, alpha=0.5, align='edge')

ap2 = ax.bar(ap_ind[3] + ap_width, np.average(pk[4:8]), width=4,
             color='#79c36a', edgecolor=None, alpha=0.5, align='edge')

ap3 = ax.bar(ap_ind[6] + ap_width*2, np.average(pk[8:12]), width=4,
             color='#f1595f', edgecolor=None, alpha=0.5, align='edge')

ap4 = ax.bar(ap_ind[9] + ap_width*3, np.average(pk[12:16]), width=4,
             color='#f9a65a', edgecolor=None, alpha=0.5, align='edge')

ap5 = ax.bar(ap_ind[12] + ap_width*4, np.average(pk[16:20]), width=4,
             color='#599ad3', edgecolor=None, alpha=0.5, align='edge')

ap6 = ax.bar(ap_ind[15] + ap_width*5, np.average(pk[20:24]), width=4,
             color='#9e66ab', edgecolor=None, alpha=0.5, align='edge')


#ax.axis('tight')
#ax.xaxis.grid(False)

def autolabel(aps):                                                             
        for ap in aps:                                                              
            h = ap.get_height()                                                     
            ax.text(ap.get_x()+ap.get_width()/2., 1.05*h, '%d'%int(h),              
                    ha='center', va='bottom', weight='bold')
#autolabel(rect1)

autolabel(ap1)
autolabel(ap2)
autolabel(ap3)
autolabel(ap4)
autolabel(ap5)
autolabel(ap6)

ax.legend( (rect1, rect2, rect3), ('Men', 'Women', 'Unknown') )

ax.set_xlabel('Hour of the Day')
ax.set_xticks(np.arange(pk.shape[0]))
ax.set_xlim(right = pk.shape[0])
ax.grid(color='grey', linestyle='--', linewidth=1, alpha=0.2)
ax.set_ylabel('Number of People')
ax.set_title('Peak Hour for July 2013')


Out[64]:
<matplotlib.text.Text at 0x7f9babeff910>

In [65]:
# For Gender
bx = fig.add_subplot(gs[1,0])                                                       
rect2 = bx.bar(np.arange(2)+0.3, (g[0],g[1]), 0.3, color='#3F5D7D',                       
               edgecolor=None, alpha=0.7)                                       
bx.set_xlabel('Gender')                                                         
bx.set_ylabel('Number of people')                                               
bx.set_title('Rides by Men vs Women')                                           
bx.grid(color='grey', linestyle='--', linewidth=1, alpha=0.2)                   
bx.set_xlim([0, 2])                                                             
bx.set_ylim(top = 1.02 * f.shape[0])                                            
bx.axes.get_xaxis().set_visible(False)                                          
bx.tick_params(labelsize=8)                                                     
bx.text(x_g, y_g, 'Male', horizontalalignment='center',                         
        verticalalignment='center', color='#303030',                            
        weight='ultralight', rotation='horizontal', transform=bx.transAxes)     
bx.text(x_g, y_g-0.2, np.sum(g[0]), horizontalalignment='center',               
        verticalalignment='center', color='#303030',                            
        weight='ultralight', rotation='horizontal', transform=bx.transAxes)     
                                                                               
bx.text(x_g+0.5, y_g, 'Female', horizontalalignment='center',                   
        verticalalignment='center', color='#303030',                            
        weight='ultralight', rotation='horizontal', transform=bx.transAxes)     
bx.text(x_g+0.5, y_g-0.2, np.sum(g[1]), horizontalalignment='center',           
        verticalalignment='center', color='#303030',                            
        weight='ultralight', rotation='horizontal', transform=bx.transAxes)


Out[65]:
<matplotlib.text.Text at 0x7f9babc44510>

In [66]:
cx = fig.add_subplot(gs[1,1])                                                                                                                                            
rect3 = cx.bar(np.arange(2)+0.3, utype, 0.3, color='#3F5D7D',                                                                                                        
               edgecolor=None, alpha=0.7)                                                                                                                            
cx.set_xlabel('Types of Customers')                                                                                                                                  
cx.set_ylabel('Number of Customers')                                                                                                                                 
cx.set_title('Tourists vs Residents')                                                                                                                                
cx.grid(color='grey', linestyle='--', linewidth=1, alpha=0.2)                                                                                                        
cx.set_xlim([0, 2])                                                                                                                                                  
cx.set_ylim(top = 1.02 * f.shape[0])                                                                                                                                 
cx.axes.get_xaxis().set_visible(False)                                                                                                                               
cx.tick_params(labelsize=8)                                                                                                                                          
cx.text(x_t, y_t, 'New \nYorkers', horizontalalignment='center',                                                                                                     
        verticalalignment='center', color='#303030',                                                                                                                 
        weight='ultralight', rotation='horizontal', transform=cx.transAxes)                                                                                          
cx.text(x_t, y_t-0.2, utype[0], horizontalalignment='center',                                                                                                        
        verticalalignment='center', color='#303030',                                                                                                                 
        weight='ultralight', rotation='horizontal', transform=cx.transAxes)                                                                                          
                                                                                                                                                                     
cx.text(x_t+0.5, y_t, 'Tourists', horizontalalignment='center',                                                                                                      
        verticalalignment='center', color='#303030',                                                                                                                 
        weight='ultralight', rotation='horizontal', transform=cx.transAxes)                                                                                          
cx.text(x_t+0.5, y_t-0.2, utype[1], horizontalalignment='center',                                                                                                    
        verticalalignment='center', color='#303030',                                                                                                                 
        weight='ultralight', rotation='horizontal', transform=cx.transAxes)


Out[66]:
<matplotlib.text.Text at 0x7f9babd0ea10>

In [67]:
plt.show()

In [ ]:
weather = np.genfromtxt('citibike-files/weather.csv', delimiter=',', dtype=str)

In [ ]:
w_monthly = np.zeros([32, 930])
w_daily = np.zeros(930)
def _date_converter(d, _cond):
    temp = datetime.strptime(str(np.char.strip(d, '<br')), '%Y:%m:%d-%H:%M:%S')
    if (temp.day == _cond):
        #print 'yes'
        return 1
    else: 
        return 0

v_date_converter = np.vectorize(_date_converter)

In [ ]:
# Monthly Temperature
for i in xrange(0,32):
    w_monthly[i] = v_date_converter(weather[:,13], i)

In [ ]:
np.average(weather[np.argmax(x[0]):np.argmax(x[1].nonzero()),1].astype(np.float))

In [ ]:
day1 = weather[0:np.argmax(x.nonzero()),1].astype(np.float)

In [ ]:
np.average(day1)