In [2]:
import pandas as pd
import glob
import matplotlib
In [2]:
%cd
%cd Documents/TaxiTripData/uber-tlc-foil-response-master/uber-trip-data/
%ls
In [3]:
pd.read_csv(glob.glob('*15.csv')[0],nrows=5)
Out[3]:
In [21]:
iterate=pd.read_csv(glob.glob('*15.csv')[0],usecols=[1],iterator=True,chunksize=10000)
countlist=[]
for chunk in iterate:
countlist.append(pd.to_datetime(chunk['Pickup_date']).dt.date.value_counts())
In [27]:
rawdatafiles14=glob.glob('uber-raw-data-*14.csv')
rawdatafiles14
Out[27]:
In [29]:
#this takes way too long
for i in rawdatafiles14:
countlist.append(pd.to_datetime(pd.read_csv(i,usecols=[0])['Date/Time']).dt.date.value_counts())
In [74]:
%cd
%cd Documents/TaxiTripData/uber-tlc-foil-response-master/
%ls
In [57]:
cond=pd.concat(countlist)
cond=cond.groupby(cond.index).sum()
In [58]:
cond.index=pd.to_datetime(cond.index)
In [6]:
cond.index
Out[6]:
In [18]:
%cd
%cd Desktop/SRDataIntensive/DataStory/
cond.to_pickle('uber_date_counts.pkl')
In [3]:
cond=pd.read_pickle('uber_date_counts.pkl')
In [4]:
months=pd.DataFrame(cond)
In [5]:
months=pd.DataFrame(cond)
months['month']=months.index.month
months['year']=months.index.year
months=months.groupby(['year','month']).sum()
In [6]:
months.columns=['Pickups']
In [14]:
mmons=range(10,13)
d={'year' : [2014 for i in mmons],
'month' : mmons,
'Pickups' : [0 for i in mmons]
}
months=(months.append(pd.DataFrame(d).groupby(['year','month']).sum())).sort()
In [15]:
plot=months.plot(kind='bar',title='Uber is growing, but not that fast',color='gray',legend=False)
plot.set_xlabel('(year, month) -Note the 3 month gap-')
plot.set_ylabel('Pickups')
plot.get_figure().savefig('ubergrowth.pdf')
Uber is clearly growing pretty steadily, but it doesn't look like exponential growth.
In [26]:
dow.name='Pickups'
In [29]:
In [35]:
dow=cond.copy()
dow.index=dow.index.weekday
dow=dow.groupby(dow.index).sum()
dow.index=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
plot=dow.plot(kind='bar',title='Uber is for parties',color='gray',legend=False)
plot.set_xlabel('Days of the week')
plot.set_ylabel('Pickups')
plot.get_figure().savefig('uberweekday.pdf')
Surprisingly (to me) the most popular time to uber is not during rush hour, it is on weekend nights. It could be because of the inconsistentcies of the Subway on weekends, especially at night,