In [2]:
import pandas as pd
import numpy as np
In [3]:
d= pd.read_csv('rec.csv')
In [4]:
d.head()
Out[4]:
In [5]:
d['hack_license'].count()
Out[5]:
In [6]:
d['day']=d['pickup_datetime'].apply(lambda x: x.split()[0].split('-')[2])
In [7]:
d.head()
Out[7]:
In [72]:
d['hour']=d['pickup_datetime'].apply(lambda x: x.split()[1].split(':')[0])
In [73]:
d.columns
Out[73]:
In [74]:
p=d[(d['pickup_latitude']>40.6725) & (d['pickup_latitude']<40.6925) &(d['pickup_longitude']<-73.9650) & (d['pickup_longitude']>-73.9850)]
In [75]:
p['hack_license'].count()
Out[75]:
In [8]:
def sector(lat,lon):
val=0
if ((lat<40.8645) and (lat >40.8145) and (lon>-74.0286) and (lon<-73.9386)):
val= 1
elif ((lat<40.8145) and (lat >40.7645) and (lon>-74.0286) and (lon<-73.9386)):
val=2
elif ((lat<40.7645) and (lat >40.7145) and (lon>-74.0286) and (lon<-73.9386)):
val=3
elif ((lat<40.7145) and (lat >40.6645) and (lon>-74.0286) and (lon<-73.9386)):
val=4
elif ((lat<40.6645) and (lat >40.6145) and (lon>-74.0286) and (lon<-73.9386)):
val=5
elif ((lat<40.6145) and (lat >40.5645) and (lon>-74.0286) and (lon<-73.9386)):
val=6
elif ((lat<40.8645) and (lat >40.8145) and (lon>-73.9386) and (lon<-73.8586)):
val=7
elif ((lat<40.8145) and (lat >40.7645) and (lon>-73.9386) and (lon<-73.8586)):
val=8
elif ((lat<40.7645) and (lat >40.7145) and (lon>-73.9386) and (lon<-73.8586)):
val=9
elif ((lat<40.7145) and (lat >40.6645) and (lon>-73.9386) and (lon<-73.8586)):
val=10
elif ((lat<40.6645) and (lat >40.6145) and (lon>-73.9386) and (lon<-73.8586)):
val=11
elif ((lat<40.6145) and (lat >40.5645) and (lon>-73.9386) and (lon<-73.8586)):
val=12
return val
In [9]:
l=[]
for i in range(len(p)):
l.append(sector(p.iloc[i]['dropoff_latitude'],p.iloc[i]['dropoff_longitude']))
p['sector']=l
In [80]:
p26=p[p['day']=='26']
p27=p[p['day']=='27']
p28=p[p['day']=='28']
p29=p[p['day']=='29']
p30=p[p['day']=='30']
p31=p[p['day']=='31']
In [81]:
data_p=[p26,p27,p28,p29,p30,p31]
In [82]:
import pickle
pickle.dump(data_p,open('data_days.p','wb'))
In [83]:
# Sector of 'Barclays Center'
print(sector(40.6825,-73.9750))
In [ ]: