In [2]:
import pandas as pd
import numpy as np

In [3]:
d= pd.read_csv('rec.csv')

In [4]:
d.head()


Out[4]:
medallion hack_license pickup_datetime dropoff_datetime passenger_count trip_time_in_secs trip_distance pickup_longitude dropoff_longitude pickup_latitude dropoff_latitude fare_amount surcharg mta_tax tip_amount tolls_amount total_amount new_total
0 2013003229 2013003226 2013-03-26 00:00:00 2013-03-26 00:07:00 1 420 2.63 -73.977592 -73.954391 40.759293 40.784023 9.0 0.5 0.5 2.85 0.0 12.85 12.85
1 2013007090 2013007086 2013-03-26 00:00:00 2013-03-26 00:02:00 1 120 0.36 -73.957893 -73.960381 40.811256 40.814125 4.0 0.5 0.5 0.90 0.0 5.90 5.90
2 2013002078 2013002075 2013-03-26 00:00:00 2013-03-26 00:16:00 1 960 5.38 -73.992180 -73.938820 40.734428 40.791225 18.0 0.5 0.5 5.55 0.0 24.55 24.55
3 2013010107 2013010107 2013-03-26 00:00:00 2013-03-26 00:14:00 1 840 2.70 -73.982513 -73.975044 40.762150 40.735542 13.0 0.5 0.5 0.00 0.0 14.00 14.00
4 2013009601 2013007484 2013-03-26 00:00:00 2013-03-26 00:20:00 1 1200 6.74 -73.995155 -73.943954 40.749981 40.711658 22.0 0.5 0.5 6.75 0.0 29.75 29.75

In [5]:
d['hack_license'].count()


Out[5]:
2470654

In [6]:
d['day']=d['pickup_datetime'].apply(lambda x: x.split()[0].split('-')[2])

In [7]:
d.head()


Out[7]:
medallion hack_license pickup_datetime dropoff_datetime passenger_count trip_time_in_secs trip_distance pickup_longitude dropoff_longitude pickup_latitude dropoff_latitude fare_amount surcharg mta_tax tip_amount tolls_amount total_amount new_total day
0 2013003229 2013003226 2013-03-26 00:00:00 2013-03-26 00:07:00 1 420 2.63 -73.977592 -73.954391 40.759293 40.784023 9.0 0.5 0.5 2.85 0.0 12.85 12.85 26
1 2013007090 2013007086 2013-03-26 00:00:00 2013-03-26 00:02:00 1 120 0.36 -73.957893 -73.960381 40.811256 40.814125 4.0 0.5 0.5 0.90 0.0 5.90 5.90 26
2 2013002078 2013002075 2013-03-26 00:00:00 2013-03-26 00:16:00 1 960 5.38 -73.992180 -73.938820 40.734428 40.791225 18.0 0.5 0.5 5.55 0.0 24.55 24.55 26
3 2013010107 2013010107 2013-03-26 00:00:00 2013-03-26 00:14:00 1 840 2.70 -73.982513 -73.975044 40.762150 40.735542 13.0 0.5 0.5 0.00 0.0 14.00 14.00 26
4 2013009601 2013007484 2013-03-26 00:00:00 2013-03-26 00:20:00 1 1200 6.74 -73.995155 -73.943954 40.749981 40.711658 22.0 0.5 0.5 6.75 0.0 29.75 29.75 26

In [72]:
d['hour']=d['pickup_datetime'].apply(lambda x: x.split()[1].split(':')[0])

In [73]:
d.columns


Out[73]:
Index(['medallion', 'hack_license', 'pickup_datetime', 'dropoff_datetime',
       'passenger_count', 'trip_time_in_secs', 'trip_distance',
       'pickup_longitude', 'dropoff_longitude', 'pickup_latitude',
       'dropoff_latitude', 'fare_amount', 'surcharg', 'mta_tax', 'tip_amount',
       'tolls_amount', 'total_amount', 'new_total', 'day', 'hour'],
      dtype='object')

In [74]:
p=d[(d['pickup_latitude']>40.6725) & (d['pickup_latitude']<40.6925) &(d['pickup_longitude']<-73.9650) & (d['pickup_longitude']>-73.9850)]

In [75]:
p['hack_license'].count()


Out[75]:
9704

In [8]:
def sector(lat,lon):
    val=0
    if ((lat<40.8645) and (lat >40.8145) and (lon>-74.0286) and (lon<-73.9386)):
        val= 1
    elif ((lat<40.8145) and (lat >40.7645) and (lon>-74.0286) and (lon<-73.9386)):
        val=2
    elif ((lat<40.7645) and (lat >40.7145) and (lon>-74.0286) and (lon<-73.9386)):
        val=3
    elif ((lat<40.7145) and (lat >40.6645) and (lon>-74.0286) and (lon<-73.9386)):
        val=4
    elif ((lat<40.6645) and (lat >40.6145) and (lon>-74.0286) and (lon<-73.9386)):
        val=5
    elif ((lat<40.6145) and (lat >40.5645) and (lon>-74.0286) and (lon<-73.9386)):
        val=6
    elif ((lat<40.8645) and (lat >40.8145) and (lon>-73.9386) and (lon<-73.8586)):
        val=7
    elif ((lat<40.8145) and (lat >40.7645) and (lon>-73.9386) and (lon<-73.8586)):
        val=8
    elif ((lat<40.7645) and (lat >40.7145) and (lon>-73.9386) and (lon<-73.8586)):
        val=9
    elif ((lat<40.7145) and (lat >40.6645) and (lon>-73.9386) and (lon<-73.8586)):
        val=10
    elif ((lat<40.6645) and (lat >40.6145) and (lon>-73.9386) and (lon<-73.8586)):
        val=11
    elif ((lat<40.6145) and (lat >40.5645) and (lon>-73.9386) and (lon<-73.8586)):
        val=12
    return val

In [9]:
l=[]
for i in range(len(p)):
    l.append(sector(p.iloc[i]['dropoff_latitude'],p.iloc[i]['dropoff_longitude']))
p['sector']=l


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-a1de9993d373> in <module>()
      1 l=[]
----> 2 for i in range(len(p)):
      3     l.append(sector(p.iloc[i]['dropoff_latitude'],p.iloc[i]['dropoff_longitude']))
      4 p['sector']=l

NameError: name 'p' is not defined

In [80]:
p26=p[p['day']=='26']
p27=p[p['day']=='27']
p28=p[p['day']=='28']
p29=p[p['day']=='29']
p30=p[p['day']=='30']
p31=p[p['day']=='31']

In [81]:
data_p=[p26,p27,p28,p29,p30,p31]

In [82]:
import pickle
pickle.dump(data_p,open('data_days.p','wb'))

In [83]:
# Sector of 'Barclays Center'
print(sector(40.6825,-73.9750))


4

In [ ]: