In [1]:
import pickle 
import pandas as pd
import numpy as np
import requests
import json

In [2]:
df=pickle.load(open('data_days.p','rb'))[4]

In [3]:
df=df[df['hour']=='23']
df['minutes']=df['pickup_datetime'].apply(lambda x:int(x.split()[1].split(':')[1]))

In [4]:
df=df[(df['passenger_count']<3) & (df['passenger_count']>0)]

In [5]:
df.columns


Out[5]:
Index(['medallion', 'hack_license', 'pickup_datetime', 'dropoff_datetime',
       'passenger_count', 'trip_time_in_secs', 'trip_distance',
       'pickup_longitude', 'dropoff_longitude', 'pickup_latitude',
       'dropoff_latitude', 'fare_amount', 'surcharg', 'mta_tax', 'tip_amount',
       'tolls_amount', 'total_amount', 'new_total', 'day', 'hour', 'sector',
       'minutes'],
      dtype='object')

In [6]:
f=df[(df['minutes']>29) &(df['minutes']<40)]
print(f['day'].count())
x=df[(df['minutes']>29) &(df['minutes']<32)]


45

In [13]:
x=df[(df['minutes']>29) &(df['minutes']<35)]

In [213]:
x=f

In [14]:
x['sector'].value_counts()


Out[14]:
4     11
3     10
5      2
10     1
Name: sector, dtype: int64

In [15]:
# List sector wise

l=[]
for i in range(1,13):
    dim=[]
    for j in range(len(x)):
        if x.iloc[j]['sector']==i:
            #print (x.iloc[j])
            dim.append(x.iloc[j])
        
    l.append(dim)

In [16]:
def graph(sourcelat,sourcelong,destlat,destlong):
    request=''
    #'http://localhost:8989/route?point=41.8789%2C-87.6359&point=41.8916%2C-87.6045&vehicle=car'
    #http://localhost:8989/?point=jfk&point=manhattan&point=new%20jersey&locale=en-US&vehicle=car&weighting=fastest&elevation=false&layer=Omniscale
    request='http://localhost:8989/route?point='+str(sourcelat)+'%2C'+str(sourcelong)+'&point='+str(destlat)+'%2C'+str(destlong)
    r=requests.get(request)
    result=json.loads(r.text)
    dist=((result['paths'][0]['distance']/1000)/1.61)
    return dist

In [17]:
distance=0
new_dist=0
for i in l:
    
    for j in range(0,len(i),2):
        if(j<len(i)):
            distance+=i[j]['trip_distance']
            new_dist+=i[j]['trip_distance']
#         print(len(i),j)
        if(j+1<len(i)):
            old=i[j+1]['trip_distance']
            distance+=old
            new=graph(i[j]['dropoff_latitude'],i[j]['dropoff_longitude'],i[j+1]['dropoff_latitude'],i[j+1]['dropoff_longitude'])
            if (new<old):
                new_dist+=new        
            else:
                new_dist+=old

In [18]:
print((new_dist/distance)*100)


91.701004333

In [19]:
distance=0
new_dist=0
for i in l:
    notlist=[]
    for j in range(0,len(i)):
        dic={}
        if (j not in notlist):
            for k in range(0,len(i)):
                if(j!=k and (k not in notlist)):
                    val=graph(i[j]['dropoff_latitude'],i[j]['dropoff_longitude'],i[k]['dropoff_latitude'],i[k]['dropoff_longitude'])
                    dic[val]=k
            if(len(dic)!=0):
                new=min(dic.keys())+i[j]['trip_distance']
                key=dic[min(dic.keys())]
                old=i[j]['trip_distance']+i[key]['trip_distance']
            else:
                old=0
                new=9999
            if(new<old):
                new_dist+=new
                distance+=old
                notlist.extend([j,key])
            else:
                new_dist+=i[j]['trip_distance']
                distance+=i[j]['trip_distance']
                notlist.extend([j])

print((new_dist/distance)*100)


78.4071099837

In [ ]: