In [18]:
import pandas as pd
import numpy as np
import requests
import psycopg2
import json
import simplejson
import urllib
import config
import ast
from operator import itemgetter
from sklearn.cluster import KMeans
from sqlalchemy import create_engine
In [469]:
!pip install --upgrade pip
!pip install sqlalchemy
!pip install psycopg2
!pip install simplejson
!pip install config
In [33]:
conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"
# conn_str = "dbname='travel_with_friends' user='Zoesh' host='localhost'"
In [29]:
import math
import random
def distL2((x1,y1), (x2,y2)):
"""Compute the L2-norm (Euclidean) distance between two points.
The distance is rounded to the closest integer, for compatibility
with the TSPLIB convention.
The two points are located on coordinates (x1,y1) and (x2,y2),
sent as parameters"""
xdiff = x2 - x1
ydiff = y2 - y1
return math.sqrt(xdiff*xdiff + ydiff*ydiff) + .5
def distL1((x1,y1), (x2,y2)):
"""Compute the L1-norm (Manhattan) distance between two points.
The distance is rounded to the closest integer, for compatibility
with the TSPLIB convention.
The two points are located on coordinates (x1,y1) and (x2,y2),
sent as parameters"""
return abs(x2-x1) + abs(y2-y1)+.5
def mk_matrix(coord, dist):
"""Compute a distance matrix for a set of points.
Uses function 'dist' to calculate distance between
any two points. Parameters:
-coord -- list of tuples with coordinates of all points, [(x1,y1),...,(xn,yn)]
-dist -- distance function
"""
n = len(coord)
D = {} # dictionary to hold n times n matrix
for i in range(n-1):
for j in range(i+1,n):
[x1,y1] = coord[i]
[x2,y2] = coord[j]
D[i,j] = dist((x1,y1), (x2,y2))
D[j,i] = D[i,j]
return n,D
def read_tsplib(filename):
"basic function for reading a TSP problem on the TSPLIB format"
"NOTE: only works for 2D euclidean or manhattan distances"
f = open(filename, 'r');
line = f.readline()
while line.find("EDGE_WEIGHT_TYPE") == -1:
line = f.readline()
if line.find("EUC_2D") != -1:
dist = distL2
elif line.find("MAN_2D") != -1:
dist = distL1
else:
print "cannot deal with non-euclidean or non-manhattan distances"
raise Exception
while line.find("NODE_COORD_SECTION") == -1:
line = f.readline()
xy_positions = []
while 1:
line = f.readline()
if line.find("EOF") != -1: break
(i,x,y) = line.split()
x = float(x)
y = float(y)
xy_positions.append((x,y))
n,D = mk_matrix(xy_positions, dist)
return n, xy_positions, D
def mk_closest(D, n):
"""Compute a sorted list of the distances for each of the nodes.
For each node, the entry is in the form [(d1,i1), (d2,i2), ...]
where each tuple is a pair (distance,node).
"""
C = []
for i in range(n):
dlist = [(D[i,j], j) for j in range(n) if j != i]
dlist.sort()
C.append(dlist)
return C
def length(tour, D):
"""Calculate the length of a tour according to distance matrix 'D'."""
z = D[tour[-1], tour[0]] # edge from last to first city of the tour
for i in range(1,len(tour)):
z += D[tour[i], tour[i-1]] # add length of edge from city i-1 to i
return z
def randtour(n):
"""Construct a random tour of size 'n'."""
sol = range(n) # set solution equal to [0,1,...,n-1]
random.shuffle(sol) # place it in a random order
return sol
def nearest(last, unvisited, D):
"""Return the index of the node which is closest to 'last'."""
near = unvisited[0]
min_dist = D[last, near]
for i in unvisited[1:]:
if D[last,i] < min_dist:
near = i
min_dist = D[last, near]
return near
def nearest_neighbor(n, i, D):
"""Return tour starting from city 'i', using the Nearest Neighbor.
Uses the Nearest Neighbor heuristic to construct a solution:
- start visiting city i
- while there are unvisited cities, follow to the closest one
- return to city i
"""
unvisited = range(n)
unvisited.remove(i)
last = i
tour = [i]
while unvisited != []:
next = nearest(last, unvisited, D)
tour.append(next)
unvisited.remove(next)
last = next
return tour
def exchange_cost(tour, i, j, D):
"""Calculate the cost of exchanging two arcs in a tour.
Determine the variation in the tour length if
arcs (i,i+1) and (j,j+1) are removed,
and replaced by (i,j) and (i+1,j+1)
(note the exception for the last arc).
Parameters:
-t -- a tour
-i -- position of the first arc
-j>i -- position of the second arc
"""
n = len(tour)
a,b = tour[i],tour[(i+1)%n]
c,d = tour[j],tour[(j+1)%n]
return (D[a,c] + D[b,d]) - (D[a,b] + D[c,d])
def exchange(tour, tinv, i, j):
"""Exchange arcs (i,i+1) and (j,j+1) with (i,j) and (i+1,j+1).
For the given tour 't', remove the arcs (i,i+1) and (j,j+1) and
insert (i,j) and (i+1,j+1).
This is done by inverting the sublist of cities between i and j.
"""
n = len(tour)
if i>j:
i,j = j,i
assert i>=0 and i<j-1 and j<n
path = tour[i+1:j+1]
path.reverse()
tour[i+1:j+1] = path
for k in range(i+1,j+1):
tinv[tour[k]] = k
def improve(tour, z, D, C):
"""Try to improve tour 't' by exchanging arcs; return improved tour length.
If possible, make a series of local improvements on the solution 'tour',
using a breadth first strategy, until reaching a local optimum.
"""
n = len(tour)
tinv = [0 for i in tour]
for k in range(n):
tinv[tour[k]] = k # position of each city in 't'
for i in range(n):
a,b = tour[i],tour[(i+1)%n]
dist_ab = D[a,b]
improved = False
for dist_ac,c in C[a]:
if dist_ac >= dist_ab:
break
j = tinv[c]
d = tour[(j+1)%n]
dist_cd = D[c,d]
dist_bd = D[b,d]
delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
if delta < 0: # exchange decreases length
exchange(tour, tinv, i, j);
z += delta
improved = True
break
if improved:
continue
for dist_bd,d in C[b]:
if dist_bd >= dist_ab:
break
j = tinv[d]-1
if j==-1:
j=n-1
c = tour[j]
dist_cd = D[c,d]
dist_ac = D[a,c]
delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
if delta < 0: # exchange decreases length
exchange(tour, tinv, i, j);
z += delta
break
return z
def localsearch(tour, z, D, C=None):
"""Obtain a local optimum starting from solution t; return solution length.
Parameters:
tour -- initial tour
z -- length of the initial tour
D -- distance matrix
"""
n = len(tour)
if C == None:
C = mk_closest(D, n) # create a sorted list of distances to each node
while 1:
newz = improve(tour, z, D, C)
if newz < z:
z = newz
else:
break
return z
def multistart_localsearch(k, n, D, report=None):
"""Do k iterations of local search, starting from random solutions.
Parameters:
-k -- number of iterations
-D -- distance matrix
-report -- if not None, call it to print verbose output
Returns best solution and its cost.
"""
C = mk_closest(D, n) # create a sorted list of distances to each node
bestt=None
bestz=None
for i in range(0,k):
tour = randtour(n)
z = length(tour, D)
z = localsearch(tour, z, D, C)
if z < bestz or bestz == None:
bestz = z
bestt = list(tour)
if report:
report(z, tour)
return bestt, bestz
In [471]:
# db_name = "travel_with_friends"
# TABLES ={}
# TABLES['full_trip_table'] = (
# "CREATE TABLE `full_trip_table` ("
# " `user_id` int(11) NOT NULL AUTO_INCREMENT,"
# " `full_trip_id` date NOT NULL,"
# " `trip_location_ids` varchar(14) NOT NULL,"
# " `default` varchar(16) NOT NULL,"
# " `county` enum('M','F') NOT NULL,"
# " `state` date NOT NULL,"
# " `details` ,"
# " `n_days`,"
# " PRIMARY KEY (`full_trip_id`)"
# ") ENGINE=InnoDB")
In [472]:
# def create_tables():
# """ create tables in the PostgreSQL database"""
# commands = (
# """
# CREATE TABLE full_trip_table (
# index INTEGER PRIMARY KEY,
# user_id VARCHAR(225) NOT NULL,
# full_trip_id VARCHAR(225) NOT NULL,
# trip_location_ids VARCHAR(225),
# default BOOLEAN NOT NULL,
# county VARCHAR(225) NOT NULL,
# state VARCHAR(225) NOT NULL,
# details VARCHAR(MAX),
# n_days VARCHAR(225) NOT NULL
# )
# """,
# """ CREATE TABLE day_trip_table (
# trip_locations_id
# full_day
# default
# county
# state
# details
# )
# """,
# """
# CREATE TABLE poi_detail_table (
# part_id INTEGER PRIMARY KEY,
# file_extension VARCHAR(5) NOT NULL,
# drawing_data BYTEA NOT NULL,
# FOREIGN KEY (part_id)
# REFERENCES parts (part_id)
# ON UPDATE CASCADE ON DELETE CASCADE
# )
# """,
# """
# CREATE TABLE google_travel_time_table (
# index INTEGER PRIMARY KEY,
# id_ VARCHAR NOT NULL,
# orig_name VARCHAR,
# orig_idx VARCHAR,
# dest_name VARCHAR,
# dest_idx VARCHAR,
# orig_coord0 INTEGER,
# orig_coord1 INTEGER,
# dest_coord0 INTEGER,
# dest_coord1 INTEGER,
# orig_coords VARCHAR,
# dest_coords VARCHAR,
# google_driving_url VARCHAR,
# google_walking_url VARCHAR,
# driving_result VARCHAR,
# walking_result VARCHAR,
# google_driving_time INTEGER,
# google_walking_time INTEGER
# )
# """)
# conn = None
# try:
# # read the connection parameters
# params = config()
# # connect to the PostgreSQL server
# conn = psycopg2.connect(**params)
# cur = conn.cursor()
# # create table one by one
# for command in commands:
# cur.execute(command)
# # close communication with the PostgreSQL database server
# cur.close()
# # commit the changes
# conn.commit()
# except (Exception, psycopg2.DatabaseError) as error:
# print(error)
# finally:
# if conn is not None:
# conn.close()
In [473]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])
# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])
# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
# 'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
# 'google_walking_url','driving_result','walking_result','google_driving_time',\
# 'google_walking_time'])
In [87]:
# read poi details csv file
poi_detail = pd.read_csv("./step9_poi.csv", index_col=0)
poi_detail['address'] = None
poi_detail['rating']=poi_detail['rating'].fillna(0)
#read US city state and county csv file
# df_counties = pd.read_csv('./us_cities_states_counties.csv',sep='|')
#find counties without duplicate
# df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()
In [34]:
def init_db_tables():
full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])
day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details','event_type','event_ids'])
google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
'google_walking_url','driving_result','walking_result','google_driving_time',\
'google_walking_time'])
day_trip_locations_table.loc[0] = ['CALIFORNIA-SAN-DIEGO-1-3-0', True, True, 'SAN DIEGO', 'California',
["{'address': '15500 San Pasqual Valley Rd, Escondido, CA 92027, USA', 'id': 2259, 'day': 0, 'name': u'San Diego Zoo Safari Park'}", "{'address': 'Safari Walk, Escondido, CA 92027, USA', 'id': 2260, 'day': 0, 'name': u'Meerkat'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3486, 'day': 0, 'name': u'Stone'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3487, 'day': 0, 'name': u'Stone Brewery'}", "{'address': 'Mount Woodson Trail, Poway, CA 92064, USA', 'id': 4951, 'day': 0, 'name': u'Lake Poway'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4953, 'day': 0, 'name': u'Potato Chip Rock'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4952, 'day': 0, 'name': u'Mt. Woodson'}"],
'big','[2259, 2260,3486,3487,4951,4953,4952]']
google_travel_time_table.loc[0] = ['439300002871', u'Moonlight Beach', 4393.0,
u'Carlsbad Flower Fields', 2871.0, -117.29692141333341,
33.047769600024424, -117.3177652511278, 33.124079753475236,
'33.0477696,-117.296921413', '33.1240797535,-117.317765251',
'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=driving&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=walking&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
"{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '14 mins', 'value': 822}, 'distance': {'text': '10.6 km', 'value': 10637}, 'status': 'OK'}]}], 'origin_addresses': ['233 C St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
"{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '2 hours 4 mins', 'value': 7457}, 'distance': {'text': '10.0 km', 'value': 10028}, 'status': 'OK'}]}], 'origin_addresses': ['498 B St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
13.0, 124.0]
full_trip_table.loc[0] = ['gordon_lee01', 'CALIFORNIA-SAN-DIEGO-1-3',
"['CALIFORNIA-SAN-DIEGO-1-3-0', 'CALIFORNIA-SAN-DIEGO-1-3-1', 'CALIFORNIA-SAN-DIEGO-1-3-2']",
True, 'SAN DIEGO', 'California',
'["{\'address\': \'15500 San Pasqual Valley Rd, Escondido, CA 92027, USA\', \'id\': 2259, \'day\': 0, \'name\': u\'San Diego Zoo Safari Park\'}", "{\'address\': \'Safari Walk, Escondido, CA 92027, USA\', \'id\': 2260, \'day\': 0, \'name\': u\'Meerkat\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3486, \'day\': 0, \'name\': u\'Stone\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3487, \'day\': 0, \'name\': u\'Stone Brewery\'}", "{\'address\': \'Mount Woodson Trail, Poway, CA 92064, USA\', \'id\': 4951, \'day\': 0, \'name\': u\'Lake Poway\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4953, \'day\': 0, \'name\': u\'Potato Chip Rock\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4952, \'day\': 0, \'name\': u\'Mt. Woodson\'}", "{\'address\': \'1 Legoland Dr, Carlsbad, CA 92008, USA\', \'id\': 2870, \'day\': 1, \'name\': u\'Legoland\'}", "{\'address\': \'5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA\', \'id\': 2871, \'day\': 1, \'name\': u\'Carlsbad Flower Fields\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2089, \'day\': 1, \'name\': u\'Oceanside Pier\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2090, \'day\': 1, \'name\': u\'Pier\'}", "{\'address\': \'1016-1024 Neptune Ave, Encinitas, CA 92024, USA\', \'id\': 2872, \'day\': 1, \'name\': u\'Encinitas\'}", "{\'address\': \'625 Pan American Rd E, San Diego, CA 92101, USA\', \'id\': 147, \'day\': 2, \'name\': u\'Balboa Park\'}", "{\'address\': \'1849-1863 Zoo Pl, San Diego, CA 92101, USA\', \'id\': 152, \'day\': 2, \'name\': u\'San Diego Zoo\'}", "{\'address\': \'701-817 Coast Blvd, La Jolla, CA 92037, USA\', \'id\': 148, \'day\': 2, \'name\': u\'La Jolla\'}", "{\'address\': \'10051-10057 Pebble Beach Dr, Santee, CA 92071, USA\', \'id\': 4630, \'day\': 2, \'name\': u\'Santee Lakes\'}", "{\'address\': \'Lake Murray Bike Path, La Mesa, CA 91942, USA\', \'id\': 4545, \'day\': 2, \'name\': u\'Lake Murray\'}", "{\'address\': \'4905 Mt Helix Dr, La Mesa, CA 91941, USA\', \'id\': 4544, \'day\': 2, \'name\': u\'Mt. Helix\'}", "{\'address\': \'1720 Melrose Ave, Chula Vista, CA 91911, USA\', \'id\': 1325, \'day\': 2, \'name\': u\'Thick-billed Kingbird\'}", "{\'address\': \'711 Basswood Ave, Imperial Beach, CA 91932, USA\', \'id\': 1326, \'day\': 2, \'name\': u\'Lesser Sand-Plover\'}"]',
3.0]
engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')
# full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
# full_trip_table.to_sql('full_trip_table', engine,if_exists='append')
full_trip_table.to_sql('full_trip_table',engine, if_exists = "replace")
day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "replace")
google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "replace")
poi_detail.to_sql('poi_detail_table',engine, if_exists = "replace")
df_counties = pd.read_csv('/Users/zoesh/Desktop/travel_with_friends/travel_with_friends/us_cities_states_counties.csv',sep='|')
df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()
df_counties_u.columns = ["city","state_abb","state","county"]
df_counties_u.to_sql('county_table',engine, if_exists = "replace")
init_db_tables()
In [42]:
def cold_start_places(df, county, state, city, number_days, first_day_full = True, last_day_full = True):
if len(county.values) != 0:
county = county.values[0]
temp_df = df[(df['county'] == county) & (df['state'] == state)]
else:
temp_df = df[(df['city'] == city) & (df['state'] == state)]
return county, temp_df
def find_county(state, city):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))
county = cur.fetchone()
conn.close()
if county:
return county[0]
else:
return None
def db_start_location(county, state, city):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
if county:
cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where county = '%s' and state = '%s'; "%(county.upper(), state))
else:
# print "else"
cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where city = '%s' and state = '%s'; "%(city, state))
a = cur.fetchall()
conn.close()
return np.array(a)
In [183]:
a1= db_start_location('San Francisco',"California","San Francisco")
poi_coords = a1[:,1:3]
n_days = 1
current_events =[]
big_ix, med_ix, small_ix =[],[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
i = 0
for ix, label in enumerate(kmeans.labels_):
# print ix, label
if label == i:
time = a1[ix,3]
event_ix = a1[ix,0]
current_events.append(event_ix)
if time > 180 :
big_ix.append(ix)
elif time >= 120 :
med_ix.append(ix)
else:
small_ix.append(ix)
print big_ix, med_ix, small_ix
# big_ = a1[[big_ix],4:]
print med_ix, ('wat')
print a1[ix,0], 'wat', a1[med_ix]
print med_ix
big_ = a1[big_ix][:,[0,4,5]]
med_ = a1[med_ix][:,[0,4,5]]
small_ = a1[small_ix][:,[0,4,5]]
In [17]:
from operator import itemgetter
med_s= sorted(med_, key=itemgetter(1,2))
a = [[1,3,3],[2,2,5],[3,4,4],[4,2,2]]
# [sorted(a, key=itemgetter(1,2)) for i in range(100)]
b = sorted(a, key=itemgetter(1,2))
a.sort(key=lambda k: (k[1], -k[2]), reverse=False)
# print med_s
print a
In [484]:
def default_cold_start_places(df,df_counties_u, day_trip_locations,full_trip_table,df_poi_travel_info,number_days = [1,2,3,4,5]):
df_c = df_counties_u.groupby(['State full','County']).count().reset_index()
for state, county,_,_ in df_c.values[105:150]:
temp_df = df[(df['county'] == county) & (df['state'] == state)]
if temp_df.shape[0]!=0:
if sum(temp_df.adjusted_normal_time_spent) < 360:
number_days = [1]
elif sum(temp_df.adjusted_normal_time_spent) < 720:
number_days = [1,2]
big_events = temp_df[temp_df.adjusted_normal_time_spent > 180]
med_events = temp_df[(temp_df.adjusted_normal_time_spent>= 120)&(temp_df.adjusted_normal_time_spent<=180)]
small_events = temp_df[temp_df.adjusted_normal_time_spent < 120]
for i in number_days:
n_days = i
full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info = \
default_search_cluster_events(df, df_counties_u, county, state, big_events,med_events, \
small_events, temp_df, n_days,day_trip_locations, full_trip_table,\
df_poi_travel_info)
print county, state
print full_trip_table.shape, len(day_trip_locations), new_trip_df1.shape, df_poi_travel_info.shape
return None
In [426]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])
# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])
# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
# 'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
# 'google_walking_url','driving_result','walking_result','google_driving_time',\
# 'google_walking_time'])
# google_travel_time_table.loc[0] = ['000000000001', "home", '0000', 'space', '0001', 999 ,999, 999.1,999.1, "999,999","999.1,999.1","http://google.com","http://google.com", "", "", 0, 0 ]
In [427]:
# google_travel_time_table.index=google_travel_time_table.index.astype(int)
In [428]:
# engine = create_engine('postgresql://Gon@localhost:5432/travel_with_friends')
# # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
# # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')
# full_trip_table.to_sql('full_trip_table',engine, if_exists = "append")
# day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "append")
# google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "append")
# # df.to_sql('poi_detail_table',engine, if_exists = "append")
In [ ]:
In [ ]:
def trip_df_cloest_distance(trip_df, event_type):
points = trip_df[['coord0','coord1']].values.tolist()
n, D = mk_matrix(points, distL2) # create the distance matrix
if len(points) >= 3:
if event_type == 'big':
tour = nearest_neighbor(n, trip_df.shape[0]-1, D) # create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
elif event_type == 'med':
tour = nearest_neighbor(n, trip_df.shape[0]-2, D) # create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
else:
tour = nearest_neighbor(n, 0, D) # create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
return tour
else:
return range(len(points))
In [253]:
def get_event_ids_list(trip_locations_id):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select event_ids,event_type from day_trip_table where trip_locations_id = '%s' " %(trip_locations_id))
event_ids,event_type = cur.fetchone()
event_ids = ast.literal_eval(event_ids)
conn.close()
return event_ids,event_type
def db_event_cloest_distance(trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None):
if new_event_id or not event_ids:
event_ids, event_type = get_event_ids_list(trip_locations_id)
if new_event_id:
event_ids.append(new_event_id)
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
points = np.zeros((len(event_ids), 3))
for i,v in enumerate(event_ids):
cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
points[i] = cur.fetchone()
conn.close()
n,D = mk_matrix(points[:,1:], distL2)
if len(points) >= 3:
if event_type == 'add':
tour = nearest_neighbor(n, 0, D)
# create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
return np.array(event_ids)[tour], event_type
#need to figure out other cases
else:
tour = nearest_neighbor(n, 0, D)
# create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
return np.array(event_ids)[tour], event_type
else:
return np.array(event_ids), event_type
In [259]:
def check_full_trip_id(full_trip_id, debug):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select details from full_trip_table where full_trip_id = '%s'" %(full_trip_id))
a = cur.fetchone()
conn.close()
if bool(a):
if not debug:
return a[0]
else:
return True
else:
return False
def check_day_trip_id(day_trip_id, debug):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select details from day_trip_table where trip_locations_id = '%s'" %(day_trip_id))
a = cur.fetchone()
conn.close()
if bool(a):
if not debug:
return a[0]
else:
return True
else:
return False
def check_travel_time_id(new_id):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select google_driving_time from google_travel_time_table where id_ = '%s'" %(new_id))
a = cur.fetchone()
conn.close()
if bool(a):
return True
else:
return False
In [175]:
#need to check about the code!
def sorted_events(info,ix):
'''
find the event_id, ranking and rating columns
sorted base on ranking then rating
return sorted list
'''
event_ = info[ix][:,[0,4,5]]
return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))
def sorted_outside_events(info, ix):
'''
find the event_id, ranking and rating columns
sorted base on ranking then rating
return sorted list
'''
event_ = info[:,[0,4,5]]
return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))
In [1185]:
med_[0,1] ,small_[0,1] , med_[0,0]
# small_[0:8,0].concatenate(medium_[0:2,0])
c = small_[0:10,]
d = med_[0:2,]
print np.vstack((c,d))
# print list(np.array(sorted(np.vstack((c,d)), key=lambda x: (x[1],-x[2])))[:,0])
print list(np.array(sorted(np.vstack((small_[0:10,:],med_)), key=lambda x: (x[1],-x[2])))[:,0])
# np.vstack((small_[0:10,],med_))
In [1208]:
a_ids, a_type=create_event_id_list(big_,med_,small_)
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# for i,v in enumerate(a_ids):
# # print i, v, type(v)
# cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
# aaa = cur.fetchone()
# # print aaa
# conn.close()
# new_a_ids, new_a_type=db_event_cloest_distance(event_ids = a_ids, event_type = a_type)
# print new_a_ids, new_a_type
In [257]:
def create_event_id_list(big_,medium_,small_):
event_type = ''
if big_.shape[0] >= 1:
if (medium_.shape[0] < 2) or (big_[0,1] <= medium_[0,1]):
if small_.shape[0] >= 6:
event_ids = list(np.concatenate((small_[0:6,0],big_[0,0]),axis=0))
else:
event_ids = list(np.concatenate((small_[:,0],big_[0,0]),axis=0))
event_type = 'big'
else:
if small_.shape[0] >= 8:
event_ids = list(np.concatenate((small_[0:8,0],medium_[0:2,0]),axis=0))
else:
event_ids = list(np.concatenate((small_[:,0],medium_[0:2,0]),axis=0))
event_type = 'med'
elif medium_.shape[0] >= 2:
if small_.shape[0] >= 8:
event_ids = list(np.concatenate((small_[0:8,0],medium_[0:2,0]),axis=0))
else:
event_ids = list(np.concatenate((small_[:,0],medium_[0:2,0]),axis=0))
event_type = 'med'
else:
if small_.shape[0] >= 10:
if not medium_.shape[0]:
event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
else:
event_ids = list(np.array(sorted(np.vstack((small_[0:10,:],medium_)), key=lambda x: (x[1],-x[2])))[:,0])
else:
if not medium_.shape[0]:
event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
else:
event_ids = list(np.array(sorted(np.vstack((small_,medium_)), key=lambda x: (x[1],-x[2])))[:,0])
event_type = 'small'
return event_ids, event_type
In [278]:
from geopy.distance import vincenty
newport_ri = (41.49008, -70.312796)
cleveland_oh = (41.499498, -81.695391)
print(vincenty(newport_ri, cleveland_oh).miles)
In [486]:
def create_trip_df(big_,medium_,small_):
event_type = ''
if big_.shape[0] >= 1:
if (medium_.shape[0] < 2) or (big_.iloc[0].poi_rank <= medium_.iloc[0].poi_rank):
if small_.shape[0] >= 6:
trip_df = small_.iloc[0:6].append(big_.iloc[0])
else:
trip_df = small_.append(big_.iloc[0])
event_type = 'big'
else:
if small_.shape[0] >= 8:
trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
else:
trip_df = small_.append(medium_.iloc[0:2])
event_type = 'med'
elif medium_.shape[0] >= 2:
if small_.shape[0] >= 8:
trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
else:
trip_df = small_.append(medium_.iloc[0:2])
event_type = 'med'
else:
if small_.shape[0] >= 10:
trip_df = small_.iloc[0:10].append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
else:
trip_df = small_.append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
event_type = 'small'
return trip_df, event_type
In [263]:
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
# my_key = 'AIzaSyAwx3xg6oJ0yiPV3MIunBa1kx6N7v5Tcw8'
def google_driving_walking_time(tour,trip_df,event_type):
poi_travel_time_df = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
'google_walking_url','driving_result','walking_result','google_driving_time',\
'google_walking_time'])
ids_, orig_names,orid_idxs,dest_names,dest_idxs,orig_coord0s,orig_coord1s,dest_coord0s,dest_coord1s = [],[],[],[],[],[],[],[],[]
orig_coordss,dest_coordss,driving_urls,walking_urls,driving_results,walking_results,driving_times,walking_times = [],[],[],[],[],[],[],[]
trip_id_list=[]
for i in range(len(tour)-1):
id_ = str(trip_df.loc[trip_df.index[tour[i]]].name) + '0000'+str(trip_df.loc[trip_df.index[tour[i+1]]].name)
result_check_travel_time_id = check_travel_time_id(id_)
if not result_check_travel_time_id:
orig_name = trip_df.loc[trip_df.index[tour[i]]]['name']
orig_idx = trip_df.loc[trip_df.index[tour[i]]].name
dest_name = trip_df.loc[trip_df.index[tour[i+1]]]['name']
dest_idx = trip_df.loc[trip_df.index[tour[i+1]]].name
orig_coord0 = trip_df.loc[trip_df.index[tour[i]]]['coord0']
orig_coord1 = trip_df.loc[trip_df.index[tour[i]]]['coord1']
dest_coord0 = trip_df.loc[trip_df.index[tour[i+1]]]['coord0']
dest_coord1 = trip_df.loc[trip_df.index[tour[i+1]]]['coord1']
orig_coords = str(orig_coord1)+','+str(orig_coord0)
dest_coords = str(dest_coord1)+','+str(dest_coord0)
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
new_df = trip_df.drop(trip_df.iloc[tour[i+1]].name)
new_tour = trip_df_cloest_distance(new_df,event_type)
return google_driving_walking_time(new_tour,new_df, event_type)
try:
google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
except:
print driving_result
try:
google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
except:
google_walking_time = 9999
poi_travel_time_df.loc[len(df_poi_travel_time)]=[id_,orig_name,orig_idx,dest_name,dest_idx,orig_coord0,orig_coord1,dest_coord0,\
dest_coord1,orig_coords,dest_coords,google_driving_url,google_walking_url,\
str(driving_result),str(walking_result),google_driving_time,google_walking_time]
driving_result = str(driving_result).replace("'", '"')
walking_result = str(walking_result).replace("'", '"')
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select max(index) from google_travel_time_table")
index = cur.fetchone()[0]+1
# print "startindex:", index , type(index)
# index += 1
# print "end index: " ,index
cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
str(driving_result), str(walking_result), google_driving_time, google_walking_time))
conn.commit()
conn.close()
else:
trip_id_list.append(id_)
return tour, trip_df, poi_travel_time_df
In [261]:
def db_google_driving_walking_time(event_ids, event_type):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
google_ids = []
driving_time_list = []
walking_time_list = []
name_list = []
for i,v in enumerate(event_ids[:-1]):
id_ = str(v) + '0000'+str(event_ids[i+1])
result_check_travel_time_id = check_travel_time_id(id_)
if not result_check_travel_time_id:
cur.execute("select name, coord0, coord1 from poi_detail_table where index = %s"%(v))
orig_name, orig_coord0, orig_coord1 = cur.fetchone()
orig_idx = v
cur.execute("select name, coord0, coord1 from poi_detail_table where index = %s "%(event_ids[i+1]))
dest_name, dest_coord0, dest_coord1 = cur.fetchone()
dest_idx = event_ids[i+1]
orig_coords = str(orig_coord1)+','+str(orig_coord0)
dest_coords = str(dest_coord1)+','+str(dest_coord0)
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
new_event_ids = list(event_ids)
new_event_ids.pop(i+1)
new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
return db_google_driving_walking_time(new_event_ids, event_type)
try:
google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
except:
print v, id_, driving_result #need to debug for this
try:
google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
except:
google_walking_time = 9999
cur.execute("select max(index) from google_travel_time_table")
index = cur.fetchone()[0]+1
driving_result = str(driving_result).replace("'",'"')
walking_result = str(walking_result).replace("'",'"')
orig_name = orig_name.replace("'","''")
dest_name = dest_name.replace("'","''")
cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
str(driving_result), str(walking_result), google_driving_time, google_walking_time))
conn.commit()
name_list.append(orig_name+" to "+ dest_name)
google_ids.append(id_)
driving_time_list.append(google_driving_time)
walking_time_list.append(google_walking_time)
else:
cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
where id_ = '%s'" %(id_))
orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
name_list.append(orig_name+" to "+ dest_name)
google_ids.append(id_)
driving_time_list.append(google_driving_time)
walking_time_list.append(google_walking_time)
conn.close()
return event_ids, google_ids, name_list, driving_time_list, walking_time_list
In [837]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
default = 1
n_days =3
full_day = 1
poi_coords = df_events[['coord0','coord1']]
trip_location_ids, full_trip_details =[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
# print kmeans.labels_
i=0
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
if label == i:
time = df_events.iloc[ix].adjusted_normal_time_spent
event_ix = df_events.iloc[ix].name
current_events.append(event_ix)
if time > 180 :
big_ix.append(event_ix)
elif time >= 120 :
med_ix.append(event_ix)
else:
small_ix.append(event_ix)
# all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = df_events.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = df_events.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = df_events.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
# trip_df, event_type = create_trip_df(big_,medium_,small_)
# tour = trip_df_cloest_distance(trip_df, event_type)
# new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
# new_trip_df = new_trip_df.iloc[new_tour]
# new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
# new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
event_ids, event_type=db_event_cloest_distance(trip_locations_id)
event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
db_address(event_ids)
values = day_trip(event_ids, county, state, default, full_day,n_days,i)
day_trip_locations.loc[len(day_trip_locations)] = values
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])
In [ ]:
def get_small_med_big_ix(county_list_info, day_labels):
In [300]:
def get_fulltrip_data_default(state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
county = find_county(state, city)
trip_location_ids, full_trip_details =[],[]
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
if not check_full_trip_id(full_trip_id, debug):
county_list_info = db_start_location(county, state, city)
poi_coords = county_list_info[:,1:3]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
day_labels = kmeans.labels_
for i in range(n_days):
if not check_day_trip_id(day_trip, debug):
trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])
current_events, big_ix, small_ix, med_ix = [],[],[],[]
for ix, label in enumerate(day_labels):
if label == i:
time = county_list_info[ix,3]
event_ix = county_list_info[ix,0]
current_events.append(event_ix)
if time > 180 :
big_ix.append(ix)
elif time >= 120 :
med_ix.append(ix)
else:
small_ix.append(ix)
#need to double check this funct!
big_ = sorted_events(county_list_info, big_ix)
med_ = sorted_events(county_list_info, med_ix)
small_ = sorted_events(county_list_info, small_ix)
event_ids, event_type = create_event_id_list(big_, med_, small_)
event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
db_address(event_ids)
details = db_day_trip(event_ids, i)
# insert to day_trip ....
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, default, county, state, details, event_type, event_ids))
conn.commit()
conn.close()
trip_location_ids.append(trip_locations_id)
full_trip_details.extend(details)
else:
print "error: already have this day, please check the next day"
trip_location_ids.append(trip_locations_id)
# call db find day trip detail
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select details from day_trip_table where trip_locations_id = '%s';"%(trip_locations_id) )
day_trip_detail = fetchall()
conn.close()
full_trip_details.extend(day_trip_detail)
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
details = full_trip_details
user_id = "Admin"
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("insert into full_trip_table(user_id, full_trip_id,trip_location_ids, default, county, state, details, n_days) VALUES ('%s', '%s', '%s', %s, '%s', '%s', '%s', %s)" %(user_id, full_trip_id, str(trip_location_ids), default, county, state, details, n_days))
conn.commit()
conn.close()
return "finish update %s, %s into database" %(state, county)
else:
return "%s, %s already in database" %(state, county)
In [298]:
def db_day_trip(event_ids, county, state, default, full_day,n_days,i):
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
# cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
# a = cur.fetchall()
# state = a[0][0].upper()
# county = a[0][1].upper()
trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])
#details dict includes: id, name,address, day
cur.execute("select index, name, address from poi_detail_table where index in %s;" %(tuple(event_ids),))
a = cur.fetchall()
details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(len(a))]
conn.close()
return [trip_locations_id, full_day, default, county, state, details]
def db_day_trip(event_ids, i):
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
details = []
#details dict includes: id, name,address, day
for event_id in event_ids:
cur.execute("select index, name, address from poi_detail_table where index = %s;" %(event_id))
a = cur.fetchone()
details.append(str({'id': a[0],'name': a[1],'address': a[2], 'day': i}))
conn.close()
return details
In [ ]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()
In [ ]:
def extend_full_trip_details(full_trip_details):
details = {}
addresses = []
ids = []
days = []
names = []
for item in full_trip_details:
addresses.append(eval(item)['address'])
ids.append(eval(item)['id'])
days.append(eval(item)['day'])
names.append(eval(item)['name'])
details['addresses'] = addresses
details['ids'] = ids
details['days'] = days
details['names'] = names
return str(full_trip_details)
In [841]:
event_ids
Out[841]:
In [569]:
print event_ids, google_ids, name_list, driving_time_list, walking_time_list
In [604]:
init_db_tables()
In [267]:
def remove_extra_events(trip_df, df_poi_travel_time):
if sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time) > 480:
new_trip_df = trip_df[:-1]
new_df_poi_travel_time = df_poi_travel_time[:-1]
return remove_extra_events(new_trip_df,new_df_poi_travel_time)
else:
return trip_df, df_poi_travel_time, sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time)
def db_remove_extra_events(event_ids, driving_time_list,walking_time_list, max_time_spent=480):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select sum (adjusted_normal_time_spent) from poi_detail_table where index in %s;" %(tuple(event_ids),))
time_spent = cur.fetchone()[0]
conn.close()
time_spent += sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list)))
if time_spent > max_time_spent:
update_event_ids = event_ids[:-1]
update_driving_time_list = driving_time_list[:-1]
update_walking_time_list = walking_time_list[:-1]
return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
else:
return event_ids, driving_time_list, walking_time_list, time_spent
In [777]:
def df_addresses(new_trip_df1, new_df_poi_travel_time):
my_lst = []
print new_trip_df1.index.values
for i in new_trip_df1.index.values:
temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.orig_idx.values]
if temp_df.shape[0]>0:
address = eval(temp_df.driving_result.values[0])['origin_addresses'][0]
my_lst.append(address)
else:
try:
temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.dest_idx.values]
address = eval(temp_df.driving_result.values[0])['destination_addresses'][0]
my_lst.append(address)
except:
print new_trip_df1, new_df_poi_travel_time
return my_lst
def check_address(index):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select address from poi_detail_table where index = %s;"%(index))
a = cur.fetchone()[0]
conn.close()
if a:
return True
else:
return False
def db_address(event_ids):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
for i in event_ids[:-1]:
if not check_address(i):
cur.execute("select driving_result from google_travel_time_table where orig_idx = %s;" %(i))
a= cur.fetchone()[0]
add = ast.literal_eval(a)['origin_addresses'][0]
cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, i))
conn.commit()
last = event_ids[-1]
if not check_address(last):
cur.execute("select driving_result from google_travel_time_table where dest_idx = %s;" %(last))
a= cur.fetchone()[0]
add = ast.literal_eval(a)['destination_addresses'][0]
cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, last))
conn.commit()
conn.close()
In [ ]:
In [ ]:
In [822]:
from numpy import *
test_event_ids_list = append(273, event_ids)
# event_ids
print test_event_ids_list
In [831]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()
In [ ]:
day_trip_locations = 'San Diego, California'
f, d, n, d= search_cluster_events(df, county, state, city, 3, day_trip_locations, full_trip_table, default = True)
In [1095]:
import time
t1=time.time()
# [index, ranking,score]
a = [[1,2,3],[2,2,6],[3,3,3],[4,3,10]]
from operator import itemgetter
print sorted(a, key=lambda x: (x[1], -x[2]))
time.time()-t1
Out[1095]:
In [123]:
'''
Most important event that will call all the functions and return the day details for the trip
'''
def search_cluster_events(df, county, state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
county, df_events =cold_start_places(df, county, state, city, n_days)
poi_coords = df_events[['coord0','coord1']]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
new_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
if not check_full_trip_id(new_trip_id, debug):
trip_location_ids = []
full_trip_details = []
for i in range(n_days):
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
if label == i:
event_ix = poi_coords.index[ix]
current_events.append(event_ix)
if event_ix in big.index:
big_ix.append(event_ix)
elif event_ix in med.index:
med_ix.append(event_ix)
else:
small_ix.append(event_ix)
# all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
# print 'big:', big_, 'small:', small_, 'msize:', medium_
trip_df, event_type = create_trip_df(big_,medium_,small_)
# print event_type
tour = trip_df_cloest_distance(trip_df, event_type)
# print tour
new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
# print new_tour, new_trip_df
# return new_trip_df, df_poi_travel_time
new_trip_df = new_trip_df.iloc[new_tour]
new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
# print new_trip_df1
new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
# print 'total time:', total_ti
values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
day_trip_locations.loc[len(day_trip_locations)] = values
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])
df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
details = extend_full_trip_details(full_trip_details)
full_trip_table.loc[len(full_trip_table)] = ["adam", full_trip_id, str(trip_location_ids), default, county, state, details, n_days]
return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info
In [36]:
def default_search_cluster_events(df, df_counties_u, county, state, big,med, small, \
temp, n_days,day_trip_locations, full_trip_table,df_poi_travel_info):
# df_poi_travel_info = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
# 'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
# 'google_walking_url','driving_result','walking_result','google_driving_time',\
# 'google_walking_time'])
poi_coords = temp[['coord0','coord1']]
kmeans = KMeans(n_clusters=n_days, random_state=0).fit(poi_coords)
# print kmeans.labels_
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
trip_location_ids = []
full_trip_details = []
for i in range(n_days):
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
if label == i:
event_ix = poi_coords.index[ix]
current_events.append(event_ix)
if event_ix in big.index:
big_ix.append(event_ix)
elif event_ix in med.index:
med_ix.append(event_ix)
else:
small_ix.append(event_ix)
all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
trip_df, event_type = create_trip_df(big_,medium_,small_)
tour = trip_df_cloest_distance(trip_df, event_type)
new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
new_trip_df = new_trip_df.iloc[new_tour]
new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
day_trip_locations.loc[len(day_trip_locations)] = values
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])
# print 'trave time df \n',new_df_poi_travel_time
df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
details = extend_full_trip_details(full_trip_details)
full_trip_table.loc[len(full_trip_table)] = [user_id, full_trip_id, \
str(trip_location_ids), default, county, state, details, n_days]
return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info
In [102]:
###Next Steps: Add control from the users. funt1: allow to add events,(specific name or auto add)
### auto route to the most appropirate order
###funt2: allow to reorder the events. funt3: allow to delete the events.
###funt4: allow to switch a new event-next to the switch and x mark icon,check mark to confirm the new place and auto order
###New table for the trip info...features including trip id, event place, days, specific date, trip details. (trip tour, trip)
def ajax_available_events(county, state):
county=county.upper()
state = state.title()
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))
poi_lst = [item for item in cur.fetchall()]
conn.close()
return poi_lst
def add_event(trip_locations_id, event_day, event_id=None, event_name=None, full_day = True, unseen_event = False):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
if unseen_event:
index += 1
trip_locations_id = '-'.join([str(eval(i)['id']) for i in eval(detail)])+'-'+event_name.replace(' ','-')+'-'+event_day
cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
a = cur.fetchone()
if bool(a):
conn.close()
return trip_locations_id, a[0]
else:
cur.execute("select max(index) from day_trip_locations")
index = cur.fetchone()[0]+1
detail = list(eval(detail))
#need to make sure the type is correct for detail!
new_event = "{'address': 'None', 'id': 'None', 'day': %s, 'name': u'%s'}"%(event_day, event_name)
detail.append(new_event)
#get the right format of detail: change from list to string and remove brackets and convert quote type
new_detail = str(detail).replace('"','').replace('[','').replace(']','').replace("'",'"')
cur.execute("INSERT INTO day_trip_locations VALUES (%i, '%s',%s,%s,'%s','%s','%s');" %(index, trip_locations_id, full_day, False, county, state, new_detail))
conn.commit()
conn.close()
return trip_locations_id, detail
else:
event_ids = add_event_cloest_distance(trip_locations_id, event_id)
event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids,event_type = 'add')
trip_locations_id = '-'.join(event_ids)+'-'+event_day
cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
a = cur.fetchone()
if not a:
details = []
db_address(event_ids)
for item in event_ids:
cur.execute("select index, name, address from poi_detail_table where index = '%s';" %(item))
a = cur.fetchone()
detail = {'id': a[0],'name': a[1],'address': a[2], 'day': event_day}
details.append(detail)
#need to make sure event detail can append to table!
cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, False, county, state, details, event_type, event_ids))
conn.commit()
conn.close()
return trip_locations_id, details
else:
conn.close()
#need to make sure type is correct.
return trip_locations_id, a[0]
def remove_event(trip_locations_id, remove_event_id, remove_event_name=None, event_day=None, full_day = True):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
new_event_ids = ast.literal_eval(event_ids)
new_event_ids.remove(remove_event_id)
new_trip_locations_id = '-'.join(str(event_id) for event_id in new_event_ids)
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(new_trip_locations_id))
check_id = cur.fetchone()
if check_id:
return new_trip_locations_id, check_id[-3]
detail = ast.literal_eval(detail[1:-1])
for index, trip_detail in enumerate(detail):
if ast.literal_eval(trip_detail)['id'] == remove_event_id:
remove_index = index
break
new_detail = list(detail)
new_detail.pop(remove_index)
new_detail = str(new_detail).replace("'","''")
default = False
cur.execute("select max(index) from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
new_index = cur.fetchone()[0]
new_index+=1
cur.execute("INSERT INTO day_trip_table VALUES (%i, '%s', %s, %s, '%s', '%s', '%s', '%s','%s');" \
%(new_index, new_trip_locations_id, full_day, default, county, state, new_detail, event_type, new_event_ids))
conn.commit()
conn.close()
return new_trip_locations_id, new_detail
def event_type_time_spent(adjusted_normal_time_spent):
if adjusted_normal_time_spent > 180:
return 'big'
elif adjusted_normal_time_spent >= 120:
return 'med'
else:
return 'small'
def switch_event_list(full_trip_id, trip_locations_id, switch_event_id, switch_event_name=None, event_day=None, full_day = True):
# new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select name, city, county, state, coord0, coord1,poi_rank, adjusted_normal_time_spent from poi_detail_table where index=%s" %(switch_event_id))
name, city, county, state,coord0, coord1,poi_rank, adjusted_normal_time_spent = cur.fetchone()
event_type = event_type_time_spent(adjusted_normal_time_spent)
avialable_lst = ajax_available_events(county, state)
cur.execute("select trip_location_ids,details from full_trip_table where full_trip_id=%s" %(full_trip_id))
full_trip_detail = cur.fetchone()
full_trip_detail = ast.literal_eval(full_trip_detail)
full_trip_ids = [ast.literal_eval(item)['id'] for item in full_trip_detail]
switch_lst = []
for item in avialable_lst:
index = item[0]
if index not in full_trip_ids:
event_ids = [switch_event_id, index]
event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids, event_type='switch')
if min(driving_time_list[0], walking_time_list[0]) <= 60:
cur.execute("select poi_rank, rating, adjusted_normal_time_spent from poi_detail_table where index=%s" %(index))
target_poi_rank, target_rating, target_adjusted_normal_time_spent = cur.fetchone()
target_event_type = event_type_time_spent(target_adjusted_normal_time_spent)
switch_lst.append([target_poi_rank, target_rating, target_event_type==event_type])
#need to sort target_event_type, target_poi_rank and target_rating
return {switch_event_id: switch_lst}
def switch_event(trip_locations_id, switch_event_id, final_event_id, event_day):
new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
new_trip_locations_id, new_detail = add_event(new_trip_locations_id, event_day, final_event_id, full_day = True, unseen_event = False)
return new_trip_locations_id, new_detail
In [113]:
ajax_available_events(county='San Francisco', state = "California")
county='San Francisco'.upper()
state = "California"
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))
full_trip_id = 'CALIFORNIA-SAN-DIEGO-1-3'
cur.execute("select details from full_trip_table where full_trip_id='%s'" %(full_trip_id))
full_trip_detail = cur.fetchone()[0]
full_trip_detail = ast.literal_eval(full_trip_detail)
[ast.literal_eval(item)['id'] for item in full_trip_detail]
Out[113]:
In [89]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
remove_event_id = 3486
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
# event_ids = ast.literal_eval(event_ids)
# print detail, '\n'
new_event_ids = ast.literal_eval(event_ids)
new_event_ids.remove(remove_event_id)
new_trip_locations_id = '-'.join(str(id_) for id_ in new_event_ids)
# event_ids.remove(remove_event_id)
detail = ast.literal_eval(detail[1:-1])
print type(detail[0])
for index, trip_detail in enumerate(detail):
if ast.literal_eval(trip_detail)['id'] == remove_event_id:
remove_index = index
break
new_detail = list(detail)
new_detail.pop(remove_index)
new_detail = str(new_detail).replace("'","''")
In [148]:
import numpy as np
#Tasks:
#0. Run the initial to debug with all the cities and counties for the poi_detail_table in hand.
#1. Continue working on add/suggest/remove features
#2. Start the new feature that allows user to generate the google map route for the day
#3. new feature that allows user to explore outside the city from a direction away from the started location
#4. get all the state and national park data into database and rework the ranking system and the poi_detail_table!
remove_event_id = 3486
#3.Travel Outside:
current_city = 'San Francisco'
current_state = 'California'
direction = 'N'
n_days = 1
#Travel outside: 1 day 2hr driving, first set up distL2 < 1.7. Find all the locations with in the range.
#And exclude the started city. Need to take care of total travel time. should be less than 5 hrs for 1 day round trip
#for 2 days: total 50% more driving time than 1 day.
#for 3 days: total 100% more driving time than 1 day
a,b =np.array([36.50641,-121.1095823]), np.array([37.0792134,-121.9502674])
def angle_between(p1, p2):
ang1 = np.arctan2(*p1[::-1])
ang2 = np.arctan2(*p2[::-1])
return np.rad2deg((ang1 - ang2) % (2 * np.pi))
def direction_from_orgin(start_coord_long, start_coord_lat, target_coord_long, target_coord_lat):
diff = [target_coord_long - start_coord_long, target_coord_lat- start_coord_lat]
angle = angle_between(diff,[0,0])
if (angle > 45) and (angle < 135):
return 'N'
elif (angle > 135) and (angle < 215):
return 'W'
elif (angle > 215) and (angle < 305):
return 'S'
else:
return 'E'
def travel_outside_coords(current_city, current_state, direction=None, n_days=1):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
#coord_long, coord_lat
cur.execute("select coord0, coord1 from all_cities_coords where city ='%s' and state = '%s';" %(current_city, current_state))
coord0, coord1 = cur.fetchone()
#city, coord_lat, coord_long
cur.execute("select distinct city, coord0, coord1 from all_cities_coords where city !='%s' and state = '%s';" %(current_city, current_state))
coords = cur.fetchall()
conn.close()
return coords, coord0, coord1
In [266]:
#Get events outside the city!!!
target_direction = 'N'
#possible city coords, target city coord_lat, target city coord_long
coords, coord_lat, coord_long = travel_outside_coords(current_city, current_state)
#coords: city, lat, long
check_cities_info = []
for item in coords:
direction = direction_from_orgin(coord_long, coord_lat, item[2], item[1])
if (target_direction == direction) and (distL2([item[1], item[2]], [coord_lat, coord_long]) <1.7):
check_cities_info.append(item)
city_infos = []
for city, _, _ in check_cities_info:
county = None
#index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating
city_info = db_start_location(county, current_state, city)
city_infos.extend(city_info)
city_infos = np.array(city_infos)
poi_coords = city_infos[:,1:3]
n_routes = sum(1 for t in np.array(city_infos)[:,3] if t >= 120)
if (n_routes>1) and (city_infos.shape[0]>=10):
kmeans = KMeans(n_clusters=n_routes).fit(poi_coords)
elif (city_infos.shape[0]> 20) or (n_routes>1):
kmeans = KMeans(n_clusters=2).fit(poi_coords)
else:
kmeans = KMeans(n_clusters=1).fit(poi_coords)
route_labels = kmeans.labels_
print n_routes, len(route_labels), city_infos.shape
print route_labels
for i in range(n_routes):
current_events, big_ix, med_ix, small_ix = [], [],[], []
for ix, label in enumerate(route_labels):
if label == i:
time = city_infos[ix,3]
event_ix = city_infos[ix,0]
current_events.append(event_ix)
if time > 180 :
big_ix.append(ix)
elif time >= 120 :
med_ix.append(ix)
else:
small_ix.append(ix)
big_ = sorted_events(city_infos, big_ix)
med_ = sorted_events(city_infos, med_ix)
small_ = sorted_events(city_infos, small_ix)
print big_, len(big_), len(med_), len(small_)
# need to update!!!!!!!!
event_ids, event_type = create_event_id_list(big_, med_, small_)
event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
# db_address(event_ids)
print i, event_ids
values = db_day_trip(event_ids, county, state, default, full_day,n_days,i)
In [232]:
db_start_location(county, current_state, city).shape
Out[232]:
In [50]:
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
# cur.execute("select index, name, coord0, coord1 from poi_detail_table where city !='%s' and state = '%s';" %(current_city, current_state))
cur.execute("select distinct city, state from poi_detail_table;" )
In [51]:
all_cities = cur.fetchall()
In [195]:
from geopy.geocoders import Nominatim
geolocator = Nominatim()
location = geolocator.geocode("343 Vernon St., San Francisco, California")
print location.latitude, location.longitude
In [68]:
for items in all_cities:
if cities_coords[cities_coords['state'] == items[1]][cities_coords.city == items[0]].shape[0] == 0:
location_name = ', '.join([items[0], items[1]])
location = geolocator.geocode(location_name)
cities_coords.loc[len(cities_coords)] = [999,items[0], items[1], 'US', location.latitude, location.longitude]
print cities_coords.loc(len(cities_coords))
In [33]:
cities_coords = pd.read_csv('cities_coords.csv', header=None)
In [35]:
cities_coords.columns = ['area_code','city','state','nation', 'coord0','coord1']
In [43]:
cities_coords[cities_coords['state'] == current_state][cities_coords.area_code == 415]
Out[43]:
In [66]:
[999,items[0], items[1], 'US', location.latitude, location.longitude]
Out[66]:
In [70]:
cities_coords.to_csv('all_cities_coords.csv')
In [26]:
engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')
new_cities_coords.to_sql('all_cities_coords',engine, if_exists = "replace")
In [2]:
import pandas as pd
cities_coords = pd.read_csv('all_cities_coords.csv')
In [269]:
cities_coords
Out[269]:
In [270]:
new_cities_coords = cities_coords[['city', 'state','nation','coord0','coord1']].drop_duplicates()
print new_cities_coords.shape
In [271]:
new_cities_coords
Out[271]:
In [14]:
new_cities_coords = new_cities_coords.drop_duplicates()
In [27]:
new_cities_coords.reset_index?
In [25]:
new_cities_coords.to_csv('all_cities_coords2.csv')
In [279]:
def calculate_initial_compass_bearing(pointA, pointB):
"""
Calculates the bearing between two points.
The formulae used is the following:
θ = atan2(sin(Δlong).cos(lat2),
cos(lat1).sin(lat2) − sin(lat1).cos(lat2).cos(Δlong))
:Parameters:
- `pointA: The tuple representing the latitude/longitude for the
first point. Latitude and longitude must be in decimal degrees
- `pointB: The tuple representing the latitude/longitude for the
second point. Latitude and longitude must be in decimal degrees
:Returns:
The bearing in degrees
:Returns Type:
float
"""
if (type(pointA) != tuple) or (type(pointB) != tuple):
raise TypeError("Only tuples are supported as arguments")
lat1 = math.radians(pointA[0])
lat2 = math.radians(pointB[0])
diffLong = math.radians(pointB[1] - pointA[1])
x = math.sin(diffLong) * math.cos(lat2)
y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
* math.cos(lat2) * math.cos(diffLong))
initial_bearing = math.atan2(x, y)
# Now we have the initial bearing but math.atan2 return values
# from -180° to + 180° which is not what we want for a compass bearing
# The solution is to normalize the initial bearing as shown below
initial_bearing = math.degrees(initial_bearing)
compass_bearing = (initial_bearing + 360) % 360
return compass_bearing
In [297]:
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(123))
not cur.fetchone()
Out[297]:
In [295]:
a = cur.fetchone()
In [310]:
np.concatenate(np.array([0, 91,23]),np.zeros((len(event_ids), 3)))
In [320]:
t = np.zeros((len(event_ids), 3))
points = np.vstack((np.array(['test',91,23]),np.zeros((len(event_ids), 3))))
In [322]:
points
Out[322]:
In [338]:
t = []
t.extend(['12']*2)
In [339]:
t
Out[339]:
In [ ]: