In [ ]:
import pandas as pd
import numpy as np
import requests
import psycopg2
import json
import simplejson
import urllib
import config
import ast
from operator import itemgetter
from sklearn.cluster import KMeans
from sqlalchemy import create_engine
In [ ]:
!pip install --upgrade pip
!pip install sqlalchemy
!pip install psycopg2
!pip install simplejson
!pip install config
In [ ]:
conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"
# conn_str = "dbname='travel_with_friends' user='Zoesh' host='localhost'"
In [ ]:
import math
import random
def distL2((x1,y1), (x2,y2)):
"""Compute the L2-norm (Euclidean) distance between two points.
The distance is rounded to the closest integer, for compatibility
with the TSPLIB convention.
The two points are located on coordinates (x1,y1) and (x2,y2),
sent as parameters"""
xdiff = x2 - x1
ydiff = y2 - y1
return math.sqrt(xdiff*xdiff + ydiff*ydiff) + .5
def distL1((x1,y1), (x2,y2)):
"""Compute the L1-norm (Manhattan) distance between two points.
The distance is rounded to the closest integer, for compatibility
with the TSPLIB convention.
The two points are located on coordinates (x1,y1) and (x2,y2),
sent as parameters"""
return abs(x2-x1) + abs(y2-y1)+.5
def mk_matrix(coord, dist):
"""Compute a distance matrix for a set of points.
Uses function 'dist' to calculate distance between
any two points. Parameters:
-coord -- list of tuples with coordinates of all points, [(x1,y1),...,(xn,yn)]
-dist -- distance function
"""
n = len(coord)
D = {} # dictionary to hold n times n matrix
for i in range(n-1):
for j in range(i+1,n):
[x1,y1] = coord[i]
[x2,y2] = coord[j]
D[i,j] = dist((x1,y1), (x2,y2))
D[j,i] = D[i,j]
return n,D
def read_tsplib(filename):
"basic function for reading a TSP problem on the TSPLIB format"
"NOTE: only works for 2D euclidean or manhattan distances"
f = open(filename, 'r');
line = f.readline()
while line.find("EDGE_WEIGHT_TYPE") == -1:
line = f.readline()
if line.find("EUC_2D") != -1:
dist = distL2
elif line.find("MAN_2D") != -1:
dist = distL1
else:
print "cannot deal with non-euclidean or non-manhattan distances"
raise Exception
while line.find("NODE_COORD_SECTION") == -1:
line = f.readline()
xy_positions = []
while 1:
line = f.readline()
if line.find("EOF") != -1: break
(i,x,y) = line.split()
x = float(x)
y = float(y)
xy_positions.append((x,y))
n,D = mk_matrix(xy_positions, dist)
return n, xy_positions, D
def mk_closest(D, n):
"""Compute a sorted list of the distances for each of the nodes.
For each node, the entry is in the form [(d1,i1), (d2,i2), ...]
where each tuple is a pair (distance,node).
"""
C = []
for i in range(n):
dlist = [(D[i,j], j) for j in range(n) if j != i]
dlist.sort()
C.append(dlist)
return C
def length(tour, D):
"""Calculate the length of a tour according to distance matrix 'D'."""
z = D[tour[-1], tour[0]] # edge from last to first city of the tour
for i in range(1,len(tour)):
z += D[tour[i], tour[i-1]] # add length of edge from city i-1 to i
return z
def randtour(n):
"""Construct a random tour of size 'n'."""
sol = range(n) # set solution equal to [0,1,...,n-1]
random.shuffle(sol) # place it in a random order
return sol
def nearest(last, unvisited, D):
"""Return the index of the node which is closest to 'last'."""
near = unvisited[0]
min_dist = D[last, near]
for i in unvisited[1:]:
if D[last,i] < min_dist:
near = i
min_dist = D[last, near]
return near
def nearest_neighbor(n, i, D):
"""Return tour starting from city 'i', using the Nearest Neighbor.
Uses the Nearest Neighbor heuristic to construct a solution:
- start visiting city i
- while there are unvisited cities, follow to the closest one
- return to city i
"""
unvisited = range(n)
unvisited.remove(i)
last = i
tour = [i]
while unvisited != []:
next = nearest(last, unvisited, D)
tour.append(next)
unvisited.remove(next)
last = next
return tour
def exchange_cost(tour, i, j, D):
"""Calculate the cost of exchanging two arcs in a tour.
Determine the variation in the tour length if
arcs (i,i+1) and (j,j+1) are removed,
and replaced by (i,j) and (i+1,j+1)
(note the exception for the last arc).
Parameters:
-t -- a tour
-i -- position of the first arc
-j>i -- position of the second arc
"""
n = len(tour)
a,b = tour[i],tour[(i+1)%n]
c,d = tour[j],tour[(j+1)%n]
return (D[a,c] + D[b,d]) - (D[a,b] + D[c,d])
def exchange(tour, tinv, i, j):
"""Exchange arcs (i,i+1) and (j,j+1) with (i,j) and (i+1,j+1).
For the given tour 't', remove the arcs (i,i+1) and (j,j+1) and
insert (i,j) and (i+1,j+1).
This is done by inverting the sublist of cities between i and j.
"""
n = len(tour)
if i>j:
i,j = j,i
assert i>=0 and i<j-1 and j<n
path = tour[i+1:j+1]
path.reverse()
tour[i+1:j+1] = path
for k in range(i+1,j+1):
tinv[tour[k]] = k
def improve(tour, z, D, C):
"""Try to improve tour 't' by exchanging arcs; return improved tour length.
If possible, make a series of local improvements on the solution 'tour',
using a breadth first strategy, until reaching a local optimum.
"""
n = len(tour)
tinv = [0 for i in tour]
for k in range(n):
tinv[tour[k]] = k # position of each city in 't'
for i in range(n):
a,b = tour[i],tour[(i+1)%n]
dist_ab = D[a,b]
improved = False
for dist_ac,c in C[a]:
if dist_ac >= dist_ab:
break
j = tinv[c]
d = tour[(j+1)%n]
dist_cd = D[c,d]
dist_bd = D[b,d]
delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
if delta < 0: # exchange decreases length
exchange(tour, tinv, i, j);
z += delta
improved = True
break
if improved:
continue
for dist_bd,d in C[b]:
if dist_bd >= dist_ab:
break
j = tinv[d]-1
if j==-1:
j=n-1
c = tour[j]
dist_cd = D[c,d]
dist_ac = D[a,c]
delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
if delta < 0: # exchange decreases length
exchange(tour, tinv, i, j);
z += delta
break
return z
def localsearch(tour, z, D, C=None):
"""Obtain a local optimum starting from solution t; return solution length.
Parameters:
tour -- initial tour
z -- length of the initial tour
D -- distance matrix
"""
n = len(tour)
if C == None:
C = mk_closest(D, n) # create a sorted list of distances to each node
while 1:
newz = improve(tour, z, D, C)
if newz < z:
z = newz
else:
break
return z
def multistart_localsearch(k, n, D, report=None):
"""Do k iterations of local search, starting from random solutions.
Parameters:
-k -- number of iterations
-D -- distance matrix
-report -- if not None, call it to print verbose output
Returns best solution and its cost.
"""
C = mk_closest(D, n) # create a sorted list of distances to each node
bestt=None
bestz=None
for i in range(0,k):
tour = randtour(n)
z = length(tour, D)
z = localsearch(tour, z, D, C)
if z < bestz or bestz == None:
bestz = z
bestt = list(tour)
if report:
report(z, tour)
return bestt, bestz
In [ ]:
# db_name = "travel_with_friends"
# TABLES ={}
# TABLES['full_trip_table'] = (
# "CREATE TABLE `full_trip_table` ("
# " `user_id` int(11) NOT NULL AUTO_INCREMENT,"
# " `full_trip_id` date NOT NULL,"
# " `trip_location_ids` varchar(14) NOT NULL,"
# " `default` varchar(16) NOT NULL,"
# " `county` enum('M','F') NOT NULL,"
# " `state` date NOT NULL,"
# " `details` ,"
# " `n_days`,"
# " PRIMARY KEY (`full_trip_id`)"
# ") ENGINE=InnoDB")
In [ ]:
# def create_tables():
# """ create tables in the PostgreSQL database"""
# commands = (
# """
# CREATE TABLE full_trip_table (
# index INTEGER PRIMARY KEY,
# user_id VARCHAR(225) NOT NULL,
# full_trip_id VARCHAR(225) NOT NULL,
# trip_location_ids VARCHAR(225),
# default BOOLEAN NOT NULL,
# county VARCHAR(225) NOT NULL,
# state VARCHAR(225) NOT NULL,
# details VARCHAR(MAX),
# n_days VARCHAR(225) NOT NULL
# )
# """,
# """ CREATE TABLE day_trip_table (
# trip_locations_id
# full_day
# default
# county
# state
# details
# )
# """,
# """
# CREATE TABLE poi_detail_table (
# part_id INTEGER PRIMARY KEY,
# file_extension VARCHAR(5) NOT NULL,
# drawing_data BYTEA NOT NULL,
# FOREIGN KEY (part_id)
# REFERENCES parts (part_id)
# ON UPDATE CASCADE ON DELETE CASCADE
# )
# """,
# """
# CREATE TABLE google_travel_time_table (
# index INTEGER PRIMARY KEY,
# id_ VARCHAR NOT NULL,
# orig_name VARCHAR,
# orig_idx VARCHAR,
# dest_name VARCHAR,
# dest_idx VARCHAR,
# orig_coord0 INTEGER,
# orig_coord1 INTEGER,
# dest_coord0 INTEGER,
# dest_coord1 INTEGER,
# orig_coords VARCHAR,
# dest_coords VARCHAR,
# google_driving_url VARCHAR,
# google_walking_url VARCHAR,
# driving_result VARCHAR,
# walking_result VARCHAR,
# google_driving_time INTEGER,
# google_walking_time INTEGER
# )
# """)
# conn = None
# try:
# # read the connection parameters
# params = config()
# # connect to the PostgreSQL server
# conn = psycopg2.connect(**params)
# cur = conn.cursor()
# # create table one by one
# for command in commands:
# cur.execute(command)
# # close communication with the PostgreSQL database server
# cur.close()
# # commit the changes
# conn.commit()
# except (Exception, psycopg2.DatabaseError) as error:
# print(error)
# finally:
# if conn is not None:
# conn.close()
In [ ]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])
# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])
# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
# 'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
# 'google_walking_url','driving_result','walking_result','google_driving_time',\
# 'google_walking_time'])
In [ ]:
# read poi details csv file
poi_detail = pd.read_csv("./step9_poi.csv", index_col=0)
poi_detail['address'] = None
poi_detail['rating']=poi_detail['rating'].fillna(0)
#read US city state and county csv file
df_counties = pd.read_csv('./us_cities_states_counties.csv',sep='|')
#find counties without duplicate
df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()
In [ ]:
In [ ]:
def init_db_tables():
full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])
day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details','event_type','event_ids'])
google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
'google_walking_url','driving_result','walking_result','google_driving_time',\
'google_walking_time'])
day_trip_locations_table.loc[0] = ['CALIFORNIA-SAN-DIEGO-1-3-0', True, True, 'SAN DIEGO', 'California',
["{'address': '15500 San Pasqual Valley Rd, Escondido, CA 92027, USA', 'id': 2259, 'day': 0, 'name': u'San Diego Zoo Safari Park'}", "{'address': 'Safari Walk, Escondido, CA 92027, USA', 'id': 2260, 'day': 0, 'name': u'Meerkat'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3486, 'day': 0, 'name': u'Stone'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3487, 'day': 0, 'name': u'Stone Brewery'}", "{'address': 'Mount Woodson Trail, Poway, CA 92064, USA', 'id': 4951, 'day': 0, 'name': u'Lake Poway'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4953, 'day': 0, 'name': u'Potato Chip Rock'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4952, 'day': 0, 'name': u'Mt. Woodson'}"],
'big','[2259, 2260,3486,3487,4951,4953,4952]']
google_travel_time_table.loc[0] = ['439300002871', u'Moonlight Beach', 4393.0,
u'Carlsbad Flower Fields', 2871.0, -117.29692141333341,
33.047769600024424, -117.3177652511278, 33.124079753475236,
'33.0477696,-117.296921413', '33.1240797535,-117.317765251',
'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=driving&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=walking&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
"{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '14 mins', 'value': 822}, 'distance': {'text': '10.6 km', 'value': 10637}, 'status': 'OK'}]}], 'origin_addresses': ['233 C St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
"{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '2 hours 4 mins', 'value': 7457}, 'distance': {'text': '10.0 km', 'value': 10028}, 'status': 'OK'}]}], 'origin_addresses': ['498 B St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
13.0, 124.0]
full_trip_table.loc[0] = ['gordon_lee01', 'CALIFORNIA-SAN-DIEGO-1-3',
"['CALIFORNIA-SAN-DIEGO-1-3-0', 'CALIFORNIA-SAN-DIEGO-1-3-1', 'CALIFORNIA-SAN-DIEGO-1-3-2']",
True, 'SAN DIEGO', 'California',
'["{\'address\': \'15500 San Pasqual Valley Rd, Escondido, CA 92027, USA\', \'id\': 2259, \'day\': 0, \'name\': u\'San Diego Zoo Safari Park\'}", "{\'address\': \'Safari Walk, Escondido, CA 92027, USA\', \'id\': 2260, \'day\': 0, \'name\': u\'Meerkat\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3486, \'day\': 0, \'name\': u\'Stone\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3487, \'day\': 0, \'name\': u\'Stone Brewery\'}", "{\'address\': \'Mount Woodson Trail, Poway, CA 92064, USA\', \'id\': 4951, \'day\': 0, \'name\': u\'Lake Poway\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4953, \'day\': 0, \'name\': u\'Potato Chip Rock\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4952, \'day\': 0, \'name\': u\'Mt. Woodson\'}", "{\'address\': \'1 Legoland Dr, Carlsbad, CA 92008, USA\', \'id\': 2870, \'day\': 1, \'name\': u\'Legoland\'}", "{\'address\': \'5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA\', \'id\': 2871, \'day\': 1, \'name\': u\'Carlsbad Flower Fields\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2089, \'day\': 1, \'name\': u\'Oceanside Pier\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2090, \'day\': 1, \'name\': u\'Pier\'}", "{\'address\': \'1016-1024 Neptune Ave, Encinitas, CA 92024, USA\', \'id\': 2872, \'day\': 1, \'name\': u\'Encinitas\'}", "{\'address\': \'625 Pan American Rd E, San Diego, CA 92101, USA\', \'id\': 147, \'day\': 2, \'name\': u\'Balboa Park\'}", "{\'address\': \'1849-1863 Zoo Pl, San Diego, CA 92101, USA\', \'id\': 152, \'day\': 2, \'name\': u\'San Diego Zoo\'}", "{\'address\': \'701-817 Coast Blvd, La Jolla, CA 92037, USA\', \'id\': 148, \'day\': 2, \'name\': u\'La Jolla\'}", "{\'address\': \'10051-10057 Pebble Beach Dr, Santee, CA 92071, USA\', \'id\': 4630, \'day\': 2, \'name\': u\'Santee Lakes\'}", "{\'address\': \'Lake Murray Bike Path, La Mesa, CA 91942, USA\', \'id\': 4545, \'day\': 2, \'name\': u\'Lake Murray\'}", "{\'address\': \'4905 Mt Helix Dr, La Mesa, CA 91941, USA\', \'id\': 4544, \'day\': 2, \'name\': u\'Mt. Helix\'}", "{\'address\': \'1720 Melrose Ave, Chula Vista, CA 91911, USA\', \'id\': 1325, \'day\': 2, \'name\': u\'Thick-billed Kingbird\'}", "{\'address\': \'711 Basswood Ave, Imperial Beach, CA 91932, USA\', \'id\': 1326, \'day\': 2, \'name\': u\'Lesser Sand-Plover\'}"]',
3.0]
engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')
# full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
# full_trip_table.to_sql('full_trip_table', engine,if_exists='append')
full_trip_table.to_sql('full_trip_table',engine, if_exists = "replace")
day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "replace")
google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "replace")
poi_detail.to_sql('poi_detail_table',engine, if_exists = "replace")
df_counties = pd.read_csv('/Users/zoesh/Desktop/travel_with_friends/travel_with_friends/us_cities_states_counties.csv',sep='|')
df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()
df_counties_u.columns = ["city","state_abb","state","county"]
df_counties_u.to_sql('county_table',engine, if_exists = "replace")
init_db_tables()
In [ ]:
def cold_start_places(df, county, state, city, number_days, first_day_full = True, last_day_full = True):
if len(county.values) != 0:
county = county.values[0]
temp_df = df[(df['county'] == county) & (df['state'] == state)]
else:
temp_df = df[(df['city'] == city) & (df['state'] == state)]
return county, temp_df
def find_county(state, city):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))
county = cur.fetchone()
conn.close()
if county:
return county[0]
else:
return None
def db_start_location(county, state, city):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
if county:
cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where county = '%s' and state = '%s'; "%(county.upper(), state))
else:
print "else"
cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where city = '%s' and state = '%s'; "%(city, state))
a = cur.fetchall()
conn.close()
return np.array(a)
In [ ]:
a1= db_start_location('San Francisco',"California","San Francisco")
poi_coords = a1[:,1:3]
n_days = 1
current_events =[]
big_ix, med_ix, small_ix =[],[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
i = 0
for ix, label in enumerate(kmeans.labels_):
# print ix, label
if label == i:
time = a1[ix,3]
event_ix = a1[ix,0]
current_events.append(event_ix)
if time > 180 :
big_ix.append(ix)
elif time >= 120 :
med_ix.append(ix)
else:
small_ix.append(ix)
print big_ix, med_ix, small_ix
# big_ = a1[[big_ix],4:]
# print med_ix
# print a1
big_ = a1[big_ix][:,[0,4,5]]
med_ = a1[med_ix][:,[0,4,5]]
small_ = a1[small_ix][:,[0,4,5]]
In [ ]:
list_a1 = sorted_events(a1,small_ix)
In [ ]:
from operator import itemgetter
med_s= sorted(med_, key=itemgetter(1,2))
a = [[1,3,3],[2,2,5],[3,4,4],[4,2,2]]
# [sorted(a, key=itemgetter(1,2)) for i in range(100)]
b = sorted(a, key=itemgetter(1,2))
a.sort(key=lambda k: (k[1], -k[2]), reverse=False)
# print med_s
print a
In [ ]:
In [ ]:
In [ ]:
def default_cold_start_places(df,df_counties_u, day_trip_locations,full_trip_table,df_poi_travel_info,number_days = [1,2,3,4,5]):
df_c = df_counties_u.groupby(['State full','County']).count().reset_index()
for state, county,_,_ in df_c.values[105:150]:
temp_df = df[(df['county'] == county) & (df['state'] == state)]
if temp_df.shape[0]!=0:
if sum(temp_df.adjusted_normal_time_spent) < 360:
number_days = [1]
elif sum(temp_df.adjusted_normal_time_spent) < 720:
number_days = [1,2]
big_events = temp_df[temp_df.adjusted_normal_time_spent > 180]
med_events = temp_df[(temp_df.adjusted_normal_time_spent>= 120)&(temp_df.adjusted_normal_time_spent<=180)]
small_events = temp_df[temp_df.adjusted_normal_time_spent < 120]
for i in number_days:
n_days = i
full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info = \
default_search_cluster_events(df, df_counties_u, county, state, big_events,med_events, \
small_events, temp_df, n_days,day_trip_locations, full_trip_table,\
df_poi_travel_info)
print county, state
print full_trip_table.shape, len(day_trip_locations), new_trip_df1.shape, df_poi_travel_info.shape
return None
In [ ]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])
# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])
# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
# 'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
# 'google_walking_url','driving_result','walking_result','google_driving_time',\
# 'google_walking_time'])
# google_travel_time_table.loc[0] = ['000000000001', "home", '0000', 'space', '0001', 999 ,999, 999.1,999.1, "999,999","999.1,999.1","http://google.com","http://google.com", "", "", 0, 0 ]
In [ ]:
# google_travel_time_table.index=google_travel_time_table.index.astype(int)
In [ ]:
# engine = create_engine('postgresql://Gon@localhost:5432/travel_with_friends')
# # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
# # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')
# full_trip_table.to_sql('full_trip_table',engine, if_exists = "append")
# day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "append")
# google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "append")
# # df.to_sql('poi_detail_table',engine, if_exists = "append")
In [ ]:
In [ ]:
def trip_df_cloest_distance(trip_df, event_type):
points = trip_df[['coord0','coord1']].values.tolist()
n, D = mk_matrix(points, distL2) # create the distance matrix
if len(points) >= 3:
if event_type == 'big':
tour = nearest_neighbor(n, trip_df.shape[0]-1, D) # create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
elif event_type == 'med':
tour = nearest_neighbor(n, trip_df.shape[0]-2, D) # create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
else:
tour = nearest_neighbor(n, 0, D) # create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
return tour
else:
return range(len(points))
In [ ]:
In [ ]:
def get_event_ids_list(trip_locations_id):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select event_ids,event_type from day_trip_table where trip_locations_id = '%s' " %(trip_locations_id))
event_ids,event_type = cur.fetchone()
event_ids = ast.literal_eval(event_ids)
conn.close()
return event_ids,event_type
def db_event_cloest_distance(trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None):
if new_event_id or not event_ids:
event_ids, event_type = get_event_ids_list(trip_locations_id)
if new_event_id:
event_ids.append(new_event_id)
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
print event_ids
points = np.zeros((len(event_ids), 3))
for i,v in enumerate(event_ids):
cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
points[i] = cur.fetchone()
conn.close()
n,D = mk_matrix(points[:,1:], distL2)
if len(points) >= 3:
if event_type == 'add':
tour = nearest_neighbor(n, 0, D)
# create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
return np.array(event_ids)[tour], event_type
#need to figure out other cases
else:
tour = nearest_neighbor(n, 0, D)
# create a greedy tour, visiting city 'i' first
z = length(tour, D)
z = localsearch(tour, z, D)
return np.array(event_ids)[tour], event_type
else:
return np.array(event_ids), event_type
In [ ]:
event_ids = np.array([1,2,3])
In [ ]:
np.array(event_ids)
In [ ]:
def check_full_trip_id(full_trip_id, debug):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select details from full_trip_table where full_trip_id = '%s'" %(full_trip_id))
a = cur.fetchone()
conn.close()
if bool(a):
if not debug:
return a[0]
else:
return True
else:
return False
def check_full_trip_id(day_trip_id, debug):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select details from day_trip_table where trip_locations_id = '%s'" %(day_trip_id))
a = cur.fetchone()
conn.close()
if bool(a):
if not debug:
return a[0]
else:
return True
else:
return False
def check_travel_time_id(new_id):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select google_driving_time from google_travel_time_table where id_ = '%s'" %(new_id))
a = cur.fetchone()
conn.close()
if bool(a):
return True
else:
return False
In [ ]:
def sorted_events(info,ix):
'''
find the event_id, ranking and rating columns
sorted base on ranking then rating
return sorted list
'''
event_ = info[ix][:,[0,4,5]]
return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))
In [ ]:
# test_ai_ids, type_= create_trip_df(big_,medium_,small_)
big_.shape, med_.shape, small_.shape
In [ ]:
med_[0,1] ,small_[0,1] , med_[0,0]
# small_[0:8,0].concatenate(medium_[0:2,0])
c = small_[0:10,]
d = med_[0:2,]
print np.vstack((c,d))
# print list(np.array(sorted(np.vstack((c,d)), key=lambda x: (x[1],-x[2])))[:,0])
print list(np.array(sorted(np.vstack((small_[0:10,:],med_)), key=lambda x: (x[1],-x[2])))[:,0])
# np.vstack((small_[0:10,],med_))
In [ ]:
a_ids, a_type=create_event_id_list(big_,med_,small_)
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# for i,v in enumerate(a_ids):
# # print i, v, type(v)
# cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
# aaa = cur.fetchone()
# # print aaa
# conn.close()
# new_a_ids, new_a_type=db_event_cloest_distance(event_ids = a_ids, event_type = a_type)
# print new_a_ids, new_a_type
In [ ]:
def create_event_id_list(big_,medium_,small_):
event_type = ''
if big_.shape[0] >= 1:
if (medium_.shape[0] < 2) or (big_[0,1] <= medium_[0,1]):
if small_.shape[0] >= 6:
event_ids = np.concatenate((big_[0,0],small_[0:6,0]),axis=0)
else:
event_ids = np.concatenate((big_[0,0],small_[:,0]),axis=0)
event_type = 'big'
else:
if small_.shape[0] >= 8:
event_ids = np.concatenate((medium_[0:2,0],small_[0:8,0]),axis=0)
else:
event_ids = np.concatenate((medium_[0:2,0],small_[:,0]),axis=0)
event_type = 'med'
elif medium_.shape[0] >= 2:
if small_.shape[0] >= 8:
event_ids = np.concatenate((medium_[0:2,0],small_[0:8,0]),axis=0)
else:
event_ids = np.concatenate((medium_[0:2,0],small_[:,0]),axis=0)
event_type = 'med'
elif medium_.shape[0]> 0:
if small_.shape[0] >= 10:
event_ids = np.array(sorted(np.vstack((small_[0:10,:],medium_)), key=lambda x: (x[1],-x[2])))[:,0]
else:
event_ids = np.array(sorted(np.vstack((small_,medium_)), key=lambda x: (x[1],-x[2])))[:,0]
event_type = 'small'
else:
if small_.shape[0]> 0:
event_ids = small_[:,0]
event_type = 'small'
return event_ids, event_type
In [ ]:
def create_trip_df(big_,medium_,small_):
event_type = ''
if big_.shape[0] >= 1:
if (medium_.shape[0] < 2) or (big_.iloc[0].poi_rank <= medium_.iloc[0].poi_rank):
if small_.shape[0] >= 6:
trip_df = small_.iloc[0:6].append(big_.iloc[0])
else:
trip_df = small_.append(big_.iloc[0])
event_type = 'big'
else:
if small_.shape[0] >= 8:
trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
else:
trip_df = small_.append(medium_.iloc[0:2])
event_type = 'med'
elif medium_.shape[0] >= 2:
if small_.shape[0] >= 8:
trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
else:
trip_df = small_.append(medium_.iloc[0:2])
event_type = 'med'
else:
if small_.shape[0] >= 10:
trip_df = small_.iloc[0:10].append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
else:
trip_df = small_.append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
event_type = 'small'
return trip_df, event_type
In [ ]:
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
# my_key = 'AIzaSyAwx3xg6oJ0yiPV3MIunBa1kx6N7v5Tcw8'
def google_driving_walking_time(tour,trip_df,event_type):
poi_travel_time_df = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
'google_walking_url','driving_result','walking_result','google_driving_time',\
'google_walking_time'])
ids_, orig_names,orid_idxs,dest_names,dest_idxs,orig_coord0s,orig_coord1s,dest_coord0s,dest_coord1s = [],[],[],[],[],[],[],[],[]
orig_coordss,dest_coordss,driving_urls,walking_urls,driving_results,walking_results,driving_times,walking_times = [],[],[],[],[],[],[],[]
trip_id_list=[]
for i in range(len(tour)-1):
id_ = str(trip_df.loc[trip_df.index[tour[i]]].name) + '0000'+str(trip_df.loc[trip_df.index[tour[i+1]]].name)
result_check_travel_time_id = check_travel_time_id(id_)
if not result_check_travel_time_id:
orig_name = trip_df.loc[trip_df.index[tour[i]]]['name']
orig_idx = trip_df.loc[trip_df.index[tour[i]]].name
dest_name = trip_df.loc[trip_df.index[tour[i+1]]]['name']
dest_idx = trip_df.loc[trip_df.index[tour[i+1]]].name
orig_coord0 = trip_df.loc[trip_df.index[tour[i]]]['coord0']
orig_coord1 = trip_df.loc[trip_df.index[tour[i]]]['coord1']
dest_coord0 = trip_df.loc[trip_df.index[tour[i+1]]]['coord0']
dest_coord1 = trip_df.loc[trip_df.index[tour[i+1]]]['coord1']
orig_coords = str(orig_coord1)+','+str(orig_coord0)
dest_coords = str(dest_coord1)+','+str(dest_coord0)
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
new_df = trip_df.drop(trip_df.iloc[tour[i+1]].name)
new_tour = trip_df_cloest_distance(new_df,event_type)
return google_driving_walking_time(new_tour,new_df, event_type)
try:
google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
except:
print driving_result
try:
google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
except:
google_walking_time = 9999
poi_travel_time_df.loc[len(df_poi_travel_time)]=[id_,orig_name,orig_idx,dest_name,dest_idx,orig_coord0,orig_coord1,dest_coord0,\
dest_coord1,orig_coords,dest_coords,google_driving_url,google_walking_url,\
str(driving_result),str(walking_result),google_driving_time,google_walking_time]
driving_result = str(driving_result).replace("'", '"')
walking_result = str(walking_result).replace("'", '"')
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select max(index) from google_travel_time_table")
index = cur.fetchone()[0]+1
# print "startindex:", index , type(index)
# index += 1
# print "end index: " ,index
cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
str(driving_result), str(walking_result), google_driving_time, google_walking_time))
conn.commit()
conn.close()
else:
trip_id_list.append(id_)
return tour, trip_df, poi_travel_time_df
In [ ]:
def db_google_driving_walking_time(event_ids, event_type):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
google_ids = []
driving_time_list = []
walking_time_list = []
name_list = []
for i,v in enumerate(event_ids[:-1]):
id_ = str(v) + '0000'+str(event_ids[i+1])
result_check_travel_time_id = check_travel_time_id(id_)
if not result_check_travel_time_id:
cur.execute("select name, coord0, coord1 from poi_detail_table where index = '%s'"%(v))
orig_name, orig_coord0, orig_coord1 = cur.fetchone()
orig_idx = v
cur.execute("select name, coord0, coord1 from poi_detail_table where index = '%s'"%(event_ids[i+1]))
dest_name, dest_coord0, dest_coord1 = cur.fetchone()
dest_idx = event_ids[i+1]
orig_coords = str(orig_coord1)+','+str(orig_coord0)
dest_coords = str(dest_coord1)+','+str(dest_coord0)
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
driving_result= simplejson.load(urllib.urlopen(google_driving_url))
if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
walking_result= simplejson.load(urllib.urlopen(google_walking_url))
if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
new_event_ids = list(event_ids)
new_event_ids.pop(i+1)
new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
return db_google_driving_walking_time(new_event_ids, event_type)
try:
google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
except:
print v, id_, driving_result #need to debug for this
try:
google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
except:
google_walking_time = 9999
# return event_ids, google_driving_time, google_walking_time
cur.execute("select max(index) from google_travel_time_table")
index = cur.fetchone()[0]+1
# print "startindex:", index , type(index)
# index += 1
# print "end index: " ,index
driving_result = str(driving_result).replace("'",'"')
walking_result = str(walking_result).replace("'",'"')
orig_name = orig_name.replace("'","''")
dest_name = dest_name.replace("'","''")
cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
str(driving_result), str(walking_result), google_driving_time, google_walking_time))
# cur.execute("select google_driving_time, google_walking_time from google_travel_time_table \
# where id_ = '%s'" %(id_))
conn.commit()
name_list.append(orig_name+" to "+ dest_name)
google_ids.append(id_)
driving_time_list.append(google_driving_time)
walking_time_list.append(google_walking_time)
else:
cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
where id_ = '%s'" %(id_))
orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
name_list.append(orig_name+" to "+ dest_name)
google_ids.append(id_)
driving_time_list.append(google_driving_time)
walking_time_list.append(google_walking_time)
conn.close()
return event_ids, google_ids, name_list, driving_time_list, walking_time_list
In [ ]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
default = 1
n_days =3
full_day = 1
poi_coords = df_events[['coord0','coord1']]
trip_location_ids, full_trip_details =[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
# print kmeans.labels_
i=0
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
if label == i:
time = df_events.iloc[ix].adjusted_normal_time_spent
event_ix = df_events.iloc[ix].name
current_events.append(event_ix)
if time > 180 :
big_ix.append(event_ix)
elif time >= 120 :
med_ix.append(event_ix)
else:
small_ix.append(event_ix)
# all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = df_events.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = df_events.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = df_events.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
# trip_df, event_type = create_trip_df(big_,medium_,small_)
# tour = trip_df_cloest_distance(trip_df, event_type)
# new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
# new_trip_df = new_trip_df.iloc[new_tour]
# new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
# new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
# event_ids, event_type=db_event_cloest_distance(trip_locations_id)
# event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
# event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
# db_address(event_ids)
# values = day_trip(event_ids, county, state, default, full_day,n_days,i)
# day_trip_locations.loc[len(day_trip_locations)] = values
# trip_location_ids.append(values[0])
# full_trip_details.extend(values[-1])
In [ ]:
In [ ]:
def get_fulltrip_data_default(state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
county = find_county(state, city)
trip_location_ids, full_trip_details =[],[]
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
if not check_full_trip_id(full_trip_id, debug):
county_list_info = db_start_location(county, state, city)
poi_coords = county_list_info[:,1:3]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
if not county_list_info:
return "error: county_list_info is empty"
for i in range(n_days):
if not check_day_trip_id(day_trip, debug):
trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])
current_events, big_ix, small_ix, med_ix = [],[],[],[]
for ix, label in enumerate(kmeans.labels_):
if label == i:
time = county_list_info[ix,3]
event_ix = county_list_info[ix,0]
current_events.append(event_ix)
if time > 180 :
big_ix.append(ix)
elif time >= 120 :
med_ix.append(ix)
else:
small_ix.append(ix)
big_ = sorted_events(county_list_info, big_ix)
med_ = sorted_events(county_list_info, med_ix)
small_ = sorted_events(county_list_info, small_ix)
event_ids, event_type = create_event_id_list(big_, med_, small_)
event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
db_address(event_ids)
values = db_day_trip(event_ids, county, state, default, full_day,n_days,i)
# insert to day_trip ....
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, default, county, state, details, event_type, event_ids))
conn.commit()
conn.close()
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])
else:
print "error: already have this day, please check the next day"
trip_location_ids.append(trip_locations_id)
# call db find day trip detail
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select details from day_trip_table where trip_locations_id = '%s';"%(trip_locations_id) )
day_trip_detail = fetchall()
conn.close()
full_trip_details.extend(day_trip_detail)
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
details = full_trip_details
user_id = "Admin"
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("insert into full_trip_table(user_id, full_trip_id,trip_location_ids, default, county, state, details, n_days) VALUES ('%s', '%s', '%s', %s, '%s', '%s', '%s', %s)" %(user_id, full_trip_id, str(trip_location_ids), default, county, state, details, n_days))
conn.commit()
conn.close()
return "finish update %s, %s into database" %(state, county)
else:
return "%s, %s already in database" %(state, county)
In [ ]:
def db_day_trip(event_ids, county, state, default, full_day,n_days,i):
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
# cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
# a = cur.fetchall()
# state = a[0][0].upper()
# county = a[0][1].upper()
trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])
#details dict includes: id, name,address, day
cur.execute("select index, name, address from poi_detail_table where index in %s;" %(tuple(event_ids),))
a = cur.fetchall()
details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(len(a))]
conn.close()
return [trip_locations_id, full_day, default, county, state, details]
In [ ]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()
In [ ]:
def extend_full_trip_details(full_trip_details):
details = {}
addresses = []
ids = []
days = []
names = []
for item in full_trip_details:
addresses.append(eval(item)['address'])
ids.append(eval(item)['id'])
days.append(eval(item)['day'])
names.append(eval(item)['name'])
details['addresses'] = addresses
details['ids'] = ids
details['days'] = days
details['names'] = names
return str(full_trip_details)
In [ ]:
event_ids
In [ ]:
print event_ids, google_ids, name_list, driving_time_list, walking_time_list
In [ ]:
init_db_tables()
In [ ]:
def remove_extra_events(trip_df, df_poi_travel_time):
if sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time) > 480:
new_trip_df = trip_df[:-1]
new_df_poi_travel_time = df_poi_travel_time[:-1]
return remove_extra_events(new_trip_df,new_df_poi_travel_time)
else:
return trip_df, df_poi_travel_time, sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time)
def db_remove_extra_events(event_ids, driving_time_list,walking_time_list):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select sum (adjusted_normal_time_spent) from poi_detail_table where index in %s;" %(tuple(event_ids),))
time_spent = cur.fetchone()[0]
conn.close()
time_spent += sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list)))
if time_spent > 480:
update_event_ids = event_ids[:-1]
update_driving_time_list = driving_time_list[:-1]
update_walking_time_list = walking_time_list[:-1]
return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
else:
return event_ids, driving_time_list, walking_time_list, time_spent
In [ ]:
def df_addresses(new_trip_df1, new_df_poi_travel_time):
my_lst = []
print new_trip_df1.index.values
for i in new_trip_df1.index.values:
temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.orig_idx.values]
if temp_df.shape[0]>0:
address = eval(temp_df.driving_result.values[0])['origin_addresses'][0]
my_lst.append(address)
else:
try:
temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.dest_idx.values]
address = eval(temp_df.driving_result.values[0])['destination_addresses'][0]
my_lst.append(address)
except:
print new_trip_df1, new_df_poi_travel_time
return my_lst
def check_address(index):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select address from poi_detail_table where index = %s;"%(index))
a = cur.fetchone()[0]
conn.close()
if a:
return True
else:
return False
def db_address(event_ids):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
for i in event_ids[:-1]:
if not check_address(i):
cur.execute("select driving_result from google_travel_time_table where orig_idx = %s;" %(i))
a= cur.fetchone()[0]
add = ast.literal_eval(a)['origin_addresses'][0]
cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, i))
conn.commit()
last = event_ids[-1]
if not check_address(last):
cur.execute("select driving_result from google_travel_time_table where dest_idx = %s;" %(last))
a= cur.fetchone()[0]
add = ast.literal_eval(a)['destination_addresses'][0]
cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, last))
conn.commit()
conn.close()
In [ ]:
In [ ]:
In [ ]:
from numpy import *
test_event_ids_list = append(273, event_ids)
# event_ids
print test_event_ids_list
In [ ]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
day_trip_locations = 'San Diego, California'
f, d, n, d= search_cluster_events(df, county, state, city, 3, day_trip_locations, full_trip_table, default = True)
In [ ]:
In [ ]:
In [ ]:
In [ ]:
import time
t1=time.time()
# [index, ranking,score]
a = [[1,2,3],[2,2,6],[3,3,3],[4,3,10]]
from operator import itemgetter
print sorted(a, key=lambda x: (x[1], -x[2]))
time.time()-t1
In [ ]:
In [ ]:
'''
Most important event that will call all the functions and return the day details for the trip
'''
def search_cluster_events(df, county, state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
county, df_events =cold_start_places(df, county, state, city, n_days)
poi_coords = df_events[['coord0','coord1']]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
new_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
if not check_full_trip_id(new_trip_id, debug):
trip_location_ids = []
full_trip_details = []
for i in range(n_days):
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
if label == i:
event_ix = poi_coords.index[ix]
current_events.append(event_ix)
if event_ix in big.index:
big_ix.append(event_ix)
elif event_ix in med.index:
med_ix.append(event_ix)
else:
small_ix.append(event_ix)
all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
# print 'big:', big_, 'small:', small_, 'msize:', medium_
trip_df, event_type = create_trip_df(big_,medium_,small_)
# print event_type
tour = trip_df_cloest_distance(trip_df, event_type)
# print tour
new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
# print new_tour, new_trip_df
# return new_trip_df, df_poi_travel_time
new_trip_df = new_trip_df.iloc[new_tour]
new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
# print new_trip_df1
new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
# print 'total time:', total_ti
values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
day_trip_locations.loc[len(day_trip_locations)] = values
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])
df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
details = extend_full_trip_details(full_trip_details)
full_trip_table.loc[len(full_trip_table)] = ["adam", full_trip_id, str(trip_location_ids), default, county, state, details, n_days]
return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info
In [ ]:
def default_search_cluster_events(df, df_counties_u, county, state, big,med, small, \
temp, n_days,day_trip_locations, full_trip_table,df_poi_travel_info):
# df_poi_travel_info = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
# 'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
# 'google_walking_url','driving_result','walking_result','google_driving_time',\
# 'google_walking_time'])
poi_coords = temp[['coord0','coord1']]
kmeans = KMeans(n_clusters=n_days, random_state=0).fit(poi_coords)
# print kmeans.labels_
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
trip_location_ids = []
full_trip_details = []
for i in range(n_days):
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
if label == i:
event_ix = poi_coords.index[ix]
current_events.append(event_ix)
if event_ix in big.index:
big_ix.append(event_ix)
elif event_ix in med.index:
med_ix.append(event_ix)
else:
small_ix.append(event_ix)
all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
trip_df, event_type = create_trip_df(big_,medium_,small_)
tour = trip_df_cloest_distance(trip_df, event_type)
new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
new_trip_df = new_trip_df.iloc[new_tour]
new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
day_trip_locations.loc[len(day_trip_locations)] = values
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])
# print 'trave time df \n',new_df_poi_travel_time
df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
details = extend_full_trip_details(full_trip_details)
full_trip_table.loc[len(full_trip_table)] = [user_id, full_trip_id, \
str(trip_location_ids), default, county, state, details, n_days]
return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info
In [ ]:
In [ ]:
In [ ]:
###Next Steps: Add control from the users. funt1: allow to add events,(specific name or auto add)
### auto route to the most appropirate order
###funt2: allow to reorder the events. funt3: allow to delete the events.
###funt4: allow to switch a new event-next to the switch and x mark icon,check mark to confirm the new place and auto order
###New table for the trip info...features including trip id, event place, days, specific date, trip details. (trip tour, trip)
def ajax_available_events(county, state):
county=county.upper()
state = state.title()
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))
poi_lst = [item for item in cur.fetchall()]
conn.close()
return poi_lst
def add_event(trip_locations_id, event_day, event_id=None, event_name=None, full_day = True, unseen_event = False):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
if unseen_event:
index += 1
trip_locations_id = '-'.join([str(eval(i)['id']) for i in eval(detail)])+'-'+event_name.replace(' ','-')+'-'+event_day
cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
a = cur.fetchone()
if bool(a):
conn.close()
return trip_locations_id, a[0]
else:
cur.execute("select max(index) from day_trip_locations")
index = cur.fetchone()[0]+1
detail = list(eval(detail))
#need to make sure the type is correct for detail!
new_event = "{'address': 'None', 'id': 'None', 'day': %s, 'name': u'%s'}"%(event_day, event_name)
detail.append(new_event)
#get the right format of detail: change from list to string and remove brackets and convert quote type
new_detail = str(detail).replace('"','').replace('[','').replace(']','').replace("'",'"')
cur.execute("INSERT INTO day_trip_locations VALUES (%i, '%s',%s,%s,'%s','%s','%s');" %(index, trip_locations_id, full_day, False, county, state, new_detail))
conn.commit()
conn.close()
return trip_locations_id, detail
else:
event_ids = add_event_cloest_distance(trip_locations_id, event_id)
event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids,event_type = 'add')
trip_locations_id = '-'.join(event_ids)+'-'+event_day
cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
a = cur.fetchone()
if not a:
details = []
db_address(event_ids)
for item in event_ids:
cur.execute("select index, name, address from poi_detail_table where index = '%s';" %(item))
a = cur.fetchone()
detail = {'id': a[0],'name': a[1],'address': a[2], 'day': event_day}
details.append(detail)
#need to make sure event detail can append to table!
cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, False, county, state, details, event_type, event_ids))
conn.commit()
conn.close()
return trip_locations_id, details
else:
conn.close()
#need to make sure type is correct.
return trip_locations_id, a[0]
def remove_event(trip_locations_id, remove_event_id, remove_event_name=None, event_day=None, full_day = True):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
new_event_ids = ast.literal_eval(event_ids)
new_event_ids.remove(remove_event_id)
new_trip_locations_id = '-'.join(str(event_id) for event_id in new_event_ids)
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(new_trip_locations_id))
check_id = cur.fetchone()
if check_id:
return new_trip_locations_id, check_id[-3]
detail = ast.literal_eval(detail[1:-1])
for index, trip_detail in enumerate(detail):
if ast.literal_eval(trip_detail)['id'] == remove_event_id:
remove_index = index
break
new_detail = list(detail)
new_detail.pop(remove_index)
new_detail = str(new_detail).replace("'","''")
default = False
cur.execute("select max(index) from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
new_index = cur.fetchone()[0]
new_index+=1
cur.execute("INSERT INTO day_trip_table VALUES (%i, '%s', %s, %s, '%s', '%s', '%s', '%s','%s');" \
%(new_index, new_trip_locations_id, full_day, default, county, state, new_detail, event_type, new_event_ids))
conn.commit()
conn.close()
return new_trip_locations_id, new_detail
def event_type_time_spent(adjusted_normal_time_spent):
if adjusted_normal_time_spent > 180:
return 'big'
elif adjusted_normal_time_spent >= 120:
return 'med'
else:
return 'small'
def switch_event_list(full_trip_id, trip_locations_id, switch_event_id, switch_event_name=None, event_day=None, full_day = True):
# new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select name, city, county, state, coord0, coord1,poi_rank, adjusted_normal_time_spent from poi_detail_table where index=%s" %(switch_event_id))
name, city, county, state,coord0, coord1,poi_rank, adjusted_normal_time_spent = cur.fetchone()
event_type = event_type_time_spent(adjusted_normal_time_spent)
avialable_lst = ajax_available_events(county, state)
cur.execute("select trip_location_ids,details from full_trip_table where full_trip_id=%s" %(full_trip_id))
full_trip_detail = cur.fetchone()
full_trip_detail = ast.literal_eval(full_trip_detail)
full_trip_ids = [ast.literal_eval(item)['id'] for item in full_trip_detail]
switch_lst = []
for item in avialable_lst:
index = item[0]
if index not in full_trip_ids:
event_ids = [switch_event_id, index]
event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids, event_type='switch')
if min(driving_time_list[0], walking_time_list[0]) <= 60:
cur.execute("select poi_rank, rating, adjusted_normal_time_spent from poi_detail_table where index=%s" %(index))
target_poi_rank, target_rating, target_adjusted_normal_time_spent = cur.fetchone()
target_event_type = event_type_time_spent(target_adjusted_normal_time_spent)
switch_lst.append([target_poi_rank, target_rating, target_event_type==event_type])
#need to sort target_event_type, target_poi_rank and target_rating
return {switch_event_id: switch_lst}
def switch_event(trip_locations_id, switch_event_id, final_event_id, event_day):
new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
new_trip_locations_id, new_detail = add_event(new_trip_locations_id, event_day, final_event_id, full_day = True, unseen_event = False)
return new_trip_locations_id, new_detail
In [ ]:
ajax_available_events(county='San Francisco', state = "California")
county='San Francisco'.upper()
state = "California"
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))
full_trip_id = 'CALIFORNIA-SAN-DIEGO-1-3'
cur.execute("select details from full_trip_table where full_trip_id='%s'" %(full_trip_id))
full_trip_detail = cur.fetchone()[0]
full_trip_detail = ast.literal_eval(full_trip_detail)
[ast.literal_eval(item)['id'] for item in full_trip_detail]
In [ ]:
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select * from poi_detail_table where name='%s'" %(trip_locations_id))
'Blue Springs State Park' in df.name
In [ ]:
"select * from poi_detail_table where index=%s" %(remove_event_id)
In [ ]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
remove_event_id = 3486
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
# event_ids = ast.literal_eval(event_ids)
# print detail, '\n'
new_event_ids = ast.literal_eval(event_ids)
new_event_ids.remove(remove_event_id)
new_trip_locations_id = '-'.join(str(id_) for id_ in new_event_ids)
# event_ids.remove(remove_event_id)
detail = ast.literal_eval(detail[1:-1])
print type(detail[0])
for index, trip_detail in enumerate(detail):
if ast.literal_eval(trip_detail)['id'] == remove_event_id:
remove_index = index
break
new_detail = list(detail)
new_detail.pop(remove_index)
new_detail = str(new_detail).replace("'","''")
In [ ]:
'-'.join(str(id_) for id_ in new_event_ids)
In [ ]:
#Tasks:
#0. Run the initial to debug with all the cities and counties for the poi_detail_table in hand.
#1. Continue working on add/suggest/remove features
#2. Start the new feature that allows user to generate the google map route for the day
#3. new feature that allows user to explore outside the city from a direction away from the started location
#4. get all the state and national park data into database and rework the ranking system and the poi_detail_table!
In [2]:
In [ ]: