In [ ]:
import pandas as pd
import numpy as np
import requests
import psycopg2
import json
import simplejson
import urllib
import config
import ast

from operator import itemgetter
from sklearn.cluster import KMeans
from sqlalchemy import create_engine

In [ ]:
!pip install --upgrade pip
!pip install sqlalchemy
!pip install psycopg2
!pip install simplejson
!pip install config

In [ ]:
conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"
# conn_str = "dbname='travel_with_friends' user='Zoesh' host='localhost'"

In [ ]:
import math
import random


def distL2((x1,y1), (x2,y2)):
    """Compute the L2-norm (Euclidean) distance between two points.

    The distance is rounded to the closest integer, for compatibility
    with the TSPLIB convention.

    The two points are located on coordinates (x1,y1) and (x2,y2),
    sent as parameters"""
    xdiff = x2 - x1
    ydiff = y2 - y1
    return math.sqrt(xdiff*xdiff + ydiff*ydiff) + .5


def distL1((x1,y1), (x2,y2)):
    """Compute the L1-norm (Manhattan) distance between two points.

    The distance is rounded to the closest integer, for compatibility
    with the TSPLIB convention.

    The two points are located on coordinates (x1,y1) and (x2,y2),
    sent as parameters"""
    return abs(x2-x1) + abs(y2-y1)+.5


def mk_matrix(coord, dist):
    """Compute a distance matrix for a set of points.

    Uses function 'dist' to calculate distance between
    any two points.  Parameters:
    -coord -- list of tuples with coordinates of all points, [(x1,y1),...,(xn,yn)]
    -dist -- distance function
    """
    n = len(coord)
    D = {}      # dictionary to hold n times n matrix
    for i in range(n-1):
        for j in range(i+1,n):
            [x1,y1] = coord[i]
            [x2,y2] = coord[j]
            D[i,j] = dist((x1,y1), (x2,y2))
            D[j,i] = D[i,j]
    return n,D

def read_tsplib(filename):
    "basic function for reading a TSP problem on the TSPLIB format"
    "NOTE: only works for 2D euclidean or manhattan distances"
    f = open(filename, 'r');

    line = f.readline()
    while line.find("EDGE_WEIGHT_TYPE") == -1:
        line = f.readline()

    if line.find("EUC_2D") != -1:
        dist = distL2
    elif line.find("MAN_2D") != -1:
        dist = distL1
    else:
        print "cannot deal with non-euclidean or non-manhattan distances"
        raise Exception

    while line.find("NODE_COORD_SECTION") == -1:
        line = f.readline()

    xy_positions = []
    while 1:
        line = f.readline()
        if line.find("EOF") != -1: break
        (i,x,y) = line.split()
        x = float(x)
        y = float(y)
        xy_positions.append((x,y))

    n,D = mk_matrix(xy_positions, dist)
    return n, xy_positions, D


def mk_closest(D, n):
    """Compute a sorted list of the distances for each of the nodes.

    For each node, the entry is in the form [(d1,i1), (d2,i2), ...]
    where each tuple is a pair (distance,node).
    """
    C = []
    for i in range(n):
        dlist = [(D[i,j], j) for j in range(n) if j != i]
        dlist.sort()
        C.append(dlist)
    return C


def length(tour, D):
    """Calculate the length of a tour according to distance matrix 'D'."""
    z = D[tour[-1], tour[0]]    # edge from last to first city of the tour
    for i in range(1,len(tour)):
        z += D[tour[i], tour[i-1]]      # add length of edge from city i-1 to i
    return z


def randtour(n):
    """Construct a random tour of size 'n'."""
    sol = range(n)      # set solution equal to [0,1,...,n-1]
    random.shuffle(sol) # place it in a random order
    return sol


def nearest(last, unvisited, D):
    """Return the index of the node which is closest to 'last'."""
    near = unvisited[0]
    min_dist = D[last, near]
    for i in unvisited[1:]:
        if D[last,i] < min_dist:
            near = i
            min_dist = D[last, near]
    return near


def nearest_neighbor(n, i, D):
    """Return tour starting from city 'i', using the Nearest Neighbor.

    Uses the Nearest Neighbor heuristic to construct a solution:
    - start visiting city i
    - while there are unvisited cities, follow to the closest one
    - return to city i
    """
    unvisited = range(n)
    unvisited.remove(i)
    last = i
    tour = [i]
    while unvisited != []:
        next = nearest(last, unvisited, D)
        tour.append(next)
        unvisited.remove(next)
        last = next
    return tour



def exchange_cost(tour, i, j, D):
    """Calculate the cost of exchanging two arcs in a tour.

    Determine the variation in the tour length if
    arcs (i,i+1) and (j,j+1) are removed,
    and replaced by (i,j) and (i+1,j+1)
    (note the exception for the last arc).

    Parameters:
    -t -- a tour
    -i -- position of the first arc
    -j>i -- position of the second arc
    """
    n = len(tour)
    a,b = tour[i],tour[(i+1)%n]
    c,d = tour[j],tour[(j+1)%n]
    return (D[a,c] + D[b,d]) - (D[a,b] + D[c,d])


def exchange(tour, tinv, i, j):
    """Exchange arcs (i,i+1) and (j,j+1) with (i,j) and (i+1,j+1).

    For the given tour 't', remove the arcs (i,i+1) and (j,j+1) and
    insert (i,j) and (i+1,j+1).

    This is done by inverting the sublist of cities between i and j.
    """
    n = len(tour)
    if i>j:
        i,j = j,i
    assert i>=0 and i<j-1 and j<n
    path = tour[i+1:j+1]
    path.reverse()
    tour[i+1:j+1] = path
    for k in range(i+1,j+1):
        tinv[tour[k]] = k


def improve(tour, z, D, C):
    """Try to improve tour 't' by exchanging arcs; return improved tour length.

    If possible, make a series of local improvements on the solution 'tour',
    using a breadth first strategy, until reaching a local optimum.
    """
    n = len(tour)
    tinv = [0 for i in tour]
    for k in range(n):
        tinv[tour[k]] = k  # position of each city in 't'
    for i in range(n):
        a,b = tour[i],tour[(i+1)%n]
        dist_ab = D[a,b]
        improved = False
        for dist_ac,c in C[a]:
            if dist_ac >= dist_ab:
                break
            j = tinv[c]
            d = tour[(j+1)%n]
            dist_cd = D[c,d]
            dist_bd = D[b,d]
            delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
            if delta < 0:       # exchange decreases length
                exchange(tour, tinv, i, j);
                z += delta
                improved = True
                break
        if improved:
            continue
        for dist_bd,d in C[b]:
            if dist_bd >= dist_ab:
                break
            j = tinv[d]-1
            if j==-1:
                j=n-1
            c = tour[j]
            dist_cd = D[c,d]
            dist_ac = D[a,c]
            delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
            if delta < 0:       # exchange decreases length
                exchange(tour, tinv, i, j);
                z += delta
                break
    return z


def localsearch(tour, z, D, C=None):
    """Obtain a local optimum starting from solution t; return solution length.

    Parameters:
      tour -- initial tour
      z -- length of the initial tour
      D -- distance matrix
    """
    n = len(tour)
    if C == None:
        C = mk_closest(D, n)     # create a sorted list of distances to each node
    while 1:
        newz = improve(tour, z, D, C)
        if newz < z:
            z = newz
        else:
            break
    return z


def multistart_localsearch(k, n, D, report=None):
    """Do k iterations of local search, starting from random solutions.

    Parameters:
    -k -- number of iterations
    -D -- distance matrix
    -report -- if not None, call it to print verbose output

    Returns best solution and its cost.
    """
    C = mk_closest(D, n) # create a sorted list of distances to each node
    bestt=None
    bestz=None
    for i in range(0,k):
        tour = randtour(n)
        z = length(tour, D)
        z = localsearch(tour, z, D, C)
        if z < bestz or bestz == None:
            bestz = z
            bestt = list(tour)
            if report:
                report(z, tour)

    return bestt, bestz

In [ ]:
# db_name = "travel_with_friends"
# TABLES ={}
# TABLES['full_trip_table'] = (
#     "CREATE TABLE `full_trip_table` ("
#     "  `user_id` int(11) NOT NULL AUTO_INCREMENT,"
#     "  `full_trip_id` date NOT NULL,"
#     "  `trip_location_ids` varchar(14) NOT NULL,"
#     "  `default` varchar(16) NOT NULL,"
#     "  `county` enum('M','F') NOT NULL,"
#     "  `state` date NOT NULL,"
#     "  `details` ,"
#     "  `n_days`,"
#     "  PRIMARY KEY (`full_trip_id`)"
#     ") ENGINE=InnoDB")

In [ ]:
# def create_tables():
#     """ create tables in the PostgreSQL database"""
#     commands = (
#         """
#         CREATE TABLE full_trip_table (
#             index INTEGER PRIMARY KEY,
#             user_id VARCHAR(225) NOT NULL,
#             full_trip_id VARCHAR(225) NOT NULL,
#             trip_location_ids VARCHAR(225),
#             default BOOLEAN NOT NULL,
#             county VARCHAR(225) NOT NULL,
#             state VARCHAR(225) NOT NULL,
#             details VARCHAR(MAX),
#             n_days VARCHAR(225) NOT NULL
#         )
#         """,
#         """ CREATE TABLE day_trip_table (
#                 trip_locations_id 
#                 full_day
#                 default 
#                 county 
#                 state
#                 details
#                 )
#         """,
#         """
#         CREATE TABLE poi_detail_table (
#                 part_id INTEGER PRIMARY KEY,
#                 file_extension VARCHAR(5) NOT NULL,
#                 drawing_data BYTEA NOT NULL,
#                 FOREIGN KEY (part_id)
#                 REFERENCES parts (part_id)
#                 ON UPDATE CASCADE ON DELETE CASCADE
#         )
#         """,
#         """
#         CREATE TABLE google_travel_time_table (
#                 index INTEGER PRIMARY KEY,
#                 id_ VARCHAR NOT NULL,
#                 orig_name VARCHAR,
#                 orig_idx VARCHAR,
#                 dest_name VARCHAR,
#                 dest_idx VARCHAR,
#                 orig_coord0 INTEGER,
#                 orig_coord1 INTEGER,
#                 dest_coord0 INTEGER,
#                 dest_coord1 INTEGER,
#                 orig_coords VARCHAR,
#                 dest_coords VARCHAR,
#                 google_driving_url VARCHAR,
#                 google_walking_url VARCHAR,
#                 driving_result VARCHAR,
#                 walking_result VARCHAR,
#                 google_driving_time INTEGER,
#                 google_walking_time INTEGER
                

#         )
#         """)
#     conn = None
#     try:
#         # read the connection parameters
#         params = config()
#         # connect to the PostgreSQL server
#         conn = psycopg2.connect(**params)
#         cur = conn.cursor()
#         # create table one by one
#         for command in commands:
#             cur.execute(command)
#         # close communication with the PostgreSQL database server
#         cur.close()
#         # commit the changes
#         conn.commit()
#     except (Exception, psycopg2.DatabaseError) as error:
#         print(error)
#     finally:
#         if conn is not None:
#             conn.close()

In [ ]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])

# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])

In [ ]:
# read poi details csv file 
poi_detail = pd.read_csv("./step9_poi.csv", index_col=0)
poi_detail['address'] = None
poi_detail['rating']=poi_detail['rating'].fillna(0)
#read US city state and county csv file
df_counties = pd.read_csv('./us_cities_states_counties.csv',sep='|')
#find counties without duplicate
df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()

In [ ]:


In [ ]:
def init_db_tables():
    full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

    day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details','event_type','event_ids'])

    google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
                                           'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
                                           'google_walking_url','driving_result','walking_result','google_driving_time',\
                                           'google_walking_time'])
    day_trip_locations_table.loc[0] = ['CALIFORNIA-SAN-DIEGO-1-3-0', True, True, 'SAN DIEGO', 'California',
       ["{'address': '15500 San Pasqual Valley Rd, Escondido, CA 92027, USA', 'id': 2259, 'day': 0, 'name': u'San Diego Zoo Safari Park'}", "{'address': 'Safari Walk, Escondido, CA 92027, USA', 'id': 2260, 'day': 0, 'name': u'Meerkat'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3486, 'day': 0, 'name': u'Stone'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3487, 'day': 0, 'name': u'Stone Brewery'}", "{'address': 'Mount Woodson Trail, Poway, CA 92064, USA', 'id': 4951, 'day': 0, 'name': u'Lake Poway'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4953, 'day': 0, 'name': u'Potato Chip Rock'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4952, 'day': 0, 'name': u'Mt. Woodson'}"],
       'big','[2259, 2260,3486,3487,4951,4953,4952]']
    google_travel_time_table.loc[0] = ['439300002871', u'Moonlight Beach', 4393.0,
       u'Carlsbad Flower Fields', 2871.0, -117.29692141333341,
       33.047769600024424, -117.3177652511278, 33.124079753475236,
       '33.0477696,-117.296921413', '33.1240797535,-117.317765251',
       'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=driving&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
       'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=walking&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
       "{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '14 mins', 'value': 822}, 'distance': {'text': '10.6 km', 'value': 10637}, 'status': 'OK'}]}], 'origin_addresses': ['233 C St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
       "{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '2 hours 4 mins', 'value': 7457}, 'distance': {'text': '10.0 km', 'value': 10028}, 'status': 'OK'}]}], 'origin_addresses': ['498 B St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
       13.0, 124.0]
    full_trip_table.loc[0] = ['gordon_lee01', 'CALIFORNIA-SAN-DIEGO-1-3',
       "['CALIFORNIA-SAN-DIEGO-1-3-0', 'CALIFORNIA-SAN-DIEGO-1-3-1', 'CALIFORNIA-SAN-DIEGO-1-3-2']",
       True, 'SAN DIEGO', 'California',
       '["{\'address\': \'15500 San Pasqual Valley Rd, Escondido, CA 92027, USA\', \'id\': 2259, \'day\': 0, \'name\': u\'San Diego Zoo Safari Park\'}", "{\'address\': \'Safari Walk, Escondido, CA 92027, USA\', \'id\': 2260, \'day\': 0, \'name\': u\'Meerkat\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3486, \'day\': 0, \'name\': u\'Stone\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3487, \'day\': 0, \'name\': u\'Stone Brewery\'}", "{\'address\': \'Mount Woodson Trail, Poway, CA 92064, USA\', \'id\': 4951, \'day\': 0, \'name\': u\'Lake Poway\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4953, \'day\': 0, \'name\': u\'Potato Chip Rock\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4952, \'day\': 0, \'name\': u\'Mt. Woodson\'}", "{\'address\': \'1 Legoland Dr, Carlsbad, CA 92008, USA\', \'id\': 2870, \'day\': 1, \'name\': u\'Legoland\'}", "{\'address\': \'5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA\', \'id\': 2871, \'day\': 1, \'name\': u\'Carlsbad Flower Fields\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2089, \'day\': 1, \'name\': u\'Oceanside Pier\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2090, \'day\': 1, \'name\': u\'Pier\'}", "{\'address\': \'1016-1024 Neptune Ave, Encinitas, CA 92024, USA\', \'id\': 2872, \'day\': 1, \'name\': u\'Encinitas\'}", "{\'address\': \'625 Pan American Rd E, San Diego, CA 92101, USA\', \'id\': 147, \'day\': 2, \'name\': u\'Balboa Park\'}", "{\'address\': \'1849-1863 Zoo Pl, San Diego, CA 92101, USA\', \'id\': 152, \'day\': 2, \'name\': u\'San Diego Zoo\'}", "{\'address\': \'701-817 Coast Blvd, La Jolla, CA 92037, USA\', \'id\': 148, \'day\': 2, \'name\': u\'La Jolla\'}", "{\'address\': \'10051-10057 Pebble Beach Dr, Santee, CA 92071, USA\', \'id\': 4630, \'day\': 2, \'name\': u\'Santee Lakes\'}", "{\'address\': \'Lake Murray Bike Path, La Mesa, CA 91942, USA\', \'id\': 4545, \'day\': 2, \'name\': u\'Lake Murray\'}", "{\'address\': \'4905 Mt Helix Dr, La Mesa, CA 91941, USA\', \'id\': 4544, \'day\': 2, \'name\': u\'Mt. Helix\'}", "{\'address\': \'1720 Melrose Ave, Chula Vista, CA 91911, USA\', \'id\': 1325, \'day\': 2, \'name\': u\'Thick-billed Kingbird\'}", "{\'address\': \'711 Basswood Ave, Imperial Beach, CA 91932, USA\', \'id\': 1326, \'day\': 2, \'name\': u\'Lesser Sand-Plover\'}"]',
       3.0]
    engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')
    # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
    # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')

    full_trip_table.to_sql('full_trip_table',engine, if_exists = "replace")
    day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "replace")
    google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "replace")
    poi_detail.to_sql('poi_detail_table',engine, if_exists = "replace")
    
    
    df_counties = pd.read_csv('/Users/zoesh/Desktop/travel_with_friends/travel_with_friends/us_cities_states_counties.csv',sep='|')
    df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()
    df_counties_u.columns = ["city","state_abb","state","county"]
    df_counties_u.to_sql('county_table',engine, if_exists = "replace")
init_db_tables()

In [ ]:
def cold_start_places(df, county, state, city, number_days, first_day_full = True, last_day_full = True):
    
    if len(county.values) != 0:
        county = county.values[0]
        temp_df = df[(df['county'] == county) & (df['state'] == state)]
    else:
        temp_df = df[(df['city'] == city) & (df['state'] == state)]

    return county, temp_df
def find_county(state, city):
    
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))

    county = cur.fetchone()
    conn.close()
    if county:
        return county[0]
    else:
        return None
def db_start_location(county, state, city):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    if county:
        cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where county = '%s' and state = '%s'; "%(county.upper(), state))

    else:
        print "else"
        cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where city = '%s' and state = '%s'; "%(city, state))
    a = cur.fetchall()
    conn.close()
    return np.array(a)

In [ ]:
a1= db_start_location('San Francisco',"California","San Francisco")
poi_coords =  a1[:,1:3]
n_days = 1
current_events =[]
big_ix, med_ix, small_ix =[],[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
i = 0
for ix, label in enumerate(kmeans.labels_):
#     print ix, label
    if label == i:
        time = a1[ix,3]
        event_ix = a1[ix,0]
        current_events.append(event_ix)
        if time > 180 :
            big_ix.append(ix)

        elif time >= 120 :
            med_ix.append(ix)
        else:
            small_ix.append(ix)
print big_ix, med_ix, small_ix
# big_ = a1[[big_ix],4:]
# print med_ix
# print a1
big_ = a1[big_ix][:,[0,4,5]]
med_ = a1[med_ix][:,[0,4,5]]
small_ = a1[small_ix][:,[0,4,5]]

In [ ]:
list_a1 = sorted_events(a1,small_ix)

In [ ]:
from operator import itemgetter
med_s= sorted(med_, key=itemgetter(1,2))
a = [[1,3,3],[2,2,5],[3,4,4],[4,2,2]]
# [sorted(a, key=itemgetter(1,2)) for i in range(100)]
b = sorted(a, key=itemgetter(1,2))
a.sort(key=lambda k: (k[1], -k[2]), reverse=False)
# print med_s
print a

In [ ]:


In [ ]:


In [ ]:
def default_cold_start_places(df,df_counties_u, day_trip_locations,full_trip_table,df_poi_travel_info,number_days = [1,2,3,4,5]):
    
    df_c = df_counties_u.groupby(['State full','County']).count().reset_index()
    for state, county,_,_ in df_c.values[105:150]:
        temp_df = df[(df['county'] == county) & (df['state'] == state)]
        if temp_df.shape[0]!=0:
            if sum(temp_df.adjusted_normal_time_spent) < 360:
                number_days = [1]
            elif sum(temp_df.adjusted_normal_time_spent) < 720:
                number_days = [1,2]
            big_events = temp_df[temp_df.adjusted_normal_time_spent > 180]
            med_events = temp_df[(temp_df.adjusted_normal_time_spent>= 120)&(temp_df.adjusted_normal_time_spent<=180)]
            small_events = temp_df[temp_df.adjusted_normal_time_spent < 120]
            for i in number_days:
                n_days = i
                full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info = \
                        default_search_cluster_events(df, df_counties_u, county, state, big_events,med_events, \
                                                      small_events, temp_df, n_days,day_trip_locations, full_trip_table,\
                                                      df_poi_travel_info)
                print county, state
                print full_trip_table.shape, len(day_trip_locations), new_trip_df1.shape, df_poi_travel_info.shape
    return None

In [ ]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])

# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])
# google_travel_time_table.loc[0] = ['000000000001', "home", '0000', 'space', '0001', 999 ,999, 999.1,999.1, "999,999","999.1,999.1","http://google.com","http://google.com", "", "", 0, 0 ]

In [ ]:
# google_travel_time_table.index=google_travel_time_table.index.astype(int)

In [ ]:
# engine = create_engine('postgresql://Gon@localhost:5432/travel_with_friends')
# # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
# # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')

# full_trip_table.to_sql('full_trip_table',engine, if_exists = "append")
# day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "append")
# google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "append")
# # df.to_sql('poi_detail_table',engine, if_exists = "append")

In [ ]:


In [ ]:
def trip_df_cloest_distance(trip_df, event_type):
    points = trip_df[['coord0','coord1']].values.tolist()
    n, D = mk_matrix(points, distL2) # create the distance matrix
    if len(points) >= 3:
        if event_type == 'big':
            tour = nearest_neighbor(n, trip_df.shape[0]-1, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        elif event_type == 'med':
            tour = nearest_neighbor(n, trip_df.shape[0]-2, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        else:
            tour = nearest_neighbor(n, 0, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        return tour
    else:
        return range(len(points))

In [ ]:


In [ ]:
def get_event_ids_list(trip_locations_id):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select event_ids,event_type from day_trip_table where trip_locations_id = '%s' " %(trip_locations_id))
    event_ids,event_type = cur.fetchone()
    event_ids = ast.literal_eval(event_ids)
    conn.close()
    return event_ids,event_type

def db_event_cloest_distance(trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None):
    if new_event_id or not event_ids:
        event_ids, event_type = get_event_ids_list(trip_locations_id)
        if new_event_id:
            event_ids.append(new_event_id)
            
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()

    print event_ids
    points = np.zeros((len(event_ids), 3))
    for i,v in enumerate(event_ids):
        cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
        points[i] = cur.fetchone()
    conn.close()
    n,D = mk_matrix(points[:,1:], distL2)
    if len(points) >= 3:
        if event_type == 'add':
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
        #need to figure out other cases
        else:
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
    else:
        return np.array(event_ids), event_type

In [ ]:
event_ids = np.array([1,2,3])

In [ ]:
np.array(event_ids)

In [ ]:
def check_full_trip_id(full_trip_id, debug):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from full_trip_table where full_trip_id = '%s'" %(full_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_full_trip_id(day_trip_id, debug):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from day_trip_table where trip_locations_id = '%s'" %(day_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_travel_time_id(new_id):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select google_driving_time from google_travel_time_table where id_ = '%s'" %(new_id))
    a = cur.fetchone()
    conn.close()
    if bool(a):
        return True
    else:
        return False

In [ ]:
def sorted_events(info,ix):
    '''
    find the event_id, ranking and rating columns
    sorted base on ranking then rating
    
    return sorted list 
    '''
    event_ = info[ix][:,[0,4,5]]
    return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))

In [ ]:
# test_ai_ids, type_= create_trip_df(big_,medium_,small_)
big_.shape, med_.shape, small_.shape

In [ ]:
med_[0,1] ,small_[0,1] , med_[0,0]
# small_[0:8,0].concatenate(medium_[0:2,0])
c = small_[0:10,]
d = med_[0:2,]
print np.vstack((c,d))
# print list(np.array(sorted(np.vstack((c,d)), key=lambda x: (x[1],-x[2])))[:,0])
print list(np.array(sorted(np.vstack((small_[0:10,:],med_)), key=lambda x: (x[1],-x[2])))[:,0])
# np.vstack((small_[0:10,],med_))

In [ ]:
a_ids, a_type=create_event_id_list(big_,med_,small_)
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# for i,v in enumerate(a_ids):
# #     print i, v, type(v)
#     cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
#     aaa = cur.fetchone()
# #     print aaa
# conn.close()

# new_a_ids, new_a_type=db_event_cloest_distance(event_ids = a_ids, event_type = a_type)
# print new_a_ids, new_a_type

In [ ]:
def create_event_id_list(big_,medium_,small_):
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_[0,1] <= medium_[0,1]):
            if small_.shape[0] >= 6:
                event_ids = np.concatenate((big_[0,0],small_[0:6,0]),axis=0)  
            else:
                event_ids = np.concatenate((big_[0,0],small_[:,0]),axis=0)
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                event_ids = np.concatenate((medium_[0:2,0],small_[0:8,0]),axis=0)
            else:
                event_ids = np.concatenate((medium_[0:2,0],small_[:,0]),axis=0)
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            event_ids = np.concatenate((medium_[0:2,0],small_[0:8,0]),axis=0)
        else:
            event_ids = np.concatenate((medium_[0:2,0],small_[:,0]),axis=0)
        event_type = 'med'
    elif medium_.shape[0]> 0:
        if small_.shape[0] >= 10:
            event_ids = np.array(sorted(np.vstack((small_[0:10,:],medium_)), key=lambda x: (x[1],-x[2])))[:,0]
        else:
            event_ids = np.array(sorted(np.vstack((small_,medium_)), key=lambda x: (x[1],-x[2])))[:,0]
        event_type = 'small'
    else:
        if small_.shape[0]> 0:
            event_ids = small_[:,0]
        event_type = 'small'
    return event_ids, event_type

In [ ]:
def create_trip_df(big_,medium_,small_):
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_.iloc[0].poi_rank <= medium_.iloc[0].poi_rank):
            if small_.shape[0] >= 6:
                trip_df = small_.iloc[0:6].append(big_.iloc[0])
            else:
                trip_df = small_.append(big_.iloc[0])
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
            else:
                trip_df = small_.append(medium_.iloc[0:2])
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
        else:
            trip_df = small_.append(medium_.iloc[0:2])
        event_type = 'med'
    else:
        if small_.shape[0] >= 10:
            trip_df = small_.iloc[0:10].append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
        else:
            trip_df = small_.append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
        event_type = 'small'
    return trip_df, event_type

In [ ]:
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
# my_key = 'AIzaSyAwx3xg6oJ0yiPV3MIunBa1kx6N7v5Tcw8'
def google_driving_walking_time(tour,trip_df,event_type):
    poi_travel_time_df = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
                                   'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
                                   'google_walking_url','driving_result','walking_result','google_driving_time',\
                                   'google_walking_time'])
    ids_, orig_names,orid_idxs,dest_names,dest_idxs,orig_coord0s,orig_coord1s,dest_coord0s,dest_coord1s = [],[],[],[],[],[],[],[],[]
    orig_coordss,dest_coordss,driving_urls,walking_urls,driving_results,walking_results,driving_times,walking_times = [],[],[],[],[],[],[],[]
    trip_id_list=[]
    for i in range(len(tour)-1):
        id_ = str(trip_df.loc[trip_df.index[tour[i]]].name) + '0000'+str(trip_df.loc[trip_df.index[tour[i+1]]].name)
        
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
    
            orig_name = trip_df.loc[trip_df.index[tour[i]]]['name']
            orig_idx = trip_df.loc[trip_df.index[tour[i]]].name
            dest_name = trip_df.loc[trip_df.index[tour[i+1]]]['name']
            dest_idx = trip_df.loc[trip_df.index[tour[i+1]]].name
            orig_coord0 = trip_df.loc[trip_df.index[tour[i]]]['coord0']
            orig_coord1 = trip_df.loc[trip_df.index[tour[i]]]['coord1']
            dest_coord0 = trip_df.loc[trip_df.index[tour[i+1]]]['coord0']
            dest_coord1 = trip_df.loc[trip_df.index[tour[i+1]]]['coord1']
            orig_coords = str(orig_coord1)+','+str(orig_coord0)
            dest_coords = str(dest_coord1)+','+str(dest_coord0)
            
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
                
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_df = trip_df.drop(trip_df.iloc[tour[i+1]].name)
                new_tour = trip_df_cloest_distance(new_df,event_type)
                return google_driving_walking_time(new_tour,new_df, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print driving_result

            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
                
            
            poi_travel_time_df.loc[len(df_poi_travel_time)]=[id_,orig_name,orig_idx,dest_name,dest_idx,orig_coord0,orig_coord1,dest_coord0,\
                                   dest_coord1,orig_coords,dest_coords,google_driving_url,google_walking_url,\
                                   str(driving_result),str(walking_result),google_driving_time,google_walking_time]
            driving_result = str(driving_result).replace("'", '"')
            walking_result = str(walking_result).replace("'", '"')

            conn = psycopg2.connect(conn_str)  
            cur = conn.cursor()  
            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
#             print "startindex:",  index , type(index)
#             index += 1
#             print "end index: " ,index
            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
                                   dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            conn.close()
        else:
            trip_id_list.append(id_)
    
    return tour, trip_df, poi_travel_time_df

In [ ]:
def db_google_driving_walking_time(event_ids, event_type):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    google_ids = []
    driving_time_list = []
    walking_time_list = []
    name_list = []
    for i,v in enumerate(event_ids[:-1]):
        id_ = str(v) + '0000'+str(event_ids[i+1])
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
            cur.execute("select name, coord0, coord1 from poi_detail_table where index = '%s'"%(v))
            orig_name, orig_coord0, orig_coord1 = cur.fetchone()
            orig_idx = v
            cur.execute("select name, coord0, coord1 from poi_detail_table where index = '%s'"%(event_ids[i+1]))
            dest_name, dest_coord0, dest_coord1 = cur.fetchone()
            dest_idx = event_ids[i+1]
            orig_coords = str(orig_coord1)+','+str(orig_coord0)
            dest_coords = str(dest_coord1)+','+str(dest_coord0)
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_event_ids = list(event_ids)
                new_event_ids.pop(i+1)
                new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
                return db_google_driving_walking_time(new_event_ids, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print v, id_, driving_result #need to debug for this
            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
#             return event_ids, google_driving_time, google_walking_time
        
            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
#             print "startindex:",  index , type(index)
#             index += 1
#             print "end index: " ,index
            driving_result = str(driving_result).replace("'",'"')
            walking_result = str(walking_result).replace("'",'"')
            orig_name = orig_name.replace("'","''")
            dest_name = dest_name.replace("'","''")
            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
                                   dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
#             cur.execute("select google_driving_time, google_walking_time from google_travel_time_table \
#                          where id_ = '%s'" %(id_))
            conn.commit()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
        else:
            
            cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
                         where id_ = '%s'" %(id_))
            orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
            
    
    conn.close()
    return event_ids, google_ids, name_list, driving_time_list, walking_time_list

In [ ]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
default = 1
n_days =3
full_day = 1
poi_coords = df_events[['coord0','coord1']]
trip_location_ids, full_trip_details =[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
# print kmeans.labels_
i=0
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
    if label == i:
        time = df_events.iloc[ix].adjusted_normal_time_spent
        event_ix = df_events.iloc[ix].name
        current_events.append(event_ix)
        if time > 180 :
            big_ix.append(event_ix)
        elif time >= 120 :
            med_ix.append(event_ix)
        else:
            small_ix.append(event_ix)

#         all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = df_events.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = df_events.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = df_events.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])

# trip_df, event_type = create_trip_df(big_,medium_,small_)
# tour = trip_df_cloest_distance(trip_df, event_type)
# new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
# new_trip_df = new_trip_df.iloc[new_tour]
# new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
# new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)

# event_ids, event_type=db_event_cloest_distance(trip_locations_id)
# event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
# event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
# db_address(event_ids)
# values = day_trip(event_ids, county, state, default, full_day,n_days,i)
# day_trip_locations.loc[len(day_trip_locations)] = values
# trip_location_ids.append(values[0])
# full_trip_details.extend(values[-1])

In [ ]:


In [ ]:
def get_fulltrip_data_default(state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
    county = find_county(state, city)
    trip_location_ids, full_trip_details =[],[]
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    
    if not check_full_trip_id(full_trip_id, debug):
        county_list_info = db_start_location(county, state, city)
        poi_coords = county_list_info[:,1:3]
        kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
        if not county_list_info:
            return "error: county_list_info is empty"
        for i in range(n_days):
            
            if not check_day_trip_id(day_trip, debug):
                trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])
                current_events, big_ix, small_ix, med_ix = [],[],[],[]

                for ix, label in enumerate(kmeans.labels_):
                    if label == i:
                        time = county_list_info[ix,3]
                        event_ix = county_list_info[ix,0]
                        current_events.append(event_ix)
                        if time > 180 :
                            big_ix.append(ix)
                        elif time >= 120 :
                            med_ix.append(ix)
                        else:
                            small_ix.append(ix)
                
                    
                
                big_ = sorted_events(county_list_info, big_ix)
                med_ = sorted_events(county_list_info, med_ix)
                small_ = sorted_events(county_list_info, small_ix)
                
                event_ids, event_type = create_event_id_list(big_, med_, small_)
                event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
                event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
                event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
                db_address(event_ids)
                values = db_day_trip(event_ids, county, state, default, full_day,n_days,i)
#                 insert to day_trip ....
                conn = psycopg2.connect(conn_str)
                cur = conn.cursor()
                cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, default, county, state, details, event_type, event_ids))
                conn.commit()
                conn.close()
                trip_location_ids.append(values[0])
                full_trip_details.extend(values[-1])

            else:
                print "error: already have this day, please check the next day"
                trip_location_ids.append(trip_locations_id)
#                 call db find day trip detail 
                conn = psycopg2.connect(conn_str)
                cur = conn.cursor()
                cur.execute("select details from day_trip_table where trip_locations_id = '%s';"%(trip_locations_id) )
                day_trip_detail = fetchall()
                conn.close()
                full_trip_details.extend(day_trip_detail)

        full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
        details = full_trip_details
        user_id = "Admin"
        conn = psycopg2.connect(conn_str)
        cur = conn.cursor()
        cur.execute("insert into full_trip_table(user_id, full_trip_id,trip_location_ids, default, county, state, details, n_days) VALUES ('%s', '%s', '%s', %s, '%s', '%s', '%s', %s)" %(user_id, full_trip_id, str(trip_location_ids), default, county, state, details, n_days))
        conn.commit()
        conn.close()

        return "finish update %s, %s into database" %(state, county)
    else:
        return "%s, %s already in database" %(state, county)

In [ ]:
def db_day_trip(event_ids, county, state, default, full_day,n_days,i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()

#     cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
#     a = cur.fetchall()
#     state = a[0][0].upper()
#     county = a[0][1].upper()

    trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])

    #details dict includes: id, name,address, day

    cur.execute("select index, name, address from poi_detail_table where index in %s;" %(tuple(event_ids),))
    a = cur.fetchall()
    details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(len(a))]
    conn.close()
    return [trip_locations_id, full_day, default, county, state, details]

In [ ]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()

In [ ]:
def extend_full_trip_details(full_trip_details):
    details = {}
    addresses = []
    ids = []
    days = []
    names = []
    for item in full_trip_details:
        addresses.append(eval(item)['address'])
        ids.append(eval(item)['id'])
        days.append(eval(item)['day'])
        names.append(eval(item)['name'])
    details['addresses'] = addresses
    details['ids'] = ids
    details['days'] = days
    details['names'] = names
    return str(full_trip_details)

In [ ]:
event_ids

In [ ]:
print event_ids, google_ids, name_list, driving_time_list, walking_time_list

In [ ]:
init_db_tables()

In [ ]:
def remove_extra_events(trip_df, df_poi_travel_time):
    if sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time) > 480:
        new_trip_df = trip_df[:-1]
        new_df_poi_travel_time = df_poi_travel_time[:-1]
        return remove_extra_events(new_trip_df,new_df_poi_travel_time)
    else:
        return trip_df, df_poi_travel_time, sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time)
def db_remove_extra_events(event_ids, driving_time_list,walking_time_list):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()   
    cur.execute("select sum (adjusted_normal_time_spent) from poi_detail_table where index in %s;" %(tuple(event_ids),))
    time_spent = cur.fetchone()[0]
    conn.close()
    time_spent += sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list)))
    if time_spent > 480:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent

In [ ]:
def df_addresses(new_trip_df1, new_df_poi_travel_time):
    my_lst = []
    print new_trip_df1.index.values
    for i in new_trip_df1.index.values:
        temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.orig_idx.values]
        if temp_df.shape[0]>0:
            address = eval(temp_df.driving_result.values[0])['origin_addresses'][0]
            my_lst.append(address)
        else:
            try:
                temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.dest_idx.values]
                address = eval(temp_df.driving_result.values[0])['destination_addresses'][0]
                my_lst.append(address)
            except:
                print new_trip_df1, new_df_poi_travel_time
    return my_lst

def check_address(index):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select address from poi_detail_table where index = %s;"%(index))
    a = cur.fetchone()[0]
    conn.close()
    if a:
        return True
    else:
        return False
def db_address(event_ids):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    for i in event_ids[:-1]:
        if not check_address(i):
            cur.execute("select driving_result from google_travel_time_table where orig_idx = %s;" %(i))
            a= cur.fetchone()[0]
            add = ast.literal_eval(a)['origin_addresses'][0]
            cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, i))
            conn.commit()
    last = event_ids[-1]
    if not check_address(last):
        cur.execute("select driving_result from google_travel_time_table where dest_idx = %s;" %(last))
        a= cur.fetchone()[0]
        add = ast.literal_eval(a)['destination_addresses'][0]
        cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, last))
        conn.commit()
    conn.close()

In [ ]:


In [ ]:


In [ ]:
from numpy import *

test_event_ids_list =  append(273, event_ids)
# event_ids
print test_event_ids_list

In [ ]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()

In [ ]:


In [ ]:


In [ ]:


In [ ]:
day_trip_locations = 'San Diego, California'

f, d, n, d= search_cluster_events(df, county, state, city, 3, day_trip_locations, full_trip_table, default = True)

In [ ]:


In [ ]:


In [ ]:


In [ ]:
import time
t1=time.time()
# [index, ranking,score]
a = [[1,2,3],[2,2,6],[3,3,3],[4,3,10]]
from operator import itemgetter
print sorted(a, key=lambda x: (x[1], -x[2]))

time.time()-t1

In [ ]:


In [ ]:
'''
Most important event that will call all the functions and return the day details for the trip
'''


def search_cluster_events(df, county, state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
    
    county, df_events =cold_start_places(df, county, state, city, n_days) 
    
    poi_coords = df_events[['coord0','coord1']]
    kmeans = KMeans(n_clusters=n_days).fit(poi_coords)

    new_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    if not check_full_trip_id(new_trip_id, debug):
        
        trip_location_ids = []
        full_trip_details = []
        for i in range(n_days):
            current_events = []
            big_ix = []
            small_ix = []
            med_ix = []
            for ix, label in enumerate(kmeans.labels_):
                if label == i:
                    event_ix = poi_coords.index[ix]
                    current_events.append(event_ix)
                    if event_ix in big.index:
                        big_ix.append(event_ix)
                    elif event_ix in med.index:
                        med_ix.append(event_ix)
                    else:
                        small_ix.append(event_ix)
            all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
            big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
            small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
            medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
    #         print 'big:', big_, 'small:', small_, 'msize:', medium_
            trip_df, event_type = create_trip_df(big_,medium_,small_)
    #         print event_type
            tour = trip_df_cloest_distance(trip_df, event_type)
    #         print tour
            new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
    #         print new_tour, new_trip_df
    #         return new_trip_df, df_poi_travel_time
            new_trip_df = new_trip_df.iloc[new_tour]
            new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
    #         print new_trip_df1
            new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
    #         print 'total time:', total_ti
            values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
            day_trip_locations.loc[len(day_trip_locations)] = values
            trip_location_ids.append(values[0])
            full_trip_details.extend(values[-1])
            df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
            
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    details = extend_full_trip_details(full_trip_details)
    full_trip_table.loc[len(full_trip_table)] = ["adam", full_trip_id, str(trip_location_ids), default, county, state, details, n_days]
    
    
    return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info

In [ ]:
def default_search_cluster_events(df, df_counties_u, county, state, big,med, small, \
                                  temp, n_days,day_trip_locations, full_trip_table,df_poi_travel_info):
#     df_poi_travel_info = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])
    poi_coords = temp[['coord0','coord1']]
    kmeans = KMeans(n_clusters=n_days, random_state=0).fit(poi_coords)
#     print kmeans.labels_
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    trip_location_ids = []
    full_trip_details = []
    for i in range(n_days):
        current_events = []
        big_ix = []
        small_ix = []
        med_ix = []
        for ix, label in enumerate(kmeans.labels_):
            if label == i:
                event_ix = poi_coords.index[ix]
                current_events.append(event_ix)
                if event_ix in big.index:
                    big_ix.append(event_ix)
                elif event_ix in med.index:
                    med_ix.append(event_ix)
                else:
                    small_ix.append(event_ix)
        all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
        big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        trip_df, event_type = create_trip_df(big_,medium_,small_)
        tour = trip_df_cloest_distance(trip_df, event_type)
        new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
        new_trip_df = new_trip_df.iloc[new_tour]
        new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
        new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
        values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
        day_trip_locations.loc[len(day_trip_locations)] = values
        trip_location_ids.append(values[0])
        full_trip_details.extend(values[-1])
#         print 'trave time df \n',new_df_poi_travel_time
        df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    details = extend_full_trip_details(full_trip_details)
    full_trip_table.loc[len(full_trip_table)] = [user_id, full_trip_id, \
                                                 str(trip_location_ids), default, county, state, details, n_days]
    return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info

In [ ]:


In [ ]:


In [ ]:
###Next Steps: Add control from the users. funt1: allow to add events,(specific name or auto add)
### auto route to the most appropirate order
###funt2: allow to reorder the events. funt3: allow to delete the events. 
###funt4: allow to switch a new event-next to the switch and x mark icon,check mark to confirm the new place and auto order

###New table for the trip info...features including trip id, event place, days, specific date, trip details. (trip tour, trip)

def ajax_available_events(county, state):
    county=county.upper()
    state = state.title()
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))  
    poi_lst = [item for item in cur.fetchall()]
    conn.close()
    return poi_lst

def add_event(trip_locations_id, event_day, event_id=None, event_name=None, full_day = True, unseen_event = False):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
    if unseen_event:
        index += 1
        trip_locations_id = '-'.join([str(eval(i)['id']) for i in eval(detail)])+'-'+event_name.replace(' ','-')+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
        a = cur.fetchone()
        if bool(a):
            conn.close()
            return trip_locations_id, a[0]
        else:
            cur.execute("select max(index) from day_trip_locations")
            index = cur.fetchone()[0]+1
            detail = list(eval(detail))
            #need to make sure the type is correct for detail!
            new_event = "{'address': 'None', 'id': 'None', 'day': %s, 'name': u'%s'}"%(event_day, event_name)
            detail.append(new_event)
            #get the right format of detail: change from list to string and remove brackets and convert quote type
            new_detail = str(detail).replace('"','').replace('[','').replace(']','').replace("'",'"')
            cur.execute("INSERT INTO day_trip_locations VALUES (%i, '%s',%s,%s,'%s','%s','%s');" %(index, trip_locations_id, full_day, False, county, state, new_detail))
            conn.commit()
            conn.close()
            return trip_locations_id, detail
    else:
        event_ids = add_event_cloest_distance(trip_locations_id, event_id)
        event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids,event_type = 'add')
        trip_locations_id = '-'.join(event_ids)+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id)) 
        a = cur.fetchone()
        if not a:
            details = []
            db_address(event_ids)
            for item in event_ids:
                cur.execute("select index, name, address from poi_detail_table where index = '%s';" %(item))
                a = cur.fetchone()
                detail = {'id': a[0],'name': a[1],'address': a[2], 'day': event_day}
                details.append(detail)
            #need to make sure event detail can append to table!
            cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, False, county, state, details, event_type, event_ids))
            conn.commit()
            conn.close()
            return trip_locations_id, details
        else:
            conn.close()
            #need to make sure type is correct.
            return trip_locations_id, a[0]

def remove_event(trip_locations_id, remove_event_id, remove_event_name=None, event_day=None, full_day = True):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
    new_event_ids = ast.literal_eval(event_ids)
    new_event_ids.remove(remove_event_id)
    new_trip_locations_id = '-'.join(str(event_id) for event_id in new_event_ids)
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(new_trip_locations_id))  
    check_id = cur.fetchone()
    if check_id:
        return new_trip_locations_id, check_id[-3]
    detail = ast.literal_eval(detail[1:-1])
    for index, trip_detail in enumerate(detail):
        if ast.literal_eval(trip_detail)['id'] == remove_event_id:
            remove_index = index
            break
    new_detail = list(detail)
    new_detail.pop(remove_index)
    new_detail =  str(new_detail).replace("'","''")
    default = False
    cur.execute("select max(index) from day_trip_table where trip_locations_id='%s'" %(trip_locations_id)) 
    new_index = cur.fetchone()[0]
    new_index+=1
    cur.execute("INSERT INTO day_trip_table VALUES (%i, '%s', %s, %s, '%s', '%s', '%s', '%s','%s');" \
                %(new_index, new_trip_locations_id, full_day, default, county, state, new_detail, event_type, new_event_ids))  
    conn.commit()
    conn.close()
    return new_trip_locations_id, new_detail

def event_type_time_spent(adjusted_normal_time_spent):
    if adjusted_normal_time_spent > 180:
        return 'big'
    elif adjusted_normal_time_spent >= 120:
        return 'med'
    else:
        return 'small'
def switch_event_list(full_trip_id, trip_locations_id, switch_event_id, switch_event_name=None, event_day=None, full_day = True):
#     new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select name, city, county, state, coord0, coord1,poi_rank, adjusted_normal_time_spent from poi_detail_table where index=%s" %(switch_event_id))
    name, city, county, state,coord0, coord1,poi_rank, adjusted_normal_time_spent = cur.fetchone()
    event_type = event_type_time_spent(adjusted_normal_time_spent)
    avialable_lst = ajax_available_events(county, state)
    cur.execute("select trip_location_ids,details from full_trip_table where full_trip_id=%s" %(full_trip_id))
    full_trip_detail = cur.fetchone()
    full_trip_detail = ast.literal_eval(full_trip_detail)
    full_trip_ids = [ast.literal_eval(item)['id'] for item in full_trip_detail]
    switch_lst = []
    for item in avialable_lst:
        index = item[0]
        if index not in full_trip_ids:
            event_ids = [switch_event_id, index]
            event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids, event_type='switch')
            if min(driving_time_list[0], walking_time_list[0]) <= 60:
                cur.execute("select poi_rank, rating, adjusted_normal_time_spent from poi_detail_table where index=%s" %(index))
                target_poi_rank, target_rating, target_adjusted_normal_time_spent = cur.fetchone()
                target_event_type = event_type_time_spent(target_adjusted_normal_time_spent)
                switch_lst.append([target_poi_rank, target_rating, target_event_type==event_type])
    #need to sort target_event_type, target_poi_rank and target_rating
    return {switch_event_id: switch_lst}

def switch_event(trip_locations_id, switch_event_id, final_event_id, event_day):
    new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    new_trip_locations_id, new_detail = add_event(new_trip_locations_id, event_day, final_event_id, full_day = True, unseen_event = False)
    return new_trip_locations_id, new_detail

In [ ]:
ajax_available_events(county='San Francisco', state = "California")
county='San Francisco'.upper()
state = "California"
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))  
full_trip_id = 'CALIFORNIA-SAN-DIEGO-1-3'
cur.execute("select details from full_trip_table where full_trip_id='%s'" %(full_trip_id))
full_trip_detail = cur.fetchone()[0]
full_trip_detail = ast.literal_eval(full_trip_detail)
[ast.literal_eval(item)['id'] for item in full_trip_detail]

In [ ]:
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
cur.execute("select * from poi_detail_table where name='%s'" %(trip_locations_id))  
    
'Blue Springs State Park' in df.name

In [ ]:
"select * from poi_detail_table where index=%s" %(remove_event_id)

In [ ]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
remove_event_id = 3486
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
# event_ids = ast.literal_eval(event_ids)
# print detail, '\n'
new_event_ids = ast.literal_eval(event_ids)
new_event_ids.remove(remove_event_id)
new_trip_locations_id = '-'.join(str(id_) for id_ in new_event_ids)
# event_ids.remove(remove_event_id)
detail = ast.literal_eval(detail[1:-1])
print type(detail[0])
for index, trip_detail in enumerate(detail):
    if ast.literal_eval(trip_detail)['id'] == remove_event_id:
        remove_index = index
        break
new_detail = list(detail)
new_detail.pop(remove_index)
new_detail =  str(new_detail).replace("'","''")

In [ ]:
'-'.join(str(id_) for id_ in new_event_ids)

In [ ]:
#Tasks:
#0. Run the initial to debug with all the cities and counties for the poi_detail_table in hand. 
#1. Continue working on add/suggest/remove features
#2. Start the new feature that allows user to generate the google map route for the day
#3. new feature that allows user to explore outside the city from a direction away from the started location
#4. get all the state and national park data into database and rework the ranking system and the poi_detail_table!

In [2]:



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-2-ac2f0b50912b> in <module>()
      3 city = "Los Angles"
      4 n_days = 1
----> 5 id_, trip  = trip.get_fulltrip_data(state, city, n_days)

/Users/Gon/Desktop/travel_with_friends/city_trip.pyc in get_fulltrip_data(state, city, n_days, full_day, regular, debug)
     14         trip_location_ids, full_trip_details =[],[]
     15         county_list_info = db_start_location(county, state, city)
---> 16         time_spent = county_list_info[:,3]
     17         poi_coords = county_list_info[:,1:3]
     18         kmeans = KMeans(n_clusters=n_days).fit(poi_coords)

IndexError: too many indices for array

In [ ]: