notebook.community

Edit and run



In [18]:

    
import pandas as pd
import numpy as np
import requests
import psycopg2
import json
import simplejson
import urllib
import config
import ast

from operator import itemgetter
from sklearn.cluster import KMeans
from sqlalchemy import create_engine



In [469]:

    
!pip install --upgrade pip
!pip install sqlalchemy
!pip install psycopg2
!pip install simplejson
!pip install config









    



Requirement already up-to-date: pip in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: sqlalchemy in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: psycopg2 in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: simplejson in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: config in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages



In [33]:

    
conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"
# conn_str = "dbname='travel_with_friends' user='Zoesh' host='localhost'"



In [29]:

    
import math
import random


def distL2((x1,y1), (x2,y2)):
    """Compute the L2-norm (Euclidean) distance between two points.

    The distance is rounded to the closest integer, for compatibility
    with the TSPLIB convention.

    The two points are located on coordinates (x1,y1) and (x2,y2),
    sent as parameters"""
    xdiff = x2 - x1
    ydiff = y2 - y1
    return math.sqrt(xdiff*xdiff + ydiff*ydiff) + .5


def distL1((x1,y1), (x2,y2)):
    """Compute the L1-norm (Manhattan) distance between two points.

    The distance is rounded to the closest integer, for compatibility
    with the TSPLIB convention.

    The two points are located on coordinates (x1,y1) and (x2,y2),
    sent as parameters"""
    return abs(x2-x1) + abs(y2-y1)+.5


def mk_matrix(coord, dist):
    """Compute a distance matrix for a set of points.

    Uses function 'dist' to calculate distance between
    any two points.  Parameters:
    -coord -- list of tuples with coordinates of all points, [(x1,y1),...,(xn,yn)]
    -dist -- distance function
    """
    n = len(coord)
    D = {}      # dictionary to hold n times n matrix
    for i in range(n-1):
        for j in range(i+1,n):
            [x1,y1] = coord[i]
            [x2,y2] = coord[j]
            D[i,j] = dist((x1,y1), (x2,y2))
            D[j,i] = D[i,j]
    return n,D

def read_tsplib(filename):
    "basic function for reading a TSP problem on the TSPLIB format"
    "NOTE: only works for 2D euclidean or manhattan distances"
    f = open(filename, 'r');

    line = f.readline()
    while line.find("EDGE_WEIGHT_TYPE") == -1:
        line = f.readline()

    if line.find("EUC_2D") != -1:
        dist = distL2
    elif line.find("MAN_2D") != -1:
        dist = distL1
    else:
        print "cannot deal with non-euclidean or non-manhattan distances"
        raise Exception

    while line.find("NODE_COORD_SECTION") == -1:
        line = f.readline()

    xy_positions = []
    while 1:
        line = f.readline()
        if line.find("EOF") != -1: break
        (i,x,y) = line.split()
        x = float(x)
        y = float(y)
        xy_positions.append((x,y))

    n,D = mk_matrix(xy_positions, dist)
    return n, xy_positions, D


def mk_closest(D, n):
    """Compute a sorted list of the distances for each of the nodes.

    For each node, the entry is in the form [(d1,i1), (d2,i2), ...]
    where each tuple is a pair (distance,node).
    """
    C = []
    for i in range(n):
        dlist = [(D[i,j], j) for j in range(n) if j != i]
        dlist.sort()
        C.append(dlist)
    return C


def length(tour, D):
    """Calculate the length of a tour according to distance matrix 'D'."""
    z = D[tour[-1], tour[0]]    # edge from last to first city of the tour
    for i in range(1,len(tour)):
        z += D[tour[i], tour[i-1]]      # add length of edge from city i-1 to i
    return z


def randtour(n):
    """Construct a random tour of size 'n'."""
    sol = range(n)      # set solution equal to [0,1,...,n-1]
    random.shuffle(sol) # place it in a random order
    return sol


def nearest(last, unvisited, D):
    """Return the index of the node which is closest to 'last'."""
    near = unvisited[0]
    min_dist = D[last, near]
    for i in unvisited[1:]:
        if D[last,i] < min_dist:
            near = i
            min_dist = D[last, near]
    return near


def nearest_neighbor(n, i, D):
    """Return tour starting from city 'i', using the Nearest Neighbor.

    Uses the Nearest Neighbor heuristic to construct a solution:
    - start visiting city i
    - while there are unvisited cities, follow to the closest one
    - return to city i
    """
    unvisited = range(n)
    unvisited.remove(i)
    last = i
    tour = [i]
    while unvisited != []:
        next = nearest(last, unvisited, D)
        tour.append(next)
        unvisited.remove(next)
        last = next
    return tour



def exchange_cost(tour, i, j, D):
    """Calculate the cost of exchanging two arcs in a tour.

    Determine the variation in the tour length if
    arcs (i,i+1) and (j,j+1) are removed,
    and replaced by (i,j) and (i+1,j+1)
    (note the exception for the last arc).

    Parameters:
    -t -- a tour
    -i -- position of the first arc
    -j>i -- position of the second arc
    """
    n = len(tour)
    a,b = tour[i],tour[(i+1)%n]
    c,d = tour[j],tour[(j+1)%n]
    return (D[a,c] + D[b,d]) - (D[a,b] + D[c,d])


def exchange(tour, tinv, i, j):
    """Exchange arcs (i,i+1) and (j,j+1) with (i,j) and (i+1,j+1).

    For the given tour 't', remove the arcs (i,i+1) and (j,j+1) and
    insert (i,j) and (i+1,j+1).

    This is done by inverting the sublist of cities between i and j.
    """
    n = len(tour)
    if i>j:
        i,j = j,i
    assert i>=0 and i<j-1 and j<n
    path = tour[i+1:j+1]
    path.reverse()
    tour[i+1:j+1] = path
    for k in range(i+1,j+1):
        tinv[tour[k]] = k


def improve(tour, z, D, C):
    """Try to improve tour 't' by exchanging arcs; return improved tour length.

    If possible, make a series of local improvements on the solution 'tour',
    using a breadth first strategy, until reaching a local optimum.
    """
    n = len(tour)
    tinv = [0 for i in tour]
    for k in range(n):
        tinv[tour[k]] = k  # position of each city in 't'
    for i in range(n):
        a,b = tour[i],tour[(i+1)%n]
        dist_ab = D[a,b]
        improved = False
        for dist_ac,c in C[a]:
            if dist_ac >= dist_ab:
                break
            j = tinv[c]
            d = tour[(j+1)%n]
            dist_cd = D[c,d]
            dist_bd = D[b,d]
            delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
            if delta < 0:       # exchange decreases length
                exchange(tour, tinv, i, j);
                z += delta
                improved = True
                break
        if improved:
            continue
        for dist_bd,d in C[b]:
            if dist_bd >= dist_ab:
                break
            j = tinv[d]-1
            if j==-1:
                j=n-1
            c = tour[j]
            dist_cd = D[c,d]
            dist_ac = D[a,c]
            delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
            if delta < 0:       # exchange decreases length
                exchange(tour, tinv, i, j);
                z += delta
                break
    return z


def localsearch(tour, z, D, C=None):
    """Obtain a local optimum starting from solution t; return solution length.

    Parameters:
      tour -- initial tour
      z -- length of the initial tour
      D -- distance matrix
    """
    n = len(tour)
    if C == None:
        C = mk_closest(D, n)     # create a sorted list of distances to each node
    while 1:
        newz = improve(tour, z, D, C)
        if newz < z:
            z = newz
        else:
            break
    return z


def multistart_localsearch(k, n, D, report=None):
    """Do k iterations of local search, starting from random solutions.

    Parameters:
    -k -- number of iterations
    -D -- distance matrix
    -report -- if not None, call it to print verbose output

    Returns best solution and its cost.
    """
    C = mk_closest(D, n) # create a sorted list of distances to each node
    bestt=None
    bestz=None
    for i in range(0,k):
        tour = randtour(n)
        z = length(tour, D)
        z = localsearch(tour, z, D, C)
        if z < bestz or bestz == None:
            bestz = z
            bestt = list(tour)
            if report:
                report(z, tour)

    return bestt, bestz



In [471]:

    
# db_name = "travel_with_friends"
# TABLES ={}
# TABLES['full_trip_table'] = (
#     "CREATE TABLE `full_trip_table` ("
#     "  `user_id` int(11) NOT NULL AUTO_INCREMENT,"
#     "  `full_trip_id` date NOT NULL,"
#     "  `trip_location_ids` varchar(14) NOT NULL,"
#     "  `default` varchar(16) NOT NULL,"
#     "  `county` enum('M','F') NOT NULL,"
#     "  `state` date NOT NULL,"
#     "  `details` ,"
#     "  `n_days`,"
#     "  PRIMARY KEY (`full_trip_id`)"
#     ") ENGINE=InnoDB")



In [472]:

    
# def create_tables():
#     """ create tables in the PostgreSQL database"""
#     commands = (
#         """
#         CREATE TABLE full_trip_table (
#             index INTEGER PRIMARY KEY,
#             user_id VARCHAR(225) NOT NULL,
#             full_trip_id VARCHAR(225) NOT NULL,
#             trip_location_ids VARCHAR(225),
#             default BOOLEAN NOT NULL,
#             county VARCHAR(225) NOT NULL,
#             state VARCHAR(225) NOT NULL,
#             details VARCHAR(MAX),
#             n_days VARCHAR(225) NOT NULL
#         )
#         """,
#         """ CREATE TABLE day_trip_table (
#                 trip_locations_id 
#                 full_day
#                 default 
#                 county 
#                 state
#                 details
#                 )
#         """,
#         """
#         CREATE TABLE poi_detail_table (
#                 part_id INTEGER PRIMARY KEY,
#                 file_extension VARCHAR(5) NOT NULL,
#                 drawing_data BYTEA NOT NULL,
#                 FOREIGN KEY (part_id)
#                 REFERENCES parts (part_id)
#                 ON UPDATE CASCADE ON DELETE CASCADE
#         )
#         """,
#         """
#         CREATE TABLE google_travel_time_table (
#                 index INTEGER PRIMARY KEY,
#                 id_ VARCHAR NOT NULL,
#                 orig_name VARCHAR,
#                 orig_idx VARCHAR,
#                 dest_name VARCHAR,
#                 dest_idx VARCHAR,
#                 orig_coord0 INTEGER,
#                 orig_coord1 INTEGER,
#                 dest_coord0 INTEGER,
#                 dest_coord1 INTEGER,
#                 orig_coords VARCHAR,
#                 dest_coords VARCHAR,
#                 google_driving_url VARCHAR,
#                 google_walking_url VARCHAR,
#                 driving_result VARCHAR,
#                 walking_result VARCHAR,
#                 google_driving_time INTEGER,
#                 google_walking_time INTEGER
                

#         )
#         """)
#     conn = None
#     try:
#         # read the connection parameters
#         params = config()
#         # connect to the PostgreSQL server
#         conn = psycopg2.connect(**params)
#         cur = conn.cursor()
#         # create table one by one
#         for command in commands:
#             cur.execute(command)
#         # close communication with the PostgreSQL database server
#         cur.close()
#         # commit the changes
#         conn.commit()
#     except (Exception, psycopg2.DatabaseError) as error:
#         print(error)
#     finally:
#         if conn is not None:
#             conn.close()



In [473]:

    
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])

# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])



In [87]:

    
# read poi details csv file 
poi_detail = pd.read_csv("./step9_poi.csv", index_col=0)
poi_detail['address'] = None
poi_detail['rating']=poi_detail['rating'].fillna(0)
#read US city state and county csv file
# df_counties = pd.read_csv('./us_cities_states_counties.csv',sep='|')
#find counties without duplicate
# df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()



In [34]:

    
def init_db_tables():
    full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

    day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details','event_type','event_ids'])

    google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
                                           'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
                                           'google_walking_url','driving_result','walking_result','google_driving_time',\
                                           'google_walking_time'])
    day_trip_locations_table.loc[0] = ['CALIFORNIA-SAN-DIEGO-1-3-0', True, True, 'SAN DIEGO', 'California',
       ["{'address': '15500 San Pasqual Valley Rd, Escondido, CA 92027, USA', 'id': 2259, 'day': 0, 'name': u'San Diego Zoo Safari Park'}", "{'address': 'Safari Walk, Escondido, CA 92027, USA', 'id': 2260, 'day': 0, 'name': u'Meerkat'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3486, 'day': 0, 'name': u'Stone'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3487, 'day': 0, 'name': u'Stone Brewery'}", "{'address': 'Mount Woodson Trail, Poway, CA 92064, USA', 'id': 4951, 'day': 0, 'name': u'Lake Poway'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4953, 'day': 0, 'name': u'Potato Chip Rock'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4952, 'day': 0, 'name': u'Mt. Woodson'}"],
       'big','[2259, 2260,3486,3487,4951,4953,4952]']
    google_travel_time_table.loc[0] = ['439300002871', u'Moonlight Beach', 4393.0,
       u'Carlsbad Flower Fields', 2871.0, -117.29692141333341,
       33.047769600024424, -117.3177652511278, 33.124079753475236,
       '33.0477696,-117.296921413', '33.1240797535,-117.317765251',
       'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=driving&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
       'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=walking&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
       "{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '14 mins', 'value': 822}, 'distance': {'text': '10.6 km', 'value': 10637}, 'status': 'OK'}]}], 'origin_addresses': ['233 C St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
       "{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '2 hours 4 mins', 'value': 7457}, 'distance': {'text': '10.0 km', 'value': 10028}, 'status': 'OK'}]}], 'origin_addresses': ['498 B St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
       13.0, 124.0]
    full_trip_table.loc[0] = ['gordon_lee01', 'CALIFORNIA-SAN-DIEGO-1-3',
       "['CALIFORNIA-SAN-DIEGO-1-3-0', 'CALIFORNIA-SAN-DIEGO-1-3-1', 'CALIFORNIA-SAN-DIEGO-1-3-2']",
       True, 'SAN DIEGO', 'California',
       '["{\'address\': \'15500 San Pasqual Valley Rd, Escondido, CA 92027, USA\', \'id\': 2259, \'day\': 0, \'name\': u\'San Diego Zoo Safari Park\'}", "{\'address\': \'Safari Walk, Escondido, CA 92027, USA\', \'id\': 2260, \'day\': 0, \'name\': u\'Meerkat\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3486, \'day\': 0, \'name\': u\'Stone\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3487, \'day\': 0, \'name\': u\'Stone Brewery\'}", "{\'address\': \'Mount Woodson Trail, Poway, CA 92064, USA\', \'id\': 4951, \'day\': 0, \'name\': u\'Lake Poway\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4953, \'day\': 0, \'name\': u\'Potato Chip Rock\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4952, \'day\': 0, \'name\': u\'Mt. Woodson\'}", "{\'address\': \'1 Legoland Dr, Carlsbad, CA 92008, USA\', \'id\': 2870, \'day\': 1, \'name\': u\'Legoland\'}", "{\'address\': \'5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA\', \'id\': 2871, \'day\': 1, \'name\': u\'Carlsbad Flower Fields\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2089, \'day\': 1, \'name\': u\'Oceanside Pier\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2090, \'day\': 1, \'name\': u\'Pier\'}", "{\'address\': \'1016-1024 Neptune Ave, Encinitas, CA 92024, USA\', \'id\': 2872, \'day\': 1, \'name\': u\'Encinitas\'}", "{\'address\': \'625 Pan American Rd E, San Diego, CA 92101, USA\', \'id\': 147, \'day\': 2, \'name\': u\'Balboa Park\'}", "{\'address\': \'1849-1863 Zoo Pl, San Diego, CA 92101, USA\', \'id\': 152, \'day\': 2, \'name\': u\'San Diego Zoo\'}", "{\'address\': \'701-817 Coast Blvd, La Jolla, CA 92037, USA\', \'id\': 148, \'day\': 2, \'name\': u\'La Jolla\'}", "{\'address\': \'10051-10057 Pebble Beach Dr, Santee, CA 92071, USA\', \'id\': 4630, \'day\': 2, \'name\': u\'Santee Lakes\'}", "{\'address\': \'Lake Murray Bike Path, La Mesa, CA 91942, USA\', \'id\': 4545, \'day\': 2, \'name\': u\'Lake Murray\'}", "{\'address\': \'4905 Mt Helix Dr, La Mesa, CA 91941, USA\', \'id\': 4544, \'day\': 2, \'name\': u\'Mt. Helix\'}", "{\'address\': \'1720 Melrose Ave, Chula Vista, CA 91911, USA\', \'id\': 1325, \'day\': 2, \'name\': u\'Thick-billed Kingbird\'}", "{\'address\': \'711 Basswood Ave, Imperial Beach, CA 91932, USA\', \'id\': 1326, \'day\': 2, \'name\': u\'Lesser Sand-Plover\'}"]',
       3.0]
    engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')
    # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
    # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')

    full_trip_table.to_sql('full_trip_table',engine, if_exists = "replace")
    day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "replace")
    google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "replace")
    poi_detail.to_sql('poi_detail_table',engine, if_exists = "replace")
    
    
    df_counties = pd.read_csv('/Users/zoesh/Desktop/travel_with_friends/travel_with_friends/us_cities_states_counties.csv',sep='|')
    df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()
    df_counties_u.columns = ["city","state_abb","state","county"]
    df_counties_u.to_sql('county_table',engine, if_exists = "replace")
init_db_tables()



In [42]:

    
def cold_start_places(df, county, state, city, number_days, first_day_full = True, last_day_full = True):
    
    if len(county.values) != 0:
        county = county.values[0]
        temp_df = df[(df['county'] == county) & (df['state'] == state)]
    else:
        temp_df = df[(df['city'] == city) & (df['state'] == state)]

    return county, temp_df
def find_county(state, city):
    
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))

    county = cur.fetchone()
    conn.close()
    if county:
        return county[0]
    else:
        return None
def db_start_location(county, state, city):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    if county:
        cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where county = '%s' and state = '%s'; "%(county.upper(), state))

    else:
#         print "else"
        cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where city = '%s' and state = '%s'; "%(city, state))
    a = cur.fetchall()
    conn.close()
    return np.array(a)



In [183]:

    
a1= db_start_location('San Francisco',"California","San Francisco")
poi_coords =  a1[:,1:3]
n_days = 1
current_events =[]
big_ix, med_ix, small_ix =[],[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
i = 0
for ix, label in enumerate(kmeans.labels_):
#     print ix, label
    if label == i:
        time = a1[ix,3]
        event_ix = a1[ix,0]
        
        current_events.append(event_ix)
        if time > 180 :
            big_ix.append(ix)

        elif time >= 120 :
            med_ix.append(ix)
        else:
            small_ix.append(ix)
print big_ix, med_ix, small_ix
# big_ = a1[[big_ix],4:]
print med_ix, ('wat')
print a1[ix,0], 'wat', a1[med_ix]
print med_ix
big_ = a1[big_ix][:,[0,4,5]]
med_ = a1[med_ix][:,[0,4,5]]
small_ = a1[small_ix][:,[0,4,5]]









    



[] [2, 10, 14] [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20]
[2, 10, 14] wat
293.0 wat [[ 275.         -122.42271178   37.82661824  180.            3.            4.5       ]
 [ 283.         -122.39371165   37.79535812  120.           11.            4.5       ]
 [ 287.         -122.38973205   37.77838412  120.           15.            4.5       ]]
[2, 10, 14]



In [17]:

    
from operator import itemgetter
med_s= sorted(med_, key=itemgetter(1,2))
a = [[1,3,3],[2,2,5],[3,4,4],[4,2,2]]
# [sorted(a, key=itemgetter(1,2)) for i in range(100)]
b = sorted(a, key=itemgetter(1,2))
a.sort(key=lambda k: (k[1], -k[2]), reverse=False)
# print med_s
print a









    



[[2, 2, 5], [4, 2, 2], [1, 3, 3], [3, 4, 4]]



In [484]:

    
def default_cold_start_places(df,df_counties_u, day_trip_locations,full_trip_table,df_poi_travel_info,number_days = [1,2,3,4,5]):
    
    df_c = df_counties_u.groupby(['State full','County']).count().reset_index()
    for state, county,_,_ in df_c.values[105:150]:
        temp_df = df[(df['county'] == county) & (df['state'] == state)]
        if temp_df.shape[0]!=0:
            if sum(temp_df.adjusted_normal_time_spent) < 360:
                number_days = [1]
            elif sum(temp_df.adjusted_normal_time_spent) < 720:
                number_days = [1,2]
            big_events = temp_df[temp_df.adjusted_normal_time_spent > 180]
            med_events = temp_df[(temp_df.adjusted_normal_time_spent>= 120)&(temp_df.adjusted_normal_time_spent<=180)]
            small_events = temp_df[temp_df.adjusted_normal_time_spent < 120]
            for i in number_days:
                n_days = i
                full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info = \
                        default_search_cluster_events(df, df_counties_u, county, state, big_events,med_events, \
                                                      small_events, temp_df, n_days,day_trip_locations, full_trip_table,\
                                                      df_poi_travel_info)
                print county, state
                print full_trip_table.shape, len(day_trip_locations), new_trip_df1.shape, df_poi_travel_info.shape
    return None



In [426]:

    
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])

# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])
# google_travel_time_table.loc[0] = ['000000000001', "home", '0000', 'space', '0001', 999 ,999, 999.1,999.1, "999,999","999.1,999.1","http://google.com","http://google.com", "", "", 0, 0 ]



In [427]:

    
# google_travel_time_table.index=google_travel_time_table.index.astype(int)



In [428]:

    
# engine = create_engine('postgresql://Gon@localhost:5432/travel_with_friends')
# # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
# # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')

# full_trip_table.to_sql('full_trip_table',engine, if_exists = "append")
# day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "append")
# google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "append")
# # df.to_sql('poi_detail_table',engine, if_exists = "append")



In [ ]:



In [ ]:

    
def trip_df_cloest_distance(trip_df, event_type):
    points = trip_df[['coord0','coord1']].values.tolist()
    n, D = mk_matrix(points, distL2) # create the distance matrix
    if len(points) >= 3:
        if event_type == 'big':
            tour = nearest_neighbor(n, trip_df.shape[0]-1, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        elif event_type == 'med':
            tour = nearest_neighbor(n, trip_df.shape[0]-2, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        else:
            tour = nearest_neighbor(n, 0, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        return tour
    else:
        return range(len(points))



In [253]:

    
def get_event_ids_list(trip_locations_id):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select event_ids,event_type from day_trip_table where trip_locations_id = '%s' " %(trip_locations_id))
    event_ids,event_type = cur.fetchone()
    event_ids = ast.literal_eval(event_ids)
    conn.close()
    return event_ids,event_type

def db_event_cloest_distance(trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None):
    if new_event_id or not event_ids:
        event_ids, event_type = get_event_ids_list(trip_locations_id)
        if new_event_id:
            event_ids.append(new_event_id)
            
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()
    points = np.zeros((len(event_ids), 3))
    for i,v in enumerate(event_ids):
        cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
        points[i] = cur.fetchone()
    conn.close()
    n,D = mk_matrix(points[:,1:], distL2)
    if len(points) >= 3:
        if event_type == 'add':
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
        #need to figure out other cases
        else:
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
    else:
        return np.array(event_ids), event_type



In [259]:

    
def check_full_trip_id(full_trip_id, debug):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from full_trip_table where full_trip_id = '%s'" %(full_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_day_trip_id(day_trip_id, debug):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from day_trip_table where trip_locations_id = '%s'" %(day_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_travel_time_id(new_id):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select google_driving_time from google_travel_time_table where id_ = '%s'" %(new_id))
    a = cur.fetchone()
    conn.close()
    if bool(a):
        return True
    else:
        return False



In [175]:

    
#need to check about the code!
def sorted_events(info,ix):
    '''
    find the event_id, ranking and rating columns
    sorted base on ranking then rating
    
    return sorted list 
    '''
    event_ = info[ix][:,[0,4,5]]
    return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))

def sorted_outside_events(info, ix):
    '''
    find the event_id, ranking and rating columns
    sorted base on ranking then rating
    
    return sorted list 
    '''
    event_ = info[:,[0,4,5]]
    return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))



In [1185]:

    
med_[0,1] ,small_[0,1] , med_[0,0]
# small_[0:8,0].concatenate(medium_[0:2,0])
c = small_[0:10,]
d = med_[0:2,]
print np.vstack((c,d))
# print list(np.array(sorted(np.vstack((c,d)), key=lambda x: (x[1],-x[2])))[:,0])
print list(np.array(sorted(np.vstack((small_[0:10,:],med_)), key=lambda x: (x[1],-x[2])))[:,0])
# np.vstack((small_[0:10,],med_))









    



[[ 273.     1.     4.5]
 [ 274.     2.     4.5]
 [ 276.     4.     0. ]
 [ 277.     5.     0. ]
 [ 278.     6.     0. ]
 [ 279.     7.     4. ]
 [ 280.     8.     0. ]
 [ 281.     9.     0. ]
 [ 282.    10.     0. ]
 [ 284.    12.     4.5]
 [ 275.     3.     4.5]
 [ 283.    11.     4.5]]
[273.0, 274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0, 281.0, 282.0, 283.0, 284.0, 287.0]



In [1208]:

    
a_ids, a_type=create_event_id_list(big_,med_,small_)
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# for i,v in enumerate(a_ids):
# #     print i, v, type(v)
#     cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
#     aaa = cur.fetchone()
# #     print aaa
# conn.close()

# new_a_ids, new_a_type=db_event_cloest_distance(event_ids = a_ids, event_type = a_type)
# print new_a_ids, new_a_type









    



[273.0, 274.0, 276.0, 277.0, 278.0, 279.0, 280.0, 281.0, 275.0, 283.0]
[ 273.  281.  278.  277.  280.  283.  276.  274.  279.  275.] med



In [257]:

    
def create_event_id_list(big_,medium_,small_):
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_[0,1] <= medium_[0,1]):
            if small_.shape[0] >= 6:
                event_ids = list(np.concatenate((small_[0:6,0],big_[0,0]),axis=0))  
            else:
                event_ids = list(np.concatenate((small_[:,0],big_[0,0]),axis=0)) 
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                event_ids = list(np.concatenate((small_[0:8,0],medium_[0:2,0]),axis=0))
            else:
                event_ids = list(np.concatenate((small_[:,0],medium_[0:2,0]),axis=0))
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            event_ids = list(np.concatenate((small_[0:8,0],medium_[0:2,0]),axis=0))
        else:
            event_ids = list(np.concatenate((small_[:,0],medium_[0:2,0]),axis=0))
        event_type = 'med'
    else:
        if small_.shape[0] >= 10:
            if not medium_.shape[0]:
                event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((small_[0:10,:],medium_)), key=lambda x: (x[1],-x[2])))[:,0])
        else:
            if not medium_.shape[0]:
                event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((small_,medium_)), key=lambda x: (x[1],-x[2])))[:,0])
        event_type = 'small'
    return event_ids, event_type



In [278]:

    
from geopy.distance import vincenty
newport_ri = (41.49008, -70.312796)
cleveland_oh = (41.499498, -81.695391)
print(vincenty(newport_ri, cleveland_oh).miles)









    



<type 'float'>



In [486]:

    
def create_trip_df(big_,medium_,small_):
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_.iloc[0].poi_rank <= medium_.iloc[0].poi_rank):
            if small_.shape[0] >= 6:
                trip_df = small_.iloc[0:6].append(big_.iloc[0])
            else:
                trip_df = small_.append(big_.iloc[0])
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
            else:
                trip_df = small_.append(medium_.iloc[0:2])
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
        else:
            trip_df = small_.append(medium_.iloc[0:2])
        event_type = 'med'
    else:
        if small_.shape[0] >= 10:
            trip_df = small_.iloc[0:10].append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
        else:
            trip_df = small_.append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
        event_type = 'small'
    return trip_df, event_type



In [263]:

    
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
# my_key = 'AIzaSyAwx3xg6oJ0yiPV3MIunBa1kx6N7v5Tcw8'
def google_driving_walking_time(tour,trip_df,event_type):
    poi_travel_time_df = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
                                   'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
                                   'google_walking_url','driving_result','walking_result','google_driving_time',\
                                   'google_walking_time'])
    ids_, orig_names,orid_idxs,dest_names,dest_idxs,orig_coord0s,orig_coord1s,dest_coord0s,dest_coord1s = [],[],[],[],[],[],[],[],[]
    orig_coordss,dest_coordss,driving_urls,walking_urls,driving_results,walking_results,driving_times,walking_times = [],[],[],[],[],[],[],[]
    trip_id_list=[]
    for i in range(len(tour)-1):
        id_ = str(trip_df.loc[trip_df.index[tour[i]]].name) + '0000'+str(trip_df.loc[trip_df.index[tour[i+1]]].name)
        
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
    
            orig_name = trip_df.loc[trip_df.index[tour[i]]]['name']
            orig_idx = trip_df.loc[trip_df.index[tour[i]]].name
            dest_name = trip_df.loc[trip_df.index[tour[i+1]]]['name']
            dest_idx = trip_df.loc[trip_df.index[tour[i+1]]].name
            orig_coord0 = trip_df.loc[trip_df.index[tour[i]]]['coord0']
            orig_coord1 = trip_df.loc[trip_df.index[tour[i]]]['coord1']
            dest_coord0 = trip_df.loc[trip_df.index[tour[i+1]]]['coord0']
            dest_coord1 = trip_df.loc[trip_df.index[tour[i+1]]]['coord1']
            orig_coords = str(orig_coord1)+','+str(orig_coord0)
            dest_coords = str(dest_coord1)+','+str(dest_coord0)
            
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
                
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_df = trip_df.drop(trip_df.iloc[tour[i+1]].name)
                new_tour = trip_df_cloest_distance(new_df,event_type)
                return google_driving_walking_time(new_tour,new_df, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print driving_result

            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
                
            
            poi_travel_time_df.loc[len(df_poi_travel_time)]=[id_,orig_name,orig_idx,dest_name,dest_idx,orig_coord0,orig_coord1,dest_coord0,\
                                   dest_coord1,orig_coords,dest_coords,google_driving_url,google_walking_url,\
                                   str(driving_result),str(walking_result),google_driving_time,google_walking_time]
            driving_result = str(driving_result).replace("'", '"')
            walking_result = str(walking_result).replace("'", '"')

            conn = psycopg2.connect(conn_str)  
            cur = conn.cursor()  
            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
#             print "startindex:",  index , type(index)
#             index += 1
#             print "end index: " ,index
            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
                                   dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            conn.close()
        else:
            trip_id_list.append(id_)
    
    return tour, trip_df, poi_travel_time_df



In [261]:

    
def db_google_driving_walking_time(event_ids, event_type):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    google_ids = []
    driving_time_list = []
    walking_time_list = []
    name_list = []
    for i,v in enumerate(event_ids[:-1]):
        id_ = str(v) + '0000'+str(event_ids[i+1])
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
            cur.execute("select name, coord0, coord1 from poi_detail_table where index = %s"%(v))
            orig_name, orig_coord0, orig_coord1 = cur.fetchone()
            orig_idx = v
            cur.execute("select name, coord0, coord1 from poi_detail_table where index = %s "%(event_ids[i+1]))
            dest_name, dest_coord0, dest_coord1 = cur.fetchone()
            dest_idx = event_ids[i+1]
            orig_coords = str(orig_coord1)+','+str(orig_coord0)
            dest_coords = str(dest_coord1)+','+str(dest_coord0)
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_event_ids = list(event_ids)
                new_event_ids.pop(i+1)
                new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
                return db_google_driving_walking_time(new_event_ids, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print v, id_, driving_result #need to debug for this
            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
        
            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
            driving_result = str(driving_result).replace("'",'"')
            walking_result = str(walking_result).replace("'",'"')
            orig_name = orig_name.replace("'","''")
            dest_name = dest_name.replace("'","''")
            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
                                   dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
        else:
            
            cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
                         where id_ = '%s'" %(id_))
            orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
            
    
    conn.close()
    return event_ids, google_ids, name_list, driving_time_list, walking_time_list



In [837]:

    
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
default = 1
n_days =3
full_day = 1
poi_coords = df_events[['coord0','coord1']]
trip_location_ids, full_trip_details =[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
# print kmeans.labels_
i=0
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
    if label == i:
        time = df_events.iloc[ix].adjusted_normal_time_spent
        event_ix = df_events.iloc[ix].name
        current_events.append(event_ix)
        if time > 180 :
            big_ix.append(event_ix)
        elif time >= 120 :
            med_ix.append(event_ix)
        else:
            small_ix.append(event_ix)

#         all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = df_events.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = df_events.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = df_events.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])

# trip_df, event_type = create_trip_df(big_,medium_,small_)
# tour = trip_df_cloest_distance(trip_df, event_type)
# new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
# new_trip_df = new_trip_df.iloc[new_tour]
# new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
# new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)

event_ids, event_type=db_event_cloest_distance(trip_locations_id)
event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
db_address(event_ids)
values = day_trip(event_ids, county, state, default, full_day,n_days,i)
day_trip_locations.loc[len(day_trip_locations)] = values
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])









    



(2259L, 'San Diego Zoo Safari Park', '15500 San Pasqual Valley Rd, Escondido, CA 92027, USA')



In [ ]:

    
def get_small_med_big_ix(county_list_info, day_labels):



In [300]:

    
def get_fulltrip_data_default(state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
    county = find_county(state, city)
    trip_location_ids, full_trip_details =[],[]
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    
    if not check_full_trip_id(full_trip_id, debug):
        county_list_info = db_start_location(county, state, city)
        poi_coords = county_list_info[:,1:3]
        kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
        day_labels = kmeans.labels_
        for i in range(n_days):
            
            if not check_day_trip_id(day_trip, debug):
                trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])
                current_events, big_ix, small_ix, med_ix = [],[],[],[]

                for ix, label in enumerate(day_labels):
                    if label == i:
                        time = county_list_info[ix,3]
                        event_ix = county_list_info[ix,0]
                        current_events.append(event_ix)
                        if time > 180 :
                            big_ix.append(ix)
                        elif time >= 120 :
                            med_ix.append(ix)
                        else:
                            small_ix.append(ix)
                            
                #need to double check this funct!
                big_ = sorted_events(county_list_info, big_ix)
                med_ = sorted_events(county_list_info, med_ix)
                small_ = sorted_events(county_list_info, small_ix)
                
                event_ids, event_type = create_event_id_list(big_, med_, small_)
                event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
                event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
                event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
                db_address(event_ids)
                details = db_day_trip(event_ids, i)
#                 insert to day_trip ....
                conn = psycopg2.connect(conn_str)
                cur = conn.cursor()
                cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, default, county, state, details, event_type, event_ids))
                conn.commit()
                conn.close()
                trip_location_ids.append(trip_locations_id)
                full_trip_details.extend(details)

            else:
                print "error: already have this day, please check the next day"
                trip_location_ids.append(trip_locations_id)
#                 call db find day trip detail 
                conn = psycopg2.connect(conn_str)
                cur = conn.cursor()
                cur.execute("select details from day_trip_table where trip_locations_id = '%s';"%(trip_locations_id) )
                day_trip_detail = fetchall()
                conn.close()
                full_trip_details.extend(day_trip_detail)

        full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
        details = full_trip_details
        user_id = "Admin"
        conn = psycopg2.connect(conn_str)
        cur = conn.cursor()
        cur.execute("insert into full_trip_table(user_id, full_trip_id,trip_location_ids, default, county, state, details, n_days) VALUES ('%s', '%s', '%s', %s, '%s', '%s', '%s', %s)" %(user_id, full_trip_id, str(trip_location_ids), default, county, state, details, n_days))
        conn.commit()
        conn.close()

        return "finish update %s, %s into database" %(state, county)
    else:
        return "%s, %s already in database" %(state, county)



In [298]:

    
def db_day_trip(event_ids, county, state, default, full_day,n_days,i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()

#     cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
#     a = cur.fetchall()
#     state = a[0][0].upper()
#     county = a[0][1].upper()

    trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])

    #details dict includes: id, name,address, day

    cur.execute("select index, name, address from poi_detail_table where index in %s;" %(tuple(event_ids),))
    a = cur.fetchall()
    details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(len(a))]
    conn.close()
    return [trip_locations_id, full_day, default, county, state, details]


def db_day_trip(event_ids, i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()
    details = []
    #details dict includes: id, name,address, day
    for event_id in event_ids:
        cur.execute("select index, name, address from poi_detail_table where index = %s;" %(event_id))
        a = cur.fetchone()
        details.append(str({'id': a[0],'name': a[1],'address': a[2], 'day': i}))
    conn.close()
    return details



In [ ]:

    
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()



In [ ]:

    
def extend_full_trip_details(full_trip_details):
    details = {}
    addresses = []
    ids = []
    days = []
    names = []
    for item in full_trip_details:
        addresses.append(eval(item)['address'])
        ids.append(eval(item)['id'])
        days.append(eval(item)['day'])
        names.append(eval(item)['name'])
    details['addresses'] = addresses
    details['ids'] = ids
    details['days'] = days
    details['names'] = names
    return str(full_trip_details)



In [841]:

    
event_ids









    Out[841]:





array([2259, 2260, 3486, 3487, 4951, 4953, 4952])



In [569]:

    
print event_ids, google_ids, name_list, driving_time_list, walking_time_list









    



[2259 2260 3486 3487 4951 4953 4952] ['225900002260', '226000003486', '348600003487', '348700004951', '495100004953', '495300004952'] ['San Diego Zoo Safari Park to Meerkat', 'Meerkat to Stone', 'Stone to Stone Brewery', 'Stone Brewery to Lake Poway', 'Lake Poway to Potato Chip Rock', 'Potato Chip Rock to Mt. Woodson'] [3.0, 25.0, 0.0, 25.0, 25.0, 0.0] [7.0, 193.0, 0.0, 268.0, 99.0, 1.0]



In [604]:

    
init_db_tables()



In [267]:

    
def remove_extra_events(trip_df, df_poi_travel_time):
    if sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time) > 480:
        new_trip_df = trip_df[:-1]
        new_df_poi_travel_time = df_poi_travel_time[:-1]
        return remove_extra_events(new_trip_df,new_df_poi_travel_time)
    else:
        return trip_df, df_poi_travel_time, sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time)
def db_remove_extra_events(event_ids, driving_time_list,walking_time_list, max_time_spent=480):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()   
    cur.execute("select sum (adjusted_normal_time_spent) from poi_detail_table where index in %s;" %(tuple(event_ids),))
    time_spent = cur.fetchone()[0]
    conn.close()
    time_spent += sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list)))
    if time_spent > max_time_spent:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent



In [777]:

    
def df_addresses(new_trip_df1, new_df_poi_travel_time):
    my_lst = []
    print new_trip_df1.index.values
    for i in new_trip_df1.index.values:
        temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.orig_idx.values]
        if temp_df.shape[0]>0:
            address = eval(temp_df.driving_result.values[0])['origin_addresses'][0]
            my_lst.append(address)
        else:
            try:
                temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.dest_idx.values]
                address = eval(temp_df.driving_result.values[0])['destination_addresses'][0]
                my_lst.append(address)
            except:
                print new_trip_df1, new_df_poi_travel_time
    return my_lst

def check_address(index):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select address from poi_detail_table where index = %s;"%(index))
    a = cur.fetchone()[0]
    conn.close()
    if a:
        return True
    else:
        return False
def db_address(event_ids):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    for i in event_ids[:-1]:
        if not check_address(i):
            cur.execute("select driving_result from google_travel_time_table where orig_idx = %s;" %(i))
            a= cur.fetchone()[0]
            add = ast.literal_eval(a)['origin_addresses'][0]
            cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, i))
            conn.commit()
    last = event_ids[-1]
    if not check_address(last):
        cur.execute("select driving_result from google_travel_time_table where dest_idx = %s;" %(last))
        a= cur.fetchone()[0]
        add = ast.literal_eval(a)['destination_addresses'][0]
        cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, last))
        conn.commit()
    conn.close()



In [ ]:



In [ ]:



In [822]:

    
from numpy import *

test_event_ids_list =  append(273, event_ids)
# event_ids
print test_event_ids_list









    



[ 273 2259 2260 3486 3487 4951 4953 4952]



In [831]:

    
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()









    



CALIFORNIA



In [ ]:

    
day_trip_locations = 'San Diego, California'

f, d, n, d= search_cluster_events(df, county, state, city, 3, day_trip_locations, full_trip_table, default = True)



In [1095]:

    
import time
t1=time.time()
# [index, ranking,score]
a = [[1,2,3],[2,2,6],[3,3,3],[4,3,10]]
from operator import itemgetter
print sorted(a, key=lambda x: (x[1], -x[2]))

time.time()-t1









    



[[2, 2, 6], [1, 2, 3], [4, 3, 10], [3, 3, 3]]






    Out[1095]:





0.0008938312530517578



In [123]:

    
'''
Most important event that will call all the functions and return the day details for the trip
'''


def search_cluster_events(df, county, state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
    
    county, df_events =cold_start_places(df, county, state, city, n_days) 
    
    poi_coords = df_events[['coord0','coord1']]
    kmeans = KMeans(n_clusters=n_days).fit(poi_coords)

    new_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    if not check_full_trip_id(new_trip_id, debug):
        
        trip_location_ids = []
        full_trip_details = []
        for i in range(n_days):
            current_events = []
            big_ix = []
            small_ix = []
            med_ix = []
            for ix, label in enumerate(kmeans.labels_):
                if label == i:
                    event_ix = poi_coords.index[ix]
                    current_events.append(event_ix)
                    if event_ix in big.index:
                        big_ix.append(event_ix)
                    elif event_ix in med.index:
                        med_ix.append(event_ix)
                    else:
                        small_ix.append(event_ix)
#             all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
            big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
            small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
            medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
    #         print 'big:', big_, 'small:', small_, 'msize:', medium_
            trip_df, event_type = create_trip_df(big_,medium_,small_)
    #         print event_type
            tour = trip_df_cloest_distance(trip_df, event_type)
    #         print tour
            new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
    #         print new_tour, new_trip_df
    #         return new_trip_df, df_poi_travel_time
            new_trip_df = new_trip_df.iloc[new_tour]
            new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
    #         print new_trip_df1
            new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
    #         print 'total time:', total_ti
            values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
            day_trip_locations.loc[len(day_trip_locations)] = values
            trip_location_ids.append(values[0])
            full_trip_details.extend(values[-1])
            df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
            
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    details = extend_full_trip_details(full_trip_details)
    full_trip_table.loc[len(full_trip_table)] = ["adam", full_trip_id, str(trip_location_ids), default, county, state, details, n_days]
    
    
    return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info



In [36]:

    
def default_search_cluster_events(df, df_counties_u, county, state, big,med, small, \
                                  temp, n_days,day_trip_locations, full_trip_table,df_poi_travel_info):
#     df_poi_travel_info = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])
    poi_coords = temp[['coord0','coord1']]
    kmeans = KMeans(n_clusters=n_days, random_state=0).fit(poi_coords)
#     print kmeans.labels_
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    trip_location_ids = []
    full_trip_details = []
    for i in range(n_days):
        current_events = []
        big_ix = []
        small_ix = []
        med_ix = []
        for ix, label in enumerate(kmeans.labels_):
            if label == i:
                event_ix = poi_coords.index[ix]
                current_events.append(event_ix)
                if event_ix in big.index:
                    big_ix.append(event_ix)
                elif event_ix in med.index:
                    med_ix.append(event_ix)
                else:
                    small_ix.append(event_ix)
        all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
        big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        trip_df, event_type = create_trip_df(big_,medium_,small_)
        tour = trip_df_cloest_distance(trip_df, event_type)
        new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
        new_trip_df = new_trip_df.iloc[new_tour]
        new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
        new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
        values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
        day_trip_locations.loc[len(day_trip_locations)] = values
        trip_location_ids.append(values[0])
        full_trip_details.extend(values[-1])
#         print 'trave time df \n',new_df_poi_travel_time
        df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    details = extend_full_trip_details(full_trip_details)
    full_trip_table.loc[len(full_trip_table)] = [user_id, full_trip_id, \
                                                 str(trip_location_ids), default, county, state, details, n_days]
    return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info



In [102]:

    
###Next Steps: Add control from the users. funt1: allow to add events,(specific name or auto add)
### auto route to the most appropirate order
###funt2: allow to reorder the events. funt3: allow to delete the events. 
###funt4: allow to switch a new event-next to the switch and x mark icon,check mark to confirm the new place and auto order

###New table for the trip info...features including trip id, event place, days, specific date, trip details. (trip tour, trip)

def ajax_available_events(county, state):
    county=county.upper()
    state = state.title()
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))  
    poi_lst = [item for item in cur.fetchall()]
    conn.close()
    return poi_lst

def add_event(trip_locations_id, event_day, event_id=None, event_name=None, full_day = True, unseen_event = False):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
    if unseen_event:
        index += 1
        trip_locations_id = '-'.join([str(eval(i)['id']) for i in eval(detail)])+'-'+event_name.replace(' ','-')+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
        a = cur.fetchone()
        if bool(a):
            conn.close()
            return trip_locations_id, a[0]
        else:
            cur.execute("select max(index) from day_trip_locations")
            index = cur.fetchone()[0]+1
            detail = list(eval(detail))
            #need to make sure the type is correct for detail!
            new_event = "{'address': 'None', 'id': 'None', 'day': %s, 'name': u'%s'}"%(event_day, event_name)
            detail.append(new_event)
            #get the right format of detail: change from list to string and remove brackets and convert quote type
            new_detail = str(detail).replace('"','').replace('[','').replace(']','').replace("'",'"')
            cur.execute("INSERT INTO day_trip_locations VALUES (%i, '%s',%s,%s,'%s','%s','%s');" %(index, trip_locations_id, full_day, False, county, state, new_detail))
            conn.commit()
            conn.close()
            return trip_locations_id, detail
    else:
        event_ids = add_event_cloest_distance(trip_locations_id, event_id)
        event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids,event_type = 'add')
        trip_locations_id = '-'.join(event_ids)+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id)) 
        a = cur.fetchone()
        if not a:
            details = []
            db_address(event_ids)
            for item in event_ids:
                cur.execute("select index, name, address from poi_detail_table where index = '%s';" %(item))
                a = cur.fetchone()
                detail = {'id': a[0],'name': a[1],'address': a[2], 'day': event_day}
                details.append(detail)
            #need to make sure event detail can append to table!
            cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, False, county, state, details, event_type, event_ids))
            conn.commit()
            conn.close()
            return trip_locations_id, details
        else:
            conn.close()
            #need to make sure type is correct.
            return trip_locations_id, a[0]

def remove_event(trip_locations_id, remove_event_id, remove_event_name=None, event_day=None, full_day = True):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
    new_event_ids = ast.literal_eval(event_ids)
    new_event_ids.remove(remove_event_id)
    new_trip_locations_id = '-'.join(str(event_id) for event_id in new_event_ids)
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(new_trip_locations_id))  
    check_id = cur.fetchone()
    if check_id:
        return new_trip_locations_id, check_id[-3]
    detail = ast.literal_eval(detail[1:-1])
    for index, trip_detail in enumerate(detail):
        if ast.literal_eval(trip_detail)['id'] == remove_event_id:
            remove_index = index
            break
    new_detail = list(detail)
    new_detail.pop(remove_index)
    new_detail =  str(new_detail).replace("'","''")
    default = False
    cur.execute("select max(index) from day_trip_table where trip_locations_id='%s'" %(trip_locations_id)) 
    new_index = cur.fetchone()[0]
    new_index+=1
    cur.execute("INSERT INTO day_trip_table VALUES (%i, '%s', %s, %s, '%s', '%s', '%s', '%s','%s');" \
                %(new_index, new_trip_locations_id, full_day, default, county, state, new_detail, event_type, new_event_ids))  
    conn.commit()
    conn.close()
    return new_trip_locations_id, new_detail

def event_type_time_spent(adjusted_normal_time_spent):
    if adjusted_normal_time_spent > 180:
        return 'big'
    elif adjusted_normal_time_spent >= 120:
        return 'med'
    else:
        return 'small'
def switch_event_list(full_trip_id, trip_locations_id, switch_event_id, switch_event_name=None, event_day=None, full_day = True):
#     new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select name, city, county, state, coord0, coord1,poi_rank, adjusted_normal_time_spent from poi_detail_table where index=%s" %(switch_event_id))
    name, city, county, state,coord0, coord1,poi_rank, adjusted_normal_time_spent = cur.fetchone()
    event_type = event_type_time_spent(adjusted_normal_time_spent)
    avialable_lst = ajax_available_events(county, state)
    cur.execute("select trip_location_ids,details from full_trip_table where full_trip_id=%s" %(full_trip_id))
    full_trip_detail = cur.fetchone()
    full_trip_detail = ast.literal_eval(full_trip_detail)
    full_trip_ids = [ast.literal_eval(item)['id'] for item in full_trip_detail]
    switch_lst = []
    for item in avialable_lst:
        index = item[0]
        if index not in full_trip_ids:
            event_ids = [switch_event_id, index]
            event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids, event_type='switch')
            if min(driving_time_list[0], walking_time_list[0]) <= 60:
                cur.execute("select poi_rank, rating, adjusted_normal_time_spent from poi_detail_table where index=%s" %(index))
                target_poi_rank, target_rating, target_adjusted_normal_time_spent = cur.fetchone()
                target_event_type = event_type_time_spent(target_adjusted_normal_time_spent)
                switch_lst.append([target_poi_rank, target_rating, target_event_type==event_type])
    #need to sort target_event_type, target_poi_rank and target_rating
    return {switch_event_id: switch_lst}

def switch_event(trip_locations_id, switch_event_id, final_event_id, event_day):
    new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    new_trip_locations_id, new_detail = add_event(new_trip_locations_id, event_day, final_event_id, full_day = True, unseen_event = False)
    return new_trip_locations_id, new_detail



In [113]:

    
ajax_available_events(county='San Francisco', state = "California")
county='San Francisco'.upper()
state = "California"
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))  
full_trip_id = 'CALIFORNIA-SAN-DIEGO-1-3'
cur.execute("select details from full_trip_table where full_trip_id='%s'" %(full_trip_id))
full_trip_detail = cur.fetchone()[0]
full_trip_detail = ast.literal_eval(full_trip_detail)
[ast.literal_eval(item)['id'] for item in full_trip_detail]



In [89]:

    
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
remove_event_id = 3486
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
# event_ids = ast.literal_eval(event_ids)
# print detail, '\n'
new_event_ids = ast.literal_eval(event_ids)
new_event_ids.remove(remove_event_id)
new_trip_locations_id = '-'.join(str(id_) for id_ in new_event_ids)
# event_ids.remove(remove_event_id)
detail = ast.literal_eval(detail[1:-1])
print type(detail[0])
for index, trip_detail in enumerate(detail):
    if ast.literal_eval(trip_detail)['id'] == remove_event_id:
        remove_index = index
        break
new_detail = list(detail)
new_detail.pop(remove_index)
new_detail =  str(new_detail).replace("'","''")









    



<type 'str'>



In [148]:

    
import numpy as np
#Tasks:
#0. Run the initial to debug with all the cities and counties for the poi_detail_table in hand. 
#1. Continue working on add/suggest/remove features
#2. Start the new feature that allows user to generate the google map route for the day
#3. new feature that allows user to explore outside the city from a direction away from the started location
#4. get all the state and national park data into database and rework the ranking system and the poi_detail_table!
remove_event_id = 3486

#3.Travel Outside:
current_city = 'San Francisco'
current_state = 'California'
direction = 'N'
n_days = 1

#Travel outside: 1 day 2hr driving, first set up distL2 < 1.7.  Find all the locations with in the range. 
#And exclude the started city. Need to take care of total travel time. should be less than 5 hrs for 1 day round trip
#for 2 days: total 50% more driving time than 1 day.
#for 3 days: total 100% more driving time than 1 day 
a,b =np.array([36.50641,-121.1095823]), np.array([37.0792134,-121.9502674])
def angle_between(p1, p2):
    ang1 = np.arctan2(*p1[::-1])
    ang2 = np.arctan2(*p2[::-1])
    return np.rad2deg((ang1 - ang2) % (2 * np.pi))

def direction_from_orgin(start_coord_long,  start_coord_lat, target_coord_long, target_coord_lat):
    diff =  [target_coord_long - start_coord_long, target_coord_lat- start_coord_lat]
    angle = angle_between(diff,[0,0])
    if (angle > 45) and (angle < 135):
        return 'N'
    elif (angle > 135) and (angle < 215):
        return 'W'
    elif (angle > 215) and (angle < 305):
        return 'S'
    else:
        return 'E'
    
def travel_outside_coords(current_city, current_state, direction=None, n_days=1):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor() 
    #coord_long, coord_lat
    cur.execute("select coord0, coord1 from all_cities_coords where city ='%s' and state = '%s';" %(current_city, current_state)) 
    coord0, coord1 = cur.fetchone()
    #city, coord_lat, coord_long
    cur.execute("select distinct city, coord0, coord1 from all_cities_coords where city !='%s' and state = '%s';" %(current_city, current_state))  
    coords = cur.fetchall()     
    conn.close()
    
    return coords, coord0, coord1



In [266]:

    
#Get events outside the city!!!

target_direction = 'N'
#possible city coords, target city coord_lat, target city coord_long
coords, coord_lat, coord_long = travel_outside_coords(current_city, current_state)
#coords: city, lat, long
check_cities_info = []
for item in coords:
    direction = direction_from_orgin(coord_long,  coord_lat, item[2], item[1])
    if (target_direction == direction) and (distL2([item[1], item[2]], [coord_lat, coord_long]) <1.7):
        check_cities_info.append(item)
city_infos = []
for city, _, _ in check_cities_info:
    county = None
    #index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating
    city_info = db_start_location(county, current_state, city)
    city_infos.extend(city_info)
city_infos = np.array(city_infos)
poi_coords = city_infos[:,1:3]
n_routes = sum(1 for t in np.array(city_infos)[:,3] if t >= 120)    
if (n_routes>1) and (city_infos.shape[0]>=10):
    kmeans = KMeans(n_clusters=n_routes).fit(poi_coords)
elif (city_infos.shape[0]> 20) or (n_routes>1):
    kmeans = KMeans(n_clusters=2).fit(poi_coords)
else:
    kmeans = KMeans(n_clusters=1).fit(poi_coords)
route_labels = kmeans.labels_

print n_routes, len(route_labels), city_infos.shape
print route_labels
for i in range(n_routes):
    current_events, big_ix, med_ix, small_ix = [], [],[], []
    for ix, label in enumerate(route_labels):
        if label == i:
            time = city_infos[ix,3]
            event_ix = city_infos[ix,0]
            current_events.append(event_ix)
            if time > 180 :
                big_ix.append(ix)
            elif time >= 120 :
                med_ix.append(ix)
            else:
                small_ix.append(ix)
    big_ = sorted_events(city_infos, big_ix)
    med_ = sorted_events(city_infos, med_ix)
    small_ = sorted_events(city_infos, small_ix)
    print big_, len(big_), len(med_), len(small_)
    
    # need to update!!!!!!!!

    event_ids, event_type = create_event_id_list(big_, med_, small_)
    event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
    event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
    event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
#     db_address(event_ids)
    print i, event_ids
    values = db_day_trip(event_ids, county, state, default, full_day,n_days,i)









    



3 60 (60, 6)
[2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
[[  2.71600000e+03   2.00000000e+00   4.00000000e+00]] 1 2 39
0 [ 4517.  4519.  4703.  4704.  3751.]






    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-266-97aa92aba371> in <module>()
     54 #     db_address(event_ids)
     55     print i, event_ids
---> 56     values = db_day_trip(event_ids, county, state, default, full_day,n_days,i)

NameError: name 'db_day_trip' is not defined



In [232]:

    
db_start_location(county, current_state, city).shape









    Out[232]:





(10, 6)



In [50]:

    
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
# cur.execute("select index, name, coord0, coord1 from poi_detail_table where city !='%s' and state = '%s';" %(current_city, current_state))
cur.execute("select distinct city, state from poi_detail_table;" )



In [51]:

    
all_cities = cur.fetchall()



In [195]:

    
from geopy.geocoders import Nominatim
geolocator = Nominatim()
location = geolocator.geocode("343 Vernon St., San Francisco, California")
print location.latitude, location.longitude









    



37.7186735714 -122.468147



In [68]:

    
for items in all_cities:
    if cities_coords[cities_coords['state'] == items[1]][cities_coords.city == items[0]].shape[0] == 0:
        location_name = ', '.join([items[0], items[1]])
        location = geolocator.geocode(location_name)
        cities_coords.loc[len(cities_coords)] = [999,items[0], items[1], 'US', location.latitude, location.longitude]
        print cities_coords.loc(len(cities_coords))









    



<pandas.core.indexing._LocIndexer object at 0x10bb76ed0>
<pandas.core.indexing._LocIndexer object at 0x10bffd510>
<pandas.core.indexing._LocIndexer object at 0x10c01c750>
<pandas.core.indexing._LocIndexer object at 0x10c01ce10>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c01c750>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c5d7e10>
<pandas.core.indexing._LocIndexer object at 0x10c5d7e10>
<pandas.core.indexing._LocIndexer object at 0x10c5d7e10>
<pandas.core.indexing._LocIndexer object at 0x10c5decd0>
<pandas.core.indexing._LocIndexer object at 0x10c5decd0>
<pandas.core.indexing._LocIndexer object at 0x10c5decd0>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10610fdd0>
<pandas.core.indexing._LocIndexer object at 0x10c01ce10>
<pandas.core.indexing._LocIndexer object at 0x10c01c750>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c01ce10>
<pandas.core.indexing._LocIndexer object at 0x10c5d75d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5e8f10>
<pandas.core.indexing._LocIndexer object at 0x10c5e8990>
<pandas.core.indexing._LocIndexer object at 0x10c5d75d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5e8990>
<pandas.core.indexing._LocIndexer object at 0x10c5e8990>
<pandas.core.indexing._LocIndexer object at 0x10c5e1f90>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1610>
<pandas.core.indexing._LocIndexer object at 0x10610fdd0>
<pandas.core.indexing._LocIndexer object at 0x10610fdd0>
<pandas.core.indexing._LocIndexer object at 0x10c5c8a50>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e0ad0>
<pandas.core.indexing._LocIndexer object at 0x10c5e0150>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e0150>
<pandas.core.indexing._LocIndexer object at 0x10c5e0ad0>
<pandas.core.indexing._LocIndexer object at 0x10c5dd710>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e2a50>
<pandas.core.indexing._LocIndexer object at 0x10c5e2350>
<pandas.core.indexing._LocIndexer object at 0x10c5e2350>
<pandas.core.indexing._LocIndexer object at 0x10c5e2e10>
<pandas.core.indexing._LocIndexer object at 0x10c5e2e50>
<pandas.core.indexing._LocIndexer object at 0x10c5c8510>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5de550>
<pandas.core.indexing._LocIndexer object at 0x10c5de890>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5de550>
<pandas.core.indexing._LocIndexer object at 0x10c5de890>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5edf90>
<pandas.core.indexing._LocIndexer object at 0x10c5de550>
<pandas.core.indexing._LocIndexer object at 0x10c5f29d0>
<pandas.core.indexing._LocIndexer object at 0x10c5f29d0>
<pandas.core.indexing._LocIndexer object at 0x10c5f29d0>
<pandas.core.indexing._LocIndexer object at 0x10c5f4e10>
<pandas.core.indexing._LocIndexer object at 0x10c5f4d50>
<pandas.core.indexing._LocIndexer object at 0x10c5f4dd0>
<pandas.core.indexing._LocIndexer object at 0x10c5f4c50>
<pandas.core.indexing._LocIndexer object at 0x10c5f4d50>
<pandas.core.indexing._LocIndexer object at 0x10c5f4dd0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1610>
<pandas.core.indexing._LocIndexer object at 0x10c5f4dd0>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>
<pandas.core.indexing._LocIndexer object at 0x10c5e27d0>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>






    



/Users/zoesh/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  from ipykernel import kernelapp as app



In [33]:

    
cities_coords = pd.read_csv('cities_coords.csv', header=None)



In [35]:

    
cities_coords.columns = ['area_code','city','state','nation', 'coord0','coord1']



In [43]:

    
cities_coords[cities_coords['state'] == current_state][cities_coords.area_code == 415]









    



/Users/zoesh/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':






    Out[43]:






  
    
      
      area_code
      city
      state
      nation
      coord0
      coord1
    
  
  
    
      834
      415
      Novato
      California
      US
      38.10742
      -122.56970
    
    
      835
      415
      San Anselmo
      California
      US
      37.97465
      -122.56164
    
    
      836
      415
      South San Francisco
      California
      US
      37.65466
      -122.40775
    
    
      837
      415
      San Rafael
      California
      US
      37.97353
      -122.53109



In [66]:

    
[999,items[0], items[1], 'US', location.latitude, location.longitude]









    Out[66]:





[999, 'Portsmouth', 'Virginia', 'US', 37.7792808, -122.4192362]



In [70]:

    
cities_coords.to_csv('all_cities_coords.csv')



In [26]:

    
engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')


new_cities_coords.to_sql('all_cities_coords',engine, if_exists = "replace")



In [2]:

    
import pandas as pd
cities_coords = pd.read_csv('all_cities_coords.csv')









    



/Users/zoesh/anaconda/lib/python2.7/site-packages/pandas/computation/__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used

  UserWarning)



In [269]:

    
cities_coords









    Out[269]:






  
    
      
      Unnamed: 0
      area_code
      city
      state
      nation
      coord0
      coord1
    
  
  
    
      0
      0
      201
      Bayonne
      New Jersey
      US
      40.668710
      -74.114310
    
    
      1
      1
      201
      Bergenfield
      New Jersey
      US
      40.927600
      -73.997360
    
    
      2
      2
      201
      Cliffside Park
      New Jersey
      US
      40.821490
      -73.987640
    
    
      3
      3
      201
      Englewood
      New Jersey
      US
      40.892880
      -73.972640
    
    
      4
      4
      201
      Fair Lawn
      New Jersey
      US
      40.940380
      -74.131810
    
    
      5
      5
      201
      Fort Lee
      New Jersey
      US
      40.850930
      -73.970140
    
    
      6
      6
      201
      Hackensack
      New Jersey
      US
      40.885930
      -74.043470
    
    
      7
      7
      201
      Hoboken
      New Jersey
      US
      40.743990
      -74.032360
    
    
      8
      8
      201
      Jersey City
      New Jersey
      US
      40.728160
      -74.077640
    
    
      9
      9
      201
      Kearny
      New Jersey
      US
      40.768430
      -74.145420
    
    
      10
      10
      201
      North Bergen
      New Jersey
      US
      40.804270
      -74.012080
    
    
      11
      11
      201
      Paramus
      New Jersey
      US
      40.944540
      -74.075420
    
    
      12
      12
      201
      Ridgewood
      New Jersey
      US
      40.979260
      -74.116530
    
    
      13
      13
      201
      Teaneck
      New Jersey
      US
      40.897600
      -74.015970
    
    
      14
      14
      201
      Union City
      New Jersey
      US
      40.779550
      -74.023750
    
    
      15
      15
      201
      West New York
      New Jersey
      US
      40.787880
      -74.014310
    
    
      16
      16
      202
      Washington
      District of Columbia
      US
      38.895110
      -77.036370
    
    
      17
      17
      203
      Bridgeport
      Connecticut
      US
      41.167040
      -73.204830
    
    
      18
      18
      203
      Danbury
      Connecticut
      US
      41.394820
      -73.454010
    
    
      19
      19
      203
      East Haven
      Connecticut
      US
      41.276210
      -72.868430
    
    
      20
      20
      203
      Meriden
      Connecticut
      US
      41.538150
      -72.807040
    
    
      21
      21
      203
      Milford
      Connecticut
      US
      41.222320
      -73.056500
    
    
      22
      22
      203
      Naugatuck
      Connecticut
      US
      41.485930
      -73.050660
    
    
      23
      23
      203
      New Haven
      Connecticut
      US
      41.308150
      -72.928160
    
    
      24
      24
      203
      North Haven
      Connecticut
      US
      41.390930
      -72.859540
    
    
      25
      25
      203
      Norwalk
      Connecticut
      US
      41.117600
      -73.407900
    
    
      26
      26
      203
      Shelton
      Connecticut
      US
      41.316490
      -73.093160
    
    
      27
      27
      203
      Stamford
      Connecticut
      US
      41.053430
      -73.538730
    
    
      28
      28
      203
      Stratford
      Connecticut
      US
      41.184540
      -73.133170
    
    
      29
      29
      203
      Trumbull
      Connecticut
      US
      41.242870
      -73.200670
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2820
      2820
      999
      Warwick
      Rhode Island
      US
      41.700202
      -71.416111
    
    
      2821
      2821
      999
      Winter Garden
      Florida
      US
      28.550269
      -81.592597
    
    
      2822
      2822
      999
      Maricopa
      Arizona
      US
      33.348830
      -112.491230
    
    
      2823
      2823
      999
      Compton
      California
      US
      33.894882
      -118.226043
    
    
      2824
      2824
      999
      Charleston
      West Virginia
      US
      38.350599
      -81.633281
    
    
      2825
      2825
      999
      Concord
      New Hampshire
      US
      43.207106
      -71.537021
    
    
      2826
      2826
      999
      St. Paul
      Minnesota
      US
      44.950404
      -93.101503
    
    
      2827
      2827
      999
      Buckeye
      Arizona
      US
      33.370275
      -112.583867
    
    
      2828
      2828
      999
      Glendale
      California
      US
      34.142498
      -118.248596
    
    
      2829
      2829
      999
      Jurupa Valley
      California
      US
      33.979847
      -117.451575
    
    
      2830
      2830
      999
      Fort Myers
      Florida
      US
      26.640628
      -81.872308
    
    
      2831
      2831
      999
      Rockwall
      Texas
      US
      32.931234
      -96.459709
    
    
      2832
      2832
      999
      Eastvale
      California
      US
      33.976680
      -117.559844
    
    
      2833
      2833
      999
      Menifee
      California
      US
      33.686443
      -117.177044
    
    
      2834
      2834
      999
      St. Petersburg
      Florida
      US
      27.770380
      -82.669508
    
    
      2835
      2835
      999
      Miami
      Florida
      US
      25.774266
      -80.193659
    
    
      2836
      2836
      999
      Boston
      Massachusetts
      US
      42.360482
      -71.059568
    
    
      2837
      2837
      999
      Goodyear
      Arizona
      US
      33.435609
      -112.357912
    
    
      2838
      2838
      999
      Bentonville
      Arkansas
      US
      36.372854
      -94.208817
    
    
      2839
      2839
      999
      Raleigh
      North Carolina
      US
      35.780398
      -78.639099
    
    
      2840
      2840
      999
      Canton
      Ohio
      US
      40.798952
      -81.378444
    
    
      2841
      2841
      999
      Las Vegas
      Nevada
      US
      36.166286
      -115.149225
    
    
      2842
      2842
      999
      Springfield
      Massachusetts
      US
      42.101483
      -72.589811
    
    
      2843
      2843
      999
      Bend
      Oregon
      US
      44.058173
      -121.315309
    
    
      2844
      2844
      999
      Miami Beach
      Florida
      US
      25.788144
      -80.127270
    
    
      2845
      2845
      999
      San Francisco
      California
      US
      37.779281
      -122.419236
    
    
      2846
      2846
      999
      Eugene
      Oregon
      US
      44.101181
      -123.152384
    
    
      2847
      2847
      999
      Albany
      New York
      US
      42.651167
      -73.754968
    
    
      2848
      2848
      999
      Kent
      Washington
      US
      47.382690
      -122.227027
    
    
      2849
      2849
      999
      Cleveland
      Ohio
      US
      41.505161
      -81.693444
    
  

2850 rows × 7 columns



In [270]:

    
new_cities_coords = cities_coords[['city', 'state','nation','coord0','coord1']].drop_duplicates()
print new_cities_coords.shape



In [271]:

    
new_cities_coords









    Out[271]:






  
    
      
      city
      state
      nation
      coord0
      coord1
    
  
  
    
      0
      Bayonne
      New Jersey
      US
      40.668710
      -74.114310
    
    
      1
      Bergenfield
      New Jersey
      US
      40.927600
      -73.997360
    
    
      2
      Cliffside Park
      New Jersey
      US
      40.821490
      -73.987640
    
    
      3
      Englewood
      New Jersey
      US
      40.892880
      -73.972640
    
    
      4
      Fair Lawn
      New Jersey
      US
      40.940380
      -74.131810
    
    
      5
      Fort Lee
      New Jersey
      US
      40.850930
      -73.970140
    
    
      6
      Hackensack
      New Jersey
      US
      40.885930
      -74.043470
    
    
      7
      Hoboken
      New Jersey
      US
      40.743990
      -74.032360
    
    
      8
      Jersey City
      New Jersey
      US
      40.728160
      -74.077640
    
    
      9
      Kearny
      New Jersey
      US
      40.768430
      -74.145420
    
    
      10
      North Bergen
      New Jersey
      US
      40.804270
      -74.012080
    
    
      11
      Paramus
      New Jersey
      US
      40.944540
      -74.075420
    
    
      12
      Ridgewood
      New Jersey
      US
      40.979260
      -74.116530
    
    
      13
      Teaneck
      New Jersey
      US
      40.897600
      -74.015970
    
    
      14
      Union City
      New Jersey
      US
      40.779550
      -74.023750
    
    
      15
      West New York
      New Jersey
      US
      40.787880
      -74.014310
    
    
      16
      Washington
      District of Columbia
      US
      38.895110
      -77.036370
    
    
      17
      Bridgeport
      Connecticut
      US
      41.167040
      -73.204830
    
    
      18
      Danbury
      Connecticut
      US
      41.394820
      -73.454010
    
    
      19
      East Haven
      Connecticut
      US
      41.276210
      -72.868430
    
    
      20
      Meriden
      Connecticut
      US
      41.538150
      -72.807040
    
    
      21
      Milford
      Connecticut
      US
      41.222320
      -73.056500
    
    
      22
      Naugatuck
      Connecticut
      US
      41.485930
      -73.050660
    
    
      23
      New Haven
      Connecticut
      US
      41.308150
      -72.928160
    
    
      24
      North Haven
      Connecticut
      US
      41.390930
      -72.859540
    
    
      25
      Norwalk
      Connecticut
      US
      41.117600
      -73.407900
    
    
      26
      Shelton
      Connecticut
      US
      41.316490
      -73.093160
    
    
      27
      Stamford
      Connecticut
      US
      41.053430
      -73.538730
    
    
      28
      Stratford
      Connecticut
      US
      41.184540
      -73.133170
    
    
      29
      Trumbull
      Connecticut
      US
      41.242870
      -73.200670
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      2820
      Warwick
      Rhode Island
      US
      41.700202
      -71.416111
    
    
      2821
      Winter Garden
      Florida
      US
      28.550269
      -81.592597
    
    
      2822
      Maricopa
      Arizona
      US
      33.348830
      -112.491230
    
    
      2823
      Compton
      California
      US
      33.894882
      -118.226043
    
    
      2824
      Charleston
      West Virginia
      US
      38.350599
      -81.633281
    
    
      2825
      Concord
      New Hampshire
      US
      43.207106
      -71.537021
    
    
      2826
      St. Paul
      Minnesota
      US
      44.950404
      -93.101503
    
    
      2827
      Buckeye
      Arizona
      US
      33.370275
      -112.583867
    
    
      2828
      Glendale
      California
      US
      34.142498
      -118.248596
    
    
      2829
      Jurupa Valley
      California
      US
      33.979847
      -117.451575
    
    
      2830
      Fort Myers
      Florida
      US
      26.640628
      -81.872308
    
    
      2831
      Rockwall
      Texas
      US
      32.931234
      -96.459709
    
    
      2832
      Eastvale
      California
      US
      33.976680
      -117.559844
    
    
      2833
      Menifee
      California
      US
      33.686443
      -117.177044
    
    
      2834
      St. Petersburg
      Florida
      US
      27.770380
      -82.669508
    
    
      2835
      Miami
      Florida
      US
      25.774266
      -80.193659
    
    
      2836
      Boston
      Massachusetts
      US
      42.360482
      -71.059568
    
    
      2837
      Goodyear
      Arizona
      US
      33.435609
      -112.357912
    
    
      2838
      Bentonville
      Arkansas
      US
      36.372854
      -94.208817
    
    
      2839
      Raleigh
      North Carolina
      US
      35.780398
      -78.639099
    
    
      2840
      Canton
      Ohio
      US
      40.798952
      -81.378444
    
    
      2841
      Las Vegas
      Nevada
      US
      36.166286
      -115.149225
    
    
      2842
      Springfield
      Massachusetts
      US
      42.101483
      -72.589811
    
    
      2843
      Bend
      Oregon
      US
      44.058173
      -121.315309
    
    
      2844
      Miami Beach
      Florida
      US
      25.788144
      -80.127270
    
    
      2845
      San Francisco
      California
      US
      37.779281
      -122.419236
    
    
      2846
      Eugene
      Oregon
      US
      44.101181
      -123.152384
    
    
      2847
      Albany
      New York
      US
      42.651167
      -73.754968
    
    
      2848
      Kent
      Washington
      US
      47.382690
      -122.227027
    
    
      2849
      Cleveland
      Ohio
      US
      41.505161
      -81.693444
    
  

1958 rows × 5 columns



In [14]:

    
new_cities_coords = new_cities_coords.drop_duplicates()



In [27]:

    
new_cities_coords.reset_index?



In [25]:

    
new_cities_coords.to_csv('all_cities_coords2.csv')



In [279]:

    
def calculate_initial_compass_bearing(pointA, pointB):
    """
    Calculates the bearing between two points.
    The formulae used is the following:
        θ = atan2(sin(Δlong).cos(lat2),
                  cos(lat1).sin(lat2) − sin(lat1).cos(lat2).cos(Δlong))
    :Parameters:
      - `pointA: The tuple representing the latitude/longitude for the
        first point. Latitude and longitude must be in decimal degrees
      - `pointB: The tuple representing the latitude/longitude for the
        second point. Latitude and longitude must be in decimal degrees
    :Returns:
      The bearing in degrees
    :Returns Type:
      float
    """
    if (type(pointA) != tuple) or (type(pointB) != tuple):
        raise TypeError("Only tuples are supported as arguments")

    lat1 = math.radians(pointA[0])
    lat2 = math.radians(pointB[0])

    diffLong = math.radians(pointB[1] - pointA[1])

    x = math.sin(diffLong) * math.cos(lat2)
    y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
            * math.cos(lat2) * math.cos(diffLong))

    initial_bearing = math.atan2(x, y)

    # Now we have the initial bearing but math.atan2 return values
    # from -180° to + 180° which is not what we want for a compass bearing
    # The solution is to normalize the initial bearing as shown below
    initial_bearing = math.degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360

    return compass_bearing



In [297]:

    
conn = psycopg2.connect(conn_str)   
cur = conn.cursor() 
cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(123)) 
not cur.fetchone()









    Out[297]:





True



In [295]:

    
a = cur.fetchone()



In [310]:

    
np.concatenate(np.array([0, 91,23]),np.zeros((len(event_ids), 3)))









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-310-67c82e74fc34> in <module>()
----> 1 np.concatenate(np.array([0, 91,23]),np.zeros((len(event_ids), 3)))

TypeError: only integer scalar arrays can be converted to a scalar index



In [320]:

    
t = np.zeros((len(event_ids), 3))
points = np.vstack((np.array(['test',91,23]),np.zeros((len(event_ids), 3))))



In [322]:

    
points









    Out[322]:





array([['test', '91', '23'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0']], 
      dtype='|S32')



In [338]:

    
t = []
t.extend(['12']*2)



In [339]:

    
t









    Out[339]:





['12', '12']



In [ ]:

	area_code	city	state	nation	coord0	coord1
834	415	Novato	California	US	38.10742	-122.56970
835	415	San Anselmo	California	US	37.97465	-122.56164
836	415	South San Francisco	California	US	37.65466	-122.40775
837	415	San Rafael	California	US	37.97353	-122.53109

	Unnamed: 0	area_code	city	state	nation	coord0	coord1
0	0	201	Bayonne	New Jersey	US	40.668710	-74.114310
1	1	201	Bergenfield	New Jersey	US	40.927600	-73.997360
2	2	201	Cliffside Park	New Jersey	US	40.821490	-73.987640
3	3	201	Englewood	New Jersey	US	40.892880	-73.972640
4	4	201	Fair Lawn	New Jersey	US	40.940380	-74.131810
5	5	201	Fort Lee	New Jersey	US	40.850930	-73.970140
6	6	201	Hackensack	New Jersey	US	40.885930	-74.043470
7	7	201	Hoboken	New Jersey	US	40.743990	-74.032360
8	8	201	Jersey City	New Jersey	US	40.728160	-74.077640
9	9	201	Kearny	New Jersey	US	40.768430	-74.145420
10	10	201	North Bergen	New Jersey	US	40.804270	-74.012080
11	11	201	Paramus	New Jersey	US	40.944540	-74.075420
12	12	201	Ridgewood	New Jersey	US	40.979260	-74.116530
13	13	201	Teaneck	New Jersey	US	40.897600	-74.015970
14	14	201	Union City	New Jersey	US	40.779550	-74.023750
15	15	201	West New York	New Jersey	US	40.787880	-74.014310
16	16	202	Washington	District of Columbia	US	38.895110	-77.036370
17	17	203	Bridgeport	Connecticut	US	41.167040	-73.204830
18	18	203	Danbury	Connecticut	US	41.394820	-73.454010
19	19	203	East Haven	Connecticut	US	41.276210	-72.868430
20	20	203	Meriden	Connecticut	US	41.538150	-72.807040
21	21	203	Milford	Connecticut	US	41.222320	-73.056500
22	22	203	Naugatuck	Connecticut	US	41.485930	-73.050660
23	23	203	New Haven	Connecticut	US	41.308150	-72.928160
24	24	203	North Haven	Connecticut	US	41.390930	-72.859540
25	25	203	Norwalk	Connecticut	US	41.117600	-73.407900
26	26	203	Shelton	Connecticut	US	41.316490	-73.093160
27	27	203	Stamford	Connecticut	US	41.053430	-73.538730
28	28	203	Stratford	Connecticut	US	41.184540	-73.133170
29	29	203	Trumbull	Connecticut	US	41.242870	-73.200670
...	...	...	...	...	...	...	...
2820	2820	999	Warwick	Rhode Island	US	41.700202	-71.416111
2821	2821	999	Winter Garden	Florida	US	28.550269	-81.592597
2822	2822	999	Maricopa	Arizona	US	33.348830	-112.491230
2823	2823	999	Compton	California	US	33.894882	-118.226043
2824	2824	999	Charleston	West Virginia	US	38.350599	-81.633281
2825	2825	999	Concord	New Hampshire	US	43.207106	-71.537021
2826	2826	999	St. Paul	Minnesota	US	44.950404	-93.101503
2827	2827	999	Buckeye	Arizona	US	33.370275	-112.583867
2828	2828	999	Glendale	California	US	34.142498	-118.248596
2829	2829	999	Jurupa Valley	California	US	33.979847	-117.451575
2830	2830	999	Fort Myers	Florida	US	26.640628	-81.872308
2831	2831	999	Rockwall	Texas	US	32.931234	-96.459709
2832	2832	999	Eastvale	California	US	33.976680	-117.559844
2833	2833	999	Menifee	California	US	33.686443	-117.177044
2834	2834	999	St. Petersburg	Florida	US	27.770380	-82.669508
2835	2835	999	Miami	Florida	US	25.774266	-80.193659
2836	2836	999	Boston	Massachusetts	US	42.360482	-71.059568
2837	2837	999	Goodyear	Arizona	US	33.435609	-112.357912
2838	2838	999	Bentonville	Arkansas	US	36.372854	-94.208817
2839	2839	999	Raleigh	North Carolina	US	35.780398	-78.639099
2840	2840	999	Canton	Ohio	US	40.798952	-81.378444
2841	2841	999	Las Vegas	Nevada	US	36.166286	-115.149225
2842	2842	999	Springfield	Massachusetts	US	42.101483	-72.589811
2843	2843	999	Bend	Oregon	US	44.058173	-121.315309
2844	2844	999	Miami Beach	Florida	US	25.788144	-80.127270
2845	2845	999	San Francisco	California	US	37.779281	-122.419236
2846	2846	999	Eugene	Oregon	US	44.101181	-123.152384
2847	2847	999	Albany	New York	US	42.651167	-73.754968
2848	2848	999	Kent	Washington	US	47.382690	-122.227027
2849	2849	999	Cleveland	Ohio	US	41.505161	-81.693444