In [18]:
import pandas as pd
import numpy as np
import requests
import psycopg2
import json
import simplejson
import urllib
import config
import ast

from operator import itemgetter
from sklearn.cluster import KMeans
from sqlalchemy import create_engine

In [469]:
!pip install --upgrade pip
!pip install sqlalchemy
!pip install psycopg2
!pip install simplejson
!pip install config


Requirement already up-to-date: pip in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: sqlalchemy in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: psycopg2 in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: simplejson in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages
Requirement already satisfied: config in /Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages

In [33]:
conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"
# conn_str = "dbname='travel_with_friends' user='Zoesh' host='localhost'"

In [29]:
import math
import random


def distL2((x1,y1), (x2,y2)):
    """Compute the L2-norm (Euclidean) distance between two points.

    The distance is rounded to the closest integer, for compatibility
    with the TSPLIB convention.

    The two points are located on coordinates (x1,y1) and (x2,y2),
    sent as parameters"""
    xdiff = x2 - x1
    ydiff = y2 - y1
    return math.sqrt(xdiff*xdiff + ydiff*ydiff) + .5


def distL1((x1,y1), (x2,y2)):
    """Compute the L1-norm (Manhattan) distance between two points.

    The distance is rounded to the closest integer, for compatibility
    with the TSPLIB convention.

    The two points are located on coordinates (x1,y1) and (x2,y2),
    sent as parameters"""
    return abs(x2-x1) + abs(y2-y1)+.5


def mk_matrix(coord, dist):
    """Compute a distance matrix for a set of points.

    Uses function 'dist' to calculate distance between
    any two points.  Parameters:
    -coord -- list of tuples with coordinates of all points, [(x1,y1),...,(xn,yn)]
    -dist -- distance function
    """
    n = len(coord)
    D = {}      # dictionary to hold n times n matrix
    for i in range(n-1):
        for j in range(i+1,n):
            [x1,y1] = coord[i]
            [x2,y2] = coord[j]
            D[i,j] = dist((x1,y1), (x2,y2))
            D[j,i] = D[i,j]
    return n,D

def read_tsplib(filename):
    "basic function for reading a TSP problem on the TSPLIB format"
    "NOTE: only works for 2D euclidean or manhattan distances"
    f = open(filename, 'r');

    line = f.readline()
    while line.find("EDGE_WEIGHT_TYPE") == -1:
        line = f.readline()

    if line.find("EUC_2D") != -1:
        dist = distL2
    elif line.find("MAN_2D") != -1:
        dist = distL1
    else:
        print "cannot deal with non-euclidean or non-manhattan distances"
        raise Exception

    while line.find("NODE_COORD_SECTION") == -1:
        line = f.readline()

    xy_positions = []
    while 1:
        line = f.readline()
        if line.find("EOF") != -1: break
        (i,x,y) = line.split()
        x = float(x)
        y = float(y)
        xy_positions.append((x,y))

    n,D = mk_matrix(xy_positions, dist)
    return n, xy_positions, D


def mk_closest(D, n):
    """Compute a sorted list of the distances for each of the nodes.

    For each node, the entry is in the form [(d1,i1), (d2,i2), ...]
    where each tuple is a pair (distance,node).
    """
    C = []
    for i in range(n):
        dlist = [(D[i,j], j) for j in range(n) if j != i]
        dlist.sort()
        C.append(dlist)
    return C


def length(tour, D):
    """Calculate the length of a tour according to distance matrix 'D'."""
    z = D[tour[-1], tour[0]]    # edge from last to first city of the tour
    for i in range(1,len(tour)):
        z += D[tour[i], tour[i-1]]      # add length of edge from city i-1 to i
    return z


def randtour(n):
    """Construct a random tour of size 'n'."""
    sol = range(n)      # set solution equal to [0,1,...,n-1]
    random.shuffle(sol) # place it in a random order
    return sol


def nearest(last, unvisited, D):
    """Return the index of the node which is closest to 'last'."""
    near = unvisited[0]
    min_dist = D[last, near]
    for i in unvisited[1:]:
        if D[last,i] < min_dist:
            near = i
            min_dist = D[last, near]
    return near


def nearest_neighbor(n, i, D):
    """Return tour starting from city 'i', using the Nearest Neighbor.

    Uses the Nearest Neighbor heuristic to construct a solution:
    - start visiting city i
    - while there are unvisited cities, follow to the closest one
    - return to city i
    """
    unvisited = range(n)
    unvisited.remove(i)
    last = i
    tour = [i]
    while unvisited != []:
        next = nearest(last, unvisited, D)
        tour.append(next)
        unvisited.remove(next)
        last = next
    return tour



def exchange_cost(tour, i, j, D):
    """Calculate the cost of exchanging two arcs in a tour.

    Determine the variation in the tour length if
    arcs (i,i+1) and (j,j+1) are removed,
    and replaced by (i,j) and (i+1,j+1)
    (note the exception for the last arc).

    Parameters:
    -t -- a tour
    -i -- position of the first arc
    -j>i -- position of the second arc
    """
    n = len(tour)
    a,b = tour[i],tour[(i+1)%n]
    c,d = tour[j],tour[(j+1)%n]
    return (D[a,c] + D[b,d]) - (D[a,b] + D[c,d])


def exchange(tour, tinv, i, j):
    """Exchange arcs (i,i+1) and (j,j+1) with (i,j) and (i+1,j+1).

    For the given tour 't', remove the arcs (i,i+1) and (j,j+1) and
    insert (i,j) and (i+1,j+1).

    This is done by inverting the sublist of cities between i and j.
    """
    n = len(tour)
    if i>j:
        i,j = j,i
    assert i>=0 and i<j-1 and j<n
    path = tour[i+1:j+1]
    path.reverse()
    tour[i+1:j+1] = path
    for k in range(i+1,j+1):
        tinv[tour[k]] = k


def improve(tour, z, D, C):
    """Try to improve tour 't' by exchanging arcs; return improved tour length.

    If possible, make a series of local improvements on the solution 'tour',
    using a breadth first strategy, until reaching a local optimum.
    """
    n = len(tour)
    tinv = [0 for i in tour]
    for k in range(n):
        tinv[tour[k]] = k  # position of each city in 't'
    for i in range(n):
        a,b = tour[i],tour[(i+1)%n]
        dist_ab = D[a,b]
        improved = False
        for dist_ac,c in C[a]:
            if dist_ac >= dist_ab:
                break
            j = tinv[c]
            d = tour[(j+1)%n]
            dist_cd = D[c,d]
            dist_bd = D[b,d]
            delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
            if delta < 0:       # exchange decreases length
                exchange(tour, tinv, i, j);
                z += delta
                improved = True
                break
        if improved:
            continue
        for dist_bd,d in C[b]:
            if dist_bd >= dist_ab:
                break
            j = tinv[d]-1
            if j==-1:
                j=n-1
            c = tour[j]
            dist_cd = D[c,d]
            dist_ac = D[a,c]
            delta = (dist_ac + dist_bd) - (dist_ab + dist_cd)
            if delta < 0:       # exchange decreases length
                exchange(tour, tinv, i, j);
                z += delta
                break
    return z


def localsearch(tour, z, D, C=None):
    """Obtain a local optimum starting from solution t; return solution length.

    Parameters:
      tour -- initial tour
      z -- length of the initial tour
      D -- distance matrix
    """
    n = len(tour)
    if C == None:
        C = mk_closest(D, n)     # create a sorted list of distances to each node
    while 1:
        newz = improve(tour, z, D, C)
        if newz < z:
            z = newz
        else:
            break
    return z


def multistart_localsearch(k, n, D, report=None):
    """Do k iterations of local search, starting from random solutions.

    Parameters:
    -k -- number of iterations
    -D -- distance matrix
    -report -- if not None, call it to print verbose output

    Returns best solution and its cost.
    """
    C = mk_closest(D, n) # create a sorted list of distances to each node
    bestt=None
    bestz=None
    for i in range(0,k):
        tour = randtour(n)
        z = length(tour, D)
        z = localsearch(tour, z, D, C)
        if z < bestz or bestz == None:
            bestz = z
            bestt = list(tour)
            if report:
                report(z, tour)

    return bestt, bestz

In [471]:
# db_name = "travel_with_friends"
# TABLES ={}
# TABLES['full_trip_table'] = (
#     "CREATE TABLE `full_trip_table` ("
#     "  `user_id` int(11) NOT NULL AUTO_INCREMENT,"
#     "  `full_trip_id` date NOT NULL,"
#     "  `trip_location_ids` varchar(14) NOT NULL,"
#     "  `default` varchar(16) NOT NULL,"
#     "  `county` enum('M','F') NOT NULL,"
#     "  `state` date NOT NULL,"
#     "  `details` ,"
#     "  `n_days`,"
#     "  PRIMARY KEY (`full_trip_id`)"
#     ") ENGINE=InnoDB")

In [472]:
# def create_tables():
#     """ create tables in the PostgreSQL database"""
#     commands = (
#         """
#         CREATE TABLE full_trip_table (
#             index INTEGER PRIMARY KEY,
#             user_id VARCHAR(225) NOT NULL,
#             full_trip_id VARCHAR(225) NOT NULL,
#             trip_location_ids VARCHAR(225),
#             default BOOLEAN NOT NULL,
#             county VARCHAR(225) NOT NULL,
#             state VARCHAR(225) NOT NULL,
#             details VARCHAR(MAX),
#             n_days VARCHAR(225) NOT NULL
#         )
#         """,
#         """ CREATE TABLE day_trip_table (
#                 trip_locations_id 
#                 full_day
#                 default 
#                 county 
#                 state
#                 details
#                 )
#         """,
#         """
#         CREATE TABLE poi_detail_table (
#                 part_id INTEGER PRIMARY KEY,
#                 file_extension VARCHAR(5) NOT NULL,
#                 drawing_data BYTEA NOT NULL,
#                 FOREIGN KEY (part_id)
#                 REFERENCES parts (part_id)
#                 ON UPDATE CASCADE ON DELETE CASCADE
#         )
#         """,
#         """
#         CREATE TABLE google_travel_time_table (
#                 index INTEGER PRIMARY KEY,
#                 id_ VARCHAR NOT NULL,
#                 orig_name VARCHAR,
#                 orig_idx VARCHAR,
#                 dest_name VARCHAR,
#                 dest_idx VARCHAR,
#                 orig_coord0 INTEGER,
#                 orig_coord1 INTEGER,
#                 dest_coord0 INTEGER,
#                 dest_coord1 INTEGER,
#                 orig_coords VARCHAR,
#                 dest_coords VARCHAR,
#                 google_driving_url VARCHAR,
#                 google_walking_url VARCHAR,
#                 driving_result VARCHAR,
#                 walking_result VARCHAR,
#                 google_driving_time INTEGER,
#                 google_walking_time INTEGER
                

#         )
#         """)
#     conn = None
#     try:
#         # read the connection parameters
#         params = config()
#         # connect to the PostgreSQL server
#         conn = psycopg2.connect(**params)
#         cur = conn.cursor()
#         # create table one by one
#         for command in commands:
#             cur.execute(command)
#         # close communication with the PostgreSQL database server
#         cur.close()
#         # commit the changes
#         conn.commit()
#     except (Exception, psycopg2.DatabaseError) as error:
#         print(error)
#     finally:
#         if conn is not None:
#             conn.close()

In [473]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])

# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])

In [87]:
# read poi details csv file 
poi_detail = pd.read_csv("./step9_poi.csv", index_col=0)
poi_detail['address'] = None
poi_detail['rating']=poi_detail['rating'].fillna(0)
#read US city state and county csv file
# df_counties = pd.read_csv('./us_cities_states_counties.csv',sep='|')
#find counties without duplicate
# df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()

In [34]:
def init_db_tables():
    full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

    day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details','event_type','event_ids'])

    google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
                                           'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
                                           'google_walking_url','driving_result','walking_result','google_driving_time',\
                                           'google_walking_time'])
    day_trip_locations_table.loc[0] = ['CALIFORNIA-SAN-DIEGO-1-3-0', True, True, 'SAN DIEGO', 'California',
       ["{'address': '15500 San Pasqual Valley Rd, Escondido, CA 92027, USA', 'id': 2259, 'day': 0, 'name': u'San Diego Zoo Safari Park'}", "{'address': 'Safari Walk, Escondido, CA 92027, USA', 'id': 2260, 'day': 0, 'name': u'Meerkat'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3486, 'day': 0, 'name': u'Stone'}", "{'address': '1999 Citracado Parkway, Escondido, CA 92029, USA', 'id': 3487, 'day': 0, 'name': u'Stone Brewery'}", "{'address': 'Mount Woodson Trail, Poway, CA 92064, USA', 'id': 4951, 'day': 0, 'name': u'Lake Poway'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4953, 'day': 0, 'name': u'Potato Chip Rock'}", "{'address': '17130 Mt Woodson Rd, Ramona, CA 92065, USA', 'id': 4952, 'day': 0, 'name': u'Mt. Woodson'}"],
       'big','[2259, 2260,3486,3487,4951,4953,4952]']
    google_travel_time_table.loc[0] = ['439300002871', u'Moonlight Beach', 4393.0,
       u'Carlsbad Flower Fields', 2871.0, -117.29692141333341,
       33.047769600024424, -117.3177652511278, 33.124079753475236,
       '33.0477696,-117.296921413', '33.1240797535,-117.317765251',
       'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=driving&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
       'https://maps.googleapis.com/maps/api/distancematrix/json?origins=33.0477696,-117.296921413&destinations=33.1240797535,-117.317765251&mode=walking&language=en-EN&sensor=false&key=AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE',
       "{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '14 mins', 'value': 822}, 'distance': {'text': '10.6 km', 'value': 10637}, 'status': 'OK'}]}], 'origin_addresses': ['233 C St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
       "{'status': 'OK', 'rows': [{'elements': [{'duration': {'text': '2 hours 4 mins', 'value': 7457}, 'distance': {'text': '10.0 km', 'value': 10028}, 'status': 'OK'}]}], 'origin_addresses': ['498 B St, Encinitas, CA 92024, USA'], 'destination_addresses': ['5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA']}",
       13.0, 124.0]
    full_trip_table.loc[0] = ['gordon_lee01', 'CALIFORNIA-SAN-DIEGO-1-3',
       "['CALIFORNIA-SAN-DIEGO-1-3-0', 'CALIFORNIA-SAN-DIEGO-1-3-1', 'CALIFORNIA-SAN-DIEGO-1-3-2']",
       True, 'SAN DIEGO', 'California',
       '["{\'address\': \'15500 San Pasqual Valley Rd, Escondido, CA 92027, USA\', \'id\': 2259, \'day\': 0, \'name\': u\'San Diego Zoo Safari Park\'}", "{\'address\': \'Safari Walk, Escondido, CA 92027, USA\', \'id\': 2260, \'day\': 0, \'name\': u\'Meerkat\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3486, \'day\': 0, \'name\': u\'Stone\'}", "{\'address\': \'1999 Citracado Parkway, Escondido, CA 92029, USA\', \'id\': 3487, \'day\': 0, \'name\': u\'Stone Brewery\'}", "{\'address\': \'Mount Woodson Trail, Poway, CA 92064, USA\', \'id\': 4951, \'day\': 0, \'name\': u\'Lake Poway\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4953, \'day\': 0, \'name\': u\'Potato Chip Rock\'}", "{\'address\': \'17130 Mt Woodson Rd, Ramona, CA 92065, USA\', \'id\': 4952, \'day\': 0, \'name\': u\'Mt. Woodson\'}", "{\'address\': \'1 Legoland Dr, Carlsbad, CA 92008, USA\', \'id\': 2870, \'day\': 1, \'name\': u\'Legoland\'}", "{\'address\': \'5754-5780 Paseo Del Norte, Carlsbad, CA 92008, USA\', \'id\': 2871, \'day\': 1, \'name\': u\'Carlsbad Flower Fields\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2089, \'day\': 1, \'name\': u\'Oceanside Pier\'}", "{\'address\': \'211-359 The Strand N, Oceanside, CA 92054, USA\', \'id\': 2090, \'day\': 1, \'name\': u\'Pier\'}", "{\'address\': \'1016-1024 Neptune Ave, Encinitas, CA 92024, USA\', \'id\': 2872, \'day\': 1, \'name\': u\'Encinitas\'}", "{\'address\': \'625 Pan American Rd E, San Diego, CA 92101, USA\', \'id\': 147, \'day\': 2, \'name\': u\'Balboa Park\'}", "{\'address\': \'1849-1863 Zoo Pl, San Diego, CA 92101, USA\', \'id\': 152, \'day\': 2, \'name\': u\'San Diego Zoo\'}", "{\'address\': \'701-817 Coast Blvd, La Jolla, CA 92037, USA\', \'id\': 148, \'day\': 2, \'name\': u\'La Jolla\'}", "{\'address\': \'10051-10057 Pebble Beach Dr, Santee, CA 92071, USA\', \'id\': 4630, \'day\': 2, \'name\': u\'Santee Lakes\'}", "{\'address\': \'Lake Murray Bike Path, La Mesa, CA 91942, USA\', \'id\': 4545, \'day\': 2, \'name\': u\'Lake Murray\'}", "{\'address\': \'4905 Mt Helix Dr, La Mesa, CA 91941, USA\', \'id\': 4544, \'day\': 2, \'name\': u\'Mt. Helix\'}", "{\'address\': \'1720 Melrose Ave, Chula Vista, CA 91911, USA\', \'id\': 1325, \'day\': 2, \'name\': u\'Thick-billed Kingbird\'}", "{\'address\': \'711 Basswood Ave, Imperial Beach, CA 91932, USA\', \'id\': 1326, \'day\': 2, \'name\': u\'Lesser Sand-Plover\'}"]',
       3.0]
    engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')
    # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
    # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')

    full_trip_table.to_sql('full_trip_table',engine, if_exists = "replace")
    day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "replace")
    google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "replace")
    poi_detail.to_sql('poi_detail_table',engine, if_exists = "replace")
    
    
    df_counties = pd.read_csv('/Users/zoesh/Desktop/travel_with_friends/travel_with_friends/us_cities_states_counties.csv',sep='|')
    df_counties_u = df_counties.drop('City alias',axis = 1).drop_duplicates()
    df_counties_u.columns = ["city","state_abb","state","county"]
    df_counties_u.to_sql('county_table',engine, if_exists = "replace")
init_db_tables()

In [42]:
def cold_start_places(df, county, state, city, number_days, first_day_full = True, last_day_full = True):
    
    if len(county.values) != 0:
        county = county.values[0]
        temp_df = df[(df['county'] == county) & (df['state'] == state)]
    else:
        temp_df = df[(df['city'] == city) & (df['state'] == state)]

    return county, temp_df
def find_county(state, city):
    
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))

    county = cur.fetchone()
    conn.close()
    if county:
        return county[0]
    else:
        return None
def db_start_location(county, state, city):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    if county:
        cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where county = '%s' and state = '%s'; "%(county.upper(), state))

    else:
#         print "else"
        cur.execute("select index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating from poi_detail_table where city = '%s' and state = '%s'; "%(city, state))
    a = cur.fetchall()
    conn.close()
    return np.array(a)

In [183]:
a1= db_start_location('San Francisco',"California","San Francisco")
poi_coords =  a1[:,1:3]
n_days = 1
current_events =[]
big_ix, med_ix, small_ix =[],[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
i = 0
for ix, label in enumerate(kmeans.labels_):
#     print ix, label
    if label == i:
        time = a1[ix,3]
        event_ix = a1[ix,0]
        
        current_events.append(event_ix)
        if time > 180 :
            big_ix.append(ix)

        elif time >= 120 :
            med_ix.append(ix)
        else:
            small_ix.append(ix)
print big_ix, med_ix, small_ix
# big_ = a1[[big_ix],4:]
print med_ix, ('wat')
print a1[ix,0], 'wat', a1[med_ix]
print med_ix
big_ = a1[big_ix][:,[0,4,5]]
med_ = a1[med_ix][:,[0,4,5]]
small_ = a1[small_ix][:,[0,4,5]]


[] [2, 10, 14] [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20]
[2, 10, 14] wat
293.0 wat [[ 275.         -122.42271178   37.82661824  180.            3.            4.5       ]
 [ 283.         -122.39371165   37.79535812  120.           11.            4.5       ]
 [ 287.         -122.38973205   37.77838412  120.           15.            4.5       ]]
[2, 10, 14]

In [17]:
from operator import itemgetter
med_s= sorted(med_, key=itemgetter(1,2))
a = [[1,3,3],[2,2,5],[3,4,4],[4,2,2]]
# [sorted(a, key=itemgetter(1,2)) for i in range(100)]
b = sorted(a, key=itemgetter(1,2))
a.sort(key=lambda k: (k[1], -k[2]), reverse=False)
# print med_s
print a


[[2, 2, 5], [4, 2, 2], [1, 3, 3], [3, 4, 4]]

In [484]:
def default_cold_start_places(df,df_counties_u, day_trip_locations,full_trip_table,df_poi_travel_info,number_days = [1,2,3,4,5]):
    
    df_c = df_counties_u.groupby(['State full','County']).count().reset_index()
    for state, county,_,_ in df_c.values[105:150]:
        temp_df = df[(df['county'] == county) & (df['state'] == state)]
        if temp_df.shape[0]!=0:
            if sum(temp_df.adjusted_normal_time_spent) < 360:
                number_days = [1]
            elif sum(temp_df.adjusted_normal_time_spent) < 720:
                number_days = [1,2]
            big_events = temp_df[temp_df.adjusted_normal_time_spent > 180]
            med_events = temp_df[(temp_df.adjusted_normal_time_spent>= 120)&(temp_df.adjusted_normal_time_spent<=180)]
            small_events = temp_df[temp_df.adjusted_normal_time_spent < 120]
            for i in number_days:
                n_days = i
                full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info = \
                        default_search_cluster_events(df, df_counties_u, county, state, big_events,med_events, \
                                                      small_events, temp_df, n_days,day_trip_locations, full_trip_table,\
                                                      df_poi_travel_info)
                print county, state
                print full_trip_table.shape, len(day_trip_locations), new_trip_df1.shape, df_poi_travel_info.shape
    return None

In [426]:
# full_trip_table = pd.DataFrame(columns =['user_id', 'full_trip_id', 'trip_location_ids', 'default', 'county', 'state', 'details', 'n_days'])

# day_trip_locations_table = pd.DataFrame(columns =['trip_locations_id','full_day', 'default', 'county', 'state','details'])

# google_travel_time_table = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])
# google_travel_time_table.loc[0] = ['000000000001', "home", '0000', 'space', '0001', 999 ,999, 999.1,999.1, "999,999","999.1,999.1","http://google.com","http://google.com", "", "", 0, 0 ]

In [427]:
# google_travel_time_table.index=google_travel_time_table.index.astype(int)

In [428]:
# engine = create_engine('postgresql://Gon@localhost:5432/travel_with_friends')
# # full_trip_table = pd.read_csv('./full_trip_table.csv', index_col= 0)
# # full_trip_table.to_sql('full_trip_table', engine,if_exists='append')

# full_trip_table.to_sql('full_trip_table',engine, if_exists = "append")
# day_trip_locations_table.to_sql('day_trip_table',engine, if_exists = "append")
# google_travel_time_table.to_sql('google_travel_time_table',engine, if_exists = "append")
# # df.to_sql('poi_detail_table',engine, if_exists = "append")

In [ ]:


In [ ]:
def trip_df_cloest_distance(trip_df, event_type):
    points = trip_df[['coord0','coord1']].values.tolist()
    n, D = mk_matrix(points, distL2) # create the distance matrix
    if len(points) >= 3:
        if event_type == 'big':
            tour = nearest_neighbor(n, trip_df.shape[0]-1, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        elif event_type == 'med':
            tour = nearest_neighbor(n, trip_df.shape[0]-2, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        else:
            tour = nearest_neighbor(n, 0, D)     # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
        return tour
    else:
        return range(len(points))

In [253]:
def get_event_ids_list(trip_locations_id):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select event_ids,event_type from day_trip_table where trip_locations_id = '%s' " %(trip_locations_id))
    event_ids,event_type = cur.fetchone()
    event_ids = ast.literal_eval(event_ids)
    conn.close()
    return event_ids,event_type

def db_event_cloest_distance(trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None):
    if new_event_id or not event_ids:
        event_ids, event_type = get_event_ids_list(trip_locations_id)
        if new_event_id:
            event_ids.append(new_event_id)
            
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()
    points = np.zeros((len(event_ids), 3))
    for i,v in enumerate(event_ids):
        cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
        points[i] = cur.fetchone()
    conn.close()
    n,D = mk_matrix(points[:,1:], distL2)
    if len(points) >= 3:
        if event_type == 'add':
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
        #need to figure out other cases
        else:
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
    else:
        return np.array(event_ids), event_type

In [259]:
def check_full_trip_id(full_trip_id, debug):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from full_trip_table where full_trip_id = '%s'" %(full_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_day_trip_id(day_trip_id, debug):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from day_trip_table where trip_locations_id = '%s'" %(day_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_travel_time_id(new_id):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select google_driving_time from google_travel_time_table where id_ = '%s'" %(new_id))
    a = cur.fetchone()
    conn.close()
    if bool(a):
        return True
    else:
        return False

In [175]:
#need to check about the code!
def sorted_events(info,ix):
    '''
    find the event_id, ranking and rating columns
    sorted base on ranking then rating
    
    return sorted list 
    '''
    event_ = info[ix][:,[0,4,5]]
    return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))

def sorted_outside_events(info, ix):
    '''
    find the event_id, ranking and rating columns
    sorted base on ranking then rating
    
    return sorted list 
    '''
    event_ = info[:,[0,4,5]]
    return np.array(sorted(event_, key=lambda x: (x[1], -x[2])))

In [1185]:
med_[0,1] ,small_[0,1] , med_[0,0]
# small_[0:8,0].concatenate(medium_[0:2,0])
c = small_[0:10,]
d = med_[0:2,]
print np.vstack((c,d))
# print list(np.array(sorted(np.vstack((c,d)), key=lambda x: (x[1],-x[2])))[:,0])
print list(np.array(sorted(np.vstack((small_[0:10,:],med_)), key=lambda x: (x[1],-x[2])))[:,0])
# np.vstack((small_[0:10,],med_))


[[ 273.     1.     4.5]
 [ 274.     2.     4.5]
 [ 276.     4.     0. ]
 [ 277.     5.     0. ]
 [ 278.     6.     0. ]
 [ 279.     7.     4. ]
 [ 280.     8.     0. ]
 [ 281.     9.     0. ]
 [ 282.    10.     0. ]
 [ 284.    12.     4.5]
 [ 275.     3.     4.5]
 [ 283.    11.     4.5]]
[273.0, 274.0, 275.0, 276.0, 277.0, 278.0, 279.0, 280.0, 281.0, 282.0, 283.0, 284.0, 287.0]

In [1208]:
a_ids, a_type=create_event_id_list(big_,med_,small_)
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# for i,v in enumerate(a_ids):
# #     print i, v, type(v)
#     cur.execute("select index, coord0, coord1 from poi_detail_table where index = %i;"%(float(v)))
#     aaa = cur.fetchone()
# #     print aaa
# conn.close()

# new_a_ids, new_a_type=db_event_cloest_distance(event_ids = a_ids, event_type = a_type)
# print new_a_ids, new_a_type


[273.0, 274.0, 276.0, 277.0, 278.0, 279.0, 280.0, 281.0, 275.0, 283.0]
[ 273.  281.  278.  277.  280.  283.  276.  274.  279.  275.] med

In [257]:
def create_event_id_list(big_,medium_,small_):
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_[0,1] <= medium_[0,1]):
            if small_.shape[0] >= 6:
                event_ids = list(np.concatenate((small_[0:6,0],big_[0,0]),axis=0))  
            else:
                event_ids = list(np.concatenate((small_[:,0],big_[0,0]),axis=0)) 
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                event_ids = list(np.concatenate((small_[0:8,0],medium_[0:2,0]),axis=0))
            else:
                event_ids = list(np.concatenate((small_[:,0],medium_[0:2,0]),axis=0))
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            event_ids = list(np.concatenate((small_[0:8,0],medium_[0:2,0]),axis=0))
        else:
            event_ids = list(np.concatenate((small_[:,0],medium_[0:2,0]),axis=0))
        event_type = 'med'
    else:
        if small_.shape[0] >= 10:
            if not medium_.shape[0]:
                event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((small_[0:10,:],medium_)), key=lambda x: (x[1],-x[2])))[:,0])
        else:
            if not medium_.shape[0]:
                event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((small_,medium_)), key=lambda x: (x[1],-x[2])))[:,0])
        event_type = 'small'
    return event_ids, event_type

In [278]:
from geopy.distance import vincenty
newport_ri = (41.49008, -70.312796)
cleveland_oh = (41.499498, -81.695391)
print(vincenty(newport_ri, cleveland_oh).miles)


<type 'float'>

In [486]:
def create_trip_df(big_,medium_,small_):
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_.iloc[0].poi_rank <= medium_.iloc[0].poi_rank):
            if small_.shape[0] >= 6:
                trip_df = small_.iloc[0:6].append(big_.iloc[0])
            else:
                trip_df = small_.append(big_.iloc[0])
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
            else:
                trip_df = small_.append(medium_.iloc[0:2])
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            trip_df = small_.iloc[0:8].append(medium_.iloc[0:2])
        else:
            trip_df = small_.append(medium_.iloc[0:2])
        event_type = 'med'
    else:
        if small_.shape[0] >= 10:
            trip_df = small_.iloc[0:10].append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
        else:
            trip_df = small_.append(medium_).sort_values(['poi_rank', 'rating'], ascending=[True, False])
        event_type = 'small'
    return trip_df, event_type

In [263]:
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
# my_key = 'AIzaSyAwx3xg6oJ0yiPV3MIunBa1kx6N7v5Tcw8'
def google_driving_walking_time(tour,trip_df,event_type):
    poi_travel_time_df = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
                                   'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
                                   'google_walking_url','driving_result','walking_result','google_driving_time',\
                                   'google_walking_time'])
    ids_, orig_names,orid_idxs,dest_names,dest_idxs,orig_coord0s,orig_coord1s,dest_coord0s,dest_coord1s = [],[],[],[],[],[],[],[],[]
    orig_coordss,dest_coordss,driving_urls,walking_urls,driving_results,walking_results,driving_times,walking_times = [],[],[],[],[],[],[],[]
    trip_id_list=[]
    for i in range(len(tour)-1):
        id_ = str(trip_df.loc[trip_df.index[tour[i]]].name) + '0000'+str(trip_df.loc[trip_df.index[tour[i+1]]].name)
        
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
    
            orig_name = trip_df.loc[trip_df.index[tour[i]]]['name']
            orig_idx = trip_df.loc[trip_df.index[tour[i]]].name
            dest_name = trip_df.loc[trip_df.index[tour[i+1]]]['name']
            dest_idx = trip_df.loc[trip_df.index[tour[i+1]]].name
            orig_coord0 = trip_df.loc[trip_df.index[tour[i]]]['coord0']
            orig_coord1 = trip_df.loc[trip_df.index[tour[i]]]['coord1']
            dest_coord0 = trip_df.loc[trip_df.index[tour[i+1]]]['coord0']
            dest_coord1 = trip_df.loc[trip_df.index[tour[i+1]]]['coord1']
            orig_coords = str(orig_coord1)+','+str(orig_coord0)
            dest_coords = str(dest_coord1)+','+str(dest_coord0)
            
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
                
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_df = trip_df.drop(trip_df.iloc[tour[i+1]].name)
                new_tour = trip_df_cloest_distance(new_df,event_type)
                return google_driving_walking_time(new_tour,new_df, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print driving_result

            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
                
            
            poi_travel_time_df.loc[len(df_poi_travel_time)]=[id_,orig_name,orig_idx,dest_name,dest_idx,orig_coord0,orig_coord1,dest_coord0,\
                                   dest_coord1,orig_coords,dest_coords,google_driving_url,google_walking_url,\
                                   str(driving_result),str(walking_result),google_driving_time,google_walking_time]
            driving_result = str(driving_result).replace("'", '"')
            walking_result = str(walking_result).replace("'", '"')

            conn = psycopg2.connect(conn_str)  
            cur = conn.cursor()  
            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
#             print "startindex:",  index , type(index)
#             index += 1
#             print "end index: " ,index
            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
                                   dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            conn.close()
        else:
            trip_id_list.append(id_)
    
    return tour, trip_df, poi_travel_time_df

In [261]:
def db_google_driving_walking_time(event_ids, event_type):
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    google_ids = []
    driving_time_list = []
    walking_time_list = []
    name_list = []
    for i,v in enumerate(event_ids[:-1]):
        id_ = str(v) + '0000'+str(event_ids[i+1])
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
            cur.execute("select name, coord0, coord1 from poi_detail_table where index = %s"%(v))
            orig_name, orig_coord0, orig_coord1 = cur.fetchone()
            orig_idx = v
            cur.execute("select name, coord0, coord1 from poi_detail_table where index = %s "%(event_ids[i+1]))
            dest_name, dest_coord0, dest_coord1 = cur.fetchone()
            dest_idx = event_ids[i+1]
            orig_coords = str(orig_coord1)+','+str(orig_coord0)
            dest_coords = str(dest_coord1)+','+str(dest_coord0)
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_event_ids = list(event_ids)
                new_event_ids.pop(i+1)
                new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
                return db_google_driving_walking_time(new_event_ids, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print v, id_, driving_result #need to debug for this
            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
        
            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
            driving_result = str(driving_result).replace("'",'"')
            walking_result = str(walking_result).replace("'",'"')
            orig_name = orig_name.replace("'","''")
            dest_name = dest_name.replace("'","''")
            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord0, orig_coord1, dest_coord0,\
                                   dest_coord1, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
        else:
            
            cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
                         where id_ = '%s'" %(id_))
            orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
            
    
    conn.close()
    return event_ids, google_ids, name_list, driving_time_list, walking_time_list

In [837]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
default = 1
n_days =3
full_day = 1
poi_coords = df_events[['coord0','coord1']]
trip_location_ids, full_trip_details =[],[]
kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
# print kmeans.labels_
i=0
current_events = []
big_ix = []
small_ix = []
med_ix = []
for ix, label in enumerate(kmeans.labels_):
    if label == i:
        time = df_events.iloc[ix].adjusted_normal_time_spent
        event_ix = df_events.iloc[ix].name
        current_events.append(event_ix)
        if time > 180 :
            big_ix.append(event_ix)
        elif time >= 120 :
            med_ix.append(event_ix)
        else:
            small_ix.append(event_ix)

#         all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
big_ = df_events.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
small_ = df_events.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
medium_ = df_events.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])

# trip_df, event_type = create_trip_df(big_,medium_,small_)
# tour = trip_df_cloest_distance(trip_df, event_type)
# new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
# new_trip_df = new_trip_df.iloc[new_tour]
# new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
# new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)

event_ids, event_type=db_event_cloest_distance(trip_locations_id)
event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
db_address(event_ids)
values = day_trip(event_ids, county, state, default, full_day,n_days,i)
day_trip_locations.loc[len(day_trip_locations)] = values
trip_location_ids.append(values[0])
full_trip_details.extend(values[-1])


(2259L, 'San Diego Zoo Safari Park', '15500 San Pasqual Valley Rd, Escondido, CA 92027, USA')

In [ ]:
def get_small_med_big_ix(county_list_info, day_labels):

In [300]:
def get_fulltrip_data_default(state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
    county = find_county(state, city)
    trip_location_ids, full_trip_details =[],[]
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    
    if not check_full_trip_id(full_trip_id, debug):
        county_list_info = db_start_location(county, state, city)
        poi_coords = county_list_info[:,1:3]
        kmeans = KMeans(n_clusters=n_days).fit(poi_coords)
        day_labels = kmeans.labels_
        for i in range(n_days):
            
            if not check_day_trip_id(day_trip, debug):
                trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])
                current_events, big_ix, small_ix, med_ix = [],[],[],[]

                for ix, label in enumerate(day_labels):
                    if label == i:
                        time = county_list_info[ix,3]
                        event_ix = county_list_info[ix,0]
                        current_events.append(event_ix)
                        if time > 180 :
                            big_ix.append(ix)
                        elif time >= 120 :
                            med_ix.append(ix)
                        else:
                            small_ix.append(ix)
                            
                #need to double check this funct!
                big_ = sorted_events(county_list_info, big_ix)
                med_ = sorted_events(county_list_info, med_ix)
                small_ = sorted_events(county_list_info, small_ix)
                
                event_ids, event_type = create_event_id_list(big_, med_, small_)
                event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
                event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
                event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
                db_address(event_ids)
                details = db_day_trip(event_ids, i)
#                 insert to day_trip ....
                conn = psycopg2.connect(conn_str)
                cur = conn.cursor()
                cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, default, county, state, details, event_type, event_ids))
                conn.commit()
                conn.close()
                trip_location_ids.append(trip_locations_id)
                full_trip_details.extend(details)

            else:
                print "error: already have this day, please check the next day"
                trip_location_ids.append(trip_locations_id)
#                 call db find day trip detail 
                conn = psycopg2.connect(conn_str)
                cur = conn.cursor()
                cur.execute("select details from day_trip_table where trip_locations_id = '%s';"%(trip_locations_id) )
                day_trip_detail = fetchall()
                conn.close()
                full_trip_details.extend(day_trip_detail)

        full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
        details = full_trip_details
        user_id = "Admin"
        conn = psycopg2.connect(conn_str)
        cur = conn.cursor()
        cur.execute("insert into full_trip_table(user_id, full_trip_id,trip_location_ids, default, county, state, details, n_days) VALUES ('%s', '%s', '%s', %s, '%s', '%s', '%s', %s)" %(user_id, full_trip_id, str(trip_location_ids), default, county, state, details, n_days))
        conn.commit()
        conn.close()

        return "finish update %s, %s into database" %(state, county)
    else:
        return "%s, %s already in database" %(state, county)

In [298]:
def db_day_trip(event_ids, county, state, default, full_day,n_days,i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()

#     cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
#     a = cur.fetchall()
#     state = a[0][0].upper()
#     county = a[0][1].upper()

    trip_locations_id = '-'.join([str(state), str(county.replace(' ','-')),str(int(default)), str(n_days),str(i)])

    #details dict includes: id, name,address, day

    cur.execute("select index, name, address from poi_detail_table where index in %s;" %(tuple(event_ids),))
    a = cur.fetchall()
    details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(len(a))]
    conn.close()
    return [trip_locations_id, full_day, default, county, state, details]


def db_day_trip(event_ids, i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()
    details = []
    #details dict includes: id, name,address, day
    for event_id in event_ids:
        cur.execute("select index, name, address from poi_detail_table where index = %s;" %(event_id))
        a = cur.fetchone()
        details.append(str({'id': a[0],'name': a[1],'address': a[2], 'day': i}))
    conn.close()
    return details

In [ ]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()

In [ ]:
def extend_full_trip_details(full_trip_details):
    details = {}
    addresses = []
    ids = []
    days = []
    names = []
    for item in full_trip_details:
        addresses.append(eval(item)['address'])
        ids.append(eval(item)['id'])
        days.append(eval(item)['day'])
        names.append(eval(item)['name'])
    details['addresses'] = addresses
    details['ids'] = ids
    details['days'] = days
    details['names'] = names
    return str(full_trip_details)

In [841]:
event_ids


Out[841]:
array([2259, 2260, 3486, 3487, 4951, 4953, 4952])

In [569]:
print event_ids, google_ids, name_list, driving_time_list, walking_time_list


[2259 2260 3486 3487 4951 4953 4952] ['225900002260', '226000003486', '348600003487', '348700004951', '495100004953', '495300004952'] ['San Diego Zoo Safari Park to Meerkat', 'Meerkat to Stone', 'Stone to Stone Brewery', 'Stone Brewery to Lake Poway', 'Lake Poway to Potato Chip Rock', 'Potato Chip Rock to Mt. Woodson'] [3.0, 25.0, 0.0, 25.0, 25.0, 0.0] [7.0, 193.0, 0.0, 268.0, 99.0, 1.0]

In [604]:
init_db_tables()

In [267]:
def remove_extra_events(trip_df, df_poi_travel_time):
    if sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time) > 480:
        new_trip_df = trip_df[:-1]
        new_df_poi_travel_time = df_poi_travel_time[:-1]
        return remove_extra_events(new_trip_df,new_df_poi_travel_time)
    else:
        return trip_df, df_poi_travel_time, sum(trip_df.adjusted_normal_time_spent)+sum(df_poi_travel_time.google_driving_time)
def db_remove_extra_events(event_ids, driving_time_list,walking_time_list, max_time_spent=480):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()   
    cur.execute("select sum (adjusted_normal_time_spent) from poi_detail_table where index in %s;" %(tuple(event_ids),))
    time_spent = cur.fetchone()[0]
    conn.close()
    time_spent += sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list)))
    if time_spent > max_time_spent:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent

In [777]:
def df_addresses(new_trip_df1, new_df_poi_travel_time):
    my_lst = []
    print new_trip_df1.index.values
    for i in new_trip_df1.index.values:
        temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.orig_idx.values]
        if temp_df.shape[0]>0:
            address = eval(temp_df.driving_result.values[0])['origin_addresses'][0]
            my_lst.append(address)
        else:
            try:
                temp_df = new_df_poi_travel_time[i == new_df_poi_travel_time.dest_idx.values]
                address = eval(temp_df.driving_result.values[0])['destination_addresses'][0]
                my_lst.append(address)
            except:
                print new_trip_df1, new_df_poi_travel_time
    return my_lst

def check_address(index):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select address from poi_detail_table where index = %s;"%(index))
    a = cur.fetchone()[0]
    conn.close()
    if a:
        return True
    else:
        return False
def db_address(event_ids):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    for i in event_ids[:-1]:
        if not check_address(i):
            cur.execute("select driving_result from google_travel_time_table where orig_idx = %s;" %(i))
            a= cur.fetchone()[0]
            add = ast.literal_eval(a)['origin_addresses'][0]
            cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, i))
            conn.commit()
    last = event_ids[-1]
    if not check_address(last):
        cur.execute("select driving_result from google_travel_time_table where dest_idx = %s;" %(last))
        a= cur.fetchone()[0]
        add = ast.literal_eval(a)['destination_addresses'][0]
        cur.execute("update poi_detail_table set address = '%s' where index = %s;" %(add, last))
        conn.commit()
    conn.close()

In [ ]:


In [ ]:


In [822]:
from numpy import *

test_event_ids_list =  append(273, event_ids)
# event_ids
print test_event_ids_list


[ 273 2259 2260 3486 3487 4951 4953 4952]

In [831]:
conn=psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("select state, county, count(*) AS count from poi_detail_table where index in %s GROUP BY state, county order by count desc;" %(tuple(test_event_ids_list),))
a = cur.fetchall()
print a[0][0].upper()
# details = [str({'id': a[x][0],'name': a[x][1],'address': a[x][2], 'day': i}) for x in range(a)]
conn.close()


CALIFORNIA

In [ ]:
day_trip_locations = 'San Diego, California'

f, d, n, d= search_cluster_events(df, county, state, city, 3, day_trip_locations, full_trip_table, default = True)

In [1095]:
import time
t1=time.time()
# [index, ranking,score]
a = [[1,2,3],[2,2,6],[3,3,3],[4,3,10]]
from operator import itemgetter
print sorted(a, key=lambda x: (x[1], -x[2]))

time.time()-t1


[[2, 2, 6], [1, 2, 3], [4, 3, 10], [3, 3, 3]]
Out[1095]:
0.0008938312530517578

In [123]:
'''
Most important event that will call all the functions and return the day details for the trip
'''


def search_cluster_events(df, county, state, city, n_days, day_trip_locations = True, full_trip_table = True, default = True, debug = True):
    
    county, df_events =cold_start_places(df, county, state, city, n_days) 
    
    poi_coords = df_events[['coord0','coord1']]
    kmeans = KMeans(n_clusters=n_days).fit(poi_coords)

    new_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    if not check_full_trip_id(new_trip_id, debug):
        
        trip_location_ids = []
        full_trip_details = []
        for i in range(n_days):
            current_events = []
            big_ix = []
            small_ix = []
            med_ix = []
            for ix, label in enumerate(kmeans.labels_):
                if label == i:
                    event_ix = poi_coords.index[ix]
                    current_events.append(event_ix)
                    if event_ix in big.index:
                        big_ix.append(event_ix)
                    elif event_ix in med.index:
                        med_ix.append(event_ix)
                    else:
                        small_ix.append(event_ix)
#             all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
            big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
            small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
            medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
    #         print 'big:', big_, 'small:', small_, 'msize:', medium_
            trip_df, event_type = create_trip_df(big_,medium_,small_)
    #         print event_type
            tour = trip_df_cloest_distance(trip_df, event_type)
    #         print tour
            new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
    #         print new_tour, new_trip_df
    #         return new_trip_df, df_poi_travel_time
            new_trip_df = new_trip_df.iloc[new_tour]
            new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
    #         print new_trip_df1
            new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
    #         print 'total time:', total_ti
            values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
            day_trip_locations.loc[len(day_trip_locations)] = values
            trip_location_ids.append(values[0])
            full_trip_details.extend(values[-1])
            df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
            
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    details = extend_full_trip_details(full_trip_details)
    full_trip_table.loc[len(full_trip_table)] = ["adam", full_trip_id, str(trip_location_ids), default, county, state, details, n_days]
    
    
    return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info

In [36]:
def default_search_cluster_events(df, df_counties_u, county, state, big,med, small, \
                                  temp, n_days,day_trip_locations, full_trip_table,df_poi_travel_info):
#     df_poi_travel_info = pd.DataFrame(columns =['id_','orig_name','orig_idx','dest_name','dest_idx','orig_coord0','orig_coord1',\
#                                        'dest_coord0','dest_coord1','orig_coords','dest_coords','google_driving_url',\
#                                        'google_walking_url','driving_result','walking_result','google_driving_time',\
#                                        'google_walking_time'])
    poi_coords = temp[['coord0','coord1']]
    kmeans = KMeans(n_clusters=n_days, random_state=0).fit(poi_coords)
#     print kmeans.labels_
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    trip_location_ids = []
    full_trip_details = []
    for i in range(n_days):
        current_events = []
        big_ix = []
        small_ix = []
        med_ix = []
        for ix, label in enumerate(kmeans.labels_):
            if label == i:
                event_ix = poi_coords.index[ix]
                current_events.append(event_ix)
                if event_ix in big.index:
                    big_ix.append(event_ix)
                elif event_ix in med.index:
                    med_ix.append(event_ix)
                else:
                    small_ix.append(event_ix)
        all_big = big.sort_values(['poi_rank', 'rating'], ascending=[True, False])
        big_ = big.loc[big_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        small_ = small.loc[small_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        medium_ = med.loc[med_ix].sort_values(['poi_rank', 'rating'], ascending=[True, False])
        trip_df, event_type = create_trip_df(big_,medium_,small_)
        tour = trip_df_cloest_distance(trip_df, event_type)
        new_tour, new_trip_df, df_poi_travel_time = google_driving_walking_time(tour,trip_df,event_type)
        new_trip_df = new_trip_df.iloc[new_tour]
        new_trip_df1,new_df_poi_travel_time,total_time = remove_extra_events(new_trip_df, df_poi_travel_time)
        new_trip_df1['address'] = df_addresses(new_trip_df1, new_df_poi_travel_time)
        values = day_trip(new_trip_df1, county, state, default, full_day,n_days,i)
        day_trip_locations.loc[len(day_trip_locations)] = values
        trip_location_ids.append(values[0])
        full_trip_details.extend(values[-1])
#         print 'trave time df \n',new_df_poi_travel_time
        df_poi_travel_info = df_poi_travel_info.append(new_df_poi_travel_time)
    full_trip_id = '-'.join([str(state.upper()), str(county.upper().replace(' ','-')),str(int(default)), str(n_days)])
    details = extend_full_trip_details(full_trip_details)
    full_trip_table.loc[len(full_trip_table)] = [user_id, full_trip_id, \
                                                 str(trip_location_ids), default, county, state, details, n_days]
    return full_trip_table, day_trip_locations, new_trip_df1, df_poi_travel_info

In [102]:
###Next Steps: Add control from the users. funt1: allow to add events,(specific name or auto add)
### auto route to the most appropirate order
###funt2: allow to reorder the events. funt3: allow to delete the events. 
###funt4: allow to switch a new event-next to the switch and x mark icon,check mark to confirm the new place and auto order

###New table for the trip info...features including trip id, event place, days, specific date, trip details. (trip tour, trip)

def ajax_available_events(county, state):
    county=county.upper()
    state = state.title()
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))  
    poi_lst = [item for item in cur.fetchall()]
    conn.close()
    return poi_lst

def add_event(trip_locations_id, event_day, event_id=None, event_name=None, full_day = True, unseen_event = False):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
    if unseen_event:
        index += 1
        trip_locations_id = '-'.join([str(eval(i)['id']) for i in eval(detail)])+'-'+event_name.replace(' ','-')+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
        a = cur.fetchone()
        if bool(a):
            conn.close()
            return trip_locations_id, a[0]
        else:
            cur.execute("select max(index) from day_trip_locations")
            index = cur.fetchone()[0]+1
            detail = list(eval(detail))
            #need to make sure the type is correct for detail!
            new_event = "{'address': 'None', 'id': 'None', 'day': %s, 'name': u'%s'}"%(event_day, event_name)
            detail.append(new_event)
            #get the right format of detail: change from list to string and remove brackets and convert quote type
            new_detail = str(detail).replace('"','').replace('[','').replace(']','').replace("'",'"')
            cur.execute("INSERT INTO day_trip_locations VALUES (%i, '%s',%s,%s,'%s','%s','%s');" %(index, trip_locations_id, full_day, False, county, state, new_detail))
            conn.commit()
            conn.close()
            return trip_locations_id, detail
    else:
        event_ids = add_event_cloest_distance(trip_locations_id, event_id)
        event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids,event_type = 'add')
        trip_locations_id = '-'.join(event_ids)+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id)) 
        a = cur.fetchone()
        if not a:
            details = []
            db_address(event_ids)
            for item in event_ids:
                cur.execute("select index, name, address from poi_detail_table where index = '%s';" %(item))
                a = cur.fetchone()
                detail = {'id': a[0],'name': a[1],'address': a[2], 'day': event_day}
                details.append(detail)
            #need to make sure event detail can append to table!
            cur.execute("insert into day_trip_table (trip_locations_id,full_day, default, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, False, county, state, details, event_type, event_ids))
            conn.commit()
            conn.close()
            return trip_locations_id, details
        else:
            conn.close()
            #need to make sure type is correct.
            return trip_locations_id, a[0]

def remove_event(trip_locations_id, remove_event_id, remove_event_name=None, event_day=None, full_day = True):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
    new_event_ids = ast.literal_eval(event_ids)
    new_event_ids.remove(remove_event_id)
    new_trip_locations_id = '-'.join(str(event_id) for event_id in new_event_ids)
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(new_trip_locations_id))  
    check_id = cur.fetchone()
    if check_id:
        return new_trip_locations_id, check_id[-3]
    detail = ast.literal_eval(detail[1:-1])
    for index, trip_detail in enumerate(detail):
        if ast.literal_eval(trip_detail)['id'] == remove_event_id:
            remove_index = index
            break
    new_detail = list(detail)
    new_detail.pop(remove_index)
    new_detail =  str(new_detail).replace("'","''")
    default = False
    cur.execute("select max(index) from day_trip_table where trip_locations_id='%s'" %(trip_locations_id)) 
    new_index = cur.fetchone()[0]
    new_index+=1
    cur.execute("INSERT INTO day_trip_table VALUES (%i, '%s', %s, %s, '%s', '%s', '%s', '%s','%s');" \
                %(new_index, new_trip_locations_id, full_day, default, county, state, new_detail, event_type, new_event_ids))  
    conn.commit()
    conn.close()
    return new_trip_locations_id, new_detail

def event_type_time_spent(adjusted_normal_time_spent):
    if adjusted_normal_time_spent > 180:
        return 'big'
    elif adjusted_normal_time_spent >= 120:
        return 'med'
    else:
        return 'small'
def switch_event_list(full_trip_id, trip_locations_id, switch_event_id, switch_event_name=None, event_day=None, full_day = True):
#     new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select name, city, county, state, coord0, coord1,poi_rank, adjusted_normal_time_spent from poi_detail_table where index=%s" %(switch_event_id))
    name, city, county, state,coord0, coord1,poi_rank, adjusted_normal_time_spent = cur.fetchone()
    event_type = event_type_time_spent(adjusted_normal_time_spent)
    avialable_lst = ajax_available_events(county, state)
    cur.execute("select trip_location_ids,details from full_trip_table where full_trip_id=%s" %(full_trip_id))
    full_trip_detail = cur.fetchone()
    full_trip_detail = ast.literal_eval(full_trip_detail)
    full_trip_ids = [ast.literal_eval(item)['id'] for item in full_trip_detail]
    switch_lst = []
    for item in avialable_lst:
        index = item[0]
        if index not in full_trip_ids:
            event_ids = [switch_event_id, index]
            event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids, event_type='switch')
            if min(driving_time_list[0], walking_time_list[0]) <= 60:
                cur.execute("select poi_rank, rating, adjusted_normal_time_spent from poi_detail_table where index=%s" %(index))
                target_poi_rank, target_rating, target_adjusted_normal_time_spent = cur.fetchone()
                target_event_type = event_type_time_spent(target_adjusted_normal_time_spent)
                switch_lst.append([target_poi_rank, target_rating, target_event_type==event_type])
    #need to sort target_event_type, target_poi_rank and target_rating
    return {switch_event_id: switch_lst}

def switch_event(trip_locations_id, switch_event_id, final_event_id, event_day):
    new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    new_trip_locations_id, new_detail = add_event(new_trip_locations_id, event_day, final_event_id, full_day = True, unseen_event = False)
    return new_trip_locations_id, new_detail

In [113]:
ajax_available_events(county='San Francisco', state = "California")
county='San Francisco'.upper()
state = "California"
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
cur.execute("select index, name from poi_detail_table where county='%s' and state='%s'" %(county,state))  
full_trip_id = 'CALIFORNIA-SAN-DIEGO-1-3'
cur.execute("select details from full_trip_table where full_trip_id='%s'" %(full_trip_id))
full_trip_detail = cur.fetchone()[0]
full_trip_detail = ast.literal_eval(full_trip_detail)
[ast.literal_eval(item)['id'] for item in full_trip_detail]


Out[113]:
[2259,
 2260,
 3486,
 3487,
 4951,
 4953,
 4952,
 2870,
 2871,
 2089,
 2090,
 2872,
 147,
 152,
 148,
 4630,
 4545,
 4544,
 1325,
 1326]

In [89]:
trip_locations_id = 'CALIFORNIA-SAN-DIEGO-1-3-0'
remove_event_id = 3486
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
(index, trip_locations_id, full_day, default, county, state, detail, event_type, event_ids) = cur.fetchone()
# event_ids = ast.literal_eval(event_ids)
# print detail, '\n'
new_event_ids = ast.literal_eval(event_ids)
new_event_ids.remove(remove_event_id)
new_trip_locations_id = '-'.join(str(id_) for id_ in new_event_ids)
# event_ids.remove(remove_event_id)
detail = ast.literal_eval(detail[1:-1])
print type(detail[0])
for index, trip_detail in enumerate(detail):
    if ast.literal_eval(trip_detail)['id'] == remove_event_id:
        remove_index = index
        break
new_detail = list(detail)
new_detail.pop(remove_index)
new_detail =  str(new_detail).replace("'","''")


<type 'str'>

In [148]:
import numpy as np
#Tasks:
#0. Run the initial to debug with all the cities and counties for the poi_detail_table in hand. 
#1. Continue working on add/suggest/remove features
#2. Start the new feature that allows user to generate the google map route for the day
#3. new feature that allows user to explore outside the city from a direction away from the started location
#4. get all the state and national park data into database and rework the ranking system and the poi_detail_table!
remove_event_id = 3486

#3.Travel Outside:
current_city = 'San Francisco'
current_state = 'California'
direction = 'N'
n_days = 1

#Travel outside: 1 day 2hr driving, first set up distL2 < 1.7.  Find all the locations with in the range. 
#And exclude the started city. Need to take care of total travel time. should be less than 5 hrs for 1 day round trip
#for 2 days: total 50% more driving time than 1 day.
#for 3 days: total 100% more driving time than 1 day 
a,b =np.array([36.50641,-121.1095823]), np.array([37.0792134,-121.9502674])
def angle_between(p1, p2):
    ang1 = np.arctan2(*p1[::-1])
    ang2 = np.arctan2(*p2[::-1])
    return np.rad2deg((ang1 - ang2) % (2 * np.pi))

def direction_from_orgin(start_coord_long,  start_coord_lat, target_coord_long, target_coord_lat):
    diff =  [target_coord_long - start_coord_long, target_coord_lat- start_coord_lat]
    angle = angle_between(diff,[0,0])
    if (angle > 45) and (angle < 135):
        return 'N'
    elif (angle > 135) and (angle < 215):
        return 'W'
    elif (angle > 215) and (angle < 305):
        return 'S'
    else:
        return 'E'
    
def travel_outside_coords(current_city, current_state, direction=None, n_days=1):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor() 
    #coord_long, coord_lat
    cur.execute("select coord0, coord1 from all_cities_coords where city ='%s' and state = '%s';" %(current_city, current_state)) 
    coord0, coord1 = cur.fetchone()
    #city, coord_lat, coord_long
    cur.execute("select distinct city, coord0, coord1 from all_cities_coords where city !='%s' and state = '%s';" %(current_city, current_state))  
    coords = cur.fetchall()     
    conn.close()
    
    return coords, coord0, coord1

In [266]:
#Get events outside the city!!!

target_direction = 'N'
#possible city coords, target city coord_lat, target city coord_long
coords, coord_lat, coord_long = travel_outside_coords(current_city, current_state)
#coords: city, lat, long
check_cities_info = []
for item in coords:
    direction = direction_from_orgin(coord_long,  coord_lat, item[2], item[1])
    if (target_direction == direction) and (distL2([item[1], item[2]], [coord_lat, coord_long]) <1.7):
        check_cities_info.append(item)
city_infos = []
for city, _, _ in check_cities_info:
    county = None
    #index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating
    city_info = db_start_location(county, current_state, city)
    city_infos.extend(city_info)
city_infos = np.array(city_infos)
poi_coords = city_infos[:,1:3]
n_routes = sum(1 for t in np.array(city_infos)[:,3] if t >= 120)    
if (n_routes>1) and (city_infos.shape[0]>=10):
    kmeans = KMeans(n_clusters=n_routes).fit(poi_coords)
elif (city_infos.shape[0]> 20) or (n_routes>1):
    kmeans = KMeans(n_clusters=2).fit(poi_coords)
else:
    kmeans = KMeans(n_clusters=1).fit(poi_coords)
route_labels = kmeans.labels_

print n_routes, len(route_labels), city_infos.shape
print route_labels
for i in range(n_routes):
    current_events, big_ix, med_ix, small_ix = [], [],[], []
    for ix, label in enumerate(route_labels):
        if label == i:
            time = city_infos[ix,3]
            event_ix = city_infos[ix,0]
            current_events.append(event_ix)
            if time > 180 :
                big_ix.append(ix)
            elif time >= 120 :
                med_ix.append(ix)
            else:
                small_ix.append(ix)
    big_ = sorted_events(city_infos, big_ix)
    med_ = sorted_events(city_infos, med_ix)
    small_ = sorted_events(city_infos, small_ix)
    print big_, len(big_), len(med_), len(small_)
    
    # need to update!!!!!!!!

    event_ids, event_type = create_event_id_list(big_, med_, small_)
    event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type)
    event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_google_driving_walking_time(event_ids, event_type)
    event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_extra_events(event_ids, driving_time_list, walking_time_list)
#     db_address(event_ids)
    print i, event_ids
    values = db_day_trip(event_ids, county, state, default, full_day,n_days,i)


3 60 (60, 6)
[2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 2 2 2 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
[[  2.71600000e+03   2.00000000e+00   4.00000000e+00]] 1 2 39
0 [ 4517.  4519.  4703.  4704.  3751.]
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-266-97aa92aba371> in <module>()
     54 #     db_address(event_ids)
     55     print i, event_ids
---> 56     values = db_day_trip(event_ids, county, state, default, full_day,n_days,i)

NameError: name 'db_day_trip' is not defined

In [232]:
db_start_location(county, current_state, city).shape


Out[232]:
(10, 6)

In [50]:
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
# cur.execute("select index, name, coord0, coord1 from poi_detail_table where city !='%s' and state = '%s';" %(current_city, current_state))
cur.execute("select distinct city, state from poi_detail_table;" )

In [51]:
all_cities = cur.fetchall()

In [195]:
from geopy.geocoders import Nominatim
geolocator = Nominatim()
location = geolocator.geocode("343 Vernon St., San Francisco, California")
print location.latitude, location.longitude


37.7186735714 -122.468147

In [68]:
for items in all_cities:
    if cities_coords[cities_coords['state'] == items[1]][cities_coords.city == items[0]].shape[0] == 0:
        location_name = ', '.join([items[0], items[1]])
        location = geolocator.geocode(location_name)
        cities_coords.loc[len(cities_coords)] = [999,items[0], items[1], 'US', location.latitude, location.longitude]
        print cities_coords.loc(len(cities_coords))


<pandas.core.indexing._LocIndexer object at 0x10bb76ed0>
<pandas.core.indexing._LocIndexer object at 0x10bffd510>
<pandas.core.indexing._LocIndexer object at 0x10c01c750>
<pandas.core.indexing._LocIndexer object at 0x10c01ce10>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c01c750>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c57dd10>
<pandas.core.indexing._LocIndexer object at 0x10c5d7e10>
<pandas.core.indexing._LocIndexer object at 0x10c5d7e10>
<pandas.core.indexing._LocIndexer object at 0x10c5d7e10>
<pandas.core.indexing._LocIndexer object at 0x10c5decd0>
<pandas.core.indexing._LocIndexer object at 0x10c5decd0>
<pandas.core.indexing._LocIndexer object at 0x10c5decd0>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10610fdd0>
<pandas.core.indexing._LocIndexer object at 0x10c01ce10>
<pandas.core.indexing._LocIndexer object at 0x10c01c750>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c279790>
<pandas.core.indexing._LocIndexer object at 0x10c01ce10>
<pandas.core.indexing._LocIndexer object at 0x10c5d75d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5e8f10>
<pandas.core.indexing._LocIndexer object at 0x10c5e8990>
<pandas.core.indexing._LocIndexer object at 0x10c5d75d0>
<pandas.core.indexing._LocIndexer object at 0x10c5de6d0>
<pandas.core.indexing._LocIndexer object at 0x10c5e8990>
<pandas.core.indexing._LocIndexer object at 0x10c5e8990>
<pandas.core.indexing._LocIndexer object at 0x10c5e1f90>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1610>
<pandas.core.indexing._LocIndexer object at 0x10610fdd0>
<pandas.core.indexing._LocIndexer object at 0x10610fdd0>
<pandas.core.indexing._LocIndexer object at 0x10c5c8a50>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e0ad0>
<pandas.core.indexing._LocIndexer object at 0x10c5e0150>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e0150>
<pandas.core.indexing._LocIndexer object at 0x10c5e0ad0>
<pandas.core.indexing._LocIndexer object at 0x10c5dd710>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1ed0>
<pandas.core.indexing._LocIndexer object at 0x10c5e2a50>
<pandas.core.indexing._LocIndexer object at 0x10c5e2350>
<pandas.core.indexing._LocIndexer object at 0x10c5e2350>
<pandas.core.indexing._LocIndexer object at 0x10c5e2e10>
<pandas.core.indexing._LocIndexer object at 0x10c5e2e50>
<pandas.core.indexing._LocIndexer object at 0x10c5c8510>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5de550>
<pandas.core.indexing._LocIndexer object at 0x10c5de890>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5de550>
<pandas.core.indexing._LocIndexer object at 0x10c5de890>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5edf90>
<pandas.core.indexing._LocIndexer object at 0x10c5de550>
<pandas.core.indexing._LocIndexer object at 0x10c5f29d0>
<pandas.core.indexing._LocIndexer object at 0x10c5f29d0>
<pandas.core.indexing._LocIndexer object at 0x10c5f29d0>
<pandas.core.indexing._LocIndexer object at 0x10c5f4e10>
<pandas.core.indexing._LocIndexer object at 0x10c5f4d50>
<pandas.core.indexing._LocIndexer object at 0x10c5f4dd0>
<pandas.core.indexing._LocIndexer object at 0x10c5f4c50>
<pandas.core.indexing._LocIndexer object at 0x10c5f4d50>
<pandas.core.indexing._LocIndexer object at 0x10c5f4dd0>
<pandas.core.indexing._LocIndexer object at 0x10c5e1610>
<pandas.core.indexing._LocIndexer object at 0x10c5f4dd0>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>
<pandas.core.indexing._LocIndexer object at 0x10c5e27d0>
<pandas.core.indexing._LocIndexer object at 0x10c5ddc50>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>
<pandas.core.indexing._LocIndexer object at 0x10c5edb50>
/Users/zoesh/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  from ipykernel import kernelapp as app

In [33]:
cities_coords = pd.read_csv('cities_coords.csv', header=None)

In [35]:
cities_coords.columns = ['area_code','city','state','nation', 'coord0','coord1']

In [43]:
cities_coords[cities_coords['state'] == current_state][cities_coords.area_code == 415]


/Users/zoesh/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
Out[43]:
area_code city state nation coord0 coord1
834 415 Novato California US 38.10742 -122.56970
835 415 San Anselmo California US 37.97465 -122.56164
836 415 South San Francisco California US 37.65466 -122.40775
837 415 San Rafael California US 37.97353 -122.53109

In [66]:
[999,items[0], items[1], 'US', location.latitude, location.longitude]


Out[66]:
[999, 'Portsmouth', 'Virginia', 'US', 37.7792808, -122.4192362]

In [70]:
cities_coords.to_csv('all_cities_coords.csv')

In [26]:
engine = create_engine('postgresql://zoesh@localhost:5432/travel_with_friends')


new_cities_coords.to_sql('all_cities_coords',engine, if_exists = "replace")

In [2]:
import pandas as pd
cities_coords = pd.read_csv('all_cities_coords.csv')


/Users/zoesh/anaconda/lib/python2.7/site-packages/pandas/computation/__init__.py:19: UserWarning: The installed version of numexpr 2.4.4 is not supported in pandas and will be not be used

  UserWarning)

In [269]:
cities_coords


Out[269]:
Unnamed: 0 area_code city state nation coord0 coord1
0 0 201 Bayonne New Jersey US 40.668710 -74.114310
1 1 201 Bergenfield New Jersey US 40.927600 -73.997360
2 2 201 Cliffside Park New Jersey US 40.821490 -73.987640
3 3 201 Englewood New Jersey US 40.892880 -73.972640
4 4 201 Fair Lawn New Jersey US 40.940380 -74.131810
5 5 201 Fort Lee New Jersey US 40.850930 -73.970140
6 6 201 Hackensack New Jersey US 40.885930 -74.043470
7 7 201 Hoboken New Jersey US 40.743990 -74.032360
8 8 201 Jersey City New Jersey US 40.728160 -74.077640
9 9 201 Kearny New Jersey US 40.768430 -74.145420
10 10 201 North Bergen New Jersey US 40.804270 -74.012080
11 11 201 Paramus New Jersey US 40.944540 -74.075420
12 12 201 Ridgewood New Jersey US 40.979260 -74.116530
13 13 201 Teaneck New Jersey US 40.897600 -74.015970
14 14 201 Union City New Jersey US 40.779550 -74.023750
15 15 201 West New York New Jersey US 40.787880 -74.014310
16 16 202 Washington District of Columbia US 38.895110 -77.036370
17 17 203 Bridgeport Connecticut US 41.167040 -73.204830
18 18 203 Danbury Connecticut US 41.394820 -73.454010
19 19 203 East Haven Connecticut US 41.276210 -72.868430
20 20 203 Meriden Connecticut US 41.538150 -72.807040
21 21 203 Milford Connecticut US 41.222320 -73.056500
22 22 203 Naugatuck Connecticut US 41.485930 -73.050660
23 23 203 New Haven Connecticut US 41.308150 -72.928160
24 24 203 North Haven Connecticut US 41.390930 -72.859540
25 25 203 Norwalk Connecticut US 41.117600 -73.407900
26 26 203 Shelton Connecticut US 41.316490 -73.093160
27 27 203 Stamford Connecticut US 41.053430 -73.538730
28 28 203 Stratford Connecticut US 41.184540 -73.133170
29 29 203 Trumbull Connecticut US 41.242870 -73.200670
... ... ... ... ... ... ... ...
2820 2820 999 Warwick Rhode Island US 41.700202 -71.416111
2821 2821 999 Winter Garden Florida US 28.550269 -81.592597
2822 2822 999 Maricopa Arizona US 33.348830 -112.491230
2823 2823 999 Compton California US 33.894882 -118.226043
2824 2824 999 Charleston West Virginia US 38.350599 -81.633281
2825 2825 999 Concord New Hampshire US 43.207106 -71.537021
2826 2826 999 St. Paul Minnesota US 44.950404 -93.101503
2827 2827 999 Buckeye Arizona US 33.370275 -112.583867
2828 2828 999 Glendale California US 34.142498 -118.248596
2829 2829 999 Jurupa Valley California US 33.979847 -117.451575
2830 2830 999 Fort Myers Florida US 26.640628 -81.872308
2831 2831 999 Rockwall Texas US 32.931234 -96.459709
2832 2832 999 Eastvale California US 33.976680 -117.559844
2833 2833 999 Menifee California US 33.686443 -117.177044
2834 2834 999 St. Petersburg Florida US 27.770380 -82.669508
2835 2835 999 Miami Florida US 25.774266 -80.193659
2836 2836 999 Boston Massachusetts US 42.360482 -71.059568
2837 2837 999 Goodyear Arizona US 33.435609 -112.357912
2838 2838 999 Bentonville Arkansas US 36.372854 -94.208817
2839 2839 999 Raleigh North Carolina US 35.780398 -78.639099
2840 2840 999 Canton Ohio US 40.798952 -81.378444
2841 2841 999 Las Vegas Nevada US 36.166286 -115.149225
2842 2842 999 Springfield Massachusetts US 42.101483 -72.589811
2843 2843 999 Bend Oregon US 44.058173 -121.315309
2844 2844 999 Miami Beach Florida US 25.788144 -80.127270
2845 2845 999 San Francisco California US 37.779281 -122.419236
2846 2846 999 Eugene Oregon US 44.101181 -123.152384
2847 2847 999 Albany New York US 42.651167 -73.754968
2848 2848 999 Kent Washington US 47.382690 -122.227027
2849 2849 999 Cleveland Ohio US 41.505161 -81.693444

2850 rows × 7 columns


In [270]:
new_cities_coords = cities_coords[['city', 'state','nation','coord0','coord1']].drop_duplicates()
print new_cities_coords.shape


(1958, 5)

In [271]:
new_cities_coords


Out[271]:
city state nation coord0 coord1
0 Bayonne New Jersey US 40.668710 -74.114310
1 Bergenfield New Jersey US 40.927600 -73.997360
2 Cliffside Park New Jersey US 40.821490 -73.987640
3 Englewood New Jersey US 40.892880 -73.972640
4 Fair Lawn New Jersey US 40.940380 -74.131810
5 Fort Lee New Jersey US 40.850930 -73.970140
6 Hackensack New Jersey US 40.885930 -74.043470
7 Hoboken New Jersey US 40.743990 -74.032360
8 Jersey City New Jersey US 40.728160 -74.077640
9 Kearny New Jersey US 40.768430 -74.145420
10 North Bergen New Jersey US 40.804270 -74.012080
11 Paramus New Jersey US 40.944540 -74.075420
12 Ridgewood New Jersey US 40.979260 -74.116530
13 Teaneck New Jersey US 40.897600 -74.015970
14 Union City New Jersey US 40.779550 -74.023750
15 West New York New Jersey US 40.787880 -74.014310
16 Washington District of Columbia US 38.895110 -77.036370
17 Bridgeport Connecticut US 41.167040 -73.204830
18 Danbury Connecticut US 41.394820 -73.454010
19 East Haven Connecticut US 41.276210 -72.868430
20 Meriden Connecticut US 41.538150 -72.807040
21 Milford Connecticut US 41.222320 -73.056500
22 Naugatuck Connecticut US 41.485930 -73.050660
23 New Haven Connecticut US 41.308150 -72.928160
24 North Haven Connecticut US 41.390930 -72.859540
25 Norwalk Connecticut US 41.117600 -73.407900
26 Shelton Connecticut US 41.316490 -73.093160
27 Stamford Connecticut US 41.053430 -73.538730
28 Stratford Connecticut US 41.184540 -73.133170
29 Trumbull Connecticut US 41.242870 -73.200670
... ... ... ... ... ...
2820 Warwick Rhode Island US 41.700202 -71.416111
2821 Winter Garden Florida US 28.550269 -81.592597
2822 Maricopa Arizona US 33.348830 -112.491230
2823 Compton California US 33.894882 -118.226043
2824 Charleston West Virginia US 38.350599 -81.633281
2825 Concord New Hampshire US 43.207106 -71.537021
2826 St. Paul Minnesota US 44.950404 -93.101503
2827 Buckeye Arizona US 33.370275 -112.583867
2828 Glendale California US 34.142498 -118.248596
2829 Jurupa Valley California US 33.979847 -117.451575
2830 Fort Myers Florida US 26.640628 -81.872308
2831 Rockwall Texas US 32.931234 -96.459709
2832 Eastvale California US 33.976680 -117.559844
2833 Menifee California US 33.686443 -117.177044
2834 St. Petersburg Florida US 27.770380 -82.669508
2835 Miami Florida US 25.774266 -80.193659
2836 Boston Massachusetts US 42.360482 -71.059568
2837 Goodyear Arizona US 33.435609 -112.357912
2838 Bentonville Arkansas US 36.372854 -94.208817
2839 Raleigh North Carolina US 35.780398 -78.639099
2840 Canton Ohio US 40.798952 -81.378444
2841 Las Vegas Nevada US 36.166286 -115.149225
2842 Springfield Massachusetts US 42.101483 -72.589811
2843 Bend Oregon US 44.058173 -121.315309
2844 Miami Beach Florida US 25.788144 -80.127270
2845 San Francisco California US 37.779281 -122.419236
2846 Eugene Oregon US 44.101181 -123.152384
2847 Albany New York US 42.651167 -73.754968
2848 Kent Washington US 47.382690 -122.227027
2849 Cleveland Ohio US 41.505161 -81.693444

1958 rows × 5 columns


In [14]:
new_cities_coords = new_cities_coords.drop_duplicates()

In [27]:
new_cities_coords.reset_index?

In [25]:
new_cities_coords.to_csv('all_cities_coords2.csv')

In [279]:
def calculate_initial_compass_bearing(pointA, pointB):
    """
    Calculates the bearing between two points.
    The formulae used is the following:
        θ = atan2(sin(Δlong).cos(lat2),
                  cos(lat1).sin(lat2) − sin(lat1).cos(lat2).cos(Δlong))
    :Parameters:
      - `pointA: The tuple representing the latitude/longitude for the
        first point. Latitude and longitude must be in decimal degrees
      - `pointB: The tuple representing the latitude/longitude for the
        second point. Latitude and longitude must be in decimal degrees
    :Returns:
      The bearing in degrees
    :Returns Type:
      float
    """
    if (type(pointA) != tuple) or (type(pointB) != tuple):
        raise TypeError("Only tuples are supported as arguments")

    lat1 = math.radians(pointA[0])
    lat2 = math.radians(pointB[0])

    diffLong = math.radians(pointB[1] - pointA[1])

    x = math.sin(diffLong) * math.cos(lat2)
    y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
            * math.cos(lat2) * math.cos(diffLong))

    initial_bearing = math.atan2(x, y)

    # Now we have the initial bearing but math.atan2 return values
    # from -180° to + 180° which is not what we want for a compass bearing
    # The solution is to normalize the initial bearing as shown below
    initial_bearing = math.degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360

    return compass_bearing

In [297]:
conn = psycopg2.connect(conn_str)   
cur = conn.cursor() 
cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(123)) 
not cur.fetchone()


Out[297]:
True

In [295]:
a = cur.fetchone()

In [310]:
np.concatenate(np.array([0, 91,23]),np.zeros((len(event_ids), 3)))


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-310-67c82e74fc34> in <module>()
----> 1 np.concatenate(np.array([0, 91,23]),np.zeros((len(event_ids), 3)))

TypeError: only integer scalar arrays can be converted to a scalar index

In [320]:
t = np.zeros((len(event_ids), 3))
points = np.vstack((np.array(['test',91,23]),np.zeros((len(event_ids), 3))))

In [322]:
points


Out[322]:
array([['test', '91', '23'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0'],
       ['0.0', '0.0', '0.0']], 
      dtype='|S32')

In [338]:
t = []
t.extend(['12']*2)

In [339]:
t


Out[339]:
['12', '12']

In [ ]: