In [30]:
import pandas as pd
import time
import numpy as np
import requests
import psycopg2
import json
import simplejson
import urllib
import config
import ast
import bs4
import pprint
import progressbar
from pymongo import MongoClient
from geopy.geocoders import Nominatim
from bs4 import BeautifulSoup as BS
from operator import itemgetter
from sklearn.cluster import KMeans
from sqlalchemy import create_engine

In [ ]:
!pip install --upgrade progressbar2

In [17]:
# conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"
conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"

In [ ]:
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
# cur.execute("select index, name, coord0, coord1 from poi_detail_table where city !='%s' and state = '%s';" %(current_city, current_state))
cur.execute("select distinct city, state from poi_detail_table;" )
all_cities = cur.fetchall()

In [ ]:
all_cities[0][0]

In [ ]:
cities_coords = pd.read_csv('cities_coords.csv', header=None)
cities_coords.columns = ['area_code', 'city','state','nation', 'coord0','coord1']
cities_coords = cities_coords[['city','state','nation', 'coord0','coord1']].drop_duplicates()
cities_coords.reset_index(drop = True, inplace = True)

In [ ]:
geolocator = Nominatim()

for items in all_cities:
    if cities_coords[cities_coords['state'] == items[1]][cities_coords.city == items[0]].shape[0] == 0:
        location_name = ', '.join([items[0], items[1]])
        try:
            location = geolocator.geocode(location_name)
            cities_coords.loc[len(cities_coords)] = [items[0], items[1], 'US', location.latitude, location.longitude]
        except:
            "error, rest"
            time.sleep(20)
            print" start again"
            
            
#         print cities_coords.loc(len(cities_coords))

In [ ]:
cities_coords.to_csv('all_cities_coords.csv')

In [ ]:
engine = create_engine('postgresql://Gon@localhost:5432/travel_with_friends')
cities_coords.to_sql('all_cities_coords',engine, if_exists = "replace")

In [ ]:
import us_state_abbrevation as abb
state_abb_dict = abb.abb2state
state_abb_dict['CA']
# print state_abb_dict.keys()[state_abb_dict.values().index('CA')]

In [ ]:
from googleplaces import GooglePlaces, types, lang
YOUR_API_KEY = 'AIzaSyDMbpmHBLl7dTOXUOMZP7Vi3zbMJlByEKM'
google_places = GooglePlaces(YOUR_API_KEY)

In [ ]:
import geocoder
api_key1 = 'AIzaSyCrgwS_L75NfO9qzIKG8L0ox7zGw81BpRU'
api_key2 = 'AIzaSyBwh4WqOIVJGJuKkmzpQxlkjahgx6qzimk'
api_key3 = 'AIzaSyA25LW2CRcD9mSmiAWBYSPOSoiKP_m2plQ'
api_key4 = 'AIzaSyB3l2Trzm4LnrC0nyUwwoM9803Fuwf0my4'
api_key5 = 'AIzaSyDj0yH_35G1zMq5uYPF6X0ogkHYcLsNN1w'
add = ' 497 lakeside drive'
g = geocoder.google(add, key = api_key5)

In [ ]:
g.ok

In [5]:
import json
with open('api_key_list.config') as api_key_list_file:
    api_key_list = json.load(api_key_list_file)
api_key_list['api_key_list']

In [27]:
api_key_list


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-27-b50e54f781a6> in <module>()
----> 1 api_key_list

NameError: name 'api_key_list' is not defined

In [ ]:
df_tmp = pd.read_csv('test_poi_detail_df_100.csv', index_col = 0)
# df_tmp.to_csv('test_poi_detail_df_100.csv', index_col=None)
df_tmp.head()
s.find(text ="Recommended length of visit:")
#         visit_length = s.find(text ="Recommended length of visit:").parent.next_sibling

In [ ]:
client = MongoClient()
db = client.zoeshrm
db.TripAdvisor_state_park.count()

In [ ]:
from web_scraping_tripadvisor import state_park_web as web
state_park_pages = db.TripAdvisor_state_park.find()
poi_detail_state_park_df, error_message_df = web(state_park_pages)

In [ ]:
import re
page = db.TripAdvisor.find_one({'city': 'San Francisco, California'})
search_visit_length = re.compile('Recommended length of visit:')
s = BS(page['html'], "html.parser")
#index
#name
input_list, error_message = [],[]
state_abb_error, state_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error = 0,0,0,0,0,0,0,0
latitude, longitude, geo_content = None, None, None
#     print name
url = page['url']
name = s.find('h1', attrs = {'class':'heading_name'}).text.strip()

#street_address
street_address = s.find('span', attrs = {'class':'street-address'}).text.strip()
#city
city = s.find('span', attrs = {'property':'addressLocality'}).text.strip()
#state
state_abb = s.find('span', attrs = {'property':'addressRegion'}).text.strip()
if state_abb:
    try:
        # state = state_abb_dict.keys()[state_abb_dict.values().index(state_abb)]
        state = abb2state_dict[state_abb]
    except:
        state_abb_error = 1
        state = state_abb
else:
    state_error =1
    state_abb = None
    state = None
#postal_code
postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()
#country
if s.find('span', attrs = {'property':'addressCountry'}).get('content'):
    country = s.find('span',{'property':'addressCountry'}).get('content')
elif s.find('span',{'property':'addressCountry'}).get('content') == None:
    country = s.find('span',{'property':'addressCountry'}).text.strip()
else:
    country = 'United States'
#address
if state:
    full_address = street_address+', '+city+', '+state+', '+postal_code[:5]+', '+country
else:
    address_error =1
    full_address = street_address+', '+city+', '+postal_code[:5]+', '+country
# if (name in name_lst) and (full_address in full_address_lst):
#     continue
# else:
#     name_lst.append(name)
#     full_address_lst.append(full_address)
#coord
try:
    latitude, longitude, geo_content = find_latlng(full_address, name)
except:
    geo_error =1
    latitude, longitude, geo_content = None, None, None

#num_reviews
try:
    num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')
    if num_reviews == None:
        num_reviews = s.find('a', {'property': "reviewCount"}).get('content')    
except:
    num_reviews = 0
    review_error=1    
#review_score
try:
    review_score = s.find('div', attrs = {'class': 'heading_rating separator'}).find('img').get('content')
    if review_score == None:
        review_score = s.find('a', {'property': "ratingValue"}).get('content')
except:
    review_score = 0 
    score_error =1
#ranking
try:
    ranking = s.find('b', attrs = {'class':'rank_text wrap'}).text.strip().replace('#',"")
except:
    ranking = 999
    ranking_error=1
#tag
try:
    tags = ", ".join(label.text.strip() for label in s.select('div.detail > a') + s.select('span.collapse.hidden > a'))
except:
    tags = None
    tag_error =1
#visit_length
if s.find('b', text =search_visit_length):
    raw_visit_length = s.find('b', text =search_visit_length).next_sibling.strip()
else:
    raw_visit_length = None
#fee
if s.find(text= "Fee:"):
    fee = s.find(text= "Fee:").parent.next_sibling.upper()
else:
    fee = 'NO'
#description
if s.find('div', attrs = {'class': "listing_details"}):
    description = s.find('div', attrs = {'class': "listing_details"}).text.strip()
else:
    description = None
# error_message = [len(poi_detail_state_park_df), name, url,state_abb_error, state_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error]
# error_message_df.loc[len(poi_detail_state_park_df)] =error_message


# input_list = [len(poi_detail_state_park_df), name, street_address, city, state_abb, state, postal_code, country, full_address, latitude, longitude, num_reviews, review_score, ranking, tags, visit_length, fee, description, url, geo_content]
# poi_detail_state_park_df.loc[len(poi_detail_state_park_df)] = input_list

In [ ]:
import re
search_visit_length = re.compile('Recommended length of visit:')
test = s.find('b', text =search_visit_length).next_sibling.strip()
tests

In [28]:
import json
with open('api_key_list.config') as f:
    data = json.load(f)

In [29]:
data['api_key_list']


Out[29]:
{u'api_key_list': [u'AIzaSyCrgwS_L75NfO9qzIKG8L0ox7zGw81BpRU',
  u'AIzaSyBwh4WqOIVJGJuKkmzpQxlkjahgx6qzimk',
  u'AIzaSyA25LW2CRcD9mSmiAWBYSPOSoiKP_m2plQ',
  u'AIzaSyB3l2Trzm4LnrC0nyUwwoM9803Fuwf0my4',
  u'AIzaSyDj0yH_35G1zMq5uYPF6X0ogkHYcLsNN1w',
  u'AIzaSyADO1wlwWDW-XaNwQ-p50Q3yMRxtRascdU']}

In [35]:
df_poi = pd.read_csv('poi_detail_df.csv', index_col = 0)

In [36]:



Out[36]:
index name street_address city state_abb state postal_code country address coord_lat coord_long num_reviews review_score ranking tag raw_visit_length fee description url geo_content
0 0.0 Central Park 59th to 110th Street New York City NY New York 10022 United States 59th to 110th Street, New York City, New York,... NaN NaN 92215 4.5 1 Points of Interest & Landmarks, Nature & Parks... More than 3 hours No For more than 150 years, visitors have flocked... http://www.tripadvisor.com/Attraction_Review-g... NaN
1 1.0 The National 9/11 Memorial & Museum 180 Greenwich St New York City NY New York 10007-0089 United States 180 Greenwich St, New York City, New York, 100... NaN NaN 61712 4.5 2 Specialty Museums, Points of Interest & Landma... 1-2 hours No The National September 11 Memorial & Museum is... http://www.tripadvisor.com/Attraction_Review-g... NaN
2 2.0 The Metropolitan Museum of Art 1000 5th Ave New York City NY New York 10028-0198 United States 1000 5th Ave, New York City, New York, 10028, ... NaN NaN 40443 5.0 3 Points of Interest & Landmarks, Art Museums, M... 2-3 hours Yes At New York City's most visited museum and att... http://www.tripadvisor.com/Attraction_Review-g... NaN
3 3.0 Top of the Rock Observation Deck 30 Rockefeller Center New York City NY New York NaN United States 30 Rockefeller Center, New York City, New York... NaN NaN 57358 4.5 4 Observation Decks & Towers, Lookouts, Sights &... NaN Yes Top of the Rock Observation Deck, the newly op... http://www.tripadvisor.com/Attraction_Review-g... NaN
4 4.0 Manhattan Skyline NaN New York City NY New York NaN United States , New York City, New York, , United States NaN NaN 15918 5.0 5 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
5 5.0 Grand Central Terminal 89 E 42nd Street New York City NY New York 10017-5503 United States 89 E 42nd Street, New York City, New York, 100... NaN NaN 28478 4.5 7 Architectural Buildings, Points of Interest & ... 1-2 hours Unknown Grand Central Terminal is the most extraordina... http://www.tripadvisor.com/Attraction_Review-g... NaN
6 6.0 The High Line NaN New York City NY New York 10014 United States , New York City, New York, 10014, United States NaN NaN 42015 4.5 8 Scenic Walking Areas, Nature & Parks, Sights &... 2-3 hours No The High Line is an elevated railway transform... http://www.tripadvisor.com/Attraction_Review-g... NaN
7 7.0 Broadway NaN New York City NY New York NaN United States , New York City, New York, , United States NaN NaN 22816 4.5 9 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
8 8.0 Frick Collection 1 E. 70th St. New York City NY New York 10021-4994 United States 1 E. 70th St., New York City, New York, 10021,... NaN NaN 7179 4.5 10 Art Museums, Museums NaN Unknown Henry Frick once resided in this 18th-century ... http://www.tripadvisor.com/Attraction_Review-g... NaN
9 9.0 Empire State Building 350 Fifth Avenue New York City NY New York 10118-0110 United States 350 Fifth Avenue, New York City, New York, 101... NaN NaN 52446 4.5 11 Architectural Buildings, Observation Decks & T... 1-2 hours Yes The world-famous Empire State Building offers ... http://www.tripadvisor.com/Attraction_Review-g... NaN
10 10.0 Bryant Park NaN New York City NY New York 10017 United States , New York City, New York, 10017, United States NaN NaN 15170 4.5 14 Nature & Parks 2-3 hours No Bryant Park has a French-style merry-go-round,... http://www.tripadvisor.com/Attraction_Review-g... NaN
11 11.0 Statue of Liberty NaN New York City NY New York 10004 United States , New York City, New York, 10004, United States NaN NaN 26918 4.5 15 Monuments & Statues, Points of Interest & Land... More than 3 hours Yes The Statue of Liberty Enlightening the World w... http://www.tripadvisor.com/Attraction_Review-g... NaN
12 12.0 Quan Gio Bar & Grill 14241 Euclid St # C11 Garden Grove CA California 92843-4979 United States 14241 Euclid St # C11, Garden Grove, Californi... NaN NaN 0 0.0 9 Bars & Clubs, Nightlife NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
13 13.0 One World Observatory - World Trade Center 1 World Trade Center New York City NY New York 10007-0089 United States 1 World Trade Center, New York City, New York,... NaN NaN 13948 4.5 16 Observation Decks & Towers, Architectural Buil... 1-2 hours Unknown Start at the top of the tallest building in th... http://www.tripadvisor.com/Attraction_Review-g... NaN
14 14.0 St. Patrick's Cathedral 460 Madison Avenue New York City NY New York 10022 United States 460 Madison Avenue, New York City, New York, 1... NaN NaN 8125 4.5 17 Churches & Cathedrals, Sights & Landmarks NaN Unknown Enormous 13th-century-style Gothic church on M... http://www.tripadvisor.com/Attraction_Review-g... NaN
15 15.0 Ground Zero Museum Workshop 420 W 14th St New York City NY New York 10014-1064 United States 420 W 14th St, New York City, New York, 10014,... NaN NaN 2658 5.0 18 Specialty Museums, Museums 1-2 hours Unknown Ground Zero Museum Workshop: Images & Artifact... http://www.tripadvisor.com/Attraction_Review-g... NaN
16 16.0 Staten Island Ferry 4 South Street New York City NY New York 10004 United States 4 South Street, New York City, New York, 10004... NaN NaN 14354 4.5 19 Ferries, Transportation NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
17 17.0 Radio City Music Hall 1260 6th Avenue (Avenue of the Americas) New York City NY New York 10020 United States 1260 6th Avenue (Avenue of the Americas), New ... NaN NaN 7025 4.5 20 Theaters, Points of Interest & Landmarks, Conc... 2-3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
18 18.0 Rockefeller Center 45 Rockefeller Plaza New York City NY New York 10111-0100 United States 45 Rockefeller Plaza, New York City, New York,... NaN NaN 16707 4.5 21 Points of Interest & Landmarks, Architectural ... 1-2 hours No Rockefeller Center is a national historic land... http://www.tripadvisor.com/Attraction_Review-g... NaN
19 19.0 New York Public Library 476 5th Ave New York, NY 10018 New York City NY New York 10018 United States 476 5th Ave New York, NY 10018, New York City,... NaN NaN 9547 4.5 23 Specialty Museums, Museums NaN Unknown Two marble lions mark the entrance to this Bea... http://www.tripadvisor.com/Attraction_Review-g... NaN
20 20.0 Madison Square Garden 4 Penn Plaza New York City NY New York 10121-0078 United States 4 Penn Plaza, New York City, New York, 10121, ... NaN NaN 6500 4.5 24 Arenas & Stadiums, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
21 21.0 The Met Cloisters 99 Margaret Corbin Drive New York City NY New York 10040 United States 99 Margaret Corbin Drive, New York City, New Y... NaN NaN 4064 4.5 25 Sacred & Religious Sites, Art Museums, Museums... More than 3 hours Yes Branch of the Metropolitan Museum of Art that ... http://www.tripadvisor.com/Attraction_Review-g... NaN
22 22.0 Tenement Museum 103 Orchard Street New York City NY New York 10002 United States 103 Orchard Street, New York City, New York, 1... NaN NaN 4846 4.5 26 Specialty Museums, Museums 1-2 hours Unknown The Tenement Museum preserves the history of i... http://www.tripadvisor.com/Attraction_Review-g... NaN
23 23.0 Times Square Broadway New York City NY New York 10036 United States Broadway, New York City, New York, 10036, Unit... NaN NaN 30873 4.5 27 Neighborhoods, Points of Interest & Landmarks,... 1-2 hours Unknown Central area in NYC that has many shops, resta... http://www.tripadvisor.com/Attraction_Review-g... NaN
24 24.0 The Museum of Modern Art (MoMA) 11 West 53rd Street New York City NY New York 10019 United States 11 West 53rd Street, New York City, New York, ... NaN NaN 12160 4.5 29 Art Museums, Museums 2-3 hours Yes Located in the heart of midtown Manhattan, The... http://www.tripadvisor.com/Attraction_Review-g... NaN
25 25.0 Cistra Usa Gifts and Crafts 12535 South Harbor Blvd Garden Grove CA California 92840 United States 12535 South Harbor Blvd, Garden Grove, Califor... NaN NaN 0 0.0 8 Gift & Specialty Shops, Shopping <1 hour Unknown Handmade handbags and purses from leather, dec... http://www.tripadvisor.com/Attraction_Review-g... NaN
26 26.0 The Getty Center 1200 Getty Center Dr Los Angeles CA California 90049 United States 1200 Getty Center Dr, Los Angeles, California,... NaN NaN 10328 5.0 1 Specialty Museums, Museums 2-3 hours Yes Spectacular museum boasts a most impressive co... http://www.tripadvisor.com/Attraction_Review-g... NaN
27 27.0 Universal Studios Hollywood 100 Universal City Plaza Los Angeles CA California 91608-1002 United States 100 Universal City Plaza, Los Angeles, Califor... NaN NaN 24681 4.5 2 Theme Parks, Water & Amusement Parks NaN Unknown Get ready for the ultimate Hollywood experienc... http://www.tripadvisor.com/Attraction_Review-g... NaN
28 28.0 Griffith Observatory 2800 E. Observatory Rd. Los Angeles CA California 90027-1299 United States 2800 E. Observatory Rd., Los Angeles, Californ... NaN NaN 12585 4.5 3 Observation Decks & Towers, Observatories & Pl... More than 3 hours No A public observatory owned and operated by the... http://www.tripadvisor.com/Attraction_Review-g... NaN
29 29.0 California Science Center 700 Exposition Park Drive Los Angeles CA California 90037-1254 United States 700 Exposition Park Drive, Los Angeles, Califo... NaN NaN 2864 4.5 5 Science Museums, Museums 2-3 hours No Space Shuttle Endeavour on display in the Cali... http://www.tripadvisor.com/Attraction_Review-g... NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
15315 15315.0 Sri Sri Radha Krishna Temple 311 West 8500 South Spanish Fork UT Utah 84660 United States 311 West 8500 South, Spanish Fork, Utah, 84660... NaN NaN 20 4.0 2 Sacred & Religious Sites, Sights & Landmarks 1-2 hours Unknown South of Spanish Fork, Utah, in a semi rural e... http://www.tripadvisor.com/Attraction_Review-g... NaN
15316 15316.0 Spanish Fork River Park HWY 6 Spanish Fork Canyon Spanish Fork UT Utah 84660 United States HWY 6 Spanish Fork Canyon, Spanish Fork, Utah,... NaN NaN 3 4.5 3 Mountains, Bodies of Water, Fun & Games, Natur... More than 3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15317 15317.0 Beckman Mill 11600 S County Road H Beloit WI Wisconsin 53511-9203 United States 11600 S County Road H, Beloit, Wisconsin, 5351... NaN NaN 40 4.5 1 Historic Sites, Sights & Landmarks 1-2 hours Unknown The mill is open on Saturdays and Sundays, 1-4... http://www.tripadvisor.com/Attraction_Review-g... NaN
15318 15318.0 Beloit Snappers 2301 Skyline Dr Beloit WI Wisconsin 53511-2651 United States 2301 Skyline Dr, Beloit, Wisconsin, 53511, Uni... NaN NaN 51 4.5 2 Arenas & Stadiums, Sights & Landmarks 2-3 hours Unknown The Beloit Snappers are a Minor League Basebal... http://www.tripadvisor.com/Attraction_Review-g... NaN
15319 15319.0 Beloit Farmer's market 100 State St Beloit WI Wisconsin 53511-6234 United States 100 State St, Beloit, Wisconsin, 53511, United... NaN NaN 13 5.0 3 Farmers Markets, Food & Drink, Shopping 1-2 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15320 15320.0 Angel Museum NaN Beloit WI Wisconsin NaN United States , Beloit, Wisconsin, , United States NaN NaN 25 4.0 4 Specialty Museums, Museums NaN Unknown The world's largest angel museum, featuring th... http://www.tripadvisor.com/Attraction_Review-g... NaN
15321 15321.0 Logan Museum of Anthropology 700 College St Beloit WI Wisconsin 53511-5509 United States 700 College St, Beloit, Wisconsin, 53511, Unit... NaN NaN 19 4.0 5 History Museums, Museums NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15322 15322.0 Turtle Island Playground 1530 Riverside Drive Beloit WI Wisconsin NaN United States 1530 Riverside Drive, Beloit, Wisconsin, , Uni... NaN NaN 14 4.5 6 Playgrounds, Nature & Parks, Fun & Games NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15323 15323.0 The Apple Hut 1718 W Walters Road Beloit WI Wisconsin 53511 United States 1718 W Walters Road, Beloit, Wisconsin, 53511,... NaN NaN 7 4.5 7 Farms, Sights & Landmarks <1 hour Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15324 15324.0 Pohlman Field 2301 Skyline Dr. Beloit WI Wisconsin 53511 United States 2301 Skyline Dr., Beloit, Wisconsin, 53511, Un... NaN NaN 13 4.5 8 Arenas & Stadiums, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15325 15325.0 Beach at Panama City NaN Panama City Beach FL Florida NaN United States , Panama City Beach, Florida, , United States NaN NaN 3822 4.5 1 Beaches, Nature & Parks, Outdoor Activities NaN Unknown This 27-mile long beach offers a wide variety ... http://www.tripadvisor.com/Attraction_Review-g... NaN
15326 15326.0 Panama City Beach Winery 8730 Thomas Dr Panama City Beach FL Florida 32408 United States 8730 Thomas Dr, Panama City Beach, Florida, 32... NaN NaN 1032 5.0 2 Wineries & Vineyards, Food & Drink NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15327 15327.0 Water Planet Mailing: 203 Greenwood Dr Panama City Beach FL Florida 32407 United States Mailing: 203 Greenwood Dr, Panama City Beach, ... NaN NaN 414 5.0 4 Educational sites, Sights & Landmarks NaN Unknown Water Planet offers a variety of dolphin swim ... http://www.tripadvisor.com/Attraction_Review-g... NaN
15328 15328.0 Pier Park 600 Pier Park Dr Ste 125 Panama City Beach FL Florida 32413-2179 United States 600 Pier Park Dr Ste 125, Panama City Beach, F... NaN NaN 1658 4.5 5 Shopping Malls, Shopping NaN Unknown Pier Park located in Panama City Beach has bec... http://www.tripadvisor.com/Attraction_Review-g... NaN
15329 15329.0 Gulf World Marine Park 15412 Front Beach Rd Panama City Beach FL Florida 32413-2502 United States 15412 Front Beach Rd, Panama City Beach, Flori... NaN NaN 1643 4.5 6 Gardens, Nature & Parks, Zoos & Aquariums 1-2 hours Unknown Our daily shows featuring Dolphins, Sea Lions,... http://www.tripadvisor.com/Attraction_Review-g... NaN
15330 15330.0 Shell Island NaN Panama City Beach FL Florida NaN United States , Panama City Beach, Florida, , United States NaN NaN 1416 4.5 7 Islands, Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15331 15331.0 Coconut Creek Family Fun Park 9807 Front Beach Rd Panama City Beach FL Florida 32407-4134 United States 9807 Front Beach Rd, Panama City Beach, Florid... NaN NaN 407 4.5 8 Theme Parks, Water & Amusement Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15332 15332.0 Ripley's Believe It Or Not 9907 Front Beach Rd Panama City Beach FL Florida 32407-4136 United States 9907 Front Beach Rd, Panama City Beach, Florid... NaN NaN 627 4.0 9 Specialty Museums, Museums NaN Unknown Ripley's Believe It or Not!(R) Panama City Bea... http://www.tripadvisor.com/Attraction_Review-g... NaN
15333 15333.0 Shipwreck Island Waterpark 12201 Hutchison Blvd Panama City Beach FL Florida 32407-3424 United States 12201 Hutchison Blvd, Panama City Beach, Flori... NaN NaN 395 4.5 10 Water Parks, Water & Amusement Parks NaN Unknown Plunge, ride, whirl, dunk, swim - whew! At the... http://www.tripadvisor.com/Attraction_Review-g... NaN
15334 15334.0 Conservation Park 100 Conservation Drive Panama City Beach FL Florida NaN United States 100 Conservation Drive, Panama City Beach, Flo... NaN NaN 204 4.5 11 Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15335 15335.0 ZooWorld Zoological Conservatory 9008 Front Beach Rd Panama City Beach FL Florida 32407-4235 United States 9008 Front Beach Rd, Panama City Beach, Florid... NaN NaN 446 4.0 12 Nature & Parks, Outdoor Activities, Zoos & Aqu... NaN Unknown Home to over 350 animals and 259 species of pl... http://www.tripadvisor.com/Attraction_Review-g... NaN
15336 15336.0 Russell Fields City Pier 12213 Front Beach Rd Panama City Beach FL Florida 32407-3407 United States 12213 Front Beach Rd, Panama City Beach, Flori... NaN NaN 175 4.5 16 Piers & Boardwalks, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15337 15337.0 M.B. Miller County Pier 12213 Front Beach Rd Panama City Beach FL Florida 32407-3407 United States 12213 Front Beach Rd, Panama City Beach, Flori... NaN NaN 92 4.5 18 Piers & Boardwalks, Points of Interest & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15338 15338.0 Race City PCB 9523 Front Beach Rd Panama City Beach FL Florida 32407-4149 United States 9523 Front Beach Rd, Panama City Beach, Florid... NaN NaN 261 3.5 21 Theme Parks, Water & Amusement Parks 1-2 hours Unknown Come and experience the thrills of the largest... http://www.tripadvisor.com/Attraction_Review-g... NaN
15339 15339.0 Frank Brown Park 16200 Panama City Beach Pkwy Panama City Beach FL Florida 32413 United States 16200 Panama City Beach Pkwy, Panama City Beac... NaN NaN 33 5.0 23 Nature & Wildlife Areas, Fun & Games, Nature &... More than 3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15340 15340.0 Rick Seltzer Park 7419 Thomas Dr Panama City Beach FL Florida 32408-7585 United States 7419 Thomas Dr, Panama City Beach, Florida, 32... NaN NaN 56 4.5 24 Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15341 15341.0 Slingshot & Indy Speedway 8762 Thomas Dr Panama City Beach FL Florida 32408-4000 United States 8762 Thomas Dr, Panama City Beach, Florida, 32... NaN NaN 42 4.5 25 Auto Race Tracks, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15342 15342.0 Nivol Brewery 473 N Richard Jackson Blvd Panama City Beach FL Florida 32407-3647 United States 473 N Richard Jackson Blvd, Panama City Beach,... NaN NaN 20 5.0 26 Breweries, Food & Drink NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15343 15343.0 Panama City Beach Library 12500 Hutchison Blvd Panama City Beach FL Florida 32407-3239 United States 12500 Hutchison Blvd, Panama City Beach, Flori... NaN NaN 32 4.5 27 Libraries, Traveler Resources NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... NaN
15344 15344.0 Gator Country Alligator Park 6523 Hwy 79 Panama City Beach FL Florida NaN United States 6523 Hwy 79, Panama City Beach, Florida, , Uni... NaN NaN 72 4.0 28 NaN NaN Unknown Gator Country Alligator Park at Airboat Advent... http://www.tripadvisor.com/Attraction_Review-g... NaN

15345 rows × 20 columns


In [263]:
search_fee = re.compile('Fee:')
df_poi = pd.read_csv('test_poi_detail_df.csv', index_col= 0)
poi_pages = db.TripAdvisor.find()
fee_lst = []
cnt = 0
for page in poi_pages:
    s = BS(page['html'], "html.parser")
    if s.find('b', text= search_fee):
        fee = s.find('b',text= search_fee).next_sibling.strip()
    else:
        fee = 'Unknown'
    fee_lst.append(fee)
    cnt+=1
    if cnt%100 ==0 :
        print '#items in fee lst: ',len(fee_lst)


#items in fee lst:  100
#items in fee lst:  200
#items in fee lst:  300
#items in fee lst:  400
#items in fee lst:  500
#items in fee lst:  600
#items in fee lst:  700
#items in fee lst:  800
#items in fee lst:  900
#items in fee lst:  1000
#items in fee lst:  1100
#items in fee lst:  1200
#items in fee lst:  1300
#items in fee lst:  1400
#items in fee lst:  1500
#items in fee lst:  1600
#items in fee lst:  1700
#items in fee lst:  1800
#items in fee lst:  1900
#items in fee lst:  2000
#items in fee lst:  2100
#items in fee lst:  2200
#items in fee lst:  2300
#items in fee lst:  2400
#items in fee lst:  2500
#items in fee lst:  2600
#items in fee lst:  2700
#items in fee lst:  2800
#items in fee lst:  2900
#items in fee lst:  3000
#items in fee lst:  3100
#items in fee lst:  3200
#items in fee lst:  3300
#items in fee lst:  3400
#items in fee lst:  3500
#items in fee lst:  3600
#items in fee lst:  3700
#items in fee lst:  3800
#items in fee lst:  3900
#items in fee lst:  4000
#items in fee lst:  4100
#items in fee lst:  4200
#items in fee lst:  4300
#items in fee lst:  4400
#items in fee lst:  4500
#items in fee lst:  4600
#items in fee lst:  4700
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-263-a3f77e65920d> in <module>()
      5 cnt = 0
      6 for page in poi_pages:
----> 7     s = BS(page['html'], "html.parser")
      8     if s.find('b', text= search_fee):
      9         fee = s.find('b',text= search_fee).next_sibling.strip()

/Users/zoesh/anaconda/lib/python2.7/site-packages/bs4/__init__.pyc in __init__(self, markup, features, builder, parse_only, from_encoding, exclude_encodings, **kwargs)
    213             if is_file:
    214                 if isinstance(markup, unicode):
--> 215                     markup = markup.encode("utf8")
    216                 warnings.warn(
    217                     '"%s" looks like a filename, not markup. You should'

/Users/zoesh/anaconda/lib/python2.7/site-packages/bs4/__init__.pyc in _feed(self)
    237 
    238     def __copy__(self):
--> 239         copy = type(self)(
    240             self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
    241         )

/Users/zoesh/anaconda/lib/python2.7/site-packages/bs4/builder/_htmlparser.pyc in feed(self, markup)
    162     def feed(self, markup):
    163         args, kwargs = self.parser_args
--> 164         parser = BeautifulSoupHTMLParser(*args, **kwargs)
    165         parser.soup = self.soup
    166         try:

/Users/zoesh/anaconda/lib/python2.7/HTMLParser.pyc in feed(self, data)
    115         """
    116         self.rawdata = self.rawdata + data
--> 117         self.goahead(0)
    118 
    119     def close(self):

/Users/zoesh/anaconda/lib/python2.7/HTMLParser.pyc in goahead(self, end)
    159             if startswith('<', i):
    160                 if starttagopen.match(rawdata, i): # < + letter
--> 161                     k = self.parse_starttag(i)
    162                 elif startswith("</", i):
    163                     k = self.parse_endtag(i)

/Users/zoesh/anaconda/lib/python2.7/HTMLParser.pyc in parse_starttag(self, i)
    296 
    297         while k < endpos:
--> 298             m = attrfind.match(rawdata, k)
    299             if not m:
    300                 break

KeyboardInterrupt: 

In [ ]:
fee_lst

In [ ]:
error_message_df.to_csv('error_message.csv', encoding=('utf-8'))
poi_detail_state_park_df.to_csv("poi_detail_state_park.csv", encoding=('utf-8'))

In [ ]:
try:
    poi_additional_detail = poi_detail_state_park[['index','name','url','address','geo_content']]

    geo_content_detail=poi_detail_state_park.pop('geo_content')
except:
    None

In [ ]:
db.geo_content.drop()
db.geo_content.insert_many(poi_additional_detail.to_dict('records'))
poi_detail_state_park.to_sql('poi_detail_state_park_table',engine, if_exists = "replace")

In [ ]:
print poi_detail_state_park_df.shape, error_message_df.shape

In [ ]:
error_message_df.columns

In [ ]:


In [ ]:
# !pip install geocoder

In [64]:
def find_latlng(full_address, name):
    g_address = geocoder.google(full_address)
    if g_address.ok:
        latitude= g_address.lat
        longitude = g_address.lng
        return latitude, longitude, g_address.content
    
    g_name = geocoder.google(name)
    if g_name.ok:
        latitude= g_name.lat
        longitude = g_name.lng
        return latitude, longitude, g_name.content
    else:
        latitude = None
        longitude = None
        return latitude, longitude, None
def find_latlng(full_address, name, key):
    g_address = geocoder.google(full_address, key = key)
    if g_address.content['status'] == 'OVER_QUERY_LIMIT':
        return False
    if g_address.ok:
        latitude= g_address.lat
        longitude = g_address.lng
        return [latitude, longitude, g_address.content]
    
    g_name = geocoder.google(name, key = key)
    if g_name.content['status'] == 'OVER_QUERY_LIMIT':
        return False
    if g_name.ok:
        latitude= g_name.lat
        longitude = g_name.lng
        return [latitude, longitude, g_name.content]
    else:
        latitude = None
        longitude = None
        return [latitude, longitude, None]
with open('api_key_list.config') as api_key_list_file:
    api_key_list = json.load(api_key_list_file)
api_key = api_key_list['api_key_list']

In [ ]:
def find_geo_location(full_address, name):
    query_result = google_places.nearby_search(location= full_address, keyword=name)
    if len(query_result.places) >0:
        best_result = query_result.places[0]
        latitude = best_result.geo_location["lat"]
        longitude = best_result.geo_location["lng"]
        google_result_name = best_result.name

        return latitude, longitude, google_result_name
    else:
        print name, "google API cant find here."
        return None, None, None

In [ ]:
poi_detail_state_park=pd.DataFrame(columns=['index','name','street_address','city','state_abb','state','postal_code','country','address','coord_lat','coord_long','num_reviews','review_score','ranking','tag','visit_length','fee','description','url',"geo_content"])

In [ ]:
error_message_df = pd.DataFrame(columns=['index','name','url','state_abb_error','address_error','geo_error','review_error','score_error','ranking_error','tag_error'])

In [ ]:
# poi_detail_state_park2=pd.DataFrame(columns=['index','name','street_address','city','state_abb','state','postal_code','country','address','coord_lat','coord_long','num_reviews','review_score','ranking','tag','visit_length','fee','description'])

In [ ]:
state_park_pages = db.TripAdvisor_state_park.find()
index = 0
for page in state_park_pages[len(poi_detail_state_park):]:
    s = BS(page['html'], "html.parser")
    #index
    #name
    error_message = []
    state_abb_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error = 0,0,0,0,0,0,0
    input_list = []
#     print name

    url = page['url']
    name = s.find('h1', attrs = {'class':'heading_name'}).text.strip()

    #street_address
    street_address = s.find('span', attrs = {'class':'street-address'}).text.strip()
    #city
    city = s.find('span', attrs = {'property':'addressLocality'}).text.strip()

    #state
    state_abb = s.find('span', attrs = {'property':'addressRegion'}).text.strip()
    if state_abb:
        try:
            state = state_abb_dict[state_abb]
        except:
            state_abb_error = 1
            state = state_abb
    else:
        state_abb = None
        state = None
    #postal_code
    postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()
    #country
    if s.find('span', attrs = {'property':'addressCountry'}).get('content'):
        country = s.find('span',{'property':'addressCountry'}).get('content')
    elif s.find('span',{'property':'addressCountry'}).get('content') == None:
        country = s.find('span',{'property':'addressCountry'}).text.strip()
    else:
        country = 'United States'
    #address
    if state_abb:
        full_address = street_address+', '+city+', '+state_abb+', '+postal_code[:5]+', '+country
    else:
        address_error =1
        full_address = street_address+', '+city+', '+postal_code[:5]+', '+country

    #coord
    try:
        latitude, longitude, geo_content = find_latlng(full_address, name)
    except:
        geo_error =1
        latitude, longitude, geo_content = None, None, None
#         break
    #num_reviews
    try:
        num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')
        if num_reviews == None:
            num_reviews = s.find('a', {'property': "reviewCount"}).get('content')    
    except:
        num_reviews = 0
        review_error=1    
    #review_score
    try:
        review_score = s.find('div', attrs = {'class': 'heading_rating separator'}).find('img').get('content')
        if review_score == None:
            review_score = s.find('a', {'property': "ratingValue"}).get('content')
    except:
        review_score = 0 
        score_error =1
    #ranking
    try:
        ranking = s.find('b', attrs = {'class':'rank_text wrap'}).text.strip().replace('#',"")
    except:
        ranking = 999
        ranking_error=1
    #tag
    try:
        tags = ", ".join(label.text.strip() for label in s.select('div.detail > a') + s.select('span.collapse.hidden > a'))
    except:
        tags = None
        tag_error =1
    #visit_length
    if s.find(text ="Recommended length of visit:"):
        visit_length = s.find(text ="Recommended length of visit:").parent.next_sibling
    else:
        visit_length = None
    #fee
    if s.find(text= "Fee:"):
        fee = s.find(text= "Fee:").parent.next_sibling.upper()
    else:
        fee = 'NO'
    #description
    if s.find('div', attrs = {'class': "listing_details"}):
        description = s.find('div', attrs = {'class': "listing_details"}).text.strip()
    else:
        description = None

    input_list = [index, name, street_address, city, state_abb, state, postal_code, country, full_address, latitude, longitude, num_reviews, review_score, ranking, tags, visit_length, fee, description, url, geo_content]
    poi_detail_state_park.loc[len(poi_detail_state_park)] = input_list
    
    error_message = [index, name, url,state_abb_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error]
    error_message_df.loc[len(poi_detail_state_park)] =error_message
    index += 1
#     time.sleep(1)

In [ ]:
poi_detail_state_park.shape

In [ ]:
url_df.shape

In [ ]:
import web_scraping_tripadvisor as web

In [ ]:
error_message_df.to_csv('error_message.csv', encoding=('utf-8'))
poi_detail_state_park.to_csv("poi_detail_state_park.csv", encoding=('utf-8'))

In [ ]:
try:
    poi_additional_detail = poi_detail_state_park[['index','name','url','address','geo_content']]

    geo_content_detail=poi_detail_state_park.pop('geo_content')
except:
    None

In [ ]:
db.geo_content.insert_many(poi_additional_detail.to_dict('records'))
poi_detail_state_park.to_sql('poi_detail_state_park_table',engine, if_exists = "replace")

In [ ]:
# poi_detail_state_park[poi_detail_state_park['name']== 'Jessie M. Honeyman Memorial State Park']

In [ ]:
# poi_detail_state_park.loc[2065]

In [ ]:
# poi_detail_state_park.drop(poi_detail_state_park.index[2065:], inplace = True)

In [ ]:
poi_detail_state_park.to_csv("poi_detail_state_park.csv", encoding=('utf-8'))

In [ ]:
poi_detail_state_park = pd.read_csv('poi_detail_state_park.csv')

In [78]:
# poi_detail_df = pd.read_csv('poi_detail_df_coords_apr_24.csv', index_col = 0)
# np.isnan(poi_detail_df.coord_lat[0])
i = 0
for index in poi_detail_df[np.isnan(poi_detail_df.coord_long)].index:
    full_address = poi_detail_df.loc[index].address
    name = poi_detail_df.loc[index]['name']
    result_longlat = find_latlng(full_address, name, api_key[i])
    [latitude, longitude, geo_content] = result_longlat 
    poi_detail_df.set_value(index, 'coord_long', longitude)
    poi_detail_df.set_value(index, 'coord_lat', latitude)
    poi_detail_df.set_value(index, 'geo_content', geo_content)
#     print result_longlat

In [1]:
poi_detail_df.coord_long[5182]


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-15bff6a7fb6b> in <module>()
----> 1 poi_detail_df.coord_long[5182]

NameError: name 'poi_detail_df' is not defined

In [86]:
new_poi_df = poi_detail_df[~np.isnan(poi_detail_df.coord_long)]

In [88]:
new_poi_df.drop('index',axis =1 , inplace=True)


/Users/zoesh/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [94]:
new_poi_df.reset_index(inplace=True)

In [96]:
new_poi_df.drop('index', axis = 1, inplace=True)


/Users/zoesh/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':

In [98]:
new_poi_df.to_csv('poi_detail_v2.csv', index_label=None)

In [ ]:
pd.read_csv('poi_detail_v2')

In [ ]:
update_idx = poi_detail_state_park[poi_detail_state_park.coord_long == incorrect_long].index.values
for index in update_idx:
    full_address = poi_detail_state_park.loc[index].address
    name = poi_detail_state_park.loc[index].name
    try:
        print 'start index: ', index
        latitude, longitude, geo_content = find_latlng(full_address, name)
        poi_detail_state_park.set_value(index, 'coord_long', longitude)
        poi_detail_state_park.set_value(index, 'coord_lat', latitude)
        poi_detail_state_park.set_value(index, 'geo_content', geo_content)
        print poi_detail_state_park.loc[index][['coord_long','coord_lat','geo_content']]
    except:
        print 'why', index
        break

In [ ]:


In [ ]:
poi_detail_state_park.to_csv('poi_detail_state_park_v2.csv', index=False)

In [ ]:
poi_additional_detail = poi_detail_state_park[['index','name','url','address','geo_content']]

In [ ]:
geo_content_detail=poi_detail_state_park.pop('geo_content')

In [ ]:
poi_detail_state_park['geo_content'] = geo_content_detail

In [ ]:
db.geo_content.insert_many(poi_additional_detail.to_dict('records'))

In [ ]:
poi_detail_state_park.to_sql('poi_detail_state_park_table',engine, if_exists = "replace")

In [ ]:
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g35805-d1134861-Reviews-Cloud_Gate-Chicago_Illinois.html'
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g60713-d127854-Reviews-San_Francisco_Zoo-San_Francisco_California.html'
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g60750-d104122-Reviews-San_Diego_Zoo-San_Diego_California.html'
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g60713-d102523-Reviews-Alcatraz_Island-San_Francisco_California.html'
# htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g32474-d4236729-Reviews-Harmony_Headlands_State_Park-Harmony_San_Luis_Obispo_County_California.html'
# htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g42926-d142814-Reviews-Cannon_Valley_Trail-Cannon_Falls_Minnesota.html'
# htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g42891-d126627-Reviews-Paul_Bunyan_State_Trail-Brainerd_Minnesota.html'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

r=requests.get(htmlurl,headers=headers)
s = BS(r.text, 'html.parser')


# for div in s.find('div', attrs = {'class' : "separator" }):
#     for tag in div.:
#         if tag.name == 'div' and tag.get('class', '') == ['detail']:
#             print tag.text
#     for item in div.contents:
# #         print item
#         if type(item)== 'bs4.element.Tag' and item.name == "detail":
#             print 1234567890
st = time.time()
for div in s.findAll("div", {"class": "separator"}):
    for tag in div.contents:
        if isinstance(tag, bs4.element.Tag) and tag.get('class',"") == ['detail'] :
            tags =  tag.text.encode('utf8').strip()
print time.time() - st
tags

In [ ]:
# s.find('span',{'property':'addressCountry'}).get('content')
# s.select('span[property="addressCountry"]').get('content')

In [ ]:
#index

#name
name = s.find('h1', attrs = {'class':'heading_name'}).text.strip()
#city
city = s.find('span', attrs = {'property':'addressLocality'}).text.strip()
street_address = s.find('span', attrs = {'class':'street-address'}).text.strip()
#state
state_abb = s.find('span', attrs = {'property':'addressRegion'}).text.strip()
# state = state_abb_dict.keys()[state_abb_dict.values().index(state_abb)]
postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()

#country
country = s.find('span',{'property':'addressCountry'}).get('content')
#address
full_address = street_address+', '+city+', '+state_abb+', '+postal_code+', '+country

# from geopy.geocoders import Nominatim
# geolocator = Nominatim()
# location =geolocator.geocode(street_address+', '+city+', '+state_abb+', '+country)
# #coord_lat
# coord_lat = location.latitude 
# #coord_long
# coord_long =location.longitude
#num_reviews
# num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')

#review_score
# review_score = s.find('div', attrs = {'class': 'heading_rating separator'}).find('img').get('content')

#ranking
ranking = s.find('b', attrs = {'class':'rank_text wrap'}).text.strip().replace('#',"")

#tag
tags = ", ".join(label.text for label in s.select('div.detail > a') + s.select('span[class="collapse hidden"] > a'))

#visit_length
# visit_length = s.find(text ="Recommended length of visit:").parent.next_sibling

# #fee
# fee = s.find(text= "Fee:").parent.next_sibling

#description
description = s.find('div', attrs = {'class': "listing_details"}).text.strip()

In [ ]:
st =time.time()
d =", ".join(label.text.strip() for label in s.select('div.listing_details'))
# print d 
ed = time.time() -st
print ed

In [ ]:
st =time.time()
s.find('div', attrs = {'class': "listing_details"}).text.strip()
ed = time.time() -st
print ed

In [ ]:
# s.select('span.hidden.collapse > a')
postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()
print postal_code[:5]

In [ ]:
# num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')
t1 = time.time()
s.select('a[property="reviewCount"]')[0].get("content")
t2 = time.time()
s.find('a', {'property': "reviewCount"}).get('content')
et = time.time()
print et -t1, et-t2

In [ ]:
!pip install python-google-places

In [ ]:
from googleplaces import GooglePlaces, types, lang

YOUR_API_KEY = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
google_places = GooglePlaces(YOUR_API_KEY)
print name, full_address
address1 = "393 County Road 174, Grove Hill, AL, 35975, United States"
query_result = google_places.nearby_search(location = address1, keyword=name)
query_result

In [ ]:


In [ ]:
name, full_address

In [ ]:
# s.select('div[class="detail"] > a')

In [ ]:
# <span class="collapse hidden">, <a href="/Attractions-g60713-Activities-c57-t68-San_Francisco_California.html">Nature &amp; Wildlife Areas</a></span>

In [ ]:
# detail = {}
# addition_info = s.find('div', attrs = {'class':'details_wrapper'}).text.strip('\n').replace("\n\n","\n").split('\n')
# # if addition_info[0] == 'Description':
# #     print addition_info[1]
# addition_info

# for info in addition_info:
#     info_list = info.split(':')
#     if info_list[0]=="Fee":
#         details["Fee"] = info_list[1]
#     else:
#         details["length of visit"] = info_list[1]
# details

In [ ]:
# fee = s.find('div', {'class':'details_wrapper'})
# fee
# length_visit = s.find(text ="Recommended length of visit:").parent.next_sibling
# length_visit
# fee = s.find(text= "Fee:").parent.next_sibling
# fee

In [ ]:
# description = s.find('div', attrs = {'class': "listing_details"}).text.strip()
# print description

In [ ]:
len(query_result.places)

In [ ]:
## different api try

#     try:
#         YOUR_API_KEY = 'AIzaSyDMbpmHBLl7dTOXUOMZP7Vi3zbMJlByEKM'
#         google_places = GooglePlaces(YOUR_API_KEY)
#         latitude, longitude, google_result_name =  find_geo_location(full_address, name)
#     except:
#         print "API error, try different key"
#         time.sleep(20)
#         try:
#             YOUR_API_KEY = 'AIzaSyAwx3xg6oJ0yiPV3MIunBa1kx6N7v5Tcw8'
#             google_places = GooglePlaces(YOUR_API_KEY)
#             latitude, longitude, google_result_name =  find_geo_location(full_address, name)
#         except:
#             print "both Key dont work"
#             print" location not found: ", name, "address : ", full_address
#             break
#     if location:
#         #coord_lat
#         poi_detail_state_park['coord_lat'] = location.latitude 
#         #coord_long
#         poi_detail_state_park['coord_long'] =location.longitude
#     else:
#         print" location not found: ", name, "address : ", full_address

In [ ]:
state_abb_error_ix = error_message_df[error_message_df['state_abb_error']==1]['index']
address_error_ix = error_message_df[error_message_df['address_error']==1]['index']

In [ ]:
# poi_detail_state_park_df.ix[state_abb_error_ix][['state_abb','state','country']]
poi_detail_state_park_df.ix[address_error_ix][['address','country']]

In [ ]:
error_message_df.columns

In [ ]:
poi_detail_state_park_df.columns

In [ ]:


In [ ]:
# poi_detail_state_park.fee[poi_detail_state_park.fee == 'NO']

In [ ]:
poi_detail_state_park.shape

In [ ]:
err = error_message_df[error_message_df.review_error == 1].index

In [ ]:
for i, link in enumerate(poi_detail_state_park_df.ix[err][['name','url']].url):
    print i, link

In [ ]:
error_message_df.tail()

In [ ]:
poi_detail_state_park_df.drop_duplicates('coord_lat').shape

In [299]:
# htmlurl = 'https://en.wikipedia.org/wiki/List_of_areas_in_the_United_States_National_Park_System'
htmlurl= 'https://en.wikipedia.org/wiki/List_of_national_parks_of_the_United_States'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

r=requests.get(htmlurl,headers=headers)
s = BS(r.text, 'html.parser')

In [333]:
national_park_US_df= pd.DataFrame(columns=["name","state"])
name, state =None, None
table =  s.find('table', {"class" : "wikitable"})
for row in table.findAll("tr")[1:]:
    if row.find('th', {'scope':"row"}) != None:
        name = row.find('th', {'scope':"row"}).next_element.get('title')
    cells = row.findAll("td")
    #For each "tr", assign each "td" to a variable.
    if len(cells) == 6:
        state = cells[1].find(text=True)
    national_park_US_df.loc[len(national_park_US_df)] = [name, state]

In [357]:
national_park_US_df


Out[357]:
name state
0 Acadia National Park Maine
1 National Park of American Samoa American Samoa
2 Arches National Park Utah
3 Badlands National Park South Dakota
4 Big Bend National Park Texas
5 Biscayne National Park Florida
6 Black Canyon of the Gunnison National Park Colorado
7 Bryce Canyon National Park Utah
8 Canyonlands National Park Utah
9 Capitol Reef National Park Utah
10 Carlsbad Caverns National Park New Mexico
11 Channel Islands National Park California
12 Congaree National Park South Carolina
13 Crater Lake National Park Oregon
14 Cuyahoga Valley National Park Ohio
15 Death Valley National Park California
16 Denali National Park and Preserve Alaska
17 Dry Tortugas National Park Florida
18 Everglades National Park Florida
19 Gates of the Arctic National Park and Preserve Alaska
20 Glacier National Park (U.S.) Montana
21 Glacier Bay National Park and Preserve Alaska
22 Grand Canyon National Park Arizona
23 Grand Teton National Park Wyoming
24 Great Basin National Park Nevada
25 Great Sand Dunes National Park and Preserve Colorado
26 Great Smoky Mountains National Park Tennessee
27 Guadalupe Mountains National Park Texas
28 Haleakalā National Park Hawaii
29 Hawaii Volcanoes National Park Hawaii
30 Hot Springs National Park Arkansas
31 Isle Royale National Park Michigan
32 Joshua Tree National Park California
33 Katmai National Park and Preserve Alaska
34 Kenai Fjords National Park Alaska
35 Kings Canyon National Park California
36 Kobuk Valley National Park Alaska
37 Lake Clark National Park and Preserve Alaska
38 Lassen Volcanic National Park California
39 Mammoth Cave National Park Kentucky
40 Mesa Verde National Park Colorado
41 Mount Rainier National Park Washington
42 North Cascades National Park Washington
43 Olympic National Park Washington
44 Petrified Forest National Park Arizona
45 Pinnacles National Park California
46 Redwood National and State Parks California
47 Rocky Mountain National Park Colorado
48 Saguaro National Park Arizona
49 Sequoia National Park California
50 Shenandoah National Park Virginia
51 Theodore Roosevelt National Park North Dakota
52 Virgin Islands National Park United States Virgin Islands
53 Voyageurs National Park Minnesota
54 Wind Cave National Park South Dakota
55 Wrangell–St. Elias National Park and Preserve Alaska
56 Yellowstone National Park Wyoming
57 Yosemite National Park California
58 Zion National Park Utah

In [358]:
for index in national_park_US_df.index:
    keyword = national_park_US_df.name[index].replace(' ','+')+"+"+national_park_US_df.state[index].replace(' ','+')
#     keyword = national_park_US_df.name[index].replace(' ','+')
    trip_url = "https://www.tripadvisor.com/Search?q=" +keyword+"&queryParsed=true&searchSessionId"
#     headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#     r=requests.get(trip_url,headers=headers)
#     test_s = BS(r.text, 'html.parser')
    print index, trip_url


0 https://www.tripadvisor.com/Search?q=Acadia+National+Park+Maine&queryParsed=true&searchSessionId
1 https://www.tripadvisor.com/Search?q=National+Park+of+American+Samoa+American+Samoa&queryParsed=true&searchSessionId
2 https://www.tripadvisor.com/Search?q=Arches+National+Park+Utah&queryParsed=true&searchSessionId
3 https://www.tripadvisor.com/Search?q=Badlands+National+Park+South+Dakota&queryParsed=true&searchSessionId
4 https://www.tripadvisor.com/Search?q=Big+Bend+National+Park+Texas&queryParsed=true&searchSessionId
5 https://www.tripadvisor.com/Search?q=Biscayne+National+Park+Florida&queryParsed=true&searchSessionId
6 https://www.tripadvisor.com/Search?q=Black+Canyon+of+the+Gunnison+National+Park+Colorado&queryParsed=true&searchSessionId
7 https://www.tripadvisor.com/Search?q=Bryce+Canyon+National+Park+Utah&queryParsed=true&searchSessionId
8 https://www.tripadvisor.com/Search?q=Canyonlands+National+Park+Utah&queryParsed=true&searchSessionId
9 https://www.tripadvisor.com/Search?q=Capitol+Reef+National+Park+Utah&queryParsed=true&searchSessionId
10 https://www.tripadvisor.com/Search?q=Carlsbad+Caverns+National+Park+New+Mexico&queryParsed=true&searchSessionId
11 https://www.tripadvisor.com/Search?q=Channel+Islands+National+Park+California&queryParsed=true&searchSessionId
12 https://www.tripadvisor.com/Search?q=Congaree+National+Park+South+Carolina&queryParsed=true&searchSessionId
13 https://www.tripadvisor.com/Search?q=Crater+Lake+National+Park+Oregon&queryParsed=true&searchSessionId
14 https://www.tripadvisor.com/Search?q=Cuyahoga+Valley+National+Park+Ohio&queryParsed=true&searchSessionId
15 https://www.tripadvisor.com/Search?q=Death+Valley+National+Park+California&queryParsed=true&searchSessionId
16 https://www.tripadvisor.com/Search?q=Denali+National+Park+and+Preserve+Alaska&queryParsed=true&searchSessionId
17 https://www.tripadvisor.com/Search?q=Dry+Tortugas+National+Park+Florida&queryParsed=true&searchSessionId
18 https://www.tripadvisor.com/Search?q=Everglades+National+Park+Florida&queryParsed=true&searchSessionId
19 https://www.tripadvisor.com/Search?q=Gates+of+the+Arctic+National+Park+and+Preserve+Alaska&queryParsed=true&searchSessionId
20 https://www.tripadvisor.com/Search?q=Glacier+National+Park+(U.S.)+Montana&queryParsed=true&searchSessionId
21 https://www.tripadvisor.com/Search?q=Glacier+Bay+National+Park+and+Preserve+Alaska&queryParsed=true&searchSessionId
22 https://www.tripadvisor.com/Search?q=Grand+Canyon+National+Park+Arizona&queryParsed=true&searchSessionId
23 https://www.tripadvisor.com/Search?q=Grand+Teton+National+Park+Wyoming&queryParsed=true&searchSessionId
24 https://www.tripadvisor.com/Search?q=Great+Basin+National+Park+Nevada&queryParsed=true&searchSessionId
25 https://www.tripadvisor.com/Search?q=Great+Sand+Dunes+National+Park+and+Preserve+Colorado&queryParsed=true&searchSessionId
26 https://www.tripadvisor.com/Search?q=Great+Smoky+Mountains+National+Park+Tennessee&queryParsed=true&searchSessionId
27 https://www.tripadvisor.com/Search?q=Guadalupe+Mountains+National+Park+Texas&queryParsed=true&searchSessionId
28 https://www.tripadvisor.com/Search?q=Haleakalā+National+Park+Hawaii&queryParsed=true&searchSessionId
29 https://www.tripadvisor.com/Search?q=Hawaii+Volcanoes+National+Park+Hawaii&queryParsed=true&searchSessionId
30 https://www.tripadvisor.com/Search?q=Hot+Springs+National+Park+Arkansas&queryParsed=true&searchSessionId
31 https://www.tripadvisor.com/Search?q=Isle+Royale+National+Park+Michigan&queryParsed=true&searchSessionId
32 https://www.tripadvisor.com/Search?q=Joshua+Tree+National+Park+California&queryParsed=true&searchSessionId
33 https://www.tripadvisor.com/Search?q=Katmai+National+Park+and+Preserve+Alaska&queryParsed=true&searchSessionId
34 https://www.tripadvisor.com/Search?q=Kenai+Fjords+National+Park+Alaska&queryParsed=true&searchSessionId
35 https://www.tripadvisor.com/Search?q=Kings+Canyon+National+Park+California&queryParsed=true&searchSessionId
36 https://www.tripadvisor.com/Search?q=Kobuk+Valley+National+Park+Alaska&queryParsed=true&searchSessionId
37 https://www.tripadvisor.com/Search?q=Lake+Clark+National+Park+and+Preserve+Alaska&queryParsed=true&searchSessionId
38 https://www.tripadvisor.com/Search?q=Lassen+Volcanic+National+Park+California&queryParsed=true&searchSessionId
39 https://www.tripadvisor.com/Search?q=Mammoth+Cave+National+Park+Kentucky&queryParsed=true&searchSessionId
40 https://www.tripadvisor.com/Search?q=Mesa+Verde+National+Park+Colorado&queryParsed=true&searchSessionId
41 https://www.tripadvisor.com/Search?q=Mount+Rainier+National+Park+Washington&queryParsed=true&searchSessionId
42 https://www.tripadvisor.com/Search?q=North+Cascades+National+Park+Washington&queryParsed=true&searchSessionId
43 https://www.tripadvisor.com/Search?q=Olympic+National+Park+Washington&queryParsed=true&searchSessionId
44 https://www.tripadvisor.com/Search?q=Petrified+Forest+National+Park+Arizona&queryParsed=true&searchSessionId
45 https://www.tripadvisor.com/Search?q=Pinnacles+National+Park+California&queryParsed=true&searchSessionId
46 https://www.tripadvisor.com/Search?q=Redwood+National+and+State+Parks+California&queryParsed=true&searchSessionId
47 https://www.tripadvisor.com/Search?q=Rocky+Mountain+National+Park+Colorado&queryParsed=true&searchSessionId
48 https://www.tripadvisor.com/Search?q=Saguaro+National+Park+Arizona&queryParsed=true&searchSessionId
49 https://www.tripadvisor.com/Search?q=Sequoia+National+Park+California&queryParsed=true&searchSessionId
50 https://www.tripadvisor.com/Search?q=Shenandoah+National+Park+Virginia&queryParsed=true&searchSessionId
51 https://www.tripadvisor.com/Search?q=Theodore+Roosevelt+National+Park+North+Dakota&queryParsed=true&searchSessionId
52 https://www.tripadvisor.com/Search?q=Virgin+Islands+National+Park+United+States+Virgin+Islands&queryParsed=true&searchSessionId
53 https://www.tripadvisor.com/Search?q=Voyageurs+National+Park+Minnesota&queryParsed=true&searchSessionId
54 https://www.tripadvisor.com/Search?q=Wind+Cave+National+Park+South+Dakota&queryParsed=true&searchSessionId
55 https://www.tripadvisor.com/Search?q=Wrangell–St.+Elias+National+Park+and+Preserve+Alaska&queryParsed=true&searchSessionId
56 https://www.tripadvisor.com/Search?q=Yellowstone+National+Park+Wyoming&queryParsed=true&searchSessionId
57 https://www.tripadvisor.com/Search?q=Yosemite+National+Park+California&queryParsed=true&searchSessionId
58 https://www.tripadvisor.com/Search?q=Zion+National+Park+Utah&queryParsed=true&searchSessionId

In [361]:
import wikipedia
wiki =  wikipedia.page('List_of_national_parks_of_the_United_States')

In [ ]:


In [ ]:


In [298]:
https://www.tripadvisor.com/Search?q=Acadia+National+Park&geo=28940&queryParsed=true&searchSessionId=F658A1719FACDE7E30D13912D3D1B3381492826820567ssid


Out[298]:
<bound method WikipediaPage.section of <WikipediaPage 'List of national parks of the United States'>>

In [ ]:
https://www.tripadvisor.com/Search?q=Pinnacles+national+park&queryParsed=true&searchSessionId

In [ ]:
https://www.tripadvisor.com/Search?q=Acadia+National+Park&queryParsed=true&searchSessionId

In [347]:
test =national_park_US_df.name[0].replace(" ", "+")

In [349]:
trip_url = "https://www.tripadvisor.com/Search?q=" +test+"&queryParsed=true&searchSessionId"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
r=requests.get(trip_url,headers=headers)
test_s = BS(r.text, 'html.parser')

In [418]:
# trip_url_30 = "https://www.tripadvisor.com/Search?geo=191&redirect&q=national+parks&uiOrigin=MASTHEAD&ssrc=A&returnTo=__2F__Tourism__2D__g143030__2D__Great__5F__Basin__5F__National__5F__Park__5F__Nevada__2D__Vacations__2E__html&pid=3825&startTime=1492837392267&searchSessionId=F658A1719FACDE7E30D13912D3D1B3381492834657203ssid#&ssrc=g&o=0"
# trip_url_60 = "https://www.tripadvisor.com/Search?geo=191&redirect&q=national+parks&uiOrigin=MASTHEAD&ssrc=A&returnTo=__2F__Tourism__2D__g143030__2D__Great__5F__Basin__5F__National__5F__Park__5F__Nevada__2D__Vacations__2E__html&pid=3825&startTime=1492837392267&searchSessionId=F658A1719FACDE7E30D13912D3D1B3381492834657203ssid#&ssrc=g&o=30"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
r=requests.get(trip_url_30,headers=headers)
# trip_30 = BS(r.text, 'html.parser')
# r=requests.get(trip_url_60,headers=headers)
# trip_60 = BS(r.text, 'html.parser')

import urllib

trip_30_html = urllib.urlopen("trip_30.html").read()
trip_60_html = urllib.urlopen("trip_60.html").read()
trip_30 = BS(trip_30_html, 'html.parser')
trip_60 = BS(trip_60_html, 'html.parser')

In [491]:
import re
to_do = re.compile("Things to do")
# trip_search_result = pd.DataFrame(columns=["name","url"])



for poi in trip_60.findAll('div', {"class": "title"}):
    name = poi.text
    for child in poi.next_siblings:
        if child.find(text=to_do) != None:
            url =  child.find(text=to_do).parent.get('href')
            
    trip_search_result.loc[len(trip_search_result)] = [name, url]
    
# for link in trip_30.findAll(text = to_do):
#     print link.parent.get('href')

#     name = poi.text
#     url = poi.get('onclick').replace("ta.setEvtCookie('Search_Results_Page', 'POI_Name', '', 0, '", "").replace("')","")

In [671]:
# trip_search_result

In [493]:
# # for url in trip_search_result.url:
    
# url = trip_search_result.url[0]
# headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
# r=requests.get(url,headers=headers)
# s = BS(r.text, 'html.parser')

In [663]:
def request_s(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r=requests.get(url,headers=headers)
    s = BS(r.text, 'html.parser')
    return s
def thing_to_do_in_national_park(s):
    thing_to_do = pd.DataFrame(columns=["national_park_name","activate_name","url","num_reviews","score","ranking","tags"])
    national_park_name = s.find('h1', {"id": "HEADING"}).text.strip('\n').replace("Things to Do in ","")
    print "park name: ",national_park_name
    for activate in s.findAll('div', {"class":"listing_title"}):
        activate_name = activate.text.strip()
        url ="https://www.tripadvisor.com"+ activate.find('a').get("href")
        if activate.find_next('div', {"class":"rs rating"}) ==None:
            score, num_reviews = 0, 0
        else:
            score = activate.find_next('div', {"class":"rs rating"}).find('span').get('alt').replace(" of 5 bubbles","")
            num_reviews = activate.find_next('div', {"class":"rs rating"}).find('span', {'class': "more"}).text.strip().replace("reviews","")
        ranking = activate.find_next('div', {'class':"popRanking wrap"}).text.strip().replace("#","")[0]
        if activate.find_next('div',{'class':"tag_line"}).find('span') == None:
            tags = None
        else:
            tags = activate.find_next('div',{'class':"tag_line"}).find('span').text
        list_thing = [national_park_name, activate_name, url, num_reviews, score, ranking, tags]
        thing_to_do.loc[len(thing_to_do)] = list_thing
    return thing_to_do

In [672]:
poi_detail_national_park_df = pd.DataFrame(columns=["national_park_name","activate_name","url","num_reviews","score","ranking","tags"])
for url in trip_search_result.url:
    thing_to_do_page = request_s(url)
    result =  (thing_to_do_in_national_park(thing_to_do_page))
    poi_detail_national_park_df = poi_detail_national_park_df.append(result, ignore_index=True)
    time.sleep(5)


park name:  Acadia National Park
park name:  Yosemite National Park
park name:  Yellowstone National Park
park name:  Zion National Park
park name:  Glacier National Park
park name:  Grand Canyon National Park
park name:  Olympic National Park
park name:  Shenandoah National Park
park name:  Hawaii Volcanoes National Park
park name:  Everglades National Park
park name:  Redwood National Park
park name:  Arches National Park
park name:  Death Valley National Park
park name:  Badlands National Park
park name:  Haleakala National Park
park name:  Rocky Mountain National Park
park name:  Bryce Canyon National Park
park name:  Grand Teton National Park
park name:  Canyonlands National Park
park name:  Joshua Tree National Park
park name:  Great Smoky Mountains National Park
park name:  Big Bend National Park
park name:  Mount Rainier National Park
park name:  Mammoth Cave National Park
park name:  Crater Lake National Park
park name:  Mesa Verde National Park
park name:  North Cascades National Park
park name:  Saguaro National Park
park name:  Carlsbad Caverns National Park
park name:  Sequoia and Kings Canyon National Park
park name:  Lassen Volcanic National Park
park name:  Capitol Reef National Park
park name:  Voyageurs National Park
park name:  Denali National Park and Preserve
park name:  Biscayne National Park
park name:  Channel Islands National Park
park name:  Petrified Forest National Park
park name:  Dry Tortugas National Park
park name:  Guadalupe Mountains National Park
park name:  Isle Royale National Park
park name:  Wind Cave National Park
park name:  Theodore Roosevelt National Park
park name:  Great Basin National Park
park name:  Kenai Fjords National Park
park name:  Great Smoky Mountains National Park
park name:  National Park
park name:  Katmai National Park and Preserve
park name:  Glacier Bay National Park and Preserve
park name:  Lake Clark National Park and Preserve
park name:  Kobuk Valley National Park
park name:  Black Canyon Of The Gunnison National Park
park name:  Great Sand Dunes National Park & Preserve
park name:  Allenspark
park name:  Wawona
park name:  Crystal Mountain
park name:  Wrangell-St Elias National Park and Preserve
park name:  Moose
park name:  McCarthy
park name:  Lajitas
park name:  Gates Of The Arctic National Park and Preserve

In [673]:
poi_detail_national_park_df.to_csv('poi_detail_national_park_df.csv',encoding=('utf-8'))
poi_detail_national_park_df


Out[673]:
national_park_name activate_name url num_reviews score ranking tags
0 Acadia National Park Cadillac Mountain https://www.tripadvisor.com/Attraction_Review-... 2,143 5 1 Mountains
1 Acadia National Park Park Loop Road https://www.tripadvisor.com/Attraction_Review-... 1,035 4.5 2 Scenic Drives
2 Acadia National Park Carriage Roads https://www.tripadvisor.com/Attraction_Review-... 383 5 3 Biking Trails
3 Acadia National Park Beehive Trail https://www.tripadvisor.com/Attraction_Review-... 350 5 4 Hiking Trails
4 Acadia National Park Jordan Pond https://www.tripadvisor.com/Attraction_Review-... 728 4.5 5 Bodies of Water
5 Acadia National Park Acadia National Park's Hulls Cove Visitors Center https://www.tripadvisor.com/Attraction_Review-... 605 4.5 6 Visitor Centers
6 Acadia National Park Sand Beach https://www.tripadvisor.com/Attraction_Review-... 539 4.5 7 Beaches
7 Acadia National Park Gorham Mountain Trail https://www.tripadvisor.com/Attraction_Review-... 193 5 8 Hiking Trails
8 Acadia National Park Thunder Hole https://www.tripadvisor.com/Attraction_Review-... 594 4 9 Geologic Formations
9 Acadia National Park Bubble Rock https://www.tripadvisor.com/Attraction_Review-... 214 4.5 1 Hiking Trails
10 Acadia National Park Otter Cliff https://www.tripadvisor.com/Attraction_Review-... 175 4.5 1 Geologic Formations
11 Acadia National Park Great Head Trail https://www.tripadvisor.com/Attraction_Review-... 139 5 1 Hiking Trails
12 Acadia National Park Precipice Trail https://www.tripadvisor.com/Attraction_Review-... 171 5 1 Hiking Trails
13 Acadia National Park Heart of Acadia Loop https://www.tripadvisor.com/Attraction_Review-... 75 5 1 Hiking Trails
14 Acadia National Park Beech Mountain Trail https://www.tripadvisor.com/Attraction_Review-... 68 4.5 1 Hiking Trails
15 Acadia National Park Echo Lake Beach https://www.tripadvisor.com/Attraction_Review-... 86 4.5 1 Beaches
16 Acadia National Park Little Hunters Beach https://www.tripadvisor.com/Attraction_Review-... 45 4.5 1 Beaches
17 Acadia National Park Horseback Riding Tours (1) https://www.tripadvisor.com/Attractions-g14301... 45 4.5 1 None
18 Acadia National Park Horse-Drawn Carriage Tours (1) https://www.tripadvisor.com/Attractions-g14301... 45 4.5 1 None
19 Acadia National Park Day Mountain https://www.tripadvisor.com/Attraction_Review-... 19 4.5 2 Mountains
20 Acadia National Park Wild Gardens of Acadia https://www.tripadvisor.com/Attraction_Review-... 25 4 2 Gardens
21 Acadia National Park Jordan Cliffs Trail https://www.tripadvisor.com/Attraction_Review-... 8 5 2 Nature & Wildlife Areas
22 Acadia National Park The Bowl Trail https://www.tripadvisor.com/Attraction_Review-... 10 4.5 2 Hiking Trails
23 Acadia National Park Pemetic Mountain https://www.tripadvisor.com/Attraction_Review-... 8 5 2 Mountains
24 Acadia National Park Thompson Island Information Center https://www.tripadvisor.com/Attraction_Review-... 11 4.5 2 Visitor Centers
25 Acadia National Park Schoodic Institute at Acadia National Park https://www.tripadvisor.com/Attraction_Review-... 7 4.5 2 Nature & Wildlife Areas
26 Acadia National Park Acadia's Birds https://www.tripadvisor.com/Attraction_Review-... 9 4.5 2 Nature & Wildlife Areas
27 Acadia National Park The Gorge Path https://www.tripadvisor.com/Attraction_Review-... 7 4.5 2 Scenic Walking Areas
28 Acadia National Park Sieur de Monts Spring https://www.tripadvisor.com/Attraction_Review-... 10 4 2 Nature & Wildlife Areas
29 Acadia National Park Great Meadow Walk https://www.tripadvisor.com/Attraction_Review-... 8 4 3 Historic Walking Areas
... ... ... ... ... ... ... ...
1005 Moose Snake River Overlook https://www.tripadvisor.com/Attraction_Review-... 41 4.5 6 Lookouts
1006 Moose Chapel of the Sacred Heart https://www.tripadvisor.com/Attraction_Review-... 48 4.5 7 Points of Interest & Landmarks
1007 Moose Snake River https://www.tripadvisor.com/Attraction_Review-... 25 5 8 Bodies of Water
1008 Moose National Parks Float Trips https://www.tripadvisor.com/Attraction_Review-... 25 4.5 9 Boat Tours
1009 Moose Dornan's Scenic Float Trips https://www.tripadvisor.com/Attraction_Review-... 35 4.5 1 River Rafting & Tubing
1010 Moose Glacier View Turnout https://www.tripadvisor.com/Attraction_Review-... 12 4.5 1 Geologic Formations
1011 Moose Snake River Bridge https://www.tripadvisor.com/Attraction_Review-... 8 4.5 1 Bridges
1012 Moose Lost Creek Ranch Adventures https://www.tripadvisor.com/Attraction_Review-... 6 5 1 Sightseeing Tours
1013 Moose Murie Centre https://www.tripadvisor.com/Attraction_Review-... 5 4.5 1 Specialty Museums
1014 Moose DZONE Skydiving https://www.tripadvisor.com/Attraction_Review-... 1 review 1 1 Scuba & Snorkeling
1015 McCarthy St. Elias Alpine Guides Day Adventures https://www.tripadvisor.com/Attraction_Review-... 508 5 1 Sightseeing Tours
1016 McCarthy McCarthy River Tours & Outfitters - Day Tours https://www.tripadvisor.com/Attraction_Review-... 98 5 2 River Rafting & Tubing
1017 McCarthy Wrangell Mountain Air - Day Trips https://www.tripadvisor.com/Attraction_Review-... 97 5 3 Air Tours
1018 McCarthy Wild Alpine - Day Tours https://www.tripadvisor.com/Attraction_Review-... 43 5 4 National Parks
1019 McCarthy Copper Oar Rafting Day Trip https://www.tripadvisor.com/Attraction_Review-... 39 5 5 River Rafting & Tubing
1020 McCarthy Alaska Boreal Canopy Adventures https://www.tripadvisor.com/Attraction_Review-... 1 review 5 6 Other Outdoor Activities
1021 McCarthy Kennecott Trading Company https://www.tripadvisor.com/Attraction_Review-... 0 0 7 Gift & Specialty Shops
1022 Lajitas El Camino Del Rio\n\n\n\n#taplc_dmo_attribute_... https://www.tripadvisor.com/Attraction_Review-... 32 5 1 Scenic Drives
1023 Lajitas Lajitas Golf Resort & Spa\n\n\n\n#taplc_dmo_at... https://www.tripadvisor.com/Attraction_Review-... 38 4.5 2 Golf Courses
1024 Lajitas Closed Canyon\n\n\n\n#taplc_dmo_attribute_to_c... https://www.tripadvisor.com/Attraction_Review-... 25 4.5 3 Hiking Trails
1025 Lajitas Lajitas Zipline\n\n\n\n#taplc_dmo_attribute_to... https://www.tripadvisor.com/Attraction_Review-... 22 5 4 Zipline & Aerial Adventure Parks
1026 Lajitas Hoodoos Trailhead\n\n\n\n#taplc_dmo_attribute_... https://www.tripadvisor.com/Attraction_Review-... 14 4 5 Hiking Trails
1027 Lajitas Abandoned Contrabando Movie Set\n\n\n\n#taplc_... https://www.tripadvisor.com/Attraction_Review-... 13 3.5 6 Points of Interest & Landmarks
1028 Gates Of The Arctic National Park and Preserve Koyukuk River https://www.tripadvisor.com/Attraction_Review-... 1 review 5 1 Bodies of Water
1029 Gates Of The Arctic National Park and Preserve Kobuk Wild River https://www.tripadvisor.com/Attraction_Review-... 1 review 5 2 Bodies of Water
1030 Gates Of The Arctic National Park and Preserve John River https://www.tripadvisor.com/Attraction_Review-... 1 review 4 3 Bodies of Water
1031 Gates Of The Arctic National Park and Preserve Kugururok River https://www.tripadvisor.com/Attraction_Review-... 0 0 4 Bodies of Water
1032 Gates Of The Arctic National Park and Preserve Tinayguk River https://www.tripadvisor.com/Attraction_Review-... 0 0 5 Bodies of Water
1033 Gates Of The Arctic National Park and Preserve Frigid Crags https://www.tripadvisor.com/Attraction_Review-... 0 0 6 Mountains
1034 Gates Of The Arctic National Park and Preserve Boreal Mountain https://www.tripadvisor.com/Attraction_Review-... 0 0 7 Mountains

1035 rows × 7 columns


In [6]:
df_city_coords_path = '/Users/zoesh/Desktop/travel_with_friends/travel_with_friends/all_cities_coords.csv'

In [71]:
cities_coords = pd.read_csv(df_city_coords_path)
cities_coords = cities_coords[['city', 'state','nation','coord0','coord1']].drop_duplicates()

In [7]:
import pandas as pd
cities_coords = pd.read_csv(df_city_coords_path)

In [15]:
import psycopg2
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute("drop table all_cities_coords.csv;")
conn.commit()
conn.close()


---------------------------------------------------------------------------
ProgrammingError                          Traceback (most recent call last)
<ipython-input-15-2768d474ea54> in <module>()
      2 conn = psycopg2.connect(conn_str)
      3 cur = conn.cursor()
----> 4 cur.execute("drop table all_cities_coords.csv;")
      5 conn.commit()
      6 conn.close()

ProgrammingError: schema "all_cities_coords" does not exist

In [17]:
poi_detail_df


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-8dfedce762d5> in <module>()
----> 1 poi_detail_df

NameError: name 'poi_detail_df' is not defined

In [243]:
poi_detail_path = '/Users/zoesh/Desktop/travel_with_friends/travel_with_friends/new_poi_df_v2.csv'

new_poi_df = pd.read_csv(poi_detail_path, index_col=0)

In [244]:
new_poi_df[new_poi_df['name'] == 'Black Sheep Bike Rental']


Out[244]:
name street_address city state_abb state postal_code country address coord_lat coord_long ... ranking tag raw_visit_length fee description url geo_content poi_type adjusted_visit_length county
6712 Black Sheep Bike Rental 900 Doolittle Dr San Leandro CA California 94577 United States 900 Doolittle Dr, San Leandro, California, 945... 37.718742 -122.189289 ... 1 Gear Rentals, Outdoor Activities More than 3 hours Unknown Black Sheep Adventures (formerly UDC) offers p... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... OutdoorActivities 360 ALAMEDA

1 rows × 22 columns


In [75]:
new_poi_df[(new_poi_df.raw_visit_length.astype(str) == 'nan') & (new_poi_df.city == 'New York City')]


Out[75]:
name street_address city state_abb state postal_code country address coord_lat coord_long num_reviews review_score ranking tag raw_visit_length fee description url geo_content poi_type
3 Top of the Rock Observation Deck 30 Rockefeller Center New York City NY New York NaN United States 30 Rockefeller Center, New York City, New York... 40.758823 -73.979398 57358 4.5 4 Observation Decks & Towers, Lookouts, Sights &... NaN Yes Top of the Rock Observation Deck, the newly op... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None
4 Manhattan Skyline NaN New York City NY New York NaN United States , New York City, New York, , United States 40.712784 -74.005941 15918 5.0 5 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None
7 Broadway NaN New York City NY New York NaN United States , New York City, New York, , United States 40.712784 -74.005941 22816 4.5 9 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None
8 Frick Collection 1 E. 70th St. New York City NY New York 10021-4994 United States 1 E. 70th St., New York City, New York, 10021,... 40.771181 -73.967350 7179 4.5 10 Art Museums, Museums NaN Unknown Henry Frick once resided in this 18th-century ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None
14 St. Patrick's Cathedral 460 Madison Avenue New York City NY New York 10022 United States 460 Madison Avenue, New York City, New York, 1... 40.758814 -73.976858 8125 4.5 17 Churches & Cathedrals, Sights & Landmarks NaN Unknown Enormous 13th-century-style Gothic church on M... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None
16 Staten Island Ferry 4 South Street New York City NY New York 10004 United States 4 South Street, New York City, New York, 10004... 40.701009 -74.013025 14354 4.5 19 Ferries, Transportation NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None
19 New York Public Library 476 5th Ave New York, NY 10018 New York City NY New York 10018 United States 476 5th Ave New York, NY 10018, New York City,... 40.753182 -73.982253 9547 4.5 23 Specialty Museums, Museums NaN Unknown Two marble lions mark the entrance to this Bea... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None
20 Madison Square Garden 4 Penn Plaza New York City NY New York 10121-0078 United States 4 Penn Plaza, New York City, New York, 10121, ... 40.750354 -73.993371 6500 4.5 24 Arenas & Stadiums, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... None

In [203]:
#set type!
new_poi_df['poi_type'] = new_poi_df.tag
for index in new_poi_df.index:
#     if str(new_poi_df.raw_visit_length[index]) == 'nan':
    try:
        if 'Museums' in new_poi_df.poi_type[index]:
            #hrs spent: 120min
# #             print new_poi_df.poi_type[index]
            new_poi_df.set_value(index, 'poi_type', 'Museum')
        elif 'Theme Parks' in new_poi_df.poi_type[index]:
            #hrs spent: 480min
#             print new_poi_df['name'][index]
            new_poi_df.set_value(index, 'poi_type', 'ThemePark')
        elif 'Zoos' in new_poi_df.poi_type[index]:
#             print new_poi_df['name'][index]
            new_poi_df.set_value(index, 'poi_type', 'Zoo')
        elif 'Parks' in new_poi_df.poi_type[index]:
            if 'National Historical Park' in new_poi_df['name'][index]:
                #hrs spent: 90min
                new_poi_df.set_value(index, 'poi_type', 'NationalHistoricalPark')
            elif 'National Aquarium' in new_poi_df['name'][index]:
                #hrs spent: 180min
                new_poi_df.set_value(index, 'poi_type', 'NationalAquarium')
            elif 'National Memorial' in new_poi_df['name'][index]:
                #hrs spent: 120min
                new_poi_df.set_value(index, 'poi_type', 'NationalMemorial')
            elif 'National Wildlife Refuge' in new_poi_df['name'][index]:
                #hrs spent: 90min
                new_poi_df.set_value(index, 'poi_type', 'NationalWildlifeRefuge')
            elif 'National Monument' in new_poi_df['name'][index]:
                #hrs spent: 120min
                new_poi_df.set_value(index, 'poi_type', 'NationalMonument')
            elif 'National Forest' in new_poi_df['name'][index]:
                #hrs spent: 180min
                new_poi_df.set_value(index, 'poi_type', 'NationalForest')
            elif 'National Beach' in new_poi_df['name'][index]:
                #hrs spent: 180min
                new_poi_df.set_value(index, 'poi_type', 'NationalBeach')
            elif 'State Park' in new_poi_df['name'][index]:
                #hrs spent: 120 min?
                new_poi_df.set_value(index, 'poi_type', 'StatePark')
            else: 
                #regional parks and gardens: hrs spent:60min
                new_poi_df.set_value(index, 'poi_type', 'Park')
        elif 'State Park' in new_poi_df['name'][index]:
            new_poi_df.set_value(index, 'poi_type', 'StatePark')
        elif 'Nightlife' in new_poi_df.poi_type[index]:
#             print new_poi_df['name'][index]
            new_poi_df.set_value(index, 'poi_type', 'Nightlife')
        elif 'Landmark' in new_poi_df.poi_type[index]:
            #hrs spent: 30 min
            new_poi_df.set_value(index, 'poi_type', 'Landmark')
        elif 'Shopping' in new_poi_df.poi_type[index]:
            #hrs spent: 60 min?
            if 'Shopping' in new_poi_df['name'][index]:
                new_poi_df.set_value(index, 'poi_type', 'ShoppingMall')
            else:
            #hrs spent: 30 min?
                new_poi_df.set_value(index, 'poi_type', 'Shopping')
        elif 'Theater' in new_poi_df.poi_type[index]:
            #hrs spent: 120 min
            new_poi_df.set_value(index, 'poi_type', 'Theater')
        elif 'Casino' in new_poi_df.poi_type[index]:
            #hrs spent: 90 min
            new_poi_df.set_value(index, 'poi_type', 'Casino')
        elif 'Stadium' in new_poi_df['name'][index]:
            #hrs spent: 15 min
            new_poi_df.set_value(index, 'poi_type', 'Stadium') 
#         elif 'Stadium' in new_poi_df['name'][index]:
#             #hrs spent: 15 min
#             new_poi_df.set_value(index, 'poi_type', 'Stadium') 
        elif 'Outdoor Activities' in new_poi_df.poi_type[index]:
            #hrs spent: 150 min
            new_poi_df.set_value(index, 'poi_type', 'OutdoorActivities') 
        elif 'Food' in new_poi_df.poi_type[index]:
            #hrs spent: 60 min
            new_poi_df.set_value(index, 'poi_type', 'Food') 
        elif 'Visitor Center' in new_poi_df.poi_type[index]:
            #hrs spent: 15 min
            new_poi_df.set_value(index, 'poi_type', 'VisotorCenter')
        elif 'Spa' in new_poi_df.poi_type[index]:
            #hrs spent: 120 min
            new_poi_df.set_value(index, 'poi_type', 'Spa')
        elif 'Games' in new_poi_df.poi_type[index]:
            #hrs spent: 90 min
            new_poi_df.set_value(index, 'poi_type', 'Game')
        elif 'Libraries' in new_poi_df.poi_type[index]:
            #hrs spent: 15 min
            new_poi_df.set_value(index, 'poi_type', 'Library')
        elif 'Tours' in new_poi_df.poi_type[index]:
            #hrs spent: 120 min
            new_poi_df.set_value(index, 'poi_type', 'Tour')
        elif 'Transportation' in new_poi_df.poi_type[index]:
            #hrs spent: 60 min
            new_poi_df.set_value(index, 'poi_type', 'Transportation')
        elif 'Show' in new_poi_df.poi_type[index]:
            #hrs spent: 180 min
            new_poi_df.set_value(index, 'poi_type', 'Show')
        else:
            #hrs spent: 15 min
            new_poi_df.set_value(index, 'poi_type', 'Other')
#             print new_poi_df['name'][index], new_poi_df['url'][index]
#             print new_poi_df['name'][index]
#             print new_poi_df.poi_type[index]
    except:
        #hrs spent 15min
        new_poi_df.set_value(index, 'poi_type', 'Other')

In [332]:
new_poi_df['adjusted_visit_length'] = None
for index in new_poi_df.index:
    if new_poi_df.raw_visit_length[index] == '1-2 hours':
        new_poi_df.set_value(index, 'adjusted_visit_length', 120)
    elif new_poi_df.raw_visit_length[index] == '<1 hour':
        new_poi_df.set_value(index, 'adjusted_visit_length', 60)
    elif new_poi_df.raw_visit_length[index] == '2-3 hours':
        new_poi_df.set_value(index, 'adjusted_visit_length', 180)
    elif new_poi_df.raw_visit_length[index] == 'More than 3 hours':   
        new_poi_df.set_value(index, 'adjusted_visit_length', 360)
    elif str(new_poi_df.raw_visit_length[index]) == 'nan':
        if 'Museum' == new_poi_df.poi_type[index]:
            #hrs spent: 120min
            new_poi_df.set_value(index, 'adjusted_visit_length', 120)
        elif 'ThemePark' == new_poi_df.poi_type[index]:
            #hrs spent: 480min
            new_poi_df.set_value(index, 'adjusted_visit_length', 480)
        elif 'Zoo' == new_poi_df.poi_type[index]:
#             hrs spent: 120min
            new_poi_df.set_value(index, 'adjusted_visit_length', 120)
        elif 'NationalHistoricalPark' == new_poi_df.poi_type[index]:
            #hrs spent: 90min
            new_poi_df.set_value(index, 'adjusted_visit_length', 90)
        elif 'NationalAquarium'  == new_poi_df.poi_type[index]:
            #hrs spent: 180min
            new_poi_df.set_value(index, 'adjusted_visit_length', 180)
        elif 'NationalMemorial'  == new_poi_df.poi_type[index]:
            #hrs spent: 120min
            new_poi_df.set_value(index, 'adjusted_visit_length', 120)
        elif 'NationalWildlifeRefuge'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 90)
        elif 'NationalMonument'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 120)
        elif 'NationalForest'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 180)
        elif 'NationalBeach'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 180)
        elif 'StatePark'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 120)
        elif 'Park'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 60)
        elif 'Nightlife'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 60)
        elif 'Landmark'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 30)
        elif 'Shopping'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 30)
        elif 'ShoppingMall'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 60)
        elif 'Theater'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 60)
        elif 'Casino'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 90)
        elif 'Stadium'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 15)
        elif 'OutdoorActivities'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 150)
        elif 'Food'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 60)
        elif 'VisotorCenter'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 15)
        elif 'Spa'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 120)
        elif 'Game'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 90)
        elif 'Library'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 15)
        elif 'Tour'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 120)
        elif 'Transportation'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 60)
        elif 'Show'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 180)
        elif 'Other'  == new_poi_df.poi_type[index]:
            new_poi_df.set_value(index, 'adjusted_visit_length', 15)

In [338]:
new_poi_df_v2 = new_poi_df[new_poi_df.review_score>0].reset_index().drop('index',axis =1)

In [357]:
new_poi_df_v2.to_csv('new_poi_df_v2.csv')

In [356]:
new_poi_df_v2 = new_poi_df_v2[new_poi_df_v2['name'] != "Cliff's Variety"].reset_index().drop('index',axis =1)

In [321]:
import psycopg2
import simplejson
import numpy as np
from distance import *

conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'

def find_county(state, city):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    city = city.replace('_',' ')
    cur.execute("select county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))

    county = cur.fetchone()
    conn.close()
    if county:
        return county[0]
    else:
        return None
county_list= []
county = None
for index in new_poi_df.index:
    try:
        if (new_poi_df.city[index] == 'New York City') and (new_poi_df.state[index] == 'New York'):
            county = 'NEW YORK'
        elif "administrative_area_level_2" in new_poi_df.geo_content[index]:
            geo_content = ast.literal_eval(new_poi_df.geo_content[index])['results']
            for info in geo_content[0]['address_components']:
                if info['types'][0] == "administrative_area_level_2":
                    county = info['short_name'].replace(' County', '').upper().encode('utf-8').strip()
        else:
            county = find_county(new_poi_df.state[index], new_poi_df.city[index])
    except:
        county = None
    county_list.append(county)
new_poi_df['county'] = county_list

In [322]:
new_poi_df.to_csv('new_poi_df_v2.csv')

In [292]:
index = 4
"administrative_area_level_2" in new_poi_df.geo_content[4]
print new_poi_df.state[index], new_poi_df.city[index]
find_county(new_poi_df.state[index], new_poi_df.city[index])


New York New York City

In [309]:
new_poi_df['county'].value_counts()


Out[309]:
LOS ANGELES          419
ORANGE               381
COOK                 249
SAN DIEGO            241
MARICOPA             219
RIVERSIDE            199
SANTA CLARA          179
ALAMEDA              170
MIDDLESEX            166
WASHINGTON           166
BROWARD              157
KING                 150
SAN BERNARDINO       148
MIAMI-DADE           148
TARRANT              147
JEFFERSON            140
DALLAS               134
MONTGOMERY           129
PALM BEACH           118
CONTRA COSTA         116
HENNEPIN             113
CLARK                107
ESSEX                 98
SALT LAKE             96
JACKSON               94
BENTON                89
FRANKLIN              87
POLK                  84
CUYAHOGA              84
SACRAMENTO            83
                    ... 
CRAWFORD               1
S                      1
CLINTON                1
SAN PATRICIO           1
CENTRAL HIGHLANDS      1
ROBERTSON              1
LEXINGTON              1
NOBLE                  1
LEAVENWORTH            1
LAWRENCE               1
SIERRA                 1
MARTIN                 1
BARBOUR                1
PENDER                 1
ANDREW                 1
CADDO                  1
VAN BUREN              1
HAWKINS                1
HOPKINS                1
MORRIS                 1
GREENUP                1
IREDELL                1
WEST SUSSEX            1
HAMPSHIRE              1
NICOLLET               1
PITTSYLVANIA           1
KANDIYOHI              1
COLLIER                1
NOME                   1
INDIAN RIVER           1
Name: county, dtype: int64

In [285]:
for info in geo_content[0]['address_components']:
    if info['types'][0] == "administrative_area_level_2":
        county = info['short_name'].replace(' County', '').upper()
print county


New York

In [323]:
new_poi_df


Out[323]:
name street_address city state_abb state postal_code country address coord_lat coord_long ... ranking tag raw_visit_length fee description url geo_content poi_type adjusted_visit_length county
0 Central Park 59th to 110th Street New York City NY New York 10022 United States 59th to 110th Street, New York City, New York,... 40.782865 -73.965355 ... 1 Points of Interest & Landmarks, Nature & Parks... More than 3 hours No For more than 150 years, visitors have flocked... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 360 NEW YORK
1 The National 9/11 Memorial & Museum 180 Greenwich St New York City NY New York 10007-0089 United States 180 Greenwich St, New York City, New York, 100... 40.711415 -74.012479 ... 2 Specialty Museums, Points of Interest & Landma... 1-2 hours No The National September 11 Memorial & Museum is... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 NEW YORK
2 The Metropolitan Museum of Art 1000 5th Ave New York City NY New York 10028-0198 United States 1000 5th Ave, New York City, New York, 10028, ... 40.779166 -73.962928 ... 3 Points of Interest & Landmarks, Art Museums, M... 2-3 hours Yes At New York City's most visited museum and att... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 180 NEW YORK
3 Top of the Rock Observation Deck 30 Rockefeller Center New York City NY New York NaN United States 30 Rockefeller Center, New York City, New York... 40.758823 -73.979398 ... 4 Observation Decks & Towers, Lookouts, Sights &... NaN Yes Top of the Rock Observation Deck, the newly op... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 NEW YORK
4 Manhattan Skyline NaN New York City NY New York NaN United States , New York City, New York, , United States 40.712784 -74.005941 ... 5 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 NEW YORK
5 Grand Central Terminal 89 E 42nd Street New York City NY New York 10017-5503 United States 89 E 42nd Street, New York City, New York, 100... 40.752496 -73.977302 ... 7 Architectural Buildings, Points of Interest & ... 1-2 hours Unknown Grand Central Terminal is the most extraordina... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 120 NEW YORK
6 The High Line NaN New York City NY New York 10014 United States , New York City, New York, 10014, United States 40.736614 -74.009447 ... 8 Scenic Walking Areas, Nature & Parks, Sights &... 2-3 hours No The High Line is an elevated railway transform... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 180 NEW YORK
7 Broadway NaN New York City NY New York NaN United States , New York City, New York, , United States 40.712784 -74.005941 ... 9 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 NEW YORK
8 Frick Collection 1 E. 70th St. New York City NY New York 10021-4994 United States 1 E. 70th St., New York City, New York, 10021,... 40.771181 -73.967350 ... 10 Art Museums, Museums NaN Unknown Henry Frick once resided in this 18th-century ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 NEW YORK
9 Empire State Building 350 Fifth Avenue New York City NY New York 10118-0110 United States 350 Fifth Avenue, New York City, New York, 101... 40.748541 -73.985763 ... 11 Architectural Buildings, Observation Decks & T... 1-2 hours Yes The world-famous Empire State Building offers ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 120 NEW YORK
10 Bryant Park NaN New York City NY New York 10017 United States , New York City, New York, 10017, United States 40.751985 -73.969780 ... 14 Nature & Parks 2-3 hours No Bryant Park has a French-style merry-go-round,... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 180 NEW YORK
11 Statue of Liberty NaN New York City NY New York 10004 United States , New York City, New York, 10004, United States 40.703870 -74.013854 ... 15 Monuments & Statues, Points of Interest & Land... More than 3 hours Yes The Statue of Liberty Enlightening the World w... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 360 NEW YORK
12 Quan Gio Bar & Grill 14241 Euclid St # C11 Garden Grove CA California 92843-4979 United States 14241 Euclid St # C11, Garden Grove, Californi... 33.756149 -117.938499 ... 9 Bars & Clubs, Nightlife NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Nightlife 60 ORANGE
13 One World Observatory - World Trade Center 1 World Trade Center New York City NY New York 10007-0089 United States 1 World Trade Center, New York City, New York,... 40.713649 -74.008713 ... 16 Observation Decks & Towers, Architectural Buil... 1-2 hours Unknown Start at the top of the tallest building in th... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 120 NEW YORK
14 St. Patrick's Cathedral 460 Madison Avenue New York City NY New York 10022 United States 460 Madison Avenue, New York City, New York, 1... 40.758814 -73.976858 ... 17 Churches & Cathedrals, Sights & Landmarks NaN Unknown Enormous 13th-century-style Gothic church on M... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 NEW YORK
15 Ground Zero Museum Workshop 420 W 14th St New York City NY New York 10014-1064 United States 420 W 14th St, New York City, New York, 10014,... 40.741231 -74.006701 ... 18 Specialty Museums, Museums 1-2 hours Unknown Ground Zero Museum Workshop: Images & Artifact... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 NEW YORK
16 Staten Island Ferry 4 South Street New York City NY New York 10004 United States 4 South Street, New York City, New York, 10004... 40.701009 -74.013025 ... 19 Ferries, Transportation NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Transportation 60 NEW YORK
17 Radio City Music Hall 1260 6th Avenue (Avenue of the Americas) New York City NY New York 10020 United States 1260 6th Avenue (Avenue of the Americas), New ... 40.760012 -73.979946 ... 20 Theaters, Points of Interest & Landmarks, Conc... 2-3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 180 NEW YORK
18 Rockefeller Center 45 Rockefeller Plaza New York City NY New York 10111-0100 United States 45 Rockefeller Plaza, New York City, New York,... 40.759179 -73.977816 ... 21 Points of Interest & Landmarks, Architectural ... 1-2 hours No Rockefeller Center is a national historic land... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 120 NEW YORK
19 New York Public Library 476 5th Ave New York, NY 10018 New York City NY New York 10018 United States 476 5th Ave New York, NY 10018, New York City,... 40.753182 -73.982253 ... 23 Specialty Museums, Museums NaN Unknown Two marble lions mark the entrance to this Bea... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 NEW YORK
20 Madison Square Garden 4 Penn Plaza New York City NY New York 10121-0078 United States 4 Penn Plaza, New York City, New York, 10121, ... 40.750354 -73.993371 ... 24 Arenas & Stadiums, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 NEW YORK
21 The Met Cloisters 99 Margaret Corbin Drive New York City NY New York 10040 United States 99 Margaret Corbin Drive, New York City, New Y... 40.864848 -73.931808 ... 25 Sacred & Religious Sites, Art Museums, Museums... More than 3 hours Yes Branch of the Metropolitan Museum of Art that ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 360 NEW YORK
22 Tenement Museum 103 Orchard Street New York City NY New York 10002 United States 103 Orchard Street, New York City, New York, 1... 40.718793 -73.990070 ... 26 Specialty Museums, Museums 1-2 hours Unknown The Tenement Museum preserves the history of i... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 NEW YORK
23 Times Square Broadway New York City NY New York 10036 United States Broadway, New York City, New York, 10036, Unit... 40.760262 -73.993287 ... 27 Neighborhoods, Points of Interest & Landmarks,... 1-2 hours Unknown Central area in NYC that has many shops, resta... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 120 NEW YORK
24 The Museum of Modern Art (MoMA) 11 West 53rd Street New York City NY New York 10019 United States 11 West 53rd Street, New York City, New York, ... 40.761326 -73.977401 ... 29 Art Museums, Museums 2-3 hours Yes Located in the heart of midtown Manhattan, The... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 180 NEW YORK
25 Cistra Usa Gifts and Crafts 12535 South Harbor Blvd Garden Grove CA California 92840 United States 12535 South Harbor Blvd, Garden Grove, Califor... 33.781074 -117.915270 ... 8 Gift & Specialty Shops, Shopping <1 hour Unknown Handmade handbags and purses from leather, dec... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Shopping 60 ORANGE
26 The Getty Center 1200 Getty Center Dr Los Angeles CA California 90049 United States 1200 Getty Center Dr, Los Angeles, California,... 34.079033 -118.475106 ... 1 Specialty Museums, Museums 2-3 hours Yes Spectacular museum boasts a most impressive co... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 180 LOS ANGELES
27 Universal Studios Hollywood 100 Universal City Plaza Los Angeles CA California 91608-1002 United States 100 Universal City Plaza, Los Angeles, Califor... 34.137868 -118.352863 ... 2 Theme Parks, Water & Amusement Parks NaN Unknown Get ready for the ultimate Hollywood experienc... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... ThemePark 480 LOS ANGELES
28 Griffith Observatory 2800 E. Observatory Rd. Los Angeles CA California 90027-1299 United States 2800 E. Observatory Rd., Los Angeles, Californ... 34.118375 -118.300354 ... 3 Observation Decks & Towers, Observatories & Pl... More than 3 hours No A public observatory owned and operated by the... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 360 LOS ANGELES
29 California Science Center 700 Exposition Park Drive Los Angeles CA California 90037-1254 United States 700 Exposition Park Drive, Los Angeles, Califo... 34.015788 -118.286209 ... 5 Science Museums, Museums 2-3 hours No Space Shuttle Endeavour on display in the Cali... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 180 LOS ANGELES
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
15306 Sri Sri Radha Krishna Temple 311 West 8500 South Spanish Fork UT Utah 84660 United States 311 West 8500 South, Spanish Fork, Utah, 84660... 40.075693 -111.662100 ... 2 Sacred & Religious Sites, Sights & Landmarks 1-2 hours Unknown South of Spanish Fork, Utah, in a semi rural e... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 120 UTAH
15307 Spanish Fork River Park HWY 6 Spanish Fork Canyon Spanish Fork UT Utah 84660 United States HWY 6 Spanish Fork Canyon, Spanish Fork, Utah,... 40.083844 -111.596030 ... 3 Mountains, Bodies of Water, Fun & Games, Natur... More than 3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 360 UTAH
15308 Beckman Mill 11600 S County Road H Beloit WI Wisconsin 53511-9203 United States 11600 S County Road H, Beloit, Wisconsin, 5351... 42.511646 -89.169087 ... 1 Historic Sites, Sights & Landmarks 1-2 hours Unknown The mill is open on Saturdays and Sundays, 1-4... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 120 ROCK
15309 Beloit Snappers 2301 Skyline Dr Beloit WI Wisconsin 53511-2651 United States 2301 Skyline Dr, Beloit, Wisconsin, 53511, Uni... 42.534982 -89.008666 ... 2 Arenas & Stadiums, Sights & Landmarks 2-3 hours Unknown The Beloit Snappers are a Minor League Basebal... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 180 ROCK
15310 Beloit Farmer's market 100 State St Beloit WI Wisconsin 53511-6234 United States 100 State St, Beloit, Wisconsin, 53511, United... 42.497522 -89.036993 ... 3 Farmers Markets, Food & Drink, Shopping 1-2 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Shopping 120 ROCK
15311 Angel Museum NaN Beloit WI Wisconsin NaN United States , Beloit, Wisconsin, , United States 42.508348 -89.031777 ... 4 Specialty Museums, Museums NaN Unknown The world's largest angel museum, featuring th... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 ROCK
15312 Logan Museum of Anthropology 700 College St Beloit WI Wisconsin 53511-5509 United States 700 College St, Beloit, Wisconsin, 53511, Unit... 42.503033 -89.030905 ... 5 History Museums, Museums NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 ROCK
15313 Turtle Island Playground 1530 Riverside Drive Beloit WI Wisconsin NaN United States 1530 Riverside Drive, Beloit, Wisconsin, , Uni... 42.514672 -89.032560 ... 6 Playgrounds, Nature & Parks, Fun & Games NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 60 ROCK
15314 The Apple Hut 1718 W Walters Road Beloit WI Wisconsin 53511 United States 1718 W Walters Road, Beloit, Wisconsin, 53511,... 42.565790 -89.058348 ... 7 Farms, Sights & Landmarks <1 hour Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 60 ROCK
15315 Pohlman Field 2301 Skyline Dr. Beloit WI Wisconsin 53511 United States 2301 Skyline Dr., Beloit, Wisconsin, 53511, Un... 42.534982 -89.008666 ... 8 Arenas & Stadiums, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 ROCK
15316 Beach at Panama City NaN Panama City Beach FL Florida NaN United States , Panama City Beach, Florida, , United States 30.176591 -85.805488 ... 1 Beaches, Nature & Parks, Outdoor Activities NaN Unknown This 27-mile long beach offers a wide variety ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 60 BAY
15317 Panama City Beach Winery 8730 Thomas Dr Panama City Beach FL Florida 32408 United States 8730 Thomas Dr, Panama City Beach, Florida, 32... 30.168712 -85.789188 ... 2 Wineries & Vineyards, Food & Drink NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Food 60 BAY
15318 Water Planet Mailing: 203 Greenwood Dr Panama City Beach FL Florida 32407 United States Mailing: 203 Greenwood Dr, Panama City Beach, ... 33.954048 -118.372252 ... 4 Educational sites, Sights & Landmarks NaN Unknown Water Planet offers a variety of dolphin swim ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 LOS ANGELES
15319 Pier Park 600 Pier Park Dr Ste 125 Panama City Beach FL Florida 32413-2179 United States 600 Pier Park Dr Ste 125, Panama City Beach, F... 30.221396 -85.871701 ... 5 Shopping Malls, Shopping NaN Unknown Pier Park located in Panama City Beach has bec... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Shopping 30 BAY
15320 Gulf World Marine Park 15412 Front Beach Rd Panama City Beach FL Florida 32413-2502 United States 15412 Front Beach Rd, Panama City Beach, Flori... 30.211304 -85.867198 ... 6 Gardens, Nature & Parks, Zoos & Aquariums 1-2 hours Unknown Our daily shows featuring Dolphins, Sea Lions,... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Zoo 120 BAY
15321 Shell Island NaN Panama City Beach FL Florida NaN United States , Panama City Beach, Florida, , United States 30.176591 -85.805488 ... 7 Islands, Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 60 BAY
15322 Coconut Creek Family Fun Park 9807 Front Beach Rd Panama City Beach FL Florida 32407-4134 United States 9807 Front Beach Rd, Panama City Beach, Florid... 30.178373 -85.798288 ... 8 Theme Parks, Water & Amusement Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... ThemePark 480 BAY
15323 Ripley's Believe It Or Not 9907 Front Beach Rd Panama City Beach FL Florida 32407-4136 United States 9907 Front Beach Rd, Panama City Beach, Florid... 30.178254 -85.799376 ... 9 Specialty Museums, Museums NaN Unknown Ripley's Believe It or Not!(R) Panama City Bea... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum 120 BAY
15324 Shipwreck Island Waterpark 12201 Hutchison Blvd Panama City Beach FL Florida 32407-3424 United States 12201 Hutchison Blvd, Panama City Beach, Flori... 30.194957 -85.828282 ... 10 Water Parks, Water & Amusement Parks NaN Unknown Plunge, ride, whirl, dunk, swim - whew! At the... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 60 BAY
15325 Conservation Park 100 Conservation Drive Panama City Beach FL Florida NaN United States 100 Conservation Drive, Panama City Beach, Flo... 30.278178 -85.915006 ... 11 Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 60 BAY
15326 ZooWorld Zoological Conservatory 9008 Front Beach Rd Panama City Beach FL Florida 32407-4235 United States 9008 Front Beach Rd, Panama City Beach, Florid... 30.181829 -85.787587 ... 12 Nature & Parks, Outdoor Activities, Zoos & Aqu... NaN Unknown Home to over 350 animals and 259 species of pl... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Zoo 120 BAY
15327 Russell Fields City Pier 12213 Front Beach Rd Panama City Beach FL Florida 32407-3407 United States 12213 Front Beach Rd, Panama City Beach, Flori... 30.189771 -85.830854 ... 16 Piers & Boardwalks, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 BAY
15328 M.B. Miller County Pier 12213 Front Beach Rd Panama City Beach FL Florida 32407-3407 United States 12213 Front Beach Rd, Panama City Beach, Flori... 30.189771 -85.830854 ... 18 Piers & Boardwalks, Points of Interest & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 BAY
15329 Race City PCB 9523 Front Beach Rd Panama City Beach FL Florida 32407-4149 United States 9523 Front Beach Rd, Panama City Beach, Florid... 30.179894 -85.794784 ... 21 Theme Parks, Water & Amusement Parks 1-2 hours Unknown Come and experience the thrills of the largest... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... ThemePark 120 BAY
15330 Frank Brown Park 16200 Panama City Beach Pkwy Panama City Beach FL Florida 32413 United States 16200 Panama City Beach Pkwy, Panama City Beac... 30.229834 -85.877462 ... 23 Nature & Wildlife Areas, Fun & Games, Nature &... More than 3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 360 BAY
15331 Rick Seltzer Park 7419 Thomas Dr Panama City Beach FL Florida 32408-7585 United States 7419 Thomas Dr, Panama City Beach, Florida, 32... 30.159265 -85.777395 ... 24 Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park 60 BAY
15332 Slingshot & Indy Speedway 8762 Thomas Dr Panama City Beach FL Florida 32408-4000 United States 8762 Thomas Dr, Panama City Beach, Florida, 32... 30.169577 -85.791476 ... 25 Auto Race Tracks, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark 30 BAY
15333 Nivol Brewery 473 N Richard Jackson Blvd Panama City Beach FL Florida 32407-3647 United States 473 N Richard Jackson Blvd, Panama City Beach,... 30.189038 -85.821273 ... 26 Breweries, Food & Drink NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Food 60 BAY
15334 Panama City Beach Library 12500 Hutchison Blvd Panama City Beach FL Florida 32407-3239 United States 12500 Hutchison Blvd, Panama City Beach, Flori... 30.197243 -85.830504 ... 27 Libraries, Traveler Resources NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Library 15 BAY
15335 Gator Country Alligator Park 6523 Hwy 79 Panama City Beach FL Florida NaN United States 6523 Hwy 79, Panama City Beach, Florida, , Uni... 30.291566 -85.861393 ... 28 NaN NaN Unknown Gator Country Alligator Park at Airboat Advent... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Other 15 BAY

15336 rows × 22 columns


In [329]:
new_poi_df.review_score.value_counts()


Out[329]:
4.5    7541
4.0    2985
5.0    2471
0.0    1029
3.5     760
3.0     357
2.5      84
2.0      49
1.0      47
1.5      13
Name: review_score, dtype: int64

In [331]:
new_poi_df.loc[363]


Out[331]:
name                                           Palace of Fine Arts Theatre
street_address                                            3301 Lyon Street
city                                                         San Francisco
state_abb                                                               CA
state                                                           California
postal_code                                                          94123
country                                                      United States
address                  3301 Lyon Street, San Francisco, California, 9...
coord_lat                                                          37.8029
coord_long                                                        -122.449
num_reviews                                                           3670
review_score                                                           4.5
ranking                                                                 10
tag                                             Theaters, Concerts & Shows
raw_visit_length                                                       NaN
fee                                                                Unknown
description              Designed to look like a Roman ruin, this golde...
url                      http://www.tripadvisor.com/Attraction_Review-g...
geo_content              {u'status': u'OK', u'results': [{u'geometry': ...
poi_type                                                           Theater
adjusted_visit_length                                                  120
county                                                       SAN FRANCISCO
Name: 363, dtype: object

In [284]:
for info in geo_content[0]['address_components']:
    if info['types'][0] == "administrative_area_level_2":
        print info['types']


[u'administrative_area_level_2', u'political']

In [261]:
import ast
ast.literal_eval(new_poi_df.geo_content[0])['results']


Out[261]:
[{u'address_components': [{u'long_name': u'Central Park',
    u'short_name': u'Central Park',
    u'types': [u'establishment', u'park', u'point_of_interest']},
   {u'long_name': u'Manhattan',
    u'short_name': u'Manhattan',
    u'types': [u'political', u'sublocality', u'sublocality_level_1']},
   {u'long_name': u'New York',
    u'short_name': u'New York',
    u'types': [u'locality', u'political']},
   {u'long_name': u'New York County',
    u'short_name': u'New York County',
    u'types': [u'administrative_area_level_2', u'political']},
   {u'long_name': u'New York',
    u'short_name': u'NY',
    u'types': [u'administrative_area_level_1', u'political']},
   {u'long_name': u'United States',
    u'short_name': u'US',
    u'types': [u'country', u'political']},
   {u'long_name': u'10024',
    u'short_name': u'10024',
    u'types': [u'postal_code']}],
  u'formatted_address': u'Central Park, New York, NY 10024, USA',
  u'geometry': {u'location': {u'lat': 40.7828647, u'lng': -73.9653551},
   u'location_type': u'APPROXIMATE',
   u'viewport': {u'northeast': {u'lat': 40.7842136802915,
     u'lng': -73.96400611970849},
    u'southwest': {u'lat': 40.7815157197085, u'lng': -73.96670408029149}}},
  u'place_id': u'ChIJ4zGFAZpYwokRGUGph3Mf37k',
  u'types': [u'establishment', u'park', u'point_of_interest']}]

In [195]:
for index in new_poi_df.index:
    if str(new_poi_df.raw_visit_length[index]) == 'nan':


Out[195]:
name street_address city state_abb state postal_code country address coord_lat coord_long num_reviews review_score ranking tag raw_visit_length fee description url geo_content poi_type
0 Central Park 59th to 110th Street New York City NY New York 10022 United States 59th to 110th Street, New York City, New York,... 40.782865 -73.965355 92215 4.5 1 Points of Interest & Landmarks, Nature & Parks... More than 3 hours No For more than 150 years, visitors have flocked... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
1 The National 9/11 Memorial & Museum 180 Greenwich St New York City NY New York 10007-0089 United States 180 Greenwich St, New York City, New York, 100... 40.711415 -74.012479 61712 4.5 2 Specialty Museums, Points of Interest & Landma... 1-2 hours No The National September 11 Memorial & Museum is... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
2 The Metropolitan Museum of Art 1000 5th Ave New York City NY New York 10028-0198 United States 1000 5th Ave, New York City, New York, 10028, ... 40.779166 -73.962928 40443 5.0 3 Points of Interest & Landmarks, Art Museums, M... 2-3 hours Yes At New York City's most visited museum and att... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
3 Top of the Rock Observation Deck 30 Rockefeller Center New York City NY New York NaN United States 30 Rockefeller Center, New York City, New York... 40.758823 -73.979398 57358 4.5 4 Observation Decks & Towers, Lookouts, Sights &... NaN Yes Top of the Rock Observation Deck, the newly op... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
4 Manhattan Skyline NaN New York City NY New York NaN United States , New York City, New York, , United States 40.712784 -74.005941 15918 5.0 5 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
5 Grand Central Terminal 89 E 42nd Street New York City NY New York 10017-5503 United States 89 E 42nd Street, New York City, New York, 100... 40.752496 -73.977302 28478 4.5 7 Architectural Buildings, Points of Interest & ... 1-2 hours Unknown Grand Central Terminal is the most extraordina... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
6 The High Line NaN New York City NY New York 10014 United States , New York City, New York, 10014, United States 40.736614 -74.009447 42015 4.5 8 Scenic Walking Areas, Nature & Parks, Sights &... 2-3 hours No The High Line is an elevated railway transform... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
7 Broadway NaN New York City NY New York NaN United States , New York City, New York, , United States 40.712784 -74.005941 22816 4.5 9 Points of Interest & Landmarks, Sights & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
8 Frick Collection 1 E. 70th St. New York City NY New York 10021-4994 United States 1 E. 70th St., New York City, New York, 10021,... 40.771181 -73.967350 7179 4.5 10 Art Museums, Museums NaN Unknown Henry Frick once resided in this 18th-century ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
9 Empire State Building 350 Fifth Avenue New York City NY New York 10118-0110 United States 350 Fifth Avenue, New York City, New York, 101... 40.748541 -73.985763 52446 4.5 11 Architectural Buildings, Observation Decks & T... 1-2 hours Yes The world-famous Empire State Building offers ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
10 Bryant Park NaN New York City NY New York 10017 United States , New York City, New York, 10017, United States 40.751985 -73.969780 15170 4.5 14 Nature & Parks 2-3 hours No Bryant Park has a French-style merry-go-round,... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
11 Statue of Liberty NaN New York City NY New York 10004 United States , New York City, New York, 10004, United States 40.703870 -74.013854 26918 4.5 15 Monuments & Statues, Points of Interest & Land... More than 3 hours Yes The Statue of Liberty Enlightening the World w... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
12 Quan Gio Bar & Grill 14241 Euclid St # C11 Garden Grove CA California 92843-4979 United States 14241 Euclid St # C11, Garden Grove, Californi... 33.756149 -117.938499 0 0.0 9 Bars & Clubs, Nightlife NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Nightlife
13 One World Observatory - World Trade Center 1 World Trade Center New York City NY New York 10007-0089 United States 1 World Trade Center, New York City, New York,... 40.713649 -74.008713 13948 4.5 16 Observation Decks & Towers, Architectural Buil... 1-2 hours Unknown Start at the top of the tallest building in th... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
14 St. Patrick's Cathedral 460 Madison Avenue New York City NY New York 10022 United States 460 Madison Avenue, New York City, New York, 1... 40.758814 -73.976858 8125 4.5 17 Churches & Cathedrals, Sights & Landmarks NaN Unknown Enormous 13th-century-style Gothic church on M... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15 Ground Zero Museum Workshop 420 W 14th St New York City NY New York 10014-1064 United States 420 W 14th St, New York City, New York, 10014,... 40.741231 -74.006701 2658 5.0 18 Specialty Museums, Museums 1-2 hours Unknown Ground Zero Museum Workshop: Images & Artifact... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
16 Staten Island Ferry 4 South Street New York City NY New York 10004 United States 4 South Street, New York City, New York, 10004... 40.701009 -74.013025 14354 4.5 19 Ferries, Transportation NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Transportation
17 Radio City Music Hall 1260 6th Avenue (Avenue of the Americas) New York City NY New York 10020 United States 1260 6th Avenue (Avenue of the Americas), New ... 40.760012 -73.979946 7025 4.5 20 Theaters, Points of Interest & Landmarks, Conc... 2-3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
18 Rockefeller Center 45 Rockefeller Plaza New York City NY New York 10111-0100 United States 45 Rockefeller Plaza, New York City, New York,... 40.759179 -73.977816 16707 4.5 21 Points of Interest & Landmarks, Architectural ... 1-2 hours No Rockefeller Center is a national historic land... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
19 New York Public Library 476 5th Ave New York, NY 10018 New York City NY New York 10018 United States 476 5th Ave New York, NY 10018, New York City,... 40.753182 -73.982253 9547 4.5 23 Specialty Museums, Museums NaN Unknown Two marble lions mark the entrance to this Bea... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
20 Madison Square Garden 4 Penn Plaza New York City NY New York 10121-0078 United States 4 Penn Plaza, New York City, New York, 10121, ... 40.750354 -73.993371 6500 4.5 24 Arenas & Stadiums, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
21 The Met Cloisters 99 Margaret Corbin Drive New York City NY New York 10040 United States 99 Margaret Corbin Drive, New York City, New Y... 40.864848 -73.931808 4064 4.5 25 Sacred & Religious Sites, Art Museums, Museums... More than 3 hours Yes Branch of the Metropolitan Museum of Art that ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
22 Tenement Museum 103 Orchard Street New York City NY New York 10002 United States 103 Orchard Street, New York City, New York, 1... 40.718793 -73.990070 4846 4.5 26 Specialty Museums, Museums 1-2 hours Unknown The Tenement Museum preserves the history of i... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
23 Times Square Broadway New York City NY New York 10036 United States Broadway, New York City, New York, 10036, Unit... 40.760262 -73.993287 30873 4.5 27 Neighborhoods, Points of Interest & Landmarks,... 1-2 hours Unknown Central area in NYC that has many shops, resta... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
24 The Museum of Modern Art (MoMA) 11 West 53rd Street New York City NY New York 10019 United States 11 West 53rd Street, New York City, New York, ... 40.761326 -73.977401 12160 4.5 29 Art Museums, Museums 2-3 hours Yes Located in the heart of midtown Manhattan, The... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
25 Cistra Usa Gifts and Crafts 12535 South Harbor Blvd Garden Grove CA California 92840 United States 12535 South Harbor Blvd, Garden Grove, Califor... 33.781074 -117.915270 0 0.0 8 Gift & Specialty Shops, Shopping <1 hour Unknown Handmade handbags and purses from leather, dec... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Shopping
26 The Getty Center 1200 Getty Center Dr Los Angeles CA California 90049 United States 1200 Getty Center Dr, Los Angeles, California,... 34.079033 -118.475106 10328 5.0 1 Specialty Museums, Museums 2-3 hours Yes Spectacular museum boasts a most impressive co... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
27 Universal Studios Hollywood 100 Universal City Plaza Los Angeles CA California 91608-1002 United States 100 Universal City Plaza, Los Angeles, Califor... 34.137868 -118.352863 24681 4.5 2 Theme Parks, Water & Amusement Parks NaN Unknown Get ready for the ultimate Hollywood experienc... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... ThemePark
28 Griffith Observatory 2800 E. Observatory Rd. Los Angeles CA California 90027-1299 United States 2800 E. Observatory Rd., Los Angeles, Californ... 34.118375 -118.300354 12585 4.5 3 Observation Decks & Towers, Observatories & Pl... More than 3 hours No A public observatory owned and operated by the... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
29 California Science Center 700 Exposition Park Drive Los Angeles CA California 90037-1254 United States 700 Exposition Park Drive, Los Angeles, Califo... 34.015788 -118.286209 2864 4.5 5 Science Museums, Museums 2-3 hours No Space Shuttle Endeavour on display in the Cali... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
15306 Sri Sri Radha Krishna Temple 311 West 8500 South Spanish Fork UT Utah 84660 United States 311 West 8500 South, Spanish Fork, Utah, 84660... 40.075693 -111.662100 20 4.0 2 Sacred & Religious Sites, Sights & Landmarks 1-2 hours Unknown South of Spanish Fork, Utah, in a semi rural e... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15307 Spanish Fork River Park HWY 6 Spanish Fork Canyon Spanish Fork UT Utah 84660 United States HWY 6 Spanish Fork Canyon, Spanish Fork, Utah,... 40.083844 -111.596030 3 4.5 3 Mountains, Bodies of Water, Fun & Games, Natur... More than 3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15308 Beckman Mill 11600 S County Road H Beloit WI Wisconsin 53511-9203 United States 11600 S County Road H, Beloit, Wisconsin, 5351... 42.511646 -89.169087 40 4.5 1 Historic Sites, Sights & Landmarks 1-2 hours Unknown The mill is open on Saturdays and Sundays, 1-4... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15309 Beloit Snappers 2301 Skyline Dr Beloit WI Wisconsin 53511-2651 United States 2301 Skyline Dr, Beloit, Wisconsin, 53511, Uni... 42.534982 -89.008666 51 4.5 2 Arenas & Stadiums, Sights & Landmarks 2-3 hours Unknown The Beloit Snappers are a Minor League Basebal... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15310 Beloit Farmer's market 100 State St Beloit WI Wisconsin 53511-6234 United States 100 State St, Beloit, Wisconsin, 53511, United... 42.497522 -89.036993 13 5.0 3 Farmers Markets, Food & Drink, Shopping 1-2 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Shopping
15311 Angel Museum NaN Beloit WI Wisconsin NaN United States , Beloit, Wisconsin, , United States 42.508348 -89.031777 25 4.0 4 Specialty Museums, Museums NaN Unknown The world's largest angel museum, featuring th... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
15312 Logan Museum of Anthropology 700 College St Beloit WI Wisconsin 53511-5509 United States 700 College St, Beloit, Wisconsin, 53511, Unit... 42.503033 -89.030905 19 4.0 5 History Museums, Museums NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
15313 Turtle Island Playground 1530 Riverside Drive Beloit WI Wisconsin NaN United States 1530 Riverside Drive, Beloit, Wisconsin, , Uni... 42.514672 -89.032560 14 4.5 6 Playgrounds, Nature & Parks, Fun & Games NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15314 The Apple Hut 1718 W Walters Road Beloit WI Wisconsin 53511 United States 1718 W Walters Road, Beloit, Wisconsin, 53511,... 42.565790 -89.058348 7 4.5 7 Farms, Sights & Landmarks <1 hour Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15315 Pohlman Field 2301 Skyline Dr. Beloit WI Wisconsin 53511 United States 2301 Skyline Dr., Beloit, Wisconsin, 53511, Un... 42.534982 -89.008666 13 4.5 8 Arenas & Stadiums, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15316 Beach at Panama City NaN Panama City Beach FL Florida NaN United States , Panama City Beach, Florida, , United States 30.176591 -85.805488 3822 4.5 1 Beaches, Nature & Parks, Outdoor Activities NaN Unknown This 27-mile long beach offers a wide variety ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15317 Panama City Beach Winery 8730 Thomas Dr Panama City Beach FL Florida 32408 United States 8730 Thomas Dr, Panama City Beach, Florida, 32... 30.168712 -85.789188 1032 5.0 2 Wineries & Vineyards, Food & Drink NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Food
15318 Water Planet Mailing: 203 Greenwood Dr Panama City Beach FL Florida 32407 United States Mailing: 203 Greenwood Dr, Panama City Beach, ... 33.954048 -118.372252 414 5.0 4 Educational sites, Sights & Landmarks NaN Unknown Water Planet offers a variety of dolphin swim ... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15319 Pier Park 600 Pier Park Dr Ste 125 Panama City Beach FL Florida 32413-2179 United States 600 Pier Park Dr Ste 125, Panama City Beach, F... 30.221396 -85.871701 1658 4.5 5 Shopping Malls, Shopping NaN Unknown Pier Park located in Panama City Beach has bec... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Shopping
15320 Gulf World Marine Park 15412 Front Beach Rd Panama City Beach FL Florida 32413-2502 United States 15412 Front Beach Rd, Panama City Beach, Flori... 30.211304 -85.867198 1643 4.5 6 Gardens, Nature & Parks, Zoos & Aquariums 1-2 hours Unknown Our daily shows featuring Dolphins, Sea Lions,... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Zoo
15321 Shell Island NaN Panama City Beach FL Florida NaN United States , Panama City Beach, Florida, , United States 30.176591 -85.805488 1416 4.5 7 Islands, Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15322 Coconut Creek Family Fun Park 9807 Front Beach Rd Panama City Beach FL Florida 32407-4134 United States 9807 Front Beach Rd, Panama City Beach, Florid... 30.178373 -85.798288 407 4.5 8 Theme Parks, Water & Amusement Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... ThemePark
15323 Ripley's Believe It Or Not 9907 Front Beach Rd Panama City Beach FL Florida 32407-4136 United States 9907 Front Beach Rd, Panama City Beach, Florid... 30.178254 -85.799376 627 4.0 9 Specialty Museums, Museums NaN Unknown Ripley's Believe It or Not!(R) Panama City Bea... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Museum
15324 Shipwreck Island Waterpark 12201 Hutchison Blvd Panama City Beach FL Florida 32407-3424 United States 12201 Hutchison Blvd, Panama City Beach, Flori... 30.194957 -85.828282 395 4.5 10 Water Parks, Water & Amusement Parks NaN Unknown Plunge, ride, whirl, dunk, swim - whew! At the... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15325 Conservation Park 100 Conservation Drive Panama City Beach FL Florida NaN United States 100 Conservation Drive, Panama City Beach, Flo... 30.278178 -85.915006 204 4.5 11 Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15326 ZooWorld Zoological Conservatory 9008 Front Beach Rd Panama City Beach FL Florida 32407-4235 United States 9008 Front Beach Rd, Panama City Beach, Florid... 30.181829 -85.787587 446 4.0 12 Nature & Parks, Outdoor Activities, Zoos & Aqu... NaN Unknown Home to over 350 animals and 259 species of pl... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Zoo
15327 Russell Fields City Pier 12213 Front Beach Rd Panama City Beach FL Florida 32407-3407 United States 12213 Front Beach Rd, Panama City Beach, Flori... 30.189771 -85.830854 175 4.5 16 Piers & Boardwalks, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15328 M.B. Miller County Pier 12213 Front Beach Rd Panama City Beach FL Florida 32407-3407 United States 12213 Front Beach Rd, Panama City Beach, Flori... 30.189771 -85.830854 92 4.5 18 Piers & Boardwalks, Points of Interest & Landm... NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15329 Race City PCB 9523 Front Beach Rd Panama City Beach FL Florida 32407-4149 United States 9523 Front Beach Rd, Panama City Beach, Florid... 30.179894 -85.794784 261 3.5 21 Theme Parks, Water & Amusement Parks 1-2 hours Unknown Come and experience the thrills of the largest... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... ThemePark
15330 Frank Brown Park 16200 Panama City Beach Pkwy Panama City Beach FL Florida 32413 United States 16200 Panama City Beach Pkwy, Panama City Beac... 30.229834 -85.877462 33 5.0 23 Nature & Wildlife Areas, Fun & Games, Nature &... More than 3 hours Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15331 Rick Seltzer Park 7419 Thomas Dr Panama City Beach FL Florida 32408-7585 United States 7419 Thomas Dr, Panama City Beach, Florida, 32... 30.159265 -85.777395 56 4.5 24 Nature & Parks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Park
15332 Slingshot & Indy Speedway 8762 Thomas Dr Panama City Beach FL Florida 32408-4000 United States 8762 Thomas Dr, Panama City Beach, Florida, 32... 30.169577 -85.791476 42 4.5 25 Auto Race Tracks, Sights & Landmarks NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Landmark
15333 Nivol Brewery 473 N Richard Jackson Blvd Panama City Beach FL Florida 32407-3647 United States 473 N Richard Jackson Blvd, Panama City Beach,... 30.189038 -85.821273 20 5.0 26 Breweries, Food & Drink NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Food
15334 Panama City Beach Library 12500 Hutchison Blvd Panama City Beach FL Florida 32407-3239 United States 12500 Hutchison Blvd, Panama City Beach, Flori... 30.197243 -85.830504 32 4.5 27 Libraries, Traveler Resources NaN Unknown NaN http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Library
15335 Gator Country Alligator Park 6523 Hwy 79 Panama City Beach FL Florida NaN United States 6523 Hwy 79, Panama City Beach, Florida, , Uni... 30.291566 -85.861393 72 4.0 28 NaN NaN Unknown Gator Country Alligator Park at Airboat Advent... http://www.tripadvisor.com/Attraction_Review-g... {u'status': u'OK', u'results': [{u'geometry': ... Other

15336 rows × 20 columns


In [324]:
np.minimum?

In [325]:
test = []
test.extend([1,2,3])

In [326]:
test


Out[326]:
[1, 2, 3]

In [361]:
labels = '1 2 1 1 1 0 3 1 2 1 0 0 1 2 3 1 1 1 1 2 1 0 1 2 2 1'
labels = map(int,labels.split(' '))

In [376]:
from collections import Counter
c = Counter(labels)

In [181]:
########helpers.py
import psycopg2
import simplejson
import numpy as np
from distance import *
from collections import Counter
# conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
my_key = "AIzaSyAA9Te-Dpi6ruT3SDpaZzVXQtlRshf_jsk"
my_key = "AIzaSyB6KDP3b55OlvCHb8U4PyVMfkLvNJEBidM"
my_key = "AIzaSyDQexgUGlMYF5pkQ3w42xQoEqsTONfMI-I"
def check_valid_state(state):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    state = state.replace('_',' ')
    cur.execute("select distinct state from poi_detail_table_v2 where state = '%s';" %(state.title()))
    c = cur.fetchone()
    return bool(c)
    
def check_valid_city(city,state):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    state = state.replace('_',' ')
    city = city.replace('_',' ')
    cur.execute("select distinct city, state from poi_detail_table_v2 where city = '%s' and state = '%s';" %(city.title(), state.title()))
    c = cur.fetchone()
    return bool(c)

def find_county(state, city):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    city = city.replace('_',' ')
    cur.execute("select distinct county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))

    county = cur.fetchone()
    conn.close()
    if county:
        return county[0]
    else:
        return None

def db_start_location(county, state, city):
    '''
    Get numpy array of county related POIs.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    if county:
        cur.execute("select index, coord_lat, coord_long, adjusted_visit_length, ranking, review_score, num_reviews from poi_detail_table_v2     where county = '%s' and state = '%s'; "%(county.upper(), state.title()))
    else:
        cur.execute("select index, coord_lat, coord_long, adjusted_visit_length, ranking, review_score, num_reviews from poi_detail_table_v2     where city = '%s' and state = '%s'; "%(city.title(), state.title()))
    a = cur.fetchall()
    conn.close()
    return np.array(a)

def get_event_ids_list(trip_locations_id):
    '''
    Input: trip_locations_id
    Output: evnet_ids, event_type = ['big', 'small', 'med', 'add',]
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select event_ids,event_type from day_trip_table where trip_locations_id = '%s' " %(trip_locations_id))
    event_ids,event_type = cur.fetchone()
    event_ids = ast.literal_eval(event_ids)
    conn.close()
    return event_ids,event_type


def db_event_cloest_distance(trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None, city_name =None):
    '''
    Get matrix cloest distance
    '''
    if new_event_id or not event_ids:
        event_ids, event_type = get_event_ids_list(trip_locations_id)
        if new_event_id:
            event_ids.append(new_event_id)
            
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()
    points=[]
    # points = np.zeros((len(event_ids), 3))
    for i,v in enumerate(event_ids):
        cur.execute("select index, coord_lat, coord_long, city , ranking from poi_detail_table_v2   where index = %i;"%(float(v)))
        points.append(cur.fetchone())
    conn.close()

    points = check_NO_1(points, city_name)
    # print 'db_distance',points
    n,D = mk_matrix(points[:,1:3], geopy_dist)
    if len(points) >= 3:
        if event_type == 'add':
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
        #need to figure out other cases
        else:
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
    else:
        return np.array(event_ids), event_type

def check_NO_1(poi_list, city_name):
    city_name = city_name.replace('_',' ')
    if len(poi_list)==1:
        return np.array(poi_list)
    for i, poi in enumerate(poi_list):
        if (poi[3] == city_name) and (poi[4]==1):
            number_one =poi_list.pop(i)
            return np.vstack((np.array(number_one),np.array(poi_list)))
    return np.array(poi_list)


def check_full_trip_id(full_trip_id, debug):
    '''
    Check full trip id exist or not.  
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from full_trip_table where full_trip_id = '%s'" %(full_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_day_trip_id(day_trip_id, debug):
    '''
    Check day trip id exist or not.  
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from day_trip_table where trip_locations_id = '%s'" %(day_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_travel_time_id(new_id):
    '''
    Check google driving time exisit or not for the 2 point poi id.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select google_driving_time from google_travel_time_table where id_field = '%s'" %(new_id))
    a = cur.fetchone()
    conn.close()
    if bool(a):
        return True
    else:
        return False

#May need to improve by adding #reviews in this. :)
def sorted_events(info,ix):
    '''
    find the event_id, ranking and review_score, num_reviews columns
    sorted base on ranking then review_score, num_reviews
    
    return sorted list 
    '''
    event_ = info[ix][:,[0,4,5,6]]
    return np.array(sorted(event_, key=lambda x: (x[1], -x[3], -x[2])))

#Need to make this more efficient
def create_event_id_list(big_,medium_,small_):
    # print big_,medium_,small_
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_[0,1] <= medium_[0,1]):
            if small_.shape[0] >= 6:
                event_ids = list(np.concatenate((big_[:1,0], small_[0:6,0]),axis=0))  
            elif small_.shape[0]>0:
                event_ids = list(np.concatenate((big_[:1,0], small_[:,0]),axis=0)) 
            else:
                event_ids = list(np.array(sorted(big_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                event_ids = list(np.concatenate((medium_[0:2,0], small_[0:8,0]),axis=0))
            elif small_.shape[0]>0:
                event_ids = list(np.concatenate((medium_[0:2,0], small_[:,0]),axis=0))
            else:
                event_ids = list(np.array(sorted(medium_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            event_ids = list(np.concatenate((medium_[0:2,0], small_[0:8,0]),axis=0))
        elif small_.shape[0]>0:
            event_ids = list(np.concatenate((medium_[0:2,0], small_[:,0]),axis=0))
        else:
            event_ids = list(np.array(sorted(medium_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
        event_type = 'med'
    else:
        if small_.shape[0] >= 10:
            if medium_.shape[0]==0:
                event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((medium_[:1,:], small_[0:10,:])), key=lambda x: (x[1],-x[2])))[:,0])
        elif small_.shape[0] > 0:
            if medium_.shape[0]==0:
                event_ids = list(np.array(sorted(small_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((medium_, small_)), key=lambda x: (x[1],-x[2])))[:,0])

        else:
            event_ids = list(np.array(sorted(medium_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
        event_type = 'small'
    # else:

    return event_ids, event_type

def db_google_driving_walking_time(event_ids, event_type):
    '''
    Get estimated travel time from google api.  
    Limit 1000 calls per day.
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    google_ids = []
    driving_time_list = []
    walking_time_list = []
    name_list = []
    for i,v in enumerate(event_ids[:-1]):
        id_ = str(v) + '0000'+str(event_ids[i+1])
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
            cur.execute("select name, coord_lat, coord_long from poi_detail_table_v2 where index = %s"%(v))
            orig_name, orig_coord_lat, orig_coord_long = cur.fetchone()
            orig_idx = v
            cur.execute("select name, coord_lat, coord_long from poi_detail_table_v2 where index = %s "%(event_ids[i+1]))
            dest_name, dest_coord_lat, dest_coord_long = cur.fetchone()
            dest_idx = event_ids[i+1]
            orig_coords = str(orig_coord_lat)+','+str(orig_coord_long)
            dest_coords = str(dest_coord_lat)+','+str(dest_coord_long)
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)

            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))

            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                print 'wrror driving?', google_driving_url
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                print 'error walking?',google_walking_url
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_event_ids = list(event_ids)
                new_event_ids.pop(i+1)
                new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
                return db_google_driving_walking_time(new_event_ids, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print v, id_, driving_result #need to debug for this
            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
            # print 'google_driving time: ', google_driving_time
            
            google_driving_url = google_driving_url.replace("'s","%27")
            google_walking_url = google_walking_url.replace("'s","%27")

            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
            driving_result = str(driving_result).replace("'",'"')
            walking_result = str(walking_result).replace("'",'"')
            orig_name = orig_name.replace("'","''")
            dest_name = dest_name.replace("'","''")

            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord_lat, orig_coord_long, dest_coord_lat,\
                                   dest_coord_long, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
        else:
            
            cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
                         where id_field = '%s'" %(id_))
            orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
    conn.close()
    # return event_ids, google_ids, name_list, driving_time_list, walking_time_list
    return event_ids, driving_time_list, walking_time_list

def db_remove_extra_events(event_ids, driving_time_list,walking_time_list, max_time_spent=600):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()   
    if len(event_ids) > 1:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index IN %s;" %(tuple(event_ids),))
        time_spent = cur.fetchone()[0]
        conn.close()
    else:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index = %s;" %(event_ids))
        time_spent = cur.fetchone()[0]
        conn.close()
    travel_time = int(sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list))))
    time_spent = int(time_spent) + travel_time
    if time_spent > max_time_spent:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent

def db_adjust_events(event_ids, driving_time_list,walking_time_list, not_visited_poi_lst, event_type, city, max_time_spent=600):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()   
    if len(event_ids) > 1:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index IN %s;" %(tuple(event_ids),))
        time_spent = cur.fetchone()[0]
        conn.close()
    else:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index = %s;" %(event_ids[0]))
        time_spent = cur.fetchone()[0]
        conn.close()
    travel_time = int(sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list))))
    time_spent = int(time_spent) + travel_time
    if time_spent > max_time_spent:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        not_visited_poi_lst.append(event_ids[-1])
        return db_adjust_events(update_event_ids, update_driving_time_list, update_walking_time_list,not_visited_poi_lst, event_type, city)
    elif (time_spent < max_time_spent - 240) and bool(not_visited_poi_lst):
        event_ids = list(event_ids)
        event_ids.extend(not_visited_poi_lst)
        event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type, city_name = city)
        event_ids, driving_time_list, walking_time_list = \
            db_google_driving_walking_time(event_ids, event_type)
        return db_adjust_events(event_ids, driving_time_list, walking_time_list, [], event_type, city)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent, not_visited_poi_lst


def db_day_trip_details(event_ids, i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()
    details = []
    #details dict includes: id, name,address, day
    for event_id in event_ids:
        cur.execute("select index, name, address, coord_lat, coord_long from poi_detail_table_v2 where index = %s;" %(event_id))
        a = cur.fetchone()
        details.append(str({'id': a[0],'name': a[1],'address': a[2], 'day': i, 'coord_lat': a[3], 'coord_long': a[4]}))
    conn.close()
    
    return details

def check_address(index):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select address from poi_detail_table_v2     where index = %s;"%(index))
    a = cur.fetchone()[0]
    conn.close()
    if a:
        return True
    else:
        return False

def db_address(event_ids):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    for i in event_ids[:-1]:
        if not check_address(i):
            cur.execute("select driving_result from google_travel_time_table where orig_idx = %s;" %(i))
            a= cur.fetchone()[0]
            add = ast.literal_eval(a)['origin_addresses'][0]
            cur.execute("update poi_detail_table_v2  set address = '%s' where index = %s;" %(add, i))
            conn.commit()
    last = event_ids[-1]
    if not check_address(last):
        cur.execute("select driving_result from google_travel_time_table where dest_idx = %s;" %(last))
        a= cur.fetchone()[0]
        add = ast.literal_eval(a)['destination_addresses'][0]
        cur.execute("update poi_detail_table_v2  set address = '%s' where index = %s;" %(add, last))
        conn.commit()
    conn.close()

def kmeans_leabels_day_order(day_labels):
    return [k for k, v in Counter(day_labels).most_common()]

In [ ]:


In [231]:
# -*- coding: utf-8 -*-
import psycopg2
import ast
import numpy as np
import simplejson
import urllib
# from helpers import *
conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"

def ajax_available_events(county, state):
    county=county.upper()
    state = state.title()
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select index, name from poi_detail_table_v2 where county='%s' and state='%s'" %(county,state))  
    poi_lst = [item for item in cur.fetchall()]
    conn.close()
    return poi_lst

def add_event(trip_locations_id, event_day, new_event_id=None, event_name=None, full_day = True, unseen_event = False):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, regular, county, state, detail, event_type, event_ids) = cur.fetchone()
    if unseen_event:
        index += 1
        trip_locations_id = '-'.join([str(eval(i)['id']) for i in eval(detail)])+'-'+event_name.replace(' ','-')+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id))
        a = cur.fetchone()
        if bool(a):
            conn.close()
            return trip_locations_id, a[0]
        else:
            cur.execute("select max(index) from day_trip_locations")
            index = cur.fetchone()[0]+1
            detail = list(eval(detail))
            #need to make sure the type is correct for detail!
            new_event = "{'address': 'None', 'id': 'None', 'day': %s, 'name': u'%s'}"%(event_day, event_name)
            detail.append(new_event)
            #get the right format of detail: change from list to string and remove brackets and convert quote type
            new_detail = str(detail).replace('"','').replace('[','').replace(']','').replace("'",'"')
            cur.execute("INSERT INTO day_trip_locations VALUES (%i, '%s',%s,%s,'%s','%s','%s');" %(index, trip_locations_id, full_day, False, county, state, new_detail))
            conn.commit()
            conn.close()
            return trip_locations_id, detail
    else:
        event_ids = db_event_cloest_distance(trip_locations_id, new_event_id)
        event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids,event_type = 'add')
        trip_locations_id = '-'.join(event_ids)+'-'+event_day
        cur.execute("select details from day_trip_locations where trip_locations_id='%s'" %(trip_locations_id)) 
        if not cur.fetchone():
            details = []
            db_address(event_ids)
            for item in event_ids:
                cur.execute("select index, name, address from poi_detail_table_v2 where index = '%s';" %(item))
                a = cur.fetchone()
                detail = {'id': a[0],'name': a[1],'address': a[2], 'day': event_day}
                details.append(detail)
            #need to make sure event detail can append to table!
            cur.execute("insert into day_trip_table (trip_locations_id,full_day, regular, county, state, details, event_type, event_ids) VALUES ( '%s', %s, %s, '%s', '%s', '%s', '%s', '%s')" %( trip_location_id, full_day, False, county, state, details, event_type, event_ids))
            conn.commit()
            conn.close()
            return trip_locations_id, details
        else:
            conn.close()
            #need to make sure type is correct.
            return trip_locations_id, a[0]

def remove_event(trip_locations_id, remove_event_id, remove_event_name=None, event_day=None, full_day = True):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(trip_locations_id))  
    (index, trip_locations_id, full_day, regular, county, state, detail, event_type, event_ids) = cur.fetchone()
    new_event_ids = ast.literal_eval(event_ids)
    new_event_ids.remove(remove_event_id)
    new_trip_locations_id = '-'.join(str(event_id) for event_id in new_event_ids)
    cur.execute("select * from day_trip_table where trip_locations_id='%s'" %(new_trip_locations_id))  
    check_id = cur.fetchone()
    if check_id:
        return new_trip_locations_id, check_id[-3]
    detail = ast.literal_eval(detail[1:-1])
    for index, trip_detail in enumerate(detail):
        if ast.literal_eval(trip_detail)['id'] == remove_event_id:
            remove_index = index
            break
    new_detail = list(detail)
    new_detail.pop(remove_index)
    new_detail =  str(new_detail).replace("'","''")
    regular = False
    cur.execute("select max(index) from day_trip_table where trip_locations_id='%s'" %(trip_locations_id)) 
    new_index = cur.fetchone()[0]
    new_index+=1
    cur.execute("INSERT INTO day_trip_table VALUES (%i, '%s', %s, %s, '%s', '%s', '%s', '%s','%s');" \
                %(new_index, new_trip_locations_id, full_day, regular, county, state, new_detail, event_type, new_event_ids))  
    conn.commit()
    conn.close()
    return new_trip_locations_id, new_detail

def event_type_time_spent(adjusted_normal_time_spent):
    if adjusted_normal_time_spent > 180:
        return 'big'
    elif adjusted_normal_time_spent >= 120:
        return 'med'
    else:
        return 'small'

def switch_event_list(full_trip_id, trip_locations_id, switch_event_id, switch_event_name=None, event_day=None, full_day = True):
#     new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select name, city, county, state, coord_lat, coord_long,ranking, adjusted_visit_length from poi_detail_table_v2 where index=%s" %(switch_event_id))
    name, city, county, state,coord_lat, coord_long,poi_rank, adjusted_normal_time_spent = cur.fetchone()
    event_type = event_type_time_spent(adjusted_normal_time_spent)
    avialable_lst = ajax_available_events(county, state)
    cur.execute("select trip_location_ids,details from full_trip_table where full_trip_id=%s" %(full_trip_id))
    full_trip_detail = cur.fetchone()
    full_trip_detail = ast.literal_eval(full_trip_detail)
    full_trip_ids = [ast.literal_eval(item)['id'] for item in full_trip_detail]
    switch_lst = []
    for item in avialable_lst:
        index = item[0]
        if index not in full_trip_ids:
            event_ids = [switch_event_id, index]
            event_ids, google_ids, name_list, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids, event_type='switch')
            if min(driving_time_list[0], walking_time_list[0]) <= 60:
                cur.execute("select ranking, review_score, adjusted_visit_length from poi_detail_table_v2 where index=%s" %(index))
                target_poi_rank, target_rating, target_adjusted_normal_time_spent = cur.fetchone()
                target_event_type = event_type_time_spent(target_adjusted_normal_time_spent)
                switch_lst.append([target_poi_rank, target_rating, target_event_type==event_type])
    #need to sort target_event_type, target_poi_rank and target_rating
    return {switch_event_id: switch_lst}

def switch_event(trip_locations_id, switch_event_id, final_event_id, event_day):
    new_trip_locations_id, new_detail = remove_event(trip_locations_id, switch_event_id)
    new_trip_locations_id, new_detail = add_event(new_trip_locations_id, event_day, final_event_id, full_day = True, unseen_event = False)
    return new_trip_locations_id, new_detail

def angle_between(p1, p2):
    ang1 = np.arctan2(*p1[::-1])
    ang2 = np.arctan2(*p2[::-1])
    return np.rad2deg((ang1 - ang2) % (2 * np.pi))

def calculate_initial_compass_bearing(pointA, pointB):
    """
    Calculates the bearing between two points.
    The formulae used is the following:
    theta = atan2(sin(delta(long)).cos(lat2),
                  cos(lat1).sin(lat2) − sin(lat1).cos(lat2).cos(delta(long)))
    :Parameters:
      - `pointA: The tuple representing the latitude/longitude for the
        first point. Latitude and longitude must be in decimal degrees
      - `pointB: The tuple representing the latitude/longitude for the
        second point. Latitude and longitude must be in decimal degrees
    :Returns:
      The bearing in degrees
    :Returns Type:
      float
    """
    if (type(pointA) != tuple) or (type(pointB) != tuple):
        raise TypeError("Only tuples are supported as arguments")

    lat1 = math.radians(pointA[0])
    lat2 = math.radians(pointB[0])

    diffLong = math.radians(pointB[1] - pointA[1])

    x = math.sin(diffLong) * math.cos(lat2)
    y = math.cos(lat1) * math.sin(lat2) - (math.sin(lat1)
            * math.cos(lat2) * math.cos(diffLong))

    initial_bearing = math.atan2(x, y)

    # Now we have the initial bearing but math.atan2 return values
    # from -180° to + 180° which is not what we want for a compass bearing
    # The solution is to normalize the initial bearing as shown below
    initial_bearing = math.degrees(initial_bearing)
    compass_bearing = (initial_bearing + 360) % 360

    return compass_bearing

def direction_from_orgin(start_coord_long,  start_coord_lat, target_coord_long, target_coord_lat):
    angle = calculate_initial_compass_bearing((start_coord_lat, start_coord_long), (target_coord_lat, target_coord_long))
    if (angle > 45) and (angle < 135):
        return 'E'
    elif (angle > 135) and (angle < 215):
        return 'S'
    elif (angle > 215) and (angle < 305):
        return 'W'
    else:
        return 'N'
    
def travel_outside_coords(current_city, current_state, direction=None, n_days=1):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor() 
    #coord_long, coord_lat
    cur.execute("select index, coord_lat, coord_long from all_cities_coords_table where city ='%s' and state = '%s';" %(current_city, current_state)) 
    id_, coord_lat, coord_long = cur.fetchone()
    #city, coord_lat, coord_long
    cur.execute("select distinct city, coord_lat, coord_long from all_cities_coords_table where city !='%s' and state = '%s';" %(current_city, current_state))  
    coords = cur.fetchall()     
    conn.close()
    
    return id_, coords, coord_lat, coord_long

def check_outside_trip_id(outside_trip_id, debug):
    '''
    Check outside trip id exist or not.  
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select outside_trip_id from outside_trip_table where outside_trip_id = '%s'" %(outside_trip_id)) 
    a = cur.fetchone()
    print 'outside stuff id', a, bool(a)
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def db_outside_route_trip_details(event_ids, route_i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()
    details = []
    #details dict includes: id, name,address, day
    for event_id in event_ids:
        cur.execute("select index, name, address, coord_lat, coord_long from poi_detail_table_v2 where index = %s;" %(event_id))
        a = cur.fetchone()
        details.append({'id': a[0],'name': a[1],'address': a[2], 'coord_lat': a[3], 'coord_long':a[4], 'route': route_i})
    conn.close()
    return details

def db_outside_google_driving_walking_time(city_id, start_coord_lat, start_coord_long, event_ids, event_type, origin_city, origin_state):
    '''
    Get estimated travel time from google api.  
    Limit 1000 calls per day.
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    google_ids = []
    driving_time_list = []
    walking_time_list = []
    name_list = []
    city_to_poi_id = str(city_id) + '0000'+str(event_ids[0])
    if not check_city_to_poi(city_to_poi_id):
        cur.execute("select name, coord_lat, coord_long from poi_detail_table_v2 where index = %s "%(event_ids[0]))
        dest_name, dest_coord_lat, dest_coord_long = cur.fetchone()
        orig_coords = str(start_coord_lat)+','+str(start_coord_long)
        dest_coords = str(dest_coord_lat)+','+str(dest_coord_long)
        google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
        google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
        driving_result= simplejson.load(urllib.urlopen(google_driving_url))
        walking_result= simplejson.load(urllib.urlopen(google_walking_url))
        orig_name = origin_city.upper().replace(' ','+').replace('-','+') + '+' + origin_state.upper().replace(' ','+').replace('-','+')
        if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                format(orig_name,dest_name.replace(' ','+').replace('-','+'),my_key)
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
        if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name,dest_name.replace(' ','+').replace('-','+'),my_key)
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
        if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
            new_event_ids = list(event_ids)
            new_event_ids.pop(0)
            new_event_ids = db_outside_event_cloest_distance(start_coord_lat, start_coord_long, event_ids=new_event_ids, event_type = event_type)
            return db_outside_google_driving_walking_time(city_id, start_coord_lat, start_coord_long, new_event_ids, event_type,origin_city, origin_state)
        try:
            city_to_poi_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
        except:            
            print city, state, dest_name, driving_result #need to debug for this
        try:
            city_to_poi_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
        except:
            city_to_poi_walking_time = 9999 

        '''
        Need to work on rest of it!
        '''
        cur.execute("select max(index) from  google_city_to_poi_table")
        index = cur.fetchone()[0]+1
        driving_result = str(driving_result).replace("'",'"')
        walking_result = str(walking_result).replace("'",'"')
        orig_name = orig_name.replace("'","''")
        dest_name = dest_name.replace("'","''")
        cur.execute("INSERT INTO google_city_to_poi_table VALUES (%i, %s, %i, '%s','%s', '%s','%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);" \
                    %(index, city_to_poi_id, city_id, origin_city.replace("'","''"), origin_state, orig_name, dest_name, event_ids[0], start_coord_lat, start_coord_long, dest_coord_lat,\
                   dest_coord_long, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                   str(driving_result), str(walking_result), city_to_poi_driving_time,city_to_poi_walking_time))
        conn.commit()
        name_list.extend([orig_name+" to "+ dest_name,dest_name+" to "+ orig_name])
        google_ids.extend([city_to_poi_id]*2)
        driving_time_list.extend([city_to_poi_driving_time]*2)
        walking_time_list.extend([city_to_poi_walking_time]*2)
    else:
        cur.execute("select orig_name, dest_name, city_to_poi_driving_time, city_to_poi_walking_time from google_city_to_poi_table \
                    where city_to_poi_id = %s " %(city_to_poi_id))
        orig_name, dest_name, city_to_poi_driving_time, city_to_poi_walking_time = cur.fetchone()
        name_list.append(orig_name+" to "+ dest_name)
        google_ids.extend([city_to_poi_id]*2)
        driving_time_list.extend([city_to_poi_driving_time]*2)
        walking_time_list.extend([city_to_poi_walking_time]*2)
    
    for i,v in enumerate(event_ids[:-1]):
        id_ = str(v) + '0000'+str(event_ids[i+1])
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
            cur.execute("select name, coord_lat, coord_long from poi_detail_table_v2 where index = %s"%(v))
            orig_name, orig_coord_lat, orig_coord_long = cur.fetchone()
            orig_idx = v
            cur.execute("select name, coord_lat, coord_long from poi_detail_table_v2 where index = %s "%(event_ids[i+1]))
            dest_name, dest_coord_lat, dest_coord_long = cur.fetchone()
            dest_idx = event_ids[i+1]
            orig_coords = str(orig_coord_lat)+','+str(orig_coord_long)
            dest_coords = str(dest_coord_lat)+','+str(dest_coord_long)
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
                
            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_event_ids = list(event_ids)
                new_event_ids.pop(i+1)
                new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
                return db_google_driving_walking_time(new_event_ids, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print v, id_, driving_result #need to debug for this
            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
        
            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
            driving_result = str(driving_result).replace("'",'"')
            walking_result = str(walking_result).replace("'",'"')
            orig_name = orig_name.replace("'","''")
            dest_name = dest_name.replace("'","''")
            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord_lat, orig_coord_long, dest_coord_long,\
                                   dest_coord_long, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
        else:
            
            cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
                         where id_field = '%s'" %(id_))
            orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
    conn.close()
    return event_ids, google_ids, name_list, driving_time_list, walking_time_list

def db_outside_event_cloest_distance(coord_lat, coord_long, trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None):
    '''
    Get matrix cloest distance
    '''
    if new_event_id or not event_ids:
        event_ids, event_type = get_event_ids_list(trip_locations_id)
        if new_event_id:
            event_ids.append(new_event_id)
            
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()
    points = np.zeros((len(event_ids), 3))
    for i,v in enumerate(event_ids):
        cur.execute("select index, coord_lat, coord_long from poi_detail_table_v2 where index = %i;"%(float(v)))
        points[i] = cur.fetchone()
    conn.close()
    points = np.vstack((np.array([0, coord_lat, coord_long]),points))
    n,D = mk_matrix(points[:,1:], geopy_dist)
    if len(points) >= 3:
        if event_type == 'add':
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            tour = np.array(tour[1:])-1
            event_ids = np.array(event_ids)
            return np.array(event_ids)[tour[1:]], event_type
        #need to figure out other cases
        else:
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            tour = np.array(tour[1:])-1
            event_ids = np.array(event_ids)
            return event_ids[tour], event_type
    else:
        return np.array(event_ids), event_type

def check_city_to_poi(city_to_poi_id):
    conn = psycopg2.connect(conn_str)   
    cur = conn.cursor()   
    cur.execute("select index from google_city_to_poi_table \
                    where city_to_poi_id = %s " %(city_to_poi_id))
    a = cur.fetchone()
    conn.close()
    if bool(a):
        return True
    else:
        return False

def db_remove_outside_extra_events(event_ids, driving_time_list,walking_time_list, max_time_spent=600):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()  
    if len(event_ids) == 1:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index = %s;" %(event_ids[0]))
    else:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index IN %s;" %(tuple(event_ids),))
    total_travel_time = sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list)))
    time_spent = float(cur.fetchone()[0]) + float(total_travel_time)
    conn.close()
    if len(event_ids) == 1:
        return event_ids, driving_time_list, walking_time_list, time_spent
    if time_spent > max_time_spent:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent

In [232]:
#Get events outside the city!!!
import numpy as np
from distance import *
'''
Outside trip table: user_id, outside_trip_id, route_ids, origin_city, state, direction, n_days, default, full_day, details 
outside route table: route_id, event_id_lst, event_type, origin_city, state, direction, details, default, 
'''
# target_direction = 'N'
# origin_city = 'San Francisco'
# origin_state = 'California'
# conn_str = "dbname='travel_with_friends' user='zoesh' host='localhost'"

def outside_trip_poi(origin_city, origin_state, target_direction = 'N', n_days = 1, \
                    full_day = True, regular = True, debug = True, user_id = 'admin'):
    outside_trip_id = '-'.join([str(origin_state.upper().replace(' ','-')), str(origin_city.upper().replace(' ','-')), \
                        target_direction,str(int(regular)), str(n_days)])
    if not check_outside_trip_id(outside_trip_id, debug):
        furthest_len = 140
        if n_days == 1:
            furthest_len = 140
        #possible city coords, target city coord_lat, target city coord_long
        city_id, coords, coord_lat, coord_long = travel_outside_coords(origin_city, origin_state)
        #coords: city, lat, long
        check_cities_info = []
        for item in coords:
            direction = direction_from_orgin(coord_long,  coord_lat, item[2], item[1])
            if (target_direction == direction) and (geopy_dist((item[1], item[2]), (coord_lat, coord_long)) < furthest_len):
                check_cities_info.append(item)
        city_infos = []
        for city, _, _ in check_cities_info:
            county = None
            #index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating
            city_info = db_start_location(county, origin_state, city)
            city_infos.extend(city_info)
        city_infos = np.array(city_infos)
        poi_coords = city_infos[:,1:3]
        n_routes = sum(1 for t in np.array(city_infos)[:,3] if t >= 120)/10
        if (n_routes>1) and (city_infos.shape[0]>=10):
            kmeans = KMeans(n_clusters=n_routes).fit(poi_coords)
        elif (city_infos.shape[0]> 20) or (n_routes>1):
            kmeans = KMeans(n_clusters=2).fit(poi_coords)
        else:
            kmeans = KMeans(n_clusters=1).fit(poi_coords)
        route_labels = kmeans.labels_
        # print n_routes, len(route_labels), city_infos.shape
        # print route_labels
        outside_route_ids, outside_trip_details,event_id_lst =[],[],[]
        for i in range(n_routes):
            current_events, big_ix, med_ix, small_ix = [], [],[], []
            for ix, label in enumerate(route_labels):
                if label == i:
                    time = city_infos[ix,3]
                    event_ix = city_infos[ix,0]
                    current_events.append(event_ix)
                    if time > 180 :
                        big_ix.append(ix)
                    elif time >= 120 :
                        med_ix.append(ix)
                    else:
                        small_ix.append(ix)
            big_ = sorted_events(city_infos, big_ix)
            med_ = sorted_events(city_infos, med_ix)
            small_ = sorted_events(city_infos, small_ix)
            # need to update!!!!!!!!
            event_ids, event_type = create_event_id_list(big_, med_, small_)
            event_ids, event_type = db_outside_event_cloest_distance(coord_lat, coord_long, event_ids = event_ids, event_type = event_type)
            event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_outside_google_driving_walking_time(city_id, coord_lat,coord_long, event_ids, event_type, origin_city = origin_city, origin_state = origin_state)
            #why bug????
            event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_outside_extra_events(event_ids, driving_time_list, walking_time_list)
            outside_route_id = outside_trip_id + '-'+str(i)
#             values = db_outside_route_trip_details(outside_route_id, event_ids, origin_city, origin_state, regular, full_day,n_days,i)
            
            details = db_outside_route_trip_details(event_ids,i)
            conn = psycopg2.connect(conn_str)
            cur = conn.cursor()
            cur.execute('select max(index) from outside_route_table;')
            new_index = cur.fetchone()[0] + 1
            cur.execute("insert into outside_route_table (index, outside_route_id, full_day, regular, origin_city, origin_state, target_direction, details, event_type, event_ids, route_num) \
                        VALUES (%s, '%s', %s, %s, '%s', '%s', '%s', '%s', '%s', '%s', %s);" \
                        %(new_index, outside_route_id, full_day, regular, origin_city, origin_state, target_direction, str(details).replace("'","''"), event_type, str(event_ids) , i))
            conn.commit()
            conn.close()
            outside_route_ids.append(outside_route_id)
            outside_trip_details.extend(details)
            event_id_lst.extend(event_ids)

        username = "zoesh"
        conn = psycopg2.connect(conn_str)
        cur = conn.cursor()
        cur.execute('SELECT MAX(index) from outside_trip_table;')
        new_index = cur.fetchone()[0] +1
        cur.execute("INSERT into outside_trip_table(index, username, outside_trip_id, outside_route_ids, event_id_lst, origin_city, origin_state, target_direction, n_routes, regular, full_day, details) \
                     VALUES (%s,'%s', '%s', '%s','%s', '%s', '%s', '%s', %s,%s,%s,'%s');" \
                     %(new_index, username, outside_trip_id, str(outside_route_ids).replace("'","''"), str(event_id_lst), origin_city, origin_state, target_direction, n_routes, regular, full_day, str(outside_trip_details).replace("'","''")))
        conn.commit()
        conn.close()
        print "finish update %s, %s, direction %s into database" %(origin_state, origin_city, target_direction)
        return outside_trip_id, outside_trip_details
    else:
        print "ALERT: %s, %s, direction %s already in database" %(origin_state, origin_city, target_direction)
        conn = psycopg2.connect(conn_str)
        cur = conn.cursor()
        cur.execute("SELECT DISTINCT outside_trip_id, details FROM outside_trip_table WHERE outside_trip_id = '%s';" %(outside_trip_id))
        outside_trip_id, details = cur.fetchone()
        details = ast.literal_eval(details)
        conn.close()
        return outside_trip_id, details

In [234]:
outside_trip_poi(origin_city, origin_state, target_direction = 'E', n_days = 1, \
                    full_day = True, regular = True, debug = True, user_id = 'zoesh')


outside stuff id None False
finish update California, San Francisco, direction E into database
Out[234]:
('CALIFORNIA-SAN-FRANCISCO-E-1-1',
 [{'address': '2546 4th St, Ceres, California, 95307, United States',
   'coord_lat': 37.5938161,
   'coord_long': -120.9532209,
   'id': 12121L,
   'name': 'Feet and Facials',
   'route': 0},
  {'address': '2800 W Monte Vista Ave, Turlock, California, 95380, United States',
   'coord_lat': 37.5176816,
   'coord_long': -120.8822119,
   'id': 8453L,
   'name': 'Monte Vista Crossings Shopping Center',
   'route': 0},
  {'address': '426 E Main St, Turlock, California, 95380, United States',
   'coord_lat': 37.496365,
   'coord_long': -120.8435831,
   'id': 8457L,
   'name': "Staley's Club",
   'route': 0},
  {'address': '108 S Center St, Turlock, California, 95380, United States',
   'coord_lat': 37.4948783,
   'coord_long': -120.84514,
   'id': 8454L,
   'name': 'Turlock Historical Society Museum',
   'route': 0},
  {'address': '10 E Main St, Turlock, California, 95380, United States',
   'coord_lat': 37.4938367,
   'coord_long': -120.8460041,
   'id': 8455L,
   'name': 'WellingtonsLateNight',
   'route': 0},
  {'address': '18012 Bollinger Canyon Road, San Ramon, California, 94583, United States',
   'coord_lat': 37.7832876,
   'coord_long': -122.0004474,
   'id': 8027L,
   'name': 'Las Trampas Regional Wilderness',
   'route': 1},
  {'address': ', Danville, California, 94526, United States',
   'coord_lat': 37.8006948,
   'coord_long': -121.9814354,
   'id': 12768L,
   'name': "Eugene O'Neill National Historic Site Tao House",
   'route': 1},
  {'address': ', Dublin, California, , United States',
   'coord_lat': 37.7021521,
   'coord_long': -121.9357918,
   'id': 11065L,
   'name': 'Iron Horse Trail',
   'route': 1},
  {'address': '2701 Prospect Park, Rancho Cordova, California, 95670, United States',
   'coord_lat': 38.5887816,
   'coord_long': -121.2837514,
   'id': 8703L,
   'name': "Sacramento Children's Museum",
   'route': 2},
  {'address': '11327 Folsom Blvd Ste 160, Rancho Cordova, California, 95742, United States',
   'coord_lat': 38.6114591,
   'coord_long': -121.2604113,
   'id': 8706L,
   'name': 'Sky High Sports',
   'route': 2},
  {'address': '12401 Folsom Blvd, Rancho Cordova, California, 95742, United States',
   'coord_lat': 38.6300264,
   'coord_long': -121.21526,
   'id': 8704L,
   'name': 'Monster Mini Golf',
   'route': 2},
  {'address': '10 College Parkway, Folsom, California, 95630, United States',
   'coord_lat': 38.6628526,
   'coord_long': -121.129444,
   'id': 8122L,
   'name': 'Harris Center',
   'route': 2},
  {'address': '17500 Mulholland Dr, Brentwood, California, 90049, United States',
   'coord_lat': 34.129972,
   'coord_long': -118.4857005,
   'id': 10635L,
   'name': 'LA96C',
   'route': 3},
  {'address': '2601 Monarch St, Alameda, California, 94501, United States',
   'coord_lat': 37.7876017,
   'coord_long': -122.3092454,
   'id': 7828L,
   'name': 'St. George Spirits',
   'route': 4},
  {'address': 'Lakeside Park, Oakland, California, 94612, United States',
   'coord_lat': 37.8113159,
   'coord_long': -122.2682245,
   'id': 1161L,
   'name': 'Lake Merritt',
   'route': 4},
  {'address': '201 University Ave, Berkeley, California, 94710, United States',
   'coord_lat': 37.8645225,
   'coord_long': -122.3139434,
   'id': 4871L,
   'name': 'Berkeley Marina',
   'route': 4},
  {'address': ', Berkeley, California, 94720, United States',
   'coord_lat': 37.870151,
   'coord_long': -122.2594606,
   'id': 4869L,
   'name': 'University of California, Berkeley',
   'route': 4},
  {'address': 'entrances at Wildcat Canyon Rd & Grizzly Peak Blvd, Berkeley, California, 94701, United States',
   'coord_lat': 37.892767,
   'coord_long': -122.242451,
   'id': 4868L,
   'name': 'Tilden Regional Park',
   'route': 4},
  {'address': '200 Centennial Dr, Berkeley, California, 94720, United States',
   'coord_lat': 37.8751552,
   'coord_long': -122.2386789,
   'id': 4870L,
   'name': 'UC Botanical Garden at Berkeley',
   'route': 4},
  {'address': '7867 Redwood Rd, Oakland, California, 94619, United States',
   'coord_lat': 37.8019369,
   'coord_long': -122.1444725,
   'id': 1160L,
   'name': 'Redwood Regional Park',
   'route': 4},
  {'address': 'Woodbridge Road, Lodi, California, , United States',
   'coord_lat': 38.1627849,
   'coord_long': -121.4123433,
   'id': 9292L,
   'name': 'Isenberg Crane Reserve',
   'route': 5},
  {'address': '1973 W Turner Rd, Lodi, California, 95242, United States',
   'coord_lat': 38.1492832,
   'coord_long': -121.3408729,
   'id': 9289L,
   'name': "Jessie's Grove Winery",
   'route': 5},
  {'address': '2545 W. Turner Road, Lodi, California, 95242, United States',
   'coord_lat': 38.1460334,
   'coord_long': -121.3077366,
   'id': 9286L,
   'name': 'Lodi Wine & Visitor Center',
   'route': 5},
  {'address': '1101 West Turner Road, Lodi, California, 95240, United States',
   'coord_lat': 38.146277,
   'coord_long': -121.292847,
   'id': 9287L,
   'name': 'Lodi Lake Park',
   'route': 5},
  {'address': '203 School Street, Lodi, California, 95240, United States',
   'coord_lat': 38.1313741,
   'coord_long': -121.2741807,
   'id': 9295L,
   'name': 'The Dancing Fox Winery',
   'route': 5},
  {'address': '2 N Sacramento St, Lodi, California, 95240, United States',
   'coord_lat': 38.134763,
   'coord_long': -121.2718869,
   'id': 9290L,
   'name': 'World of Wonders Science Museum',
   'route': 5},
  {'address': '2551 Harbor St, Pittsburg, California, 94565, United States',
   'coord_lat': 38.0090309,
   'coord_long': -121.8849573,
   'id': 8877L,
   'name': 'Small World Park',
   'route': 6},
  {'address': '5175 Somersville Rd, Antioch, California, 94509, United States',
   'coord_lat': 37.958309,
   'coord_long': -121.863356,
   'id': 5417L,
   'name': 'Black Diamond Mines Regional Preserve',
   'route': 6},
  {'address': '1200 Frederickson Lane, Antioch, California, 94509, United States',
   'coord_lat': 37.965515,
   'coord_long': -121.822328,
   'id': 5418L,
   'name': 'Contra Loma Regional Park',
   'route': 6},
  {'address': '602 W 2nd St, Antioch, California, 94509, United States',
   'coord_lat': 38.0171923,
   'coord_long': -121.8140055,
   'id': 5420L,
   'name': 'El Campanil Theatre',
   'route': 6},
  {'address': '7430 W. Sandy Mush Rd, Merced, California, 93635, United States',
   'coord_lat': 37.1860426,
   'coord_long': -120.6262407,
   'id': 7363L,
   'name': 'Merced National Wildlife Refuge',
   'route': 7},
  {'address': '1603 V St, Merced, California, 95340, United States',
   'coord_lat': 37.3062626,
   'coord_long': -120.4991535,
   'id': 7376L,
   'name': 'R-Bar',
   'route': 7},
  {'address': '1445 W 18th St, Merced, California, 95340, United States',
   'coord_lat': 37.3078953,
   'coord_long': -120.4965852,
   'id': 7375L,
   'name': 'Roll-er Land',
   'route': 7},
  {'address': '1201 N Pershing Ave, Stockton, California, 95203, United States',
   'coord_lat': 37.9602519,
   'coord_long': -121.31431,
   'id': 1637L,
   'name': 'The Haggin Museum',
   'route': 8},
  {'address': '404 W Fremont St, Stockton, California, 95203, United States',
   'coord_lat': 37.95608,
   'coord_long': -121.298084,
   'id': 1640L,
   'name': 'Banner Island Ballpark',
   'route': 8},
  {'address': '248 W Fremont St, Stockton, California, 95203, United States',
   'coord_lat': 37.956949,
   'coord_long': -121.2956797,
   'id': 1639L,
   'name': 'Stockton Arena',
   'route': 8},
  {'address': '445 West Weber Avenue, Stockton, California, 95202, United States',
   'coord_lat': 37.9528065,
   'coord_long': -121.2971925,
   'id': 1643L,
   'name': 'Downtown Stockton Marina and Joan Darrah Promenade',
   'route': 8},
  {'address': '402 W Weber Ave, Stockton, California, 95203, United States',
   'coord_lat': 37.9518502,
   'coord_long': -121.2967728,
   'id': 1642L,
   'name': "Children's Museum of Stockton",
   'route': 8},
  {'address': '242 E Main St, Stockton, California, 95202, United States',
   'coord_lat': 37.9526154,
   'coord_long': -121.2875095,
   'id': 1638L,
   'name': 'Historic Bob Hope Theatre',
   'route': 8},
  {'address': ', Stockton, California, 95205, United States',
   'coord_lat': 37.9642589,
   'coord_long': -121.2569138,
   'id': 1648L,
   'name': "Stockton Certified Farmer's Market",
   'route': 8},
  {'address': '1601 Civic Dr, Walnut Creek, California, 94596, United States',
   'coord_lat': 37.9009276,
   'coord_long': -122.0634936,
   'id': 8822L,
   'name': 'Lesher Center for the Arts',
   'route': 9},
  {'address': '1931 First Ave., Walnut Creek, California, , United States',
   'coord_lat': 37.923506,
   'coord_long': -122.0757024,
   'id': 8821L,
   'name': 'Lindsay Wildlife Museum',
   'route': 9},
  {'address': '1552 Bancroft Rd, Walnut Creek, California, 94598, United States',
   'coord_lat': 37.923495,
   'coord_long': -122.036327,
   'id': 8823L,
   'name': 'The Ruth Bancroft Garden',
   'route': 9},
  {'address': '3215 Willow Pass Rd, Concord, California, 94519, United States',
   'coord_lat': 37.981879,
   'coord_long': -122.0186636,
   'id': 4468L,
   'name': 'Contra Costa Canal Trail',
   'route': 9},
  {'address': '900 Doolittle Dr, San Leandro, California, 94577, United States',
   'coord_lat': 37.7187416,
   'coord_long': -122.1892887,
   'id': 6712L,
   'name': 'Black Sheep Bike Rental',
   'route': 10},
  {'address': 'Monarch Bay Drive, San Leandro, California, 94577, United States',
   'coord_lat': 37.6958571,
   'coord_long': -122.1866837,
   'id': 6713L,
   'name': 'Marina Park',
   'route': 10},
  {'address': 'Foothill Blvd., Hayward, California, , United States',
   'coord_lat': 37.6822412,
   'coord_long': -122.0883995,
   'id': 3643L,
   'name': 'Mural Arts Program of Hayward',
   'route': 10},
  {'address': '22373 N 3rd St, Hayward, California, 94546, United States',
   'coord_lat': 37.6802183,
   'coord_long': -122.0801537,
   'id': 3641L,
   'name': 'Hayward Japanese Gardens',
   'route': 10},
  {'address': '715 N Central Ave, Tracy, California, 95376, United States',
   'coord_lat': 37.7358372,
   'coord_long': -121.4260339,
   'id': 6979L,
   'name': 'Grand Theatre for the Performing Arts',
   'route': 11},
  {'address': '1885 N MacArthur Dr, Tracy, California, 95376, United States',
   'coord_lat': 37.7472769,
   'coord_long': -121.412508,
   'id': 6981L,
   'name': 'Morgan Territory Brewing',
   'route': 11},
  {'address': 'I-205 to MacArthur Blvd. Exit, Tracy, California, , United States',
   'coord_lat': 37.7628538,
   'coord_long': -121.4133086,
   'id': 6984L,
   'name': 'Tracy Outlet Center',
   'route': 11},
  {'address': '1077 Milo Candini Drive, Manteca, California, 95337, United States',
   'coord_lat': 37.787642,
   'coord_long': -121.2577032,
   'id': 8239L,
   'name': 'Big League Dreams',
   'route': 11},
  {'address': '3443 Laguna Blvd Ste 115, Elk Grove, California, 95758, United States',
   'coord_lat': 38.4283719,
   'coord_long': -121.4642258,
   'id': 3450L,
   'name': 'SURF XTREME',
   'route': 12},
  {'address': '3443 Laguna Blvd, Elk Grove, California, 95758, United States',
   'coord_lat': 38.4283719,
   'coord_long': -121.4642258,
   'id': 3455L,
   'name': "Coach's Classic Bar & Grill",
   'route': 12},
  {'address': 'Laguna and West Stockton Boulevards, Elk Grove, California, 95758, United States',
   'coord_lat': 38.4230572,
   'coord_long': -121.4000722,
   'id': 3456L,
   'name': 'Laguna Gateway',
   'route': 12},
  {'address': ', Elk Grove, California, , United States',
   'coord_lat': 38.4087993,
   'coord_long': -121.3716178,
   'id': 3451L,
   'name': 'Elk Grove Creek Trail',
   'route': 12},
  {'address': '9039 Elk Grove Blvd, Elk Grove, California, 95624, United States',
   'coord_lat': 38.4092089,
   'coord_long': -121.3651729,
   'id': 3458L,
   'name': "Bob's Club",
   'route': 12},
  {'address': '1188 Vineyard Ave, Pleasanton, California, 94566, United States',
   'coord_lat': 37.653654,
   'coord_long': -121.8208221,
   'id': 8041L,
   'name': 'Rubino Estates Winery',
   'route': 13},
  {'address': '1188 Vineyard Ave, Pleasanton, California, 94566, United States',
   'coord_lat': 37.653654,
   'coord_long': -121.8208221,
   'id': 8042L,
   'name': 'Mitchell Katz Winery',
   'route': 13},
  {'address': '400 Vineyard Ave, Pleasanton, California, 94566, United States',
   'coord_lat': 37.6512874,
   'coord_long': -121.806848,
   'id': 8035L,
   'name': 'Ruby Hill Winery',
   'route': 13},
  {'address': '7000 Del Valle Road, Livermore, California, 94550, United States',
   'coord_lat': 37.5683361,
   'coord_long': -121.6870831,
   'id': 6954L,
   'name': 'Del Valle Regional Park',
   'route': 13},
  {'address': '18600 W Corral Hollow Rd, Tracy, California, 95375, United States',
   'coord_lat': 37.6334837,
   'coord_long': -121.5435773,
   'id': 6982L,
   'name': 'Carnegie State Vehicular Recreation Area',
   'route': 13},
  {'address': '2100 Isherwood Way, Fremont, California, 94536, United States',
   'coord_lat': 37.5794842,
   'coord_long': -122.0074961,
   'id': 2329L,
   'name': 'Quarry Lakes Regional Recreation Area',
   'route': 14},
  {'address': 'Sailway Drive, Fremont, California, , United States',
   'coord_lat': 37.5482594,
   'coord_long': -121.9672729,
   'id': 2325L,
   'name': 'Central Park',
   'route': 14},
  {'address': 'Stanford Avenue, Fremont, California, , United States',
   'coord_lat': 37.5033366,
   'coord_long': -121.9122436,
   'id': 2326L,
   'name': 'Mission Peak Regional Preserve',
   'route': 14},
  {'address': '3100 Calaveras Rd, Milpitas, California, 95035, United States',
   'coord_lat': 37.4480512,
   'coord_long': -121.8487485,
   'id': 8524L,
   'name': 'Ed Levin County Park',
   'route': 14},
  {'address': '4545 Felter Rd, Milpitas, California, 95035, United States',
   'coord_lat': 37.4405015,
   'coord_long': -121.8328232,
   'id': 8525L,
   'name': 'Big Dog Vineyards',
   'route': 14}])

In [230]:
import psycopg2
import simplejson
import numpy as np
from distance import *
from collections import Counter
# conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"
# my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
# my_key = "AIzaSyCwIBKkWAkAcPjpO840fGPc1vnmK7h2UnY"
# my_key1 = "AIzaSyBrYcGsb8kIlIfa011bSbVF8X4NueqzZBo"
def check_valid_state(state):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    state = state.replace('_',' ')
    cur.execute("select distinct state from poi_detail_table_v2 where state = '%s';" %(state.title()))
    c = cur.fetchone()
    return bool(c)
    
def check_valid_city(city,state):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    state = state.replace('_',' ')
    city = city.replace('_',' ')
    cur.execute("select distinct city, state from poi_detail_table_v2 where city = '%s' and state = '%s';" %(city.title(), state.title()))
    c = cur.fetchone()
    return bool(c)

def find_county(state, city):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    city = city.replace('_',' ')
    cur.execute("select distinct county from county_table where city = '%s' and state = '%s';" %(city.title(), state.title()))

    county = cur.fetchone()
    conn.close()
    if county:
        return county[0]
    else:
        return None

def db_start_location(county, state, city):
    '''
    Get numpy array of county related POIs.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    if county:
        cur.execute("select index, coord_lat, coord_long, adjusted_visit_length, ranking, review_score, num_reviews from poi_detail_table_v2     where county = '%s' and state = '%s'; "%(county.upper(), state.title()))
    else:
        cur.execute("select index, coord_lat, coord_long, adjusted_visit_length, ranking, review_score, num_reviews from poi_detail_table_v2     where city = '%s' and state = '%s'; "%(city.title(), state.title()))
    a = cur.fetchall()
    conn.close()
    return np.array(a)


def get_event_ids_list(trip_locations_id):
    '''
    Input: trip_locations_id
    Output: evnet_ids, event_type = ['big', 'small', 'med', 'add',]
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select event_ids,event_type from day_trip_table where trip_locations_id = '%s' " %(trip_locations_id))
    event_ids,event_type = cur.fetchone()
    event_ids = litast.eral_eval(event_ids)
    conn.close()
    return event_ids,event_type


def db_event_cloest_distance(trip_locations_id=None,event_ids=None, event_type = 'add',new_event_id = None, city_name =None):
    '''
    Get matrix cloest distance
    '''
    if new_event_id or not event_ids:
        event_ids, event_type = get_event_ids_list(trip_locations_id)
        if new_event_id:
            event_ids.append(new_event_id)
            
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()
    points=[]
    # points = np.zeros((len(event_ids), 3))
    for i,v in enumerate(event_ids):
        cur.execute("select index, coord_lat, coord_long, city , ranking from poi_detail_table_v2   where index = %i;"%(float(v)))
        points.append(cur.fetchone())
    conn.close()

    points = check_NO_1(points, city_name)
    # print 'db_distance',points
    n,D = mk_matrix(points[:,1:3], geopy_dist)
    if len(points) >= 3:
        if event_type == 'add':
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
        #need to figure out other cases
        else:
            tour = nearest_neighbor(n, 0, D)
            # create a greedy tour, visiting city 'i' first
            z = length(tour, D)
            z = localsearch(tour, z, D)
            return np.array(event_ids)[tour], event_type
    else:
        return np.array(event_ids), event_type

def check_NO_1(poi_list, city_name):
    city_name = city_name.replace('_',' ')
    if len(poi_list)==1:
        return np.array(poi_list)
    for i, poi in enumerate(poi_list):
        if (poi[3] == city_name) and (poi[4]==1):
            number_one =poi_list.pop(i)
            return np.vstack((np.array(number_one),np.array(poi_list)))
    return np.array(poi_list)


def check_full_trip_id(full_trip_id, debug):
    '''
    Check full trip id exist or not.  
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from full_trip_table where full_trip_id = '%s'" %(full_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_day_trip_id(day_trip_id, debug):
    '''
    Check day trip id exist or not.  
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    cur.execute("select details from day_trip_table where trip_locations_id = '%s'" %(day_trip_id)) 
    a = cur.fetchone()
    conn.close()
    if bool(a):
        if not debug: 
            return a[0]
        else:
            return True
    else:
        return False

def check_travel_time_id(new_id):
    '''
    Check google driving time exisit or not for the 2 point poi id.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select google_driving_time from google_travel_time_table where id_field = '%s'" %(new_id))
    a = cur.fetchone()
    conn.close()
    if bool(a):
        return True
    else:
        return False

#May need to improve by adding #reviews in this. :)
def sorted_events(info,ix):
    '''
    find the event_id, ranking and review_score, num_reviews columns
    sorted base on ranking then review_score, num_reviews
    
    return sorted list 
    '''
    event_ = info[ix][:,[0,4,5,6]]
    return np.array(sorted(event_, key=lambda x: (x[1], -x[3], -x[2])))

#Need to make this more efficient
def create_event_id_list(big_,medium_,small_):
    # print big_,medium_,small_
    event_type = ''
    if big_.shape[0] >= 1:
        if (medium_.shape[0] < 2) or (big_[0,1] <= medium_[0,1]):
            if small_.shape[0] >= 6:
                event_ids = list(np.concatenate((big_[:1,0], small_[0:6,0]),axis=0))  
            elif small_.shape[0]>0:
                event_ids = list(np.concatenate((big_[:1,0], small_[:,0]),axis=0)) 
            else:
                event_ids = list(np.array(sorted(big_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
            event_type = 'big'
        else:
            if small_.shape[0] >= 8:
                event_ids = list(np.concatenate((medium_[0:2,0], small_[0:8,0]),axis=0))
            elif small_.shape[0]>0:
                event_ids = list(np.concatenate((medium_[0:2,0], small_[:,0]),axis=0))
            else:
                event_ids = list(np.array(sorted(medium_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
            event_type = 'med'
    elif medium_.shape[0] >= 2:
        if small_.shape[0] >= 8:
            event_ids = list(np.concatenate((medium_[0:2,0], small_[0:8,0]),axis=0))
        elif small_.shape[0]>0:
            event_ids = list(np.concatenate((medium_[0:2,0], small_[:,0]),axis=0))
        else:
            event_ids = list(np.array(sorted(medium_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
        event_type = 'med'
    else:
        if small_.shape[0] >= 10:
            if medium_.shape[0]==0:
                event_ids = list(np.array(sorted(small_[0:10,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((medium_[:1,:], small_[0:10,:])), key=lambda x: (x[1],-x[2])))[:,0])
        elif small_.shape[0] > 0:
            if medium_.shape[0]==0:
                event_ids = list(np.array(sorted(small_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
            else:
                event_ids = list(np.array(sorted(np.vstack((medium_, small_)), key=lambda x: (x[1],-x[2])))[:,0])

        else:
            event_ids = list(np.array(sorted(medium_[0:,:], key=lambda x: (x[1],-x[2])))[:,0])
        event_type = 'small'
    # else:

    return event_ids, event_type

def db_google_driving_walking_time(event_ids, event_type):
    '''
    Get estimated travel time from google api.  
    Limit 1000 calls per day.
    '''
    conn = psycopg2.connect(conn_str)  
    cur = conn.cursor()  
    google_ids = []
    driving_time_list = []
    walking_time_list = []
    name_list = []
    for i,v in enumerate(event_ids[:-1]):
        id_ = str(v) + '0000'+str(event_ids[i+1])
        result_check_travel_time_id = check_travel_time_id(id_)
        if not result_check_travel_time_id:
            cur.execute("select name, coord_lat, coord_long from poi_detail_table_v2 where index = %s"%(v))
            orig_name, orig_coord_lat, orig_coord_long = cur.fetchone()
            orig_idx = v
            cur.execute("select name, coord_lat, coord_long from poi_detail_table_v2 where index = %s "%(event_ids[i+1]))
            dest_name, dest_coord_lat, dest_coord_long = cur.fetchone()
            dest_idx = event_ids[i+1]
            orig_coords = str(orig_coord_lat)+','+str(orig_coord_long)
            dest_coords = str(dest_coord_lat)+','+str(dest_coord_long)
            google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)
            google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                    format(orig_coords.replace(' ',''),dest_coords.replace(' ',''),my_key)

            driving_result= simplejson.load(urllib.urlopen(google_driving_url))
            walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if driving_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_driving_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=driving&language=en-EN&sensor=false&key={2}".\
                                    format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                driving_result= simplejson.load(urllib.urlopen(google_driving_url))
                
            if walking_result['rows'][0]['elements'][0]['status'] == 'ZERO_RESULTS':
                google_walking_url = "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&mode=walking&language=en-EN&sensor=false&key={2}".\
                                        format(orig_name.replace(' ','+').replace('-','+'),dest_name.replace(' ','+').replace('-','+'),my_key)
                walking_result= simplejson.load(urllib.urlopen(google_walking_url))
            if (driving_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND') and (walking_result['rows'][0]['elements'][0]['status'] == 'NOT_FOUND'):
                new_event_ids = list(event_ids)
                new_event_ids.pop(i+1)
                new_event_ids = db_event_cloest_distance(event_ids=new_event_ids, event_type = event_type)
                return db_google_driving_walking_time(new_event_ids, event_type)
            try:
                google_driving_time = driving_result['rows'][0]['elements'][0]['duration']['value']/60
            except:            
                print v, id_, driving_result #need to debug for this
            try:
                google_walking_time = walking_result['rows'][0]['elements'][0]['duration']['value']/60
            except:
                google_walking_time = 9999
            # print 'google_driving time: ', google_driving_time
            
            google_driving_url = google_driving_url.replace("'s","%27")
            google_walking_url = google_walking_url.replace("'s","%27")

            cur.execute("select max(index) from  google_travel_time_table")
            index = cur.fetchone()[0]+1
            driving_result = str(driving_result).replace("'",'"')
            walking_result = str(walking_result).replace("'",'"')
            orig_name = orig_name.replace("'","''")
            dest_name = dest_name.replace("'","''")

            cur.execute("INSERT INTO google_travel_time_table VALUES (%i, '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s','%s', '%s', '%s', '%s', '%s', '%s', %s, %s);"%(index, id_, orig_name, orig_idx, dest_name, dest_idx, orig_coord_lat, orig_coord_long, dest_coord_lat,\
                                   dest_coord_long, orig_coords, dest_coords, google_driving_url, google_walking_url,\
                                   str(driving_result), str(walking_result), google_driving_time, google_walking_time))
            conn.commit()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
        else:
            
            cur.execute("select orig_name, dest_name, google_driving_time, google_walking_time from google_travel_time_table \
                         where id_field = '%s'" %(id_))
            orig_name, dest_name, google_driving_time, google_walking_time = cur.fetchone()
            name_list.append(orig_name+" to "+ dest_name)
            google_ids.append(id_)
            driving_time_list.append(google_driving_time)
            walking_time_list.append(google_walking_time)
    conn.close()
    # return event_ids, google_ids, name_list, driving_time_list, walking_time_list
    return event_ids, driving_time_list, walking_time_list

def db_remove_extra_events(event_ids, driving_time_list,walking_time_list, max_time_spent=600):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()   
    if len(event_ids) > 1:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index IN %s;" %(tuple(event_ids),))
        time_spent = cur.fetchone()[0]
        conn.close()
    else:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index = %s;" %(event_ids))
        time_spent = cur.fetchone()[0]
        conn.close()
    travel_time = int(sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list))))
    time_spent = int(time_spent) + travel_time
    if time_spent > max_time_spent:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        return db_remove_extra_events(update_event_ids, update_driving_time_list, update_walking_time_list)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent

def db_adjust_events(event_ids, driving_time_list,walking_time_list, not_visited_poi_lst, event_type, city, max_time_spent=600):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()   
    if len(event_ids) > 1:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index IN %s;" %(tuple(event_ids),))
        time_spent = cur.fetchone()[0]
        conn.close()
    else:
        cur.execute("SELECT DISTINCT SUM(adjusted_visit_length) FROM poi_detail_table_v2 WHERE index = %s;" %(event_ids[0]))
        time_spent = cur.fetchone()[0]
        conn.close()
    travel_time = int(sum(np.minimum(np.array(driving_time_list),np.array(walking_time_list))))
    time_spent = int(time_spent) + travel_time
    if time_spent > max_time_spent:
        update_event_ids = event_ids[:-1]
        update_driving_time_list = driving_time_list[:-1]
        update_walking_time_list = walking_time_list[:-1]
        not_visited_poi_lst.append(event_ids[-1])
        return db_adjust_events(update_event_ids, update_driving_time_list, update_walking_time_list,not_visited_poi_lst, event_type, city)
    elif (time_spent < max_time_spent - 240) and (len(not_visited_poi_lst)>1):
        event_ids = list(event_ids)
        event_ids.extend(not_visited_poi_lst)
        event_ids, event_type = db_event_cloest_distance(event_ids = event_ids, event_type = event_type, city_name = city)
        event_ids, driving_time_list, walking_time_list = db_google_driving_walking_time(event_ids, event_type)
        return db_adjust_events(event_ids, driving_time_list, walking_time_list, [], event_type, city)
    else:
        return event_ids, driving_time_list, walking_time_list, time_spent, not_visited_poi_lst


def db_day_trip_details(event_ids, i):
    conn=psycopg2.connect(conn_str)
    cur = conn.cursor()
    details = []
    #details dict includes: id, name,address, day
    for event_id in event_ids:
        cur.execute("select index, name, address, coord_lat, coord_long from poi_detail_table_v2 where index = %s;" %(event_id))
        a = cur.fetchone()
        details.append(str({'id': a[0],'name': a[1],'address': a[2], 'day': i, 'coord_lat': a[3], 'coord_long': a[4]}))
    conn.close()
    
    return details

def check_address(index):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    cur.execute("select address from poi_detail_table_v2     where index = %s;"%(index))
    a = cur.fetchone()[0]
    conn.close()
    if a:
        return True
    else:
        return False

def db_address(event_ids):
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    for i in event_ids[:-1]:
        if not check_address(i):
            cur.execute("select driving_result from google_travel_time_table where orig_idx = %s;" %(i))
            a= cur.fetchone()[0]
            add = ast.literal_eval(a)['origin_addresses'][0]
            cur.execute("update poi_detail_table_v2  set address = '%s' where index = %s;" %(add, i))
            conn.commit()
    last = event_ids[-1]
    if not check_address(last):
        cur.execute("select driving_result from google_travel_time_table where dest_idx = %s;" %(last))
        a= cur.fetchone()[0]
        add = ast.literal_eval(a)['destination_addresses'][0]
        cur.execute("update poi_detail_table_v2  set address = '%s' where index = %s;" %(add, last))
        conn.commit()
    conn.close()

def kmeans_leabels_day_order(day_labels):
    return [k for k, v in Counter(day_labels).most_common()]

In [ ]: