In [1]:
import pandas as pd
import time
import numpy as np
import requests
import psycopg2
import json
import simplejson
import urllib
import config
import ast
import bs4
import pprint
import progressbar
from pymongo import MongoClient
from geopy.geocoders import Nominatim
from bs4 import BeautifulSoup as BS
from operator import itemgetter
from sklearn.cluster import KMeans
from sqlalchemy import create_engine

In [ ]:
!pip install --upgrade progressbar2

In [ ]:
conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"
# conn_str = "dbname='travel_with_friends' user='Zoesh' host='localhost'"

In [ ]:
conn = psycopg2.connect(conn_str)   
cur = conn.cursor()   
# cur.execute("select index, name, coord0, coord1 from poi_detail_table where city !='%s' and state = '%s';" %(current_city, current_state))
cur.execute("select distinct city, state from poi_detail_table;" )
all_cities = cur.fetchall()

In [ ]:
all_cities[0][0]

In [ ]:
cities_coords = pd.read_csv('cities_coords.csv', header=None)
cities_coords.columns = ['area_code', 'city','state','nation', 'coord0','coord1']
cities_coords = cities_coords[['city','state','nation', 'coord0','coord1']].drop_duplicates()
cities_coords.reset_index(drop = True, inplace = True)

In [ ]:
geolocator = Nominatim()

for items in all_cities:
    if cities_coords[cities_coords['state'] == items[1]][cities_coords.city == items[0]].shape[0] == 0:
        location_name = ', '.join([items[0], items[1]])
        try:
            location = geolocator.geocode(location_name)
            cities_coords.loc[len(cities_coords)] = [items[0], items[1], 'US', location.latitude, location.longitude]
        except:
            "error, rest"
            time.sleep(20)
            print" start again"
            
            
#         print cities_coords.loc(len(cities_coords))

In [ ]:
cities_coords.to_csv('all_cities_coords.csv')

In [ ]:
engine = create_engine('postgresql://Gon@localhost:5432/travel_with_friends')
cities_coords.to_sql('all_cities_coords',engine, if_exists = "replace")

In [ ]:
import us_state_abbrevation as abb
state_abb_dict = abb.abb2state
state_abb_dict['CA']
# print state_abb_dict.keys()[state_abb_dict.values().index('CA')]

In [ ]:
from googleplaces import GooglePlaces, types, lang
YOUR_API_KEY = 'AIzaSyDMbpmHBLl7dTOXUOMZP7Vi3zbMJlByEKM'
google_places = GooglePlaces(YOUR_API_KEY)

In [ ]:
import geocoder
api_key1 = 'AIzaSyCrgwS_L75NfO9qzIKG8L0ox7zGw81BpRU'
api_key2 = 'AIzaSyBwh4WqOIVJGJuKkmzpQxlkjahgx6qzimk'
api_key3 = 'AIzaSyA25LW2CRcD9mSmiAWBYSPOSoiKP_m2plQ'
api_key4 = 'AIzaSyB3l2Trzm4LnrC0nyUwwoM9803Fuwf0my4'
api_key5 = 'AIzaSyDj0yH_35G1zMq5uYPF6X0ogkHYcLsNN1w'
add = ' 497 lakeside drive'
g = geocoder.google(add, key = api_key5)

In [ ]:
g.ok

In [ ]:
import json
with open('api_key_list.config') as api_key_list_file:
    api_key_list = json.load(api_key_list_file)
api_key_list['api_key_list']

In [ ]:
api_key_list

In [ ]:
df_tmp = pd.read_csv('test_poi_detail_df_100.csv', index_col = 0)
# df_tmp.to_csv('test_poi_detail_df_100.csv', index_col=None)
df_tmp.head()
s.find(text ="Recommended length of visit:")
#         visit_length = s.find(text ="Recommended length of visit:").parent.next_sibling

In [ ]:
client = MongoClient()
db = client.zoeshrm
db.TripAdvisor_state_park.count()

In [ ]:
from web_scraping_tripadvisor import state_park_web as web
state_park_pages = db.TripAdvisor_state_park.find()
poi_detail_state_park_df, error_message_df = web(state_park_pages)

In [ ]:
import re
page = db.TripAdvisor.find_one({'city': 'San Francisco, California'})
search_visit_length = re.compile('Recommended length of visit:')
s = BS(page['html'], "html.parser")
#index
#name
input_list, error_message = [],[]
state_abb_error, state_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error = 0,0,0,0,0,0,0,0
latitude, longitude, geo_content = None, None, None
#     print name
url = page['url']
name = s.find('h1', attrs = {'class':'heading_name'}).text.strip()

#street_address
street_address = s.find('span', attrs = {'class':'street-address'}).text.strip()
#city
city = s.find('span', attrs = {'property':'addressLocality'}).text.strip()
#state
state_abb = s.find('span', attrs = {'property':'addressRegion'}).text.strip()
if state_abb:
    try:
        # state = state_abb_dict.keys()[state_abb_dict.values().index(state_abb)]
        state = abb2state_dict[state_abb]
    except:
        state_abb_error = 1
        state = state_abb
else:
    state_error =1
    state_abb = None
    state = None
#postal_code
postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()
#country
if s.find('span', attrs = {'property':'addressCountry'}).get('content'):
    country = s.find('span',{'property':'addressCountry'}).get('content')
elif s.find('span',{'property':'addressCountry'}).get('content') == None:
    country = s.find('span',{'property':'addressCountry'}).text.strip()
else:
    country = 'United States'
#address
if state:
    full_address = street_address+', '+city+', '+state+', '+postal_code[:5]+', '+country
else:
    address_error =1
    full_address = street_address+', '+city+', '+postal_code[:5]+', '+country
# if (name in name_lst) and (full_address in full_address_lst):
#     continue
# else:
#     name_lst.append(name)
#     full_address_lst.append(full_address)
#coord
try:
    latitude, longitude, geo_content = find_latlng(full_address, name)
except:
    geo_error =1
    latitude, longitude, geo_content = None, None, None

#num_reviews
try:
    num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')
    if num_reviews == None:
        num_reviews = s.find('a', {'property': "reviewCount"}).get('content')    
except:
    num_reviews = 0
    review_error=1    
#review_score
try:
    review_score = s.find('div', attrs = {'class': 'heading_rating separator'}).find('img').get('content')
    if review_score == None:
        review_score = s.find('a', {'property': "ratingValue"}).get('content')
except:
    review_score = 0 
    score_error =1
#ranking
try:
    ranking = s.find('b', attrs = {'class':'rank_text wrap'}).text.strip().replace('#',"")
except:
    ranking = 999
    ranking_error=1
#tag
try:
    tags = ", ".join(label.text.strip() for label in s.select('div.detail > a') + s.select('span.collapse.hidden > a'))
except:
    tags = None
    tag_error =1
#visit_length
if s.find('b', text =search_visit_length):
    raw_visit_length = s.find('b', text =search_visit_length).next_sibling.strip()
else:
    raw_visit_length = None
#fee
if s.find(text= "Fee:"):
    fee = s.find(text= "Fee:").parent.next_sibling.upper()
else:
    fee = 'NO'
#description
if s.find('div', attrs = {'class': "listing_details"}):
    description = s.find('div', attrs = {'class': "listing_details"}).text.strip()
else:
    description = None
# error_message = [len(poi_detail_state_park_df), name, url,state_abb_error, state_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error]
# error_message_df.loc[len(poi_detail_state_park_df)] =error_message


# input_list = [len(poi_detail_state_park_df), name, street_address, city, state_abb, state, postal_code, country, full_address, latitude, longitude, num_reviews, review_score, ranking, tags, visit_length, fee, description, url, geo_content]
# poi_detail_state_park_df.loc[len(poi_detail_state_park_df)] = input_list

In [ ]:
import re
search_visit_length = re.compile('Recommended length of visit:')
test = s.find('b', text =search_visit_length).next_sibling.strip()
tests

In [ ]:
import json
with open('api_key_list.config') as f:
    data = json.load(f)

In [ ]:
data['api_key_list']

In [ ]:
df_poi = pd.read_csv('poi_detail_df.csv', index_col = 0)

In [ ]:


In [ ]:
search_fee = re.compile('Fee:')
df_poi = pd.read_csv('test_poi_detail_df.csv', index_col= 0)
poi_pages = db.TripAdvisor.find()
fee_lst = []
cnt = 0
for page in poi_pages:
    s = BS(page['html'], "html.parser")
    if s.find('b', text= search_fee):
        fee = s.find('b',text= search_fee).next_sibling.strip()
    else:
        fee = 'Unknown'
    fee_lst.append(fee)
    cnt+=1
    if cnt%100 ==0 :
        print '#items in fee lst: ',len(fee_lst)

In [ ]:
fee_lst

In [ ]:
error_message_df.to_csv('error_message.csv', encoding=('utf-8'))
poi_detail_state_park_df.to_csv("poi_detail_state_park.csv", encoding=('utf-8'))

In [ ]:
try:
    poi_additional_detail = poi_detail_state_park[['index','name','url','address','geo_content']]

    geo_content_detail=poi_detail_state_park.pop('geo_content')
except:
    None

In [ ]:
db.geo_content.drop()
db.geo_content.insert_many(poi_additional_detail.to_dict('records'))
poi_detail_state_park.to_sql('poi_detail_state_park_table',engine, if_exists = "replace")

In [ ]:
print poi_detail_state_park_df.shape, error_message_df.shape

In [ ]:
error_message_df.columns

In [ ]:


In [ ]:
# !pip install geocoder

In [ ]:
def find_latlng(full_address, name):
    g_address = geocoder.google(full_address)
    if g_address.ok:
        latitude= g_address.lat
        longitude = g_address.lng
        return latitude, longitude, g_address.content
    
    g_name = geocoder.google(name)
    if g_name.ok:
        latitude= g_name.lat
        longitude = g_name.lng
        return latitude, longitude, g_name.content
    else:
        latitude = None
        longitude = None
        return latitude, longitude, None

In [ ]:
def find_geo_location(full_address, name):
    query_result = google_places.nearby_search(location= full_address, keyword=name)
    if len(query_result.places) >0:
        best_result = query_result.places[0]
        latitude = best_result.geo_location["lat"]
        longitude = best_result.geo_location["lng"]
        google_result_name = best_result.name

        return latitude, longitude, google_result_name
    else:
        print name, "google API cant find here."
        return None, None, None

In [ ]:
poi_detail_state_park=pd.DataFrame(columns=['index','name','street_address','city','state_abb','state','postal_code','country','address','coord_lat','coord_long','num_reviews','review_score','ranking','tag','visit_length','fee','description','url',"geo_content"])

In [ ]:
error_message_df = pd.DataFrame(columns=['index','name','url','state_abb_error','address_error','geo_error','review_error','score_error','ranking_error','tag_error'])

In [ ]:
# poi_detail_state_park2=pd.DataFrame(columns=['index','name','street_address','city','state_abb','state','postal_code','country','address','coord_lat','coord_long','num_reviews','review_score','ranking','tag','visit_length','fee','description'])

In [ ]:
state_park_pages = db.TripAdvisor_state_park.find()
index = 0
for page in state_park_pages[len(poi_detail_state_park):]:
    s = BS(page['html'], "html.parser")
    #index
    #name
    error_message = []
    state_abb_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error = 0,0,0,0,0,0,0
    input_list = []
#     print name

    url = page['url']
    name = s.find('h1', attrs = {'class':'heading_name'}).text.strip()

    #street_address
    street_address = s.find('span', attrs = {'class':'street-address'}).text.strip()
    #city
    city = s.find('span', attrs = {'property':'addressLocality'}).text.strip()

    #state
    state_abb = s.find('span', attrs = {'property':'addressRegion'}).text.strip()
    if state_abb:
        try:
            state = state_abb_dict[state_abb]
        except:
            state_abb_error = 1
            state = state_abb
    else:
        state_abb = None
        state = None
    #postal_code
    postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()
    #country
    if s.find('span', attrs = {'property':'addressCountry'}).get('content'):
        country = s.find('span',{'property':'addressCountry'}).get('content')
    elif s.find('span',{'property':'addressCountry'}).get('content') == None:
        country = s.find('span',{'property':'addressCountry'}).text.strip()
    else:
        country = 'United States'
    #address
    if state_abb:
        full_address = street_address+', '+city+', '+state_abb+', '+postal_code[:5]+', '+country
    else:
        address_error =1
        full_address = street_address+', '+city+', '+postal_code[:5]+', '+country

    #coord
    try:
        latitude, longitude, geo_content = find_latlng(full_address, name)
    except:
        geo_error =1
        latitude, longitude, geo_content = None, None, None
#         break
    #num_reviews
    try:
        num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')
        if num_reviews == None:
            num_reviews = s.find('a', {'property': "reviewCount"}).get('content')    
    except:
        num_reviews = 0
        review_error=1    
    #review_score
    try:
        review_score = s.find('div', attrs = {'class': 'heading_rating separator'}).find('img').get('content')
        if review_score == None:
            review_score = s.find('a', {'property': "ratingValue"}).get('content')
    except:
        review_score = 0 
        score_error =1
    #ranking
    try:
        ranking = s.find('b', attrs = {'class':'rank_text wrap'}).text.strip().replace('#',"")
    except:
        ranking = 999
        ranking_error=1
    #tag
    try:
        tags = ", ".join(label.text.strip() for label in s.select('div.detail > a') + s.select('span.collapse.hidden > a'))
    except:
        tags = None
        tag_error =1
    #visit_length
    if s.find(text ="Recommended length of visit:"):
        visit_length = s.find(text ="Recommended length of visit:").parent.next_sibling
    else:
        visit_length = None
    #fee
    if s.find(text= "Fee:"):
        fee = s.find(text= "Fee:").parent.next_sibling.upper()
    else:
        fee = 'NO'
    #description
    if s.find('div', attrs = {'class': "listing_details"}):
        description = s.find('div', attrs = {'class': "listing_details"}).text.strip()
    else:
        description = None

    input_list = [index, name, street_address, city, state_abb, state, postal_code, country, full_address, latitude, longitude, num_reviews, review_score, ranking, tags, visit_length, fee, description, url, geo_content]
    poi_detail_state_park.loc[len(poi_detail_state_park)] = input_list
    
    error_message = [index, name, url,state_abb_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error]
    error_message_df.loc[len(poi_detail_state_park)] =error_message
    index += 1
#     time.sleep(1)

In [ ]:
poi_detail_state_park.shape

In [ ]:
url_df.shape

In [ ]:
import web_scraping_tripadvisor as web

In [ ]:
error_message_df.to_csv('error_message.csv', encoding=('utf-8'))
poi_detail_state_park.to_csv("poi_detail_state_park.csv", encoding=('utf-8'))

In [ ]:
try:
    poi_additional_detail = poi_detail_state_park[['index','name','url','address','geo_content']]

    geo_content_detail=poi_detail_state_park.pop('geo_content')
except:
    None

In [ ]:
db.geo_content.insert_many(poi_additional_detail.to_dict('records'))
poi_detail_state_park.to_sql('poi_detail_state_park_table',engine, if_exists = "replace")

In [ ]:
# poi_detail_state_park[poi_detail_state_park['name']== 'Jessie M. Honeyman Memorial State Park']

In [ ]:
# poi_detail_state_park.loc[2065]

In [ ]:
# poi_detail_state_park.drop(poi_detail_state_park.index[2065:], inplace = True)

In [ ]:
poi_detail_state_park.to_csv("poi_detail_state_park.csv", encoding=('utf-8'))

In [ ]:
poi_detail_state_park = pd.read_csv('poi_detail_state_park.csv')

In [ ]:
poi_detail_df = pd.read_csv('poi_detail_coords_1000.csv', index_col = 0)
# np.isnan(poi_detail_df.coord_lat[0])
poi_detail_df.coord_lat[:100]

In [ ]:
update_idx = poi_detail_state_park[poi_detail_state_park.coord_long == incorrect_long].index.values
for index in update_idx:
    full_address = poi_detail_state_park.loc[index].address
    name = poi_detail_state_park.loc[index].name
    try:
        print 'start index: ', index
        latitude, longitude, geo_content = find_latlng(full_address, name)
        poi_detail_state_park.set_value(index, 'coord_long', longitude)
        poi_detail_state_park.set_value(index, 'coord_lat', latitude)
        poi_detail_state_park.set_value(index, 'geo_content', geo_content)
        print poi_detail_state_park.loc[index][['coord_long','coord_lat','geo_content']]
    except:
        print 'why', index
        break

In [ ]:
poi_detail_state_park.to_csv('poi_detail_state_park_v2.csv', index=False)

In [ ]:
poi_additional_detail = poi_detail_state_park[['index','name','url','address','geo_content']]

In [ ]:
geo_content_detail=poi_detail_state_park.pop('geo_content')

In [ ]:
poi_detail_state_park['geo_content'] = geo_content_detail

In [ ]:
db.geo_content.insert_many(poi_additional_detail.to_dict('records'))

In [ ]:
poi_detail_state_park.to_sql('poi_detail_state_park_table',engine, if_exists = "replace")

In [ ]:
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g35805-d1134861-Reviews-Cloud_Gate-Chicago_Illinois.html'
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g60713-d127854-Reviews-San_Francisco_Zoo-San_Francisco_California.html'
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g60750-d104122-Reviews-San_Diego_Zoo-San_Diego_California.html'
htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g60713-d102523-Reviews-Alcatraz_Island-San_Francisco_California.html'
# htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g32474-d4236729-Reviews-Harmony_Headlands_State_Park-Harmony_San_Luis_Obispo_County_California.html'
# htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g42926-d142814-Reviews-Cannon_Valley_Trail-Cannon_Falls_Minnesota.html'
# htmlurl = 'https://www.tripadvisor.com/Attraction_Review-g42891-d126627-Reviews-Paul_Bunyan_State_Trail-Brainerd_Minnesota.html'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

r=requests.get(htmlurl,headers=headers)
s = BS(r.text, 'html.parser')


# for div in s.find('div', attrs = {'class' : "separator" }):
#     for tag in div.:
#         if tag.name == 'div' and tag.get('class', '') == ['detail']:
#             print tag.text
#     for item in div.contents:
# #         print item
#         if type(item)== 'bs4.element.Tag' and item.name == "detail":
#             print 1234567890
st = time.time()
for div in s.findAll("div", {"class": "separator"}):
    for tag in div.contents:
        if isinstance(tag, bs4.element.Tag) and tag.get('class',"") == ['detail'] :
            tags =  tag.text.encode('utf8').strip()
print time.time() - st
tags

In [ ]:
# s.find('span',{'property':'addressCountry'}).get('content')
# s.select('span[property="addressCountry"]').get('content')

In [ ]:
#index

#name
name = s.find('h1', attrs = {'class':'heading_name'}).text.strip()
#city
city = s.find('span', attrs = {'property':'addressLocality'}).text.strip()
street_address = s.find('span', attrs = {'class':'street-address'}).text.strip()
#state
state_abb = s.find('span', attrs = {'property':'addressRegion'}).text.strip()
# state = state_abb_dict.keys()[state_abb_dict.values().index(state_abb)]
postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()

#country
country = s.find('span',{'property':'addressCountry'}).get('content')
#address
full_address = street_address+', '+city+', '+state_abb+', '+postal_code+', '+country

# from geopy.geocoders import Nominatim
# geolocator = Nominatim()
# location =geolocator.geocode(street_address+', '+city+', '+state_abb+', '+country)
# #coord_lat
# coord_lat = location.latitude 
# #coord_long
# coord_long =location.longitude
#num_reviews
# num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')

#review_score
# review_score = s.find('div', attrs = {'class': 'heading_rating separator'}).find('img').get('content')

#ranking
ranking = s.find('b', attrs = {'class':'rank_text wrap'}).text.strip().replace('#',"")

#tag
tags = ", ".join(label.text for label in s.select('div.detail > a') + s.select('span[class="collapse hidden"] > a'))

#visit_length
# visit_length = s.find(text ="Recommended length of visit:").parent.next_sibling

# #fee
# fee = s.find(text= "Fee:").parent.next_sibling

#description
description = s.find('div', attrs = {'class': "listing_details"}).text.strip()

In [ ]:
st =time.time()
d =", ".join(label.text.strip() for label in s.select('div.listing_details'))
# print d 
ed = time.time() -st
print ed

In [ ]:
st =time.time()
s.find('div', attrs = {'class': "listing_details"}).text.strip()
ed = time.time() -st
print ed

In [ ]:
# s.select('span.hidden.collapse > a')
postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()
print postal_code[:5]

In [ ]:
# num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')
t1 = time.time()
s.select('a[property="reviewCount"]')[0].get("content")
t2 = time.time()
s.find('a', {'property': "reviewCount"}).get('content')
et = time.time()
print et -t1, et-t2

In [ ]:
!pip install python-google-places

In [ ]:
from googleplaces import GooglePlaces, types, lang

YOUR_API_KEY = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'
google_places = GooglePlaces(YOUR_API_KEY)
print name, full_address
address1 = "393 County Road 174, Grove Hill, AL, 35975, United States"
query_result = google_places.nearby_search(location = address1, keyword=name)
query_result

In [ ]:


In [ ]:
name, full_address

In [ ]:
# s.select('div[class="detail"] > a')

In [ ]:
# <span class="collapse hidden">, <a href="/Attractions-g60713-Activities-c57-t68-San_Francisco_California.html">Nature &amp; Wildlife Areas</a></span>

In [ ]:
# detail = {}
# addition_info = s.find('div', attrs = {'class':'details_wrapper'}).text.strip('\n').replace("\n\n","\n").split('\n')
# # if addition_info[0] == 'Description':
# #     print addition_info[1]
# addition_info

# for info in addition_info:
#     info_list = info.split(':')
#     if info_list[0]=="Fee":
#         details["Fee"] = info_list[1]
#     else:
#         details["length of visit"] = info_list[1]
# details

In [ ]:
# fee = s.find('div', {'class':'details_wrapper'})
# fee
# length_visit = s.find(text ="Recommended length of visit:").parent.next_sibling
# length_visit
# fee = s.find(text= "Fee:").parent.next_sibling
# fee

In [ ]:
# description = s.find('div', attrs = {'class': "listing_details"}).text.strip()
# print description

In [ ]:
len(query_result.places)

In [ ]:
## different api try

#     try:
#         YOUR_API_KEY = 'AIzaSyDMbpmHBLl7dTOXUOMZP7Vi3zbMJlByEKM'
#         google_places = GooglePlaces(YOUR_API_KEY)
#         latitude, longitude, google_result_name =  find_geo_location(full_address, name)
#     except:
#         print "API error, try different key"
#         time.sleep(20)
#         try:
#             YOUR_API_KEY = 'AIzaSyAwx3xg6oJ0yiPV3MIunBa1kx6N7v5Tcw8'
#             google_places = GooglePlaces(YOUR_API_KEY)
#             latitude, longitude, google_result_name =  find_geo_location(full_address, name)
#         except:
#             print "both Key dont work"
#             print" location not found: ", name, "address : ", full_address
#             break
#     if location:
#         #coord_lat
#         poi_detail_state_park['coord_lat'] = location.latitude 
#         #coord_long
#         poi_detail_state_park['coord_long'] =location.longitude
#     else:
#         print" location not found: ", name, "address : ", full_address

In [ ]:
state_abb_error_ix = error_message_df[error_message_df['state_abb_error']==1]['index']
address_error_ix = error_message_df[error_message_df['address_error']==1]['index']

In [ ]:
# poi_detail_state_park_df.ix[state_abb_error_ix][['state_abb','state','country']]
poi_detail_state_park_df.ix[address_error_ix][['address','country']]

In [ ]:
error_message_df.columns

In [ ]:
poi_detail_state_park_df.columns

In [ ]:


In [ ]:
# poi_detail_state_park.fee[poi_detail_state_park.fee == 'NO']

In [ ]:
poi_detail_state_park.shape

In [ ]:
err = error_message_df[error_message_df.review_error == 1].index

In [ ]:
for i, link in enumerate(poi_detail_state_park_df.ix[err][['name','url']].url):
    print i, link

In [ ]:
error_message_df.tail()

In [ ]:
poi_detail_state_park_df.drop_duplicates('coord_lat').shape

In [ ]:
# htmlurl = 'https://en.wikipedia.org/wiki/List_of_areas_in_the_United_States_National_Park_System'
htmlurl= 'https://en.wikipedia.org/wiki/List_of_national_parks_of_the_United_States'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

r=requests.get(htmlurl,headers=headers)
s = BS(r.text, 'html.parser')

In [ ]:
import re


name, state =None, None
table =  s.find('table', {"class" : "wikitable"})
# col_name =  [x.text for x in table.findAll("th",{"scope":"col"})]
# num_col = len(col_name)

# wiki_table= pd.DataFrame(columns=col_name)
national_park_US_df2 = pd.DataFrame(columns = ["name","state","description"])
for row in table.findAll("tr")[1:]:
    if row.find('th', {'scope':"row"}) != None:
        name = row.find('th', {'scope':"row"}).next_element.get('title')
    cells = row.findAll("td")
    #For each "tr", assign each "td" to a variable.
    if len(cells) == 6:
        state = cells[1].find(text=True)
        des = str("".join(cells[5].findAll(text=True)).encode('utf8'))
        description = re.sub(r"\[\d+\]","",des)

    national_park_US_df2.loc[len(national_park_US_df2)] = [name, state, description]

In [ ]:
# "".join(national_park_US_df2.desciption[0])

In [ ]:
for index in national_park_US_df.index:
    keyword = national_park_US_df.name[index].replace(' ','+')+"+"+national_park_US_df.state[index].replace(' ','+')
#     keyword = national_park_US_df.name[index].replace(' ','+')
    trip_url = "https://www.tripadvisor.com/Search?q=" +keyword+"&queryParsed=true&searchSessionId"
#     headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#     r=requests.get(trip_url,headers=headers)
#     test_s = BS(r.text, 'html.parser')
#     print index, trip_url

In [ ]:
import wikipedia
wiki =  wikipedia.page('List_of_national_parks_of_the_United_States')

In [ ]:


In [ ]:


In [ ]:
https://www.tripadvisor.com/Search?q=Acadia+National+Park&geo=28940&queryParsed=true&searchSessionId=F658A1719FACDE7E30D13912D3D1B3381492826820567ssid

In [ ]:
https://www.tripadvisor.com/Search?q=Pinnacles+national+park&queryParsed=true&searchSessionId

In [ ]:
https://www.tripadvisor.com/Search?q=Acadia+National+Park&queryParsed=true&searchSessionId

In [ ]:
test =national_park_US_df.name[0].replace(" ", "+")

In [ ]:
trip_url = "https://www.tripadvisor.com/Search?q=" +test+"&queryParsed=true&searchSessionId"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
r=requests.get(trip_url,headers=headers)
test_s = BS(r.text, 'html.parser')

In [ ]:
# trip_url_30 = "https://www.tripadvisor.com/Search?geo=191&redirect&q=national+parks&uiOrigin=MASTHEAD&ssrc=A&returnTo=__2F__Tourism__2D__g143030__2D__Great__5F__Basin__5F__National__5F__Park__5F__Nevada__2D__Vacations__2E__html&pid=3825&startTime=1492837392267&searchSessionId=F658A1719FACDE7E30D13912D3D1B3381492834657203ssid#&ssrc=g&o=0"
# trip_url_60 = "https://www.tripadvisor.com/Search?geo=191&redirect&q=national+parks&uiOrigin=MASTHEAD&ssrc=A&returnTo=__2F__Tourism__2D__g143030__2D__Great__5F__Basin__5F__National__5F__Park__5F__Nevada__2D__Vacations__2E__html&pid=3825&startTime=1492837392267&searchSessionId=F658A1719FACDE7E30D13912D3D1B3381492834657203ssid#&ssrc=g&o=30"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
r=requests.get(trip_url_30,headers=headers)
# trip_30 = BS(r.text, 'html.parser')
# r=requests.get(trip_url_60,headers=headers)
# trip_60 = BS(r.text, 'html.parser')

import urllib

trip_30_html = urllib.urlopen("trip_30.html").read()
trip_60_html = urllib.urlopen("trip_60.html").read()
trip_30 = BS(trip_30_html, 'html.parser')
trip_60 = BS(trip_60_html, 'html.parser')

In [ ]:
import re
to_do = re.compile("Things to do")
# trip_search_result = pd.DataFrame(columns=["name","url"])



for poi in trip_60.findAll('div', {"class": "title"}):
    name = poi.text
    for child in poi.next_siblings:
        if child.find(text=to_do) != None:
            url =  child.find(text=to_do).parent.get('href')
            
    trip_search_result.loc[len(trip_search_result)] = [name, url]
    
# for link in trip_30.findAll(text = to_do):
#     print link.parent.get('href')

#     name = poi.text
#     url = poi.get('onclick').replace("ta.setEvtCookie('Search_Results_Page', 'POI_Name', '', 0, '", "").replace("')","")

In [ ]:
# trip_search_result

In [ ]:
# # for url in trip_search_result.url:
    
# url = trip_search_result.url[0]
# headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
# r=requests.get(url,headers=headers)
# s = BS(r.text, 'html.parser')

In [121]:
def request_s(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    r=requests.get(url,headers=headers)
    s = BS(r.text, 'html.parser')
    return s
def thing_to_do(s):
    thing_to_do = pd.DataFrame(columns=["national_park_name","activate_name","url","num_reviews","score","ranking","tags"])
    national_park_name = s.find('h1', {"id": "HEADING"}).text.strip('\n').replace("Things to Do in ","")
    print "park name: ",national_park_name
    for activate in s.findAll('div', {"class":"listing_title"}):
        activate_name = activate.text.strip()
        url ="https://www.tripadvisor.com"+ activate.find('a').get("href")
        if activate.find_next('div', {"class":"rs rating"}) ==None:
            score, num_reviews = 0, 0
        else:
            score = activate.find_next('div', {"class":"rs rating"}).find('span').get('alt').replace(" of 5 bubbles","")
            num_reviews = activate.find_next('div', {"class":"rs rating"}).find('span', {'class': "more"}).text.strip().replace("reviews","")
        ranking = activate.find_next('div', {'class':"popRanking wrap"}).text.strip().replace("#","")[0]
        if activate.find_next('div',{'class':"tag_line"}).find('span') == None:
            tags = None
        else:
            tags = activate.find_next('div',{'class':"tag_line"}).find('span').text
        list_thing = [national_park_name, activate_name, url, num_reviews, score, ranking, tags]
        thing_to_do.loc[len(thing_to_do)] = list_thing
    return thing_to_do

In [ ]:
thing_to_do_national_park_df = pd.DataFrame(columns=["national_park_name","activate_name","url","num_reviews","score","ranking","tags"])
for url in national_park_US_df.url:
    thing_to_do_page = request_s(url)
    result =  (thing_to_do(thing_to_do_page))
    thing_to_do_national_park_df = thing_to_do_national_park_df.append(result, ignore_index=True)
    time.sleep(2)

In [ ]:
thing_to_do_national_park_df.to_csv('poi_detail_national_park_todo_df.csv',encoding=('utf-8'))
name_list = set(thing_to_do_national_park_df.national_park_name)

In [ ]:


In [ ]:


In [ ]:
['index','name','street_address','city','state_abb','state','postal_code','country','address','coord_lat','coord_long','num_reviews','review_score','ranking','tag','raw_visit_length','fee','description','url',"geo_content"]
# national_park_US_df["tags"] = "National Park"
# national_park_US_df["url"] = None
# national_park_US_df.rename(columns={'desciption': 'description'}, inplace=True)
# national_park_US_df["index"] = national_park_US_df.index
national_park_US_df.columns

In [ ]:
national_park_US_df = national_park_US_df[['index','name','street_address','city','state_abb','state','postal_code','country','address','coord_lat','coord_long','num_reviews','review_score','ranking','tag','raw_visit_length','fee','description','url',"geo_content", "check"]]
small_national_park = national_park_US_df[national_park_US_df.check == 0]
national_park_US_df.to_csv("poi_detail_national_park.csv", encoding=('utf-8'))

In [ ]:


In [ ]:
small_national_park.ix[1].url = "https://www.tripadvisor.com/Attractions-g143045-Activities-National_Park_of_American_Samoa_Tutuila.html"

In [ ]:
should_be_big = small_national_park.ix[[12, 14, 30, 45]]

In [ ]:
# small_national_park
# 1,49,52
small_national_park_df=pd.DataFrame(columns=['index','name','street_address','city','state_abb','state','postal_code','country','address','coord_lat','coord_long','num_reviews','review_score','ranking','tag','raw_visit_length','fee','description','url',"geo_content"])
error_message_df = pd.DataFrame(columns=['index','name','url','state_abb_error', 'state_error','address_error','geo_error','review_error','score_error','ranking_error','tag_error']) 
search_visit_length = re.compile('Recommended length of visit:')
search_fee = re.compile('Fee:')
cnt = 0
name_lst = []
full_address_lst = []
api_i = 0
for url in small_nation_park.url:
    s = request_s(url)
    input_list, error_message = [],[]
    state_abb_error, state_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error = 0,0,0,0,0,0,0,0
    latitude, longitude, geo_content = None, None, None

    name = s.find('h1', attrs = {'class':'heading_name'}).text.strip()

    #street_address
    street_address = s.find('span', attrs = {'class':'street-address'}).text.strip()
    #city
    city = s.find('span', attrs = {'property':'addressLocality'}).text.strip()
    #state
    state_abb = s.find('span', attrs = {'property':'addressRegion'}).text.strip()
    if state_abb:
        try:
            # state = state_abb_dict.keys()[state_abb_dict.values().index(state_abb)]
            state = abb2state_dict[state_abb]
        except:
            state_abb_error = 1
            state = state_abb
    else:
        state_error =1
        state_abb = None
        state = None
    #postal_code
    postal_code = s.find('span', attrs = {'property':'postalCode'}).text.strip()
    #country
    if s.find('span', attrs = {'property':'addressCountry'}).get('content'):
        country = s.find('span',{'property':'addressCountry'}).get('content')
    elif s.find('span',{'property':'addressCountry'}).get('content') == None:
        country = s.find('span',{'property':'addressCountry'}).text.strip()
    else:
        country = 'United States'
    #address
    if state:
        full_address = street_address+', '+city+', '+state+', '+postal_code[:5]+', '+country
    else:
        address_error =1
        full_address = street_address+', '+city+', '+postal_code[:5]+', '+country
    if (name in name_lst) and (full_address in full_address_lst):
        continue
    else:
        name_lst.append(name)
        full_address_lst.append(full_address)
    try:
#         latitude, longitude, geo_content = find_latlng(full_address, name, 1)
        result_longlat = find_latlng(full_address, name, 1)
        while result_longlat == False:
            api_i+=1
            result_longlat = find_latlng(full_address, name, 1)
    except:
        geo_error =1
        latitude, longitude, geo_content = None, None, None

    [latitude, longitude, geo_content] = result_longlat
    #num_reviews
    try:
        num_reviews = s.find('div', attrs = {'class': 'rs rating'}).find('a').get('content')

    except:
        try:
            num_reviews = s.find('a', {'property': "reviewCount"}).get('content')
        except:
            num_reviews = 0
            review_error=1    
    #review_score
    try:
        review_score = s.find('div', attrs = {'class': 'heading_rating separator'}).find('img').get('content')
    except:
        try:
            review_score = s.find('span', {'property': "ratingValue"}).get('content')
        except:
            review_score = 0 
            score_error =1
    #ranking
    try:
        ranking = s.find('b', attrs = {'class':'rank_text wrap'}).text.strip().replace('#',"")
    except:
        ranking = 999
        ranking_error=1
    #tag
    try:
        tags = ", ".join(label.text.strip() for label in s.select('div.detail > a') + s.select('span.collapse.hidden > a'))
    except:
        tags = None
        tag_error =1
    #visit_length
    if s.find('b', text =search_visit_length):
        raw_visit_length = s.find('b', text =search_visit_length).next_sibling.strip()
    else:
        raw_visit_length = None
    #fee
    if s.find('b', text= search_fee):
        fee = s.find('b',text= search_fee).next_sibling.strip()
    else:
        fee = 'Unknown'
    #description
    if s.find('div', attrs = {'class': "listing_details"}):
        description = s.find('div', attrs = {'class': "listing_details"}).text.strip()
    else:
        description = None
    error_message = [len(small_national_park_df), name, url,state_abb_error, state_error, address_error, geo_error, review_error, score_error, ranking_error, tag_error]
    error_message_df.loc[len(small_national_park_df)] =error_message


    input_list = [len(small_national_park_df), name, street_address, city, state_abb, state, postal_code, country, full_address, latitude, longitude, num_reviews, review_score, ranking, tags, raw_visit_length, fee, description, url, geo_content]
    small_national_park_df.loc[len(small_national_park_df)] = input_list

In [ ]:
for u in small_national_park_df.url:
    print u

In [ ]:
small_national_park_df.shape

In [ ]:
national_park_US_df =national_park_US_df.drop("check",1)

In [ ]:
national_park_US_df.to_csv("poi_detail_national_park.csv", encoding=('utf-8'))

In [ ]:


In [ ]:
# national_park_US_df["url"][national_park_US_df.index == 52] = "https://www.tripadvisor.com/Attractions-g147411-Activities-Virgin_Islands_National_Park_St_John_U_S_Virgin_Islands.html"
name_list=[]
for name in national_park_US_df["name"]:
    name_list.append(name.split(",",1)[0])
national_park_US_df["name"]=name_list

In [ ]:
national_park_US_df[national_park_US_df.check ==0]
#small 12 14 30 45
small_nation_park = national_park_US_df.ix[[12 ,14, 30, 45]]
national_park_US_df = national_park_US_df.drop(national_park_US_df.index[[12,14,30,45]])

In [ ]:
for address in national_park_US_df.address:
    for address2 in trip_search_result.address:
        if address == address2:
            national_park_US_df.name[national_park_US_df.address == address] = trip_search_result.name[trip_search_result.address ==address2].values[0]
#             national_park_US_df.url[national_park_US_df.address == address] = trip_search_result.url[trip_search_result.address ==address2].values[0]

In [ ]:
trip_search_result.name[trip_search_result.address =="Arches National Park, Utah, USA"].values[0]
# trip_search_result[(trip_search_result.address =="Arches National Park, Utah, USA")].index

In [ ]:
# address, lat, lng = [],[],[]

# park name:  Acadia National Park
# park name:  National Park of American Samoa
list1,list2 =[],[]
# street_address, city, state_abb, postal_code, country,geo_content = [],[],[],[],[],[]

g = geocoder.google("National Park of American Samoa")
name = "National Park of American Samoa"
city = g.city
street_address = g.street_long
state_abb = g.state
state = g.state_long
postal_code=g.postal
country=g.country_long
geo_content=g.content
full_address = g.address
latitude =g.lat
longitude = g.lng
num_reviews, review_score, ranking = None, None ,None
tags = "National Park"
raw_visit_length, fee = None, None 
# url = "https://www.tripadvisor.com/Attractions-g143010-Activities-Acadia_National_Park_Mount_Desert_Island_Maine.html"
url ="https://www.tripadvisor.com/Attractions-g143045-Activities-National_Park_of_American_Samoa_Tutuila.html"

In [ ]:
national_park_US_df

In [ ]:
# list2 = [1, name, street_address, city, state_abb, state, postal_code, country, full_address, latitude, longitude, num_reviews, review_score, ranking, tags, raw_visit_length, fee, description, url, geo_content, 1]

In [ ]:
# national_park_US_df["address"] = address
# national_park_US_df["lat"] = lat
# national_park_US_df["lng"] = lng
national_park_US_df["city"] = city
national_park_US_df["street_address"] = street_address
national_park_US_df["state_abb"] = state_abb
national_park_US_df["postal_code"] = postal_code
national_park_US_df["country"] = country
national_park_US_df["geo_content"] = geo_content

In [ ]:
# national_park_US_df

In [ ]:


In [ ]:
address_match = []
for address in national_park_US_df.address:
    for address2 in trip_search_result.address:
        if address == address2:
            address_match.append(address)

In [ ]:


In [ ]:
check_list = []
for add in trip_search_result.address:
    if add in address_match:
        check_list.append(1)
    else:
        check_list.append(0)
trip_search_result['check'] = check_list

In [ ]:
# national_park_US_df[national_park_US_df.check ==0]

In [ ]:
# trip_search_result[trip_search_result.check == 0]

In [ ]:
# trip_search_result.name
# for link in trip_search_result.ix[[52,54,58,53,45,57,56]].url:
#     print link

In [ ]:
# pop_list = [52,54,58,53,45,57,56]
# trip_search_result = trip_search_result.drop(pop_list).sort()

In [ ]:
national_park_US_df

In [ ]:
new_poi_df= pd.read_csv("new_poi_df.csv", index_col=0)

In [ ]:
new_poi_df.poi_type

In [98]:
national_park_US_df.to_csv("poi_detail_national_park.csv", encoding = ('utf-8'))
national_park_US_df = pd.read_csv("poi_detail_national_park.csv", encoding = ('utf-8'), index_col=0)

In [100]:
state_park_df2.to_csv("poi_detail_national_park.csv", encoding = ('utf-8'))
state_park_df = pd.read_csv("poi_detail_national_park.csv", encoding = ('utf-8'), index_col=0)

In [101]:
US_park= pd.concat([state_park_df, national_park_US_df])

In [104]:
US_park.to_csv("poi_detail_us_park.csv", encoding = ('utf-8'))

In [ ]:


In [2]:
import web_scraping_tripadvisor as web
client = MongoClient()
db = client.zoeshrm
db_html = db.TripAdvisor_state_park.find()
print db_html.count()
state_park_df2, error_state_park_df2 = web.state_park_web(db_html)
state_park_df2.shape


2647
Out[2]:
(2647, 23)

In [41]:
sorted(state_park_df2.area)


Out[41]:
[33.209456321338145,
 33.209457674359349,
 33.20945823627752,
 33.209458562723732,
 33.209458612501955,
 33.209458883391179,
 33.209459529930015,
 33.209459565633686,
 33.209459813843239,
 33.209459854299318,
 33.209459925072125,
 33.20945995241204,
 33.209459956788066,
 33.209459979970795,
 33.209459991374715,
 33.209459992788865,
 33.209460012291231,
 33.209460012457249,
 33.209460050254691,
 33.20946005633003,
 33.209460073057706,
 33.209460076224211,
 33.209460119454789,
 33.209460151368894,
 33.209460160166351,
 33.209460171492012,
 33.209460176201262,
 33.209460184436423,
 33.209460194023229,
 33.209460198396357,
 33.209460207860722,
 33.209460230746785,
 33.209460234140664,
 33.209460235752942,
 33.209460236991255,
 33.209460241440212,
 33.209460253401396,
 33.209460253401396,
 33.209460253923559,
 33.209460256502389,
 33.209460259434671,
 33.20946030467379,
 33.209460305748841,
 33.209460306930836,
 33.209460308886406,
 33.209460309157627,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315935594,
 33.209460315979321,
 33.209460315979321,
 33.209460315979321,
 33.209460315979321,
 33.209460315979321,
 33.209460315979321,
 33.209460315979321,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316023055,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 33.209460316110523,
 ...]

In [67]:
area_list=[]
for x in state_park_df2.area:
    if x<34:
        area_list.append(60)
    elif x<500:
        area_list.append(120)
    elif x<2000:
        area_list.append(180)
    else:
        area_list.append(240)
for x in [60,120,180,240]:
    print x,": ", area_list.count(x)
state_park_df2["time_base_on_area"] = area_list


60 :  1937
120 :  370
180 :  294
240 :  46

In [93]:
# state_park_df2["adjusted_visit_length"][state_park_df2.adjusted_visit_length ==0] =state_park_df2["time_base_on_area"][state_park_df2.adjusted_visit_length ==0]
# state_park_df2["poi_type"] = "StatePark"
# state_park_df2.drop("time_base_on_area", 1, inplace=True)
# state_park_df2.drop("area", 1, inplace=True)
# state_park_df2.drop("index", 1, inplace=True)

state_park_df2.columns


Out[93]:
Index([u'name', u'street_address', u'city', u'county', u'state_abb', u'state',
       u'postal_code', u'country', u'address', u'coord_lat', u'coord_long',
       u'num_reviews', u'review_score', u'ranking', u'tag',
       u'raw_visit_length', u'fee', u'description', u'url', u'geo_content',
       u'adjusted_visit_length', u'poi_type'],
      dtype='object')

In [105]:
import psycopg2
import simplejson
import numpy as np
from distance import *

conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"
my_key = 'AIzaSyDJh9EWCA_v0_B3SvjzjUA3OSVYufPJeGE'

def find_county(state, city):
    '''
    Only valid within the U.S.
    '''
    conn = psycopg2.connect(conn_str)
    cur = conn.cursor()
    city = city.replace('_',' ')
    cur.execute("select county from poi_detail_table where city = '%s' and state = '%s';" %(city.title(), state.title()))

    county = cur.fetchone()
    conn.close()
    if county:
        return county[0]
    else:
        return None
county_list= []
county = None
for index in national_park_US_df.index:
    try:
        if (national_park_US_df.city[index] == 'New York City') and (national_park_US_df.state[index] == 'New York'):
            county = 'NEW YORK'
        elif "administrative_area_level_2" in national_park_US_df.geo_content[index]:
            geo_content = ast.literal_eval(national_park_US_df.geo_content[index])['results']
            for info in geo_content[0]['address_components']:
                if info['types'][0] == "administrative_area_level_2":
                    county = info['short_name'].replace(' County', '').upper().encode('utf-8').strip()
        else:
            county = find_county(national_park_US_df.state[index], national_park_US_df.city[index])
    except:
        county = None
    county_list.append(county)
national_park_US_df['county'] = county_list

In [119]:
for x in range(len(state_park_df)):
    if state_park_df.tag[x].find("State Parks")==-1:
        print state_park_df.tag[x]


Historic Sites, Sights & Landmarks
Caverns & Caves, Nature & Parks
Nature & Parks
Nature & Parks
Bridges, Sights & Landmarks
Nature & Wildlife Areas, Nature & Parks
Specialty Museums, Museums
Nature & Wildlife Areas, Nature & Parks
Specialty Museums, Museums
History Museums, Museums
Historic Sites, Nature & Wildlife Areas, Nature & Parks, Sights & Landmarks
Bars & Clubs, Nightlife
Hiking Trails, Outdoor Activities, Nature & Parks
Bodies of Water, Nature & Parks
Specialty Museums, Museums
Nature & Wildlife Areas, Nature & Parks
Nature & Parks
Gardens, Nature & Parks
Specialty Museums, Museums
Theaters, Concerts & Shows
Other Outdoor Activities, Outdoor Activities
Beaches, Outdoor Activities, Nature & Parks
Nature & Parks
Nature & Wildlife Areas, Nature & Parks
Historic Sites, Sights & Landmarks
Beaches, Outdoor Activities, Nature & Parks
Horseback Riding Tours, Nature & Wildlife Areas, Outdoor Activities, Nature & Parks
Nature & Wildlife Areas, Nature & Parks
Specialty Museums, Museums
History Museums, Specialty Museums, Museums
Specialty Museums, Museums
Nature & Parks
Beaches, Nature & Parks, Outdoor Activities
Beaches, Nature & Parks, Outdoor Activities
Motorcycle Trails, Off-Road & ATV Trails, Outdoor Activities, Nature & Parks
Nature & Wildlife Areas, Nature & Parks
Bars & Clubs, Nightlife
Historic Walking Areas, Nature & Parks, Sights & Landmarks
Sacred & Religious Sites, Sights & Landmarks
Caverns & Caves, Nature & Parks
Nature & Parks
Hiking Trails, Nature & Parks, Outdoor Activities
Beaches, Nature & Parks, Outdoor Activities
Nature & Parks
Hiking Trails, Nature & Parks, Outdoor Activities
Beaches, Nature & Parks, Outdoor Activities
Beaches, Outdoor Activities, Nature & Parks
Bodies of Water, Nature & Parks
Beaches, Nature & Parks, Outdoor Activities
Historic Sites, Sights & Landmarks
Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Beaches, Nature & Parks, Outdoor Activities
Theme Parks, Water & Amusement Parks
Hiking Trails, Canyons, Nature & Parks, Outdoor Activities
Theaters, Concerts & Shows
Castles, Historic Sites, Sights & Landmarks, Monuments & Statues
Gift & Specialty Shops, Shopping
Nature & Parks
Golf Courses, Outdoor Activities
Theme Parks, Disney Parks & Activities, Water & Amusement Parks
Nature & Wildlife Areas, Nature & Parks
Nature & Parks
Neighborhoods, Sights & Landmarks
Points of Interest & Landmarks, Neighborhoods, Sights & Landmarks
Bodies of Water, Nature & Parks
Historic Sites, Sights & Landmarks
Historic Sites, Sights & Landmarks
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Beaches, Nature & Parks, Outdoor Activities
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Nature & Parks
Bodies of Water, Nature & Wildlife Areas, Nature & Parks
Government Buildings, History Museums, Sights & Landmarks, Museums
Beaches, Nature & Parks, Outdoor Activities
Specialty Museums, Museums
Beaches, Nature & Parks, Outdoor Activities
Beaches, Nature & Parks, Outdoor Activities
Beaches, Outdoor Activities, Nature & Parks
Specialty Museums, Museums
Nature & Parks
Beaches, Hiking Trails, Nature & Parks, Boat Tours & Water Sports, Tours, Outdoor Activities
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Lighthouses, Sights & Landmarks
Beaches, Nature & Parks, Outdoor Activities
Nature & Wildlife Areas, Nature & Parks
Lighthouses, Sights & Landmarks
Outdoor Activities, Nature & Parks, Zoos & Aquariums
Scenic Drives, Sights & Landmarks, Outdoor Activities
Bars & Clubs, Nightlife
Beaches, Nature & Parks, Outdoor Activities
History Museums, Museums
Scenic Railroads, Tours
Specialty Museums, Points of Interest & Landmarks, Museums, Sights & Landmarks, Monuments & Statues, Historic Sites
Beaches, Outdoor Activities, Nature & Parks
Nature & Wildlife Areas, Nature & Parks, Bodies of Water
Gift & Specialty Shops, Shopping
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Canyons, Biking Trails, Nature & Parks, Outdoor Activities
Scenic Drives, Sights & Landmarks, Outdoor Activities
Government Buildings, Sights & Landmarks
Beaches, Outdoor Activities, Nature & Parks
Points of Interest & Landmarks, Sights & Landmarks
Nature & Wildlife Areas, Nature & Parks
History Museums, Museums
Hiking Trails, Outdoor Activities, Nature & Parks
Beaches, Nature & Parks, Outdoor Activities
Other Outdoor Activities, Outdoor Activities
Architectural Buildings, Nature & Parks, Sights & Landmarks, Historic Sites
Bodies of Water, Educational sites, Nature & Parks, Sights & Landmarks, Nature & Wildlife Areas
Beaches, Outdoor Activities, Nature & Parks
Beaches, Outdoor Activities, Nature & Parks
Nature & Wildlife Areas, Nature & Parks
Movie Theaters, Fun & Games
Beaches, Nature & Wildlife Areas, Nature & Parks, Outdoor Activities
Gardens, Nature & Parks
Canyons, Nature & Parks
Architectural Buildings, Historic Sites, Sights & Landmarks, Monuments & Statues, Points of Interest & Landmarks
Beaches, Outdoor Activities, Nature & Parks
Other Outdoor Activities, Outdoor Activities
Nature & Parks
Hiking Trails, Nature & Parks, Outdoor Activities
Beaches, Nature & Parks, Outdoor Activities
Nature & Parks
Mountains, Nature & Parks
Nature & Parks
Nature & Parks
Nature & Parks
Points of Interest & Landmarks, Nature & Parks, Sights & Landmarks, Scenic Walking Areas
Waterfalls, Nature & Parks
Nature & Parks
Nature & Wildlife Areas, Nature & Parks
Gardens, Ski & Snowboard Areas, Outdoor Activities, Nature & Parks
Nature & Parks
Hiking Trails, Outdoor Activities, Nature & Parks
Nature & Parks
Monuments & Statues, Points of Interest & Landmarks, Sights & Landmarks
Hiking Trails, Biking Trails, Nature & Parks, Outdoor Activities
Nature & Parks
Hiking Trails, Nature & Parks, Outdoor Activities
Beaches, Nature & Parks, Outdoor Activities
Specialty Museums, Museums
Room Escape Games, Fun & Games
Battlefields, Sights & Landmarks
Nature & Parks
National Parks, Nature & Parks
Bodies of Water, Nature & Parks
Ski & Snowboard Areas, Outdoor Activities, Nature & Parks
Farms, Sights & Landmarks
National Parks, Nature & Parks
Nature & Parks
Game & Entertainment Centers, Fun & Games
Forests, Nature & Parks
Geologic Formations, Nature & Parks
Hiking Trails, Nature & Parks, Outdoor Activities, Bodies of Water
Nature & Parks
Nature & Parks
Hiking Trails, Nature & Parks, Outdoor Activities
Nature & Parks
Nature & Parks
Hiking Trails, Outdoor Activities, Nature & Parks
Gift & Specialty Shops, Shopping
Points of Interest & Landmarks, Sights & Landmarks
Other Outdoor Activities, Outdoor Activities
Forests, Nature & Parks
Forests, Nature & Parks
Forests, Nature & Parks
Forests, Nature & Parks
Nature & Wildlife Areas, Nature & Parks
Nature & Wildlife Areas, Bodies of Water, Nature & Parks
Nature & Parks
Historic Sites, Sights & Landmarks
Bars & Clubs, Dance Clubs & Discos, Nightlife
Scenic Railroads, Tours
History Museums, Walking Tours, Sights & Landmarks, Museums, Tours, Historic Sites, Factory Tours, Historical & Heritage Tours
Gardens, Nature & Wildlife Areas, Nature & Parks, Outdoor Activities, Hiking Trails
Islands, Nature & Parks
Architectural Buildings, Observation Decks & Towers, Sights & Landmarks, Points of Interest & Landmarks
Historic Sites, Sights & Landmarks
Historic Sites, Sights & Landmarks
Nature & Wildlife Areas, Nature & Parks
History Museums, Museums
Hiking Trails, Outdoor Activities, Nature & Parks
Historic Sites, Sights & Landmarks
Bodies of Water, Biking Trails, Nature & Parks, Outdoor Activities, Nature & Wildlife Areas, Equestrian Trails, Hiking Trails
Historic Sites, Sights & Landmarks
Bodies of Water, Outdoor Activities, Fun & Games, Nature & Parks, Hiking Trails, Playgrounds, Nature & Wildlife Areas
National Parks, Nature & Parks
Golf Courses, Outdoor Activities
Specialty Museums, Museums
Art Museums, Museums
Historic Sites, Sights & Landmarks
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-119-cee462a1229b> in <module>()
      1 for x in range(len(state_park_df)):
----> 2     if state_park_df.tag[x].find("State Parks")==-1:
      3         print state_park_df.tag[x]
      4 

AttributeError: 'float' object has no attribute 'find'

In [65]:
import matplotlib.pyplot as plt
plt.figure()
%matplotlib inline
state_park_df2[state_park_df2.area >34]["area"].plot.hist(bins=1000,xlim=[35,2000])


Out[65]:
<matplotlib.axes._subplots.AxesSubplot at 0x1335684d0>

In [ ]:
import geocoder
g= geocoder.google("yellowstone national park")

In [ ]:
box = g.bbox
g.bbox

In [ ]:
find_area(box)

In [ ]:
from math import cos, radians
def find_area(box):
#     to make thing simple, we use 111.111 
#     we assume the distance:
#     Latitude: 1 deg = 110.574 km
#     Longitude: 1 deg = 111.320*cos(latitude) km
#     if we need more accuracy number, we need to use different approach.
#     ex. using Shapely to calculate polygon/ WGS84 formula
    lat = (box["southwest"][0]-box["northeast"][0])*110.574
    lng = 111.320*cos(radians(box["southwest"][1]-box["northeast"][1]))
    return abs(lat*lng)

In [ ]:
from math import sin, cos, sqrt, atan2, radians

# approximate radius of earth in km
R = 6373.0

lat1 = radians(52.2296756)
lon1 = radians(21.0122287)
lat2 = radians(52.406374)
lon2 = radians(16.9251681)

dlon = lon2 - lon1
dlat = lat2 - lat1

a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
c = 2 * atan2(sqrt(a), sqrt(1 - a))

distance = R * c

print("Result:", distance)
print("Should be:", 278.546, "km")

In [ ]:


In [ ]:


In [ ]:
def raw_to_adjust_time(raw):
    adjusted_time =0
    if raw == "1-2 hours":
        adjusted_time = 120
    if raw == "2-3 hours":
        adjusted_time = 180
    if raw == "More than 3 hours":
        adjusted_time = 360
    if raw == "<1 hour":
        adjusted_time = 60
    return adjusted_time

In [ ]:
conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"

event_ids = [353,   355,   354,   360,  972,   361,   356,   357,  352,   359]
conn = psycopg2.connect(conn_str)  
cur = conn.cursor()

# points = np.zeros((len(event_ids), 5))
points = []
for i,v in enumerate(event_ids):
#     print i, v
    cur.execute("select index, coord0, coord1, city , poi_rank from poi_detail_table   where index = %i;"%(float(v)))
    a = cur.fetchone()
    points.append(list(a))    
#     points[i] = cur.fetchone()
conn.close()
# points = np.array(points)
# print points

def check_NO_1(poi_list, city_name):
    for i, poi in enumerate(poi_list):
        if poi[3] == city_name and poi[4]==1:
            number_one =poi_list.pop(i)
            return np.vstack((np.array(number_one),np.array(poi_list)))
    return poi_list

In [ ]:
new_points= check_NO_1(points, "Detroit")

In [ ]:
print new_points

In [123]:
us_park_df = pd.read_csv("poi_detail_us_park.csv",encoding = ('utf-8'), index_col=0)
poi_detail_v2 = pd.read_csv("new_poi_df_v2.csv" ,encoding = ('utf-8'), index_col=0)

In [126]:
poi_detail_v2.shape, us_park_df.shape


Out[126]:
((14306, 22), (2706, 22))

In [128]:
poi_detail_table_final_v1= pd.concat([poi_detail_v2, us_park_df])

In [132]:
poi_detail_table_final_v1.to_csv("poi_detail_table_final_v1.csv", encoding = ('utf-8'))

In [134]:
import os
os.getcwd()


Out[134]:
'/Users/Gon/Desktop/travel_with_friends'

In [135]:
user = pd.read_csv("auth_user.csv")

In [138]:
from sqlalchemy import create_engine

engine = create_engine('postgresql://{}@localhost:5432/travel_with_friends'.format("Gon"))
user.to_sql('auth_user',engine, if_exists = "replace")

In [9]:
poi = pd.read_csv("poi_detail_table_final_v1.csv", encoding = ('utf-8'), index_col =0)

In [157]:
# poi[["num_reviews","review_score"]].fillna(0)
poi[["num_reviews","review_score"]] = poi[["num_reviews","review_score"]].fillna(0)

In [332]:
poi2.to_csv("poi_detail_table_final_v1.csv", encoding = ('utf-8'))

In [1]:
print "github go back to 53294e143b1a3565eedf3854f25b5b6ecee7b813"


github go back to 53294e143b1a3565eedf3854f25b5b6ecee7b813

In [10]:
poi.shape


Out[10]:
(17012, 22)

In [14]:
poi.columns


Out[14]:
Index([u'address', u'adjusted_visit_length', u'city', u'coord_lat',
       u'coord_long', u'country', u'county', u'description', u'fee',
       u'geo_content', u'name', u'num_reviews', u'poi_type', u'postal_code',
       u'ranking', u'raw_visit_length', u'review_score', u'state',
       u'state_abb', u'street_address', u'tag', u'url'],
      dtype='object')

In [98]:
poi2 = poi.drop_duplicates(subset = ["name","address"])

In [99]:
poi2.shape


Out[99]:
(16654, 22)

In [76]:
poi2.shape


Out[76]:
(16298, 22)

In [84]:
new_index = poi2.index.tolist()
type(new_index)


Out[84]:
list

In [85]:
poi3= poi[~poi.index.isin(new_index)]

In [86]:
poi3.shape


Out[86]:
(358, 22)

In [90]:
problem_name_list=[]
problem_index_list =[]
for i in poi3.index.tolist():
    for j in poi3.index.tolist():
        if poi3.name[i] == poi3.name[j]:
            if (poi3.url[i]!=poi3.url[j]) :
                problem_name_list.append(poi3.name[i])
                problem_index_list.append(i)

In [91]:
# poi3.ix[problem_index_list]

In [108]:
a = list(set(problem_name_list))
len(a)


Out[108]:
8

In [215]:
address need fix 
add col icon_img_url after push on s3


Out[215]:
0        59th to 110th Street, New York City, New York,...
1        180 Greenwich St, New York City, New York, 100...
2        1000 5th Ave, New York City, New York, 10028, ...
3        30 Rockefeller Center, New York City, New York...
4               , New York City, New York, , United States
5        89 E 42nd Street, New York City, New York, 100...
6          , New York City, New York, 10014, United States
7               , New York City, New York, , United States
8        1 E. 70th St., New York City, New York, 10021,...
9        350 Fifth Avenue, New York City, New York, 101...
10         , New York City, New York, 10017, United States
11         , New York City, New York, 10004, United States
12       1 World Trade Center, New York City, New York,...
13       460 Madison Avenue, New York City, New York, 1...
14       420 W 14th St, New York City, New York, 10014,...
15       4 South Street, New York City, New York, 10004...
16       1260 6th Avenue (Avenue of the Americas), New ...
17       45 Rockefeller Plaza, New York City, New York,...
18       476 5th Ave New York, NY 10018, New York City,...
19       4 Penn Plaza, New York City, New York, 10121, ...
20       99 Margaret Corbin Drive, New York City, New Y...
21       103 Orchard Street, New York City, New York, 1...
22       Broadway, New York City, New York, 10036, Unit...
23       11 West 53rd Street, New York City, New York, ...
24       1200 Getty Center Dr, Los Angeles, California,...
25       100 Universal City Plaza, Los Angeles, Califor...
26       2800 E. Observatory Rd., Los Angeles, Californ...
27       700 Exposition Park Drive, Los Angeles, Califo...
28       15151 Bledsoe St, Los Angeles, California, 913...
29       4730 Crystal Springs Dr, Los Angeles, Californ...
                               ...                        
16624    Hawaiʻi Volcanoes National Park, Hawaii Volcan...
16625    101 Reserve St, Hot Springs, AR, 71901, United...
16626             Isle Royale National Park, Michigan, USA
16627           Joshua Tree National Park, California, USA
16628    Katmai National Park and Preserve, King Salmon...
16629              Kenai Fjords National Park, Alaska, USA
16630          Kings Canyon National Park, California, USA
16631    Kobuk Valley National Park, 171 3rd Ave, Kotze...
16632    Lake Clark National Park and Preserve, General...
16633       Lassen Volcanic National Park, California, USA
16634            Mammoth Cave National Park, Kentucky, USA
16635        Mesa Verde National Park, Mesa Verde, CO, USA
16636         Mount Rainier National Park, Washington, USA
16637        North Cascades National Park, Washington, USA
16638    Olympic National Park, 3002 Mt Angeles Rd, Por...
16639         Petrified Forest National Park, Arizona, USA
16640    5000 CA-Hwy 146, Paicines, CA, 95043, United S...
16641    Redwood National and State Parks, California, USA
16642          Rocky Mountain National Park, Colorado, USA
16643                  Saguaro National Park, Arizona, USA
16644               Sequoia National Park, California, USA
16645              Shenandoah National Park, Virginia, USA
16646    Theodore Roosevelt National Park, North Dakota...
16647          Virgin Islands National Park, St John, USVI
16648    Voyageurs National Park, 360 Minnesota 11, Int...
16649           Wind Cave National Park, South Dakota, USA
16650    Wrangell-St. Elias National Park & Preserve, A...
16651             Yellowstone National Park, United States
16652              Yosemite National Park, California, USA
16653                        Zion National Park, Utah, USA
Name: address, dtype: object

In [217]:
import os
import time
img_list = os.listdir("img_file/")

In [214]:
def save_img(path, img_url):
    f = open(path,'wb')
    f.write(urllib.urlopen(img_url).read())
    f.close()
    
item_done = len(os.listdir("img_file/"))-1
if item_done < 0:
    item_done = 0
    
for i, link in enumerate(poi2.url[item_done:]):
    try:
        s = request_s(link)
        try:
            img_url = s.select('img[class="centeredImg"]')[0]["src"]
        except IndexError:
            print link 
            pass
        path = 'img_file/'+str(item_done+i)+".jpg"
        save_img(path, img_url)
        time.sleep(5)
    except:
        time.sleep(300)
        s = request_s(link)
        try:
            img_url = s.select('img[class="centeredImg"]')[0]["src"]
        except IndexError:
            print link 
            pass
        path = 'img_file/'+str(item_done+i)+".jpg"
        save_img(path, img_url)
        time.sleep(10)


http://www.tripadvisor.com/Attraction_Review-g58026-d7692129-Reviews-The_Fresh_Market-Norfolk_Virginia.html
http://www.tripadvisor.com/Attraction_Review-g40424-d169779-Reviews-Eldorado_Resort_Casino_Shreveport-Shreveport_Louisiana.html
http://www.tripadvisor.com/Attraction_Review-g46890-d5835626-Reviews-Bennigan_s_Vineland-Vineland_New_Jersey.html
http://www.tripadvisor.com/Attraction_Review-g43242-d3279793-Reviews-Fur_Ever_Wild-Lakeville_Minnesota.html
https://www.tripadvisor.com/Attraction_Review-g33799-d12122510-Reviews-Haddam_Island_State_Park-Haddam_Connecticut.html?t=33799
https://www.tripadvisor.com/Attraction_Review-g33754-d11707436-Reviews-Old_Furnace_State_Park-Danielson_Killingly_Mystic_Country_Connecticut.html?t=33813
https://www.tripadvisor.com/Attraction_Review-g43640-d7161772-Reviews-Garden_Island_State_Recreation_Area-Williams_Minnesota.html?t=1
https://www.tripadvisor.com/Attraction_Review-g28968-d210301-Reviews-Federation_Forest_State_Park-Washington.html?t=1
https://www.tripadvisor.com/Attractions-g143010-Activities-Acadia_National_Park_Mount_Desert_Island_Maine.html
https://www.tripadvisor.com/Attractions-g143045-Activities-National_Park_of_American_Samoa_Tutuila.html
https://www.tripadvisor.com/Attractions-g143011-Activities-Arches_National_Park_Utah.html
https://www.tripadvisor.com/Attractions-g143012-Activities-Badlands_National_Park_South_Dakota.html
https://www.tripadvisor.com/Attractions-g60733-Activities-Big_Bend_National_Park_Texas.html
https://www.tripadvisor.com/Attractions-g143013-Activities-Biscayne_National_Park_Florida.html
https://www.tripadvisor.com/Attractions-g143014-Activities-Black_Canyon_Of_The_Gunnison_National_Park_Colorado.html
https://www.tripadvisor.com/Attractions-g143015-Activities-Bryce_Canyon_National_Park_Utah.html
https://www.tripadvisor.com/Attractions-g143016-Activities-Canyonlands_National_Park_Utah.html
https://www.tripadvisor.com/Attractions-g143017-Activities-Capitol_Reef_National_Park_Utah.html
https://www.tripadvisor.com/Attractions-g143018-Activities-Carlsbad_Caverns_National_Park_New_Mexico.html
https://www.tripadvisor.com/Attractions-g143019-Activities-Channel_Islands_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143020-Activities-Crater_Lake_National_Park_Oregon.html
https://www.tripadvisor.com/Attractions-g143021-Activities-Death_Valley_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143022-Activities-Denali_National_Park_and_Preserve_Alaska.html
https://www.tripadvisor.com/Attractions-g143023-Activities-Dry_Tortugas_National_Park_Florida_Keys_Florida.html
https://www.tripadvisor.com/Attractions-g143024-Activities-Everglades_National_Park_Florida.html
https://www.tripadvisor.com/Attractions-g143025-Activities-Gates_Of_The_Arctic_National_Park_and_Preserve_Alaska.html
https://www.tripadvisor.com/Attractions-g143026-Activities-Glacier_National_Park_Montana.html
https://www.tripadvisor.com/Attractions-g143027-Activities-Glacier_Bay_National_Park_and_Preserve_Alaska.html
https://www.tripadvisor.com/Attractions-g143028-Activities-Grand_Canyon_National_Park_Arizona.html
https://www.tripadvisor.com/Attractions-g143029-Activities-Grand_Teton_National_Park_Wyoming.html
https://www.tripadvisor.com/Attractions-g143030-Activities-Great_Basin_National_Park_Nevada.html
https://www.tripadvisor.com/Attractions-g1511447-Activities-Great_Sand_Dunes_National_Park_Preserve_Colorado.html
https://www.tripadvisor.com/Attractions-g143031-Activities-Great_Smoky_Mountains_National_Park_Tennessee.html
https://www.tripadvisor.com/Attractions-g143032-Activities-Guadalupe_Mountains_National_Park_Texas.html
https://www.tripadvisor.com/Attractions-g143033-Activities-Haleakala_National_Park_Maui_Hawaii.html
https://www.tripadvisor.com/Attractions-g143034-Activities-Hawaii_Volcanoes_National_Park_Island_of_Hawaii_Hawaii.html
https://www.tripadvisor.com/Attractions-g143036-Activities-Isle_Royale_National_Park_Michigan.html
https://www.tripadvisor.com/Attractions-g143037-Activities-Joshua_Tree_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143038-Activities-Katmai_National_Park_and_Preserve_Alaska.html
https://www.tripadvisor.com/Attractions-g143039-Activities-Kenai_Fjords_National_Park_Alaska.html
https://www.tripadvisor.com/Attractions-g143050-Activities-Sequoia_and_Kings_Canyon_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143040-Activities-Kobuk_Valley_National_Park_Alaska.html
https://www.tripadvisor.com/Attractions-g143041-Activities-Lake_Clark_National_Park_and_Preserve_Alaska.html
https://www.tripadvisor.com/Attractions-g143042-Activities-Lassen_Volcanic_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143043-Activities-Mammoth_Cave_National_Park_Kentucky.html
https://www.tripadvisor.com/Attractions-g60900-Activities-Mesa_Verde_National_Park_Colorado.html
https://www.tripadvisor.com/Attractions-g143044-Activities-Mount_Rainier_National_Park_Washington.html
https://www.tripadvisor.com/Attractions-g143046-Activities-North_Cascades_National_Park_Washington.html
https://www.tripadvisor.com/Attractions-g143047-Activities-Olympic_National_Park_Washington.html
https://www.tripadvisor.com/Attractions-g60932-Activities-Petrified_Forest_National_Park_Arizona.html
https://www.tripadvisor.com/Attractions-g2193168-Activities-Redwood_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143048-Activities-Rocky_Mountain_National_Park_Colorado.html
https://www.tripadvisor.com/Attractions-g143049-Activities-Saguaro_National_Park_Arizona.html
https://www.tripadvisor.com/Attractions-g143050-Activities-Sequoia_and_Kings_Canyon_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143051-Activities-Shenandoah_National_Park_Virginia.html
https://www.tripadvisor.com/Attractions-g143052-Activities-Theodore_Roosevelt_National_Park_North_Dakota.html
https://www.tripadvisor.com/Attractions-g147411-Activities-Virgin_Islands_National_Park_St_John_U_S_Virgin_Islands.html
https://www.tripadvisor.com/Attractions-g143054-Activities-Voyageurs_National_Park_Minnesota.html
https://www.tripadvisor.com/Attractions-g143055-Activities-Wind_Cave_National_Park_South_Dakota.html
https://www.tripadvisor.com/Attractions-g143056-Activities-Wrangell_St_Elias_National_Park_and_Preserve_Alaska.html
https://www.tripadvisor.com/Attractions-g60999-Activities-Yellowstone_National_Park_Wyoming.html
https://www.tripadvisor.com/Attractions-g61000-Activities-Yosemite_National_Park_California.html
https://www.tripadvisor.com/Attractions-g143057-Activities-Zion_National_Park_Utah.html

In [211]:
# img_url = s.findAll("img", class_="centeredImg")
img_url = s.select('img[class="centeredImg"]')

In [224]:
img_id =[]
for img_name in img_list[1:]:
    img_id.append(int(img_name.replace(".jpg","")))
# img_id

In [136]:
# import urllib
# f = open('img_file/00000001.jpg','wb')
# f.write(urllib.urlopen(img_url).read())
# f.close()

In [229]:
import cv2
import numpy as np
wrong_img = []
for img_name in img_list[1:]:
    pic = cv2.imread("img_file/"+img_name)
    if (pic.shape[0] < 10 )or (pic.shape[1]< 10):
        wrong_img.append(img_name)


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-229-9bf3e78714be> in <module>()
----> 1 import cv2
      2 import numpy as np
      3 wrong_img = []
      4 for img_name in img_list[1:]:
      5     pic = cv2.imread("img_file/"+img_name)

ImportError: No module named cv2

In [232]:
import os
import time
import numpy as np
from matplotlib import pyplot as plt

img_list = os.listdir("img_file/")
wrong_img = []
for img_name in img_list[1:]:
	path = "img_file/"+img_name
	pic = plt.imread(path, 0)

	if type(pic) == "NoneType":
		print(img_name)

	if (pic.shape[0] < 10 )or (pic.shape[1]< 10):
		wrong_img.append(img_name)
print(wrong_img)


/Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')
['1000.jpg', '10017.jpg', '10023.jpg', '10024.jpg', '10025.jpg', '10026.jpg', '10027.jpg', '10029.jpg', '10033.jpg', '10038.jpg', '10039.jpg', '10040.jpg', '10041.jpg', '10042.jpg', '10043.jpg', '10060.jpg', '10064.jpg', '10066.jpg', '10073.jpg', '10074.jpg', '10075.jpg', '10078.jpg', '10079.jpg', '10080.jpg', '10081.jpg', '10088.jpg', '10089.jpg', '10094.jpg', '10096.jpg', '10098.jpg', '10099.jpg', '10102.jpg', '10103.jpg', '10105.jpg', '10108.jpg', '10114.jpg', '10115.jpg', '10117.jpg', '10118.jpg', '10119.jpg', '10120.jpg', '10121.jpg', '10122.jpg', '10123.jpg', '10124.jpg', '10126.jpg', '10127.jpg', '10129.jpg', '10130.jpg', '10139.jpg', '10140.jpg', '10141.jpg', '10143.jpg', '10145.jpg', '10147.jpg', '10148.jpg', '10161.jpg', '10167.jpg', '10170.jpg', '10171.jpg', '10172.jpg', '10173.jpg', '10184.jpg', '10188.jpg', '10201.jpg', '10204.jpg', '10205.jpg', '10221.jpg', '10235.jpg', '10237.jpg', '10238.jpg', '10241.jpg', '10242.jpg', '10246.jpg', '10247.jpg', '10248.jpg', '10251.jpg', '10260.jpg', '10261.jpg', '10263.jpg', '10265.jpg', '10267.jpg', '10278.jpg', '10279.jpg', '10281.jpg', '10282.jpg', '10284.jpg', '10286.jpg', '10292.jpg', '10293.jpg', '10295.jpg', '10297.jpg', '10320.jpg', '10326.jpg', '10328.jpg', '10329.jpg', '10330.jpg', '10331.jpg', '10337.jpg', '10342.jpg', '10345.jpg', '10346.jpg', '10347.jpg', '10348.jpg', '10352.jpg', '10353.jpg', '10355.jpg', '10356.jpg', '10357.jpg', '10361.jpg', '10364.jpg', '10373.jpg', '10375.jpg', '10376.jpg', '10377.jpg', '10389.jpg', '10390.jpg', '10393.jpg', '10399.jpg', '10400.jpg', '10401.jpg', '10402.jpg', '10403.jpg', '10407.jpg', '10410.jpg', '10413.jpg', '10415.jpg', '10422.jpg', '10428.jpg', '10429.jpg', '10431.jpg', '10432.jpg', '10441.jpg', '10443.jpg', '10444.jpg', '10446.jpg', '10452.jpg', '10453.jpg', '10459.jpg', '10460.jpg', '10461.jpg', '10462.jpg', '10463.jpg', '10470.jpg', '10482.jpg', '10484.jpg', '10485.jpg', '10488.jpg', '10489.jpg', '10495.jpg', '10497.jpg', '10499.jpg', '10500.jpg', '10501.jpg', '10502.jpg', '10503.jpg', '10505.jpg', '10506.jpg', '10507.jpg', '10524.jpg', '10527.jpg', '10532.jpg', '10534.jpg', '10535.jpg', '10536.jpg', '10544.jpg', '10556.jpg', '10557.jpg', '10559.jpg', '10560.jpg', '10562.jpg', '10564.jpg', '10570.jpg', '10572.jpg', '10574.jpg', '10575.jpg', '10576.jpg', '10577.jpg', '10580.jpg', '10583.jpg', '10584.jpg', '10585.jpg', '10586.jpg', '10590.jpg', '10591.jpg', '10593.jpg', '10604.jpg', '10609.jpg', '10611.jpg', '10614.jpg', '10616.jpg', '10618.jpg', '10621.jpg', '10622.jpg', '10623.jpg', '10624.jpg', '10625.jpg', '10626.jpg', '10627.jpg', '10631.jpg', '10632.jpg', '10634.jpg', '10636.jpg', '10637.jpg', '10638.jpg', '10639.jpg', '10640.jpg', '10641.jpg', '10642.jpg', '10643.jpg', '10652.jpg', '10655.jpg', '10656.jpg', '10670.jpg', '10671.jpg', '10673.jpg', '10674.jpg', '10680.jpg', '10681.jpg', '10682.jpg', '10683.jpg', '10684.jpg', '10690.jpg', '10694.jpg', '10695.jpg', '10696.jpg', '10703.jpg', '10709.jpg', '10715.jpg', '10719.jpg', '10721.jpg', '10722.jpg', '10739.jpg', '10740.jpg', '10748.jpg', '10749.jpg', '10750.jpg', '10753.jpg', '10761.jpg', '10764.jpg', '10765.jpg', '10766.jpg', '10767.jpg', '10768.jpg', '10776.jpg', '10778.jpg', '10798.jpg', '10814.jpg', '10815.jpg', '10822.jpg', '10823.jpg', '10826.jpg', '10833.jpg', '10834.jpg', '10835.jpg', '10836.jpg', '10837.jpg', '10840.jpg', '10842.jpg', '10844.jpg', '10846.jpg', '10850.jpg', '10853.jpg', '10854.jpg', '10855.jpg', '10857.jpg', '10858.jpg', '10859.jpg', '10866.jpg', '10867.jpg', '10868.jpg', '10869.jpg', '10870.jpg', '10878.jpg', '10879.jpg', '10881.jpg', '10885.jpg', '10886.jpg', '10887.jpg', '10888.jpg', '10890.jpg', '10891.jpg', '10921.jpg', '10922.jpg', '10927.jpg', '10928.jpg', '10932.jpg', '10933.jpg', '10934.jpg', '10935.jpg', '10941.jpg', '10943.jpg', '10944.jpg', '10962.jpg', '10964.jpg', '10975.jpg', '10977.jpg', '10978.jpg', '10981.jpg', '11006.jpg', '11019.jpg', '11027.jpg', '11028.jpg', '11042.jpg', '11043.jpg', '11044.jpg', '11047.jpg', '11050.jpg', '11052.jpg', '11053.jpg', '11054.jpg', '11057.jpg', '11058.jpg', '11059.jpg', '11061.jpg', '11070.jpg', '11087.jpg', '11088.jpg', '11089.jpg', '11090.jpg', '11091.jpg', '11092.jpg', '11093.jpg', '11094.jpg', '11101.jpg', '11102.jpg', '11103.jpg', '11107.jpg', '11108.jpg', '11113.jpg', '11118.jpg', '11119.jpg', '11120.jpg', '11121.jpg', '11122.jpg', '11123.jpg', '11126.jpg', '11159.jpg', '11164.jpg', '11170.jpg', '11174.jpg', '11176.jpg', '11183.jpg', '11187.jpg', '11203.jpg', '11204.jpg', '11205.jpg', '11207.jpg', '11208.jpg', '11209.jpg', '11210.jpg', '11224.jpg', '11227.jpg', '11228.jpg', '11229.jpg', '11251.jpg', '11253.jpg', '11258.jpg', '11259.jpg', '11260.jpg', '11261.jpg', '11262.jpg', '11273.jpg', '11274.jpg', '11289.jpg', '11291.jpg', '11294.jpg', '11313.jpg', '11314.jpg', '11315.jpg', '11317.jpg', '11318.jpg', '11329.jpg', '11332.jpg', '11339.jpg', '11340.jpg', '11341.jpg', '11349.jpg', '11350.jpg', '11352.jpg', '11354.jpg', '11355.jpg', '11360.jpg', '11362.jpg', '11363.jpg', '11382.jpg', '11383.jpg', '11399.jpg', '11402.jpg', '11404.jpg', '11405.jpg', '11406.jpg', '11407.jpg', '11408.jpg', '11409.jpg', '11412.jpg', '11413.jpg', '11414.jpg', '11415.jpg', '11417.jpg', '11418.jpg', '11419.jpg', '11420.jpg', '11433.jpg', '11436.jpg', '11438.jpg', '11439.jpg', '11448.jpg', '11455.jpg', '11477.jpg', '11483.jpg', '11484.jpg', '11491.jpg', '11495.jpg', '11496.jpg', '11497.jpg', '11498.jpg', '11504.jpg', '11507.jpg', '11509.jpg', '11511.jpg', '11512.jpg', '11519.jpg', '11520.jpg', '11521.jpg', '11531.jpg', '11532.jpg', '11534.jpg', '11538.jpg', '11540.jpg', '11546.jpg', '11550.jpg', '11552.jpg', '11555.jpg', '11563.jpg', '11568.jpg', '11570.jpg', '11572.jpg', '11573.jpg', '11592.jpg', '11594.jpg', '11598.jpg', '11603.jpg', '11608.jpg', '11646.jpg', '11647.jpg', '11650.jpg', '11654.jpg', '11656.jpg', '11658.jpg', '11660.jpg', '11667.jpg', '11668.jpg', '11681.jpg', '11693.jpg', '11703.jpg', '11707.jpg', '11708.jpg', '11710.jpg', '11712.jpg', '11714.jpg', '11715.jpg', '11722.jpg', '11725.jpg', '11726.jpg', '11736.jpg', '11742.jpg', '11743.jpg', '11766.jpg', '11777.jpg', '11780.jpg', '11782.jpg', '11791.jpg', '11792.jpg', '11793.jpg', '11794.jpg', '11795.jpg', '11797.jpg', '11798.jpg', '11799.jpg', '11801.jpg', '11815.jpg', '11816.jpg', '11818.jpg', '11819.jpg', '11820.jpg', '11833.jpg', '11845.jpg', '11853.jpg', '11854.jpg', '11856.jpg', '11858.jpg', '11859.jpg', '11860.jpg', '11861.jpg', '11862.jpg', '11867.jpg', '11869.jpg', '11870.jpg', '11871.jpg', '11872.jpg', '11873.jpg', '11900.jpg', '11904.jpg', '11905.jpg', '11907.jpg', '11908.jpg', '11910.jpg', '11912.jpg', '11914.jpg', '11916.jpg', '11917.jpg', '11918.jpg', '11921.jpg', '11923.jpg', '11927.jpg', '11928.jpg', '11929.jpg', '11931.jpg', '11932.jpg', '11936.jpg', '11938.jpg', '11946.jpg', '11947.jpg', '11949.jpg', '11950.jpg', '11951.jpg', '11952.jpg', '11959.jpg', '11961.jpg', '11962.jpg', '11995.jpg', '11996.jpg', '11997.jpg', '12007.jpg', '12009.jpg', '12010.jpg', '12012.jpg', '12013.jpg', '12029.jpg', '12030.jpg', '12032.jpg', '12033.jpg', '12034.jpg', '12036.jpg', '12037.jpg', '12038.jpg', '12041.jpg', '12045.jpg', '12046.jpg', '12047.jpg', '12051.jpg', '12053.jpg', '12054.jpg', '12055.jpg', '12056.jpg', '12066.jpg', '12068.jpg', '12070.jpg', '12071.jpg', '12072.jpg', '12073.jpg', '12080.jpg', '12085.jpg', '12086.jpg', '12087.jpg', '12088.jpg', '12091.jpg', '12092.jpg', '12093.jpg', '12095.jpg', '12103.jpg', '12104.jpg', '12105.jpg', '12106.jpg', '12107.jpg', '12108.jpg', '12115.jpg', '12117.jpg', '12119.jpg', '12120.jpg', '12121.jpg', '12123.jpg', '12125.jpg', '12132.jpg', '12137.jpg', '12139.jpg', '12140.jpg', '12154.jpg', '12155.jpg', '12169.jpg', '12170.jpg', '12172.jpg', '12207.jpg', '12209.jpg', '12238.jpg', '12239.jpg', '12242.jpg', '12248.jpg', '12249.jpg', '12250.jpg', '12251.jpg', '12268.jpg', '12272.jpg', '12286.jpg', '12287.jpg', '12301.jpg', '12311.jpg', '12315.jpg', '12316.jpg', '12332.jpg', '12333.jpg', '12335.jpg', '12340.jpg', '12342.jpg', '12343.jpg', '12344.jpg', '12345.jpg', '12346.jpg', '12348.jpg', '12349.jpg', '12359.jpg', '12365.jpg', '12367.jpg', '12374.jpg', '12376.jpg', '12377.jpg', '12389.jpg', '12390.jpg', '12391.jpg', '12400.jpg', '12402.jpg', '12403.jpg', '12409.jpg', '12411.jpg', '12414.jpg', '12416.jpg', '12417.jpg', '12418.jpg', '12423.jpg', '12426.jpg', '12436.jpg', '12438.jpg', '12439.jpg', '12440.jpg', '12442.jpg', '12451.jpg', '12452.jpg', '12461.jpg', '12465.jpg', '12466.jpg', '12467.jpg', '12469.jpg', '12471.jpg', '12472.jpg', '12476.jpg', '12477.jpg', '12506.jpg', '12512.jpg', '12513.jpg', '12514.jpg', '12517.jpg', '12518.jpg', '12519.jpg', '12520.jpg', '12553.jpg', '12556.jpg', '12557.jpg', '12558.jpg', '12568.jpg', '12591.jpg', '12593.jpg', '12605.jpg', '12606.jpg', '12630.jpg', '12634.jpg', '12635.jpg', '12638.jpg', '12640.jpg', '12654.jpg', '12669.jpg', '12670.jpg', '12674.jpg', '12675.jpg', '12677.jpg', '12681.jpg', '12682.jpg', '12689.jpg', '12690.jpg', '12693.jpg', '12694.jpg', '12695.jpg', '12698.jpg', '12701.jpg', '12702.jpg', '12703.jpg', '12704.jpg', '12714.jpg', '12725.jpg', '12728.jpg', '12732.jpg', '12733.jpg', '12734.jpg', '12748.jpg', '12754.jpg', '12756.jpg', '12763.jpg', '12765.jpg', '12766.jpg', '12772.jpg', '12773.jpg', '12775.jpg', '12776.jpg', '12778.jpg', '12786.jpg', '12787.jpg', '12798.jpg', '12799.jpg', '12801.jpg', '12805.jpg', '12806.jpg', '12808.jpg', '12810.jpg', '12839.jpg', '12845.jpg', '12848.jpg', '12851.jpg', '12853.jpg', '12855.jpg', '12857.jpg', '12866.jpg', '12867.jpg', '12868.jpg', '12873.jpg', '12874.jpg', '12875.jpg', '12886.jpg', '12887.jpg', '12889.jpg', '12891.jpg', '12893.jpg', '12901.jpg', '12903.jpg', '12904.jpg', '12908.jpg', '12910.jpg', '12912.jpg', '12913.jpg', '12918.jpg', '12923.jpg', '12929.jpg', '12931.jpg', '12932.jpg', '1295.jpg', '12970.jpg', '12971.jpg', '12975.jpg', '12989.jpg', '12992.jpg', '12994.jpg', '12997.jpg', '13023.jpg', '13024.jpg', '13039.jpg', '13041.jpg', '13042.jpg', '13046.jpg', '13047.jpg', '13050.jpg', '13054.jpg', '13055.jpg', '13057.jpg', '13058.jpg', '13060.jpg', '13062.jpg', '13063.jpg', '13067.jpg', '13069.jpg', '13070.jpg', '13072.jpg', '13074.jpg', '13075.jpg', '13077.jpg', '13078.jpg', '13082.jpg', '13086.jpg', '13090.jpg', '13091.jpg', '1310.jpg', '13106.jpg', '13123.jpg', '13124.jpg', '13126.jpg', '13127.jpg', '13128.jpg', '13130.jpg', '13131.jpg', '13132.jpg', '13133.jpg', '13134.jpg', '13135.jpg', '13137.jpg', '13138.jpg', '13139.jpg', '13140.jpg', '13142.jpg', '13143.jpg', '13144.jpg', '13159.jpg', '13166.jpg', '13173.jpg', '13174.jpg', '13178.jpg', '1318.jpg', '13182.jpg', '13184.jpg', '13185.jpg', '13186.jpg', '13187.jpg', '1319.jpg', '13194.jpg', '13195.jpg', '13196.jpg', '13197.jpg', '13199.jpg', '13201.jpg', '13205.jpg', '13206.jpg', '13207.jpg', '13208.jpg', '13209.jpg', '13215.jpg', '13218.jpg', '1322.jpg', '13221.jpg', '13222.jpg', '13223.jpg', '13224.jpg', '13226.jpg', '13231.jpg', '13235.jpg', '13236.jpg', '13237.jpg', '13243.jpg', '13246.jpg', '13247.jpg', '13248.jpg', '13251.jpg', '13254.jpg', '13262.jpg', '13265.jpg', '13277.jpg', '13283.jpg', '13284.jpg', '13285.jpg', '13286.jpg', '13293.jpg', '13294.jpg', '13295.jpg', '13296.jpg', '13297.jpg', '13300.jpg', '13307.jpg', '13314.jpg', '13315.jpg', '13317.jpg', '13337.jpg', '13344.jpg', '13352.jpg', '13354.jpg', '13355.jpg', '13357.jpg', '13358.jpg', '13359.jpg', '13360.jpg', '13361.jpg', '13365.jpg', '13369.jpg', '13372.jpg', '13379.jpg', '13381.jpg', '13382.jpg', '13387.jpg', '13390.jpg', '13392.jpg', '13393.jpg', '13395.jpg', '13406.jpg', '13408.jpg', '13413.jpg', '13417.jpg', '13418.jpg', '13419.jpg', '13439.jpg', '13455.jpg', '13463.jpg', '13464.jpg', '13466.jpg', '13467.jpg', '13475.jpg', '13480.jpg', '13482.jpg', '13489.jpg', '13492.jpg', '13493.jpg', '13494.jpg', '13495.jpg', '13497.jpg', '13498.jpg', '13503.jpg', '13505.jpg', '13509.jpg', '13511.jpg', '13512.jpg', '13518.jpg', '13520.jpg', '13521.jpg', '13522.jpg', '13524.jpg', '13526.jpg', '13527.jpg', '13529.jpg', '13531.jpg', '13534.jpg', '13536.jpg', '13544.jpg', '13549.jpg', '13550.jpg', '13563.jpg', '13575.jpg', '13576.jpg', '13577.jpg', '13578.jpg', '13589.jpg', '13591.jpg', '13592.jpg', '13593.jpg', '13594.jpg', '13605.jpg', '13615.jpg', '13616.jpg', '13620.jpg', '13621.jpg', '13623.jpg', '13625.jpg', '13627.jpg', '13636.jpg', '13638.jpg', '13639.jpg', '13640.jpg', '13641.jpg', '13642.jpg', '13644.jpg', '1365.jpg', '13651.jpg', '13653.jpg', '13660.jpg', '13663.jpg', '13664.jpg', '13669.jpg', '13672.jpg', '1368.jpg', '1370.jpg', '13704.jpg', '13706.jpg', '13707.jpg', '13708.jpg', '13709.jpg', '1371.jpg', '13710.jpg', '13718.jpg', '1372.jpg', '13723.jpg', '13728.jpg', '1373.jpg', '13730.jpg', '13731.jpg', '13732.jpg', '13734.jpg', '13735.jpg', '13752.jpg', '13753.jpg', '13756.jpg', '13763.jpg', '13765.jpg', '13766.jpg', '13767.jpg', '13769.jpg', '13771.jpg', '13774.jpg', '13798.jpg', '13807.jpg', '13811.jpg', '13817.jpg', '13823.jpg', '13825.jpg', '13826.jpg', '13833.jpg', '13836.jpg', '13843.jpg', '13846.jpg', '13856.jpg', '13862.jpg', '13866.jpg', '13867.jpg', '13871.jpg', '13872.jpg', '13882.jpg', '13885.jpg', '13895.jpg', '13898.jpg', '13900.jpg', '13901.jpg', '13902.jpg', '13922.jpg', '13926.jpg', '13974.jpg', '13992.jpg', '13993.jpg', '13994.jpg', '13995.jpg', '14007.jpg', '14008.jpg', '14012.jpg', '14013.jpg', '14016.jpg', '14023.jpg', '14027.jpg', '14029.jpg', '14030.jpg', '14032.jpg', '14035.jpg', '14037.jpg', '14038.jpg', '14039.jpg', '14040.jpg', '14041.jpg', '14042.jpg', '14043.jpg', '14045.jpg', '14046.jpg', '14049.jpg', '14052.jpg', '14066.jpg', '14068.jpg', '14070.jpg', '14071.jpg', '14083.jpg', '14085.jpg', '14086.jpg', '14089.jpg', '14090.jpg', '14108.jpg', '14110.jpg', '14119.jpg', '14120.jpg', '14130.jpg', '14131.jpg', '14133.jpg', '14138.jpg', '14150.jpg', '14152.jpg', '14153.jpg', '14166.jpg', '14168.jpg', '14169.jpg', '14170.jpg', '14172.jpg', '14193.jpg', '14198.jpg', '14200.jpg', '14201.jpg', '14218.jpg', '14230.jpg', '14232.jpg', '14234.jpg', '14235.jpg', '14236.jpg', '14239.jpg', '14240.jpg', '14241.jpg', '14243.jpg', '14244.jpg', '14252.jpg', '14256.jpg', '14258.jpg', '14263.jpg', '14264.jpg', '14265.jpg', '14269.jpg', '14271.jpg', '14274.jpg', '14277.jpg', '14284.jpg', '14322.jpg', '14358.jpg', '14362.jpg', '14369.jpg', '1439.jpg', '1440.jpg', '1441.jpg', '1442.jpg', '14460.jpg', '14510.jpg', '14526.jpg', '14562.jpg', '14572.jpg', '14591.jpg', '14613.jpg', '14623.jpg', '14635.jpg', '14636.jpg', '14637.jpg', '14643.jpg', '14648.jpg', '14667.jpg', '14669.jpg', '14686.jpg', '1473.jpg', '14747.jpg', '1475.jpg', '1476.jpg', '14816.jpg', '14842.jpg', '14843.jpg', '14851.jpg', '14857.jpg', '14863.jpg', '14872.jpg', '14875.jpg', '14889.jpg', '14894.jpg', '14918.jpg', '14936.jpg', '14939.jpg', '14947.jpg', '14948.jpg', '14952.jpg', '14958.jpg', '14964.jpg', '14965.jpg', '14967.jpg', '14969.jpg', '14973.jpg', '14974.jpg', '14977.jpg', '14978.jpg', '14982.jpg', '14985.jpg', '14986.jpg', '14987.jpg', '14990.jpg', '14991.jpg', '14993.jpg', '14994.jpg', '14996.jpg', '14999.jpg', '15001.jpg', '15004.jpg', '15046.jpg', '15093.jpg', '15103.jpg', '15137.jpg', '15148.jpg', '15155.jpg', '15182.jpg', '15186.jpg', '15201.jpg', '15203.jpg', '15208.jpg', '1523.jpg', '15251.jpg', '15266.jpg', '15268.jpg', '15279.jpg', '15299.jpg', '15316.jpg', '15335.jpg', '15336.jpg', '15340.jpg', '15372.jpg', '15373.jpg', '15375.jpg', '15378.jpg', '15379.jpg', '15396.jpg', '15398.jpg', '15415.jpg', '15451.jpg', '15453.jpg', '15458.jpg', '15482.jpg', '15490.jpg', '15492.jpg', '15501.jpg', '15504.jpg', '15505.jpg', '15512.jpg', '15514.jpg', '15524.jpg', '15546.jpg', '15570.jpg', '15583.jpg', '15589.jpg', '15592.jpg', '15596.jpg', '15602.jpg', '15634.jpg', '15640.jpg', '15652.jpg', '15663.jpg', '15688.jpg', '15695.jpg', '15712.jpg', '15718.jpg', '15772.jpg', '15776.jpg', '15777.jpg', '15786.jpg', '15794.jpg', '15804.jpg', '15809.jpg', '15838.jpg', '15839.jpg', '15891.jpg', '15913.jpg', '15914.jpg', '15922.jpg', '15967.jpg', '15973.jpg', '15979.jpg', '16000.jpg', '16010.jpg', '16012.jpg', '16020.jpg', '16023.jpg', '16030.jpg', '16080.jpg', '16084.jpg', '16134.jpg', '16142.jpg', '16197.jpg', '16209.jpg', '16230.jpg', '16282.jpg', '16285.jpg', '16297.jpg', '16315.jpg', '16326.jpg', '16339.jpg', '16391.jpg', '16397.jpg', '16425.jpg', '1646.jpg', '16460.jpg', '16467.jpg', '1647.jpg', '16476.jpg', '16479.jpg', '1649.jpg', '1651.jpg', '16512.jpg', '16518.jpg', '16537.jpg', '16562.jpg', '16568.jpg', '16591.jpg', '1720.jpg', '1723.jpg', '1730.jpg', '1747.jpg', '1775.jpg', '1780.jpg', '1782.jpg', '1783.jpg', '1784.jpg', '1826.jpg', '1828.jpg', '1873.jpg', '1884.jpg', '1889.jpg', '1893.jpg', '1909.jpg', '1987.jpg', '1990.jpg', '2000.jpg', '2001.jpg', '2097.jpg', '2099.jpg', '2100.jpg', '2103.jpg', '2116.jpg', '2118.jpg', '2119.jpg', '2121.jpg', '2122.jpg', '2141.jpg', '2145.jpg', '2165.jpg', '2172.jpg', '2175.jpg', '2178.jpg', '2179.jpg', '2180.jpg', '2183.jpg', '2184.jpg', '2186.jpg', '2198.jpg', '2199.jpg', '2234.jpg', '2244.jpg', '2248.jpg', '2275.jpg', '2285.jpg', '2287.jpg', '2289.jpg', '2290.jpg', '2291.jpg', '2292.jpg', '2294.jpg', '2319.jpg', '2321.jpg', '2322.jpg', '2410.jpg', '2412.jpg', '2413.jpg', '2414.jpg', '2416.jpg', '2440.jpg', '2455.jpg', '2468.jpg', '2472.jpg', '2527.jpg', '2539.jpg', '2547.jpg', '2556.jpg', '2558.jpg', '2563.jpg', '2590.jpg', '2609.jpg', '2618.jpg', '2619.jpg', '2639.jpg', '2640.jpg', '2664.jpg', '2667.jpg', '2669.jpg', '2692.jpg', '2698.jpg', '2708.jpg', '2711.jpg', '2713.jpg', '2723.jpg', '2739.jpg', '2742.jpg', '2743.jpg', '2746.jpg', '2748.jpg', '2767.jpg', '2813.jpg', '2817.jpg', '2818.jpg', '2821.jpg', '2836.jpg', '2841.jpg', '2845.jpg', '2848.jpg', '2860.jpg', '2862.jpg', '2863.jpg', '2864.jpg', '2865.jpg', '2867.jpg', '2888.jpg', '2893.jpg', '2917.jpg', '2971.jpg', '2972.jpg', '3010.jpg', '3012.jpg', '3013.jpg', '3056.jpg', '3059.jpg', '3061.jpg', '3065.jpg', '3083.jpg', '3084.jpg', '3085.jpg', '3086.jpg', '3088.jpg', '3089.jpg', '3098.jpg', '3103.jpg', '3106.jpg', '3118.jpg', '3126.jpg', '3127.jpg', '3141.jpg', '3207.jpg', '3224.jpg', '3225.jpg', '3232.jpg', '3236.jpg', '3237.jpg', '3284.jpg', '3286.jpg', '3298.jpg', '3299.jpg', '3300.jpg', '3301.jpg', '3323.jpg', '3330.jpg', '3346.jpg', '3357.jpg', '3375.jpg', '3395.jpg', '3401.jpg', '3404.jpg', '3437.jpg', '3438.jpg', '3439.jpg', '3441.jpg', '3448.jpg', '3449.jpg', '3451.jpg', '3453.jpg', '3454.jpg', '3457.jpg', '3458.jpg', '3459.jpg', '3460.jpg', '3461.jpg', '3489.jpg', '3508.jpg', '3510.jpg', '3512.jpg', '3513.jpg', '3516.jpg', '3517.jpg', '3532.jpg', '3539.jpg', '3546.jpg', '3549.jpg', '3558.jpg', '3562.jpg', '3572.jpg', '3574.jpg', '3575.jpg', '3602.jpg', '3603.jpg', '3604.jpg', '3609.jpg', '3610.jpg', '3612.jpg', '3613.jpg', '3616.jpg', '3617.jpg', '3618.jpg', '3619.jpg', '3620.jpg', '3649.jpg', '3652.jpg', '3663.jpg', '3667.jpg', '3679.jpg', '3680.jpg', '3681.jpg', '3694.jpg', '3698.jpg', '3700.jpg', '3701.jpg', '3756.jpg', '3768.jpg', '3771.jpg', '3776.jpg', '3778.jpg', '3792.jpg', '3796.jpg', '3797.jpg', '3812.jpg', '3813.jpg', '3835.jpg', '3837.jpg', '3843.jpg', '3849.jpg', '3850.jpg', '3852.jpg', '3865.jpg', '3882.jpg', '3887.jpg', '3889.jpg', '3899.jpg', '3901.jpg', '3902.jpg', '3929.jpg', '3935.jpg', '3936.jpg', '3937.jpg', '3939.jpg', '3999.jpg', '4055.jpg', '4059.jpg', '4065.jpg', '4067.jpg', '4069.jpg', '4070.jpg', '4071.jpg', '4072.jpg', '4073.jpg', '4074.jpg', '4089.jpg', '4090.jpg', '4102.jpg', '4112.jpg', '4118.jpg', '4120.jpg', '4121.jpg', '4122.jpg', '4126.jpg', '4127.jpg', '4128.jpg', '4129.jpg', '4130.jpg', '4131.jpg', '4147.jpg', '4148.jpg', '4149.jpg', '4150.jpg', '4151.jpg', '4152.jpg', '4156.jpg', '4159.jpg', '4160.jpg', '4161.jpg', '4163.jpg', '4199.jpg', '4200.jpg', '4201.jpg', '4204.jpg', '4207.jpg', '4208.jpg', '4209.jpg', '4239.jpg', '4240.jpg', '4259.jpg', '4291.jpg', '4292.jpg', '4301.jpg', '4302.jpg', '4335.jpg', '4336.jpg', '4337.jpg', '4339.jpg', '4340.jpg', '4355.jpg', '4357.jpg', '4363.jpg', '4366.jpg', '4367.jpg', '4375.jpg', '4376.jpg', '4405.jpg', '4407.jpg', '4409.jpg', '4411.jpg', '4413.jpg', '4414.jpg', '4415.jpg', '4417.jpg', '4418.jpg', '4419.jpg', '4425.jpg', '4427.jpg', '4430.jpg', '4443.jpg', '4444.jpg', '4447.jpg', '4455.jpg', '4463.jpg', '4464.jpg', '4466.jpg', '4476.jpg', '4487.jpg', '4507.jpg', '4523.jpg', '4525.jpg', '4528.jpg', '4530.jpg', '4531.jpg', '4535.jpg', '4546.jpg', '4551.jpg', '4552.jpg', '4553.jpg', '4557.jpg', '4559.jpg', '4560.jpg', '4561.jpg', '4574.jpg', '4576.jpg', '4578.jpg', '4586.jpg', '4587.jpg', '4588.jpg', '4591.jpg', '4592.jpg', '4593.jpg', '4615.jpg', '4616.jpg', '4617.jpg', '4649.jpg', '4651.jpg', '4657.jpg', '4658.jpg', '4659.jpg', '4660.jpg', '4680.jpg', '4681.jpg', '4683.jpg', '4685.jpg', '4695.jpg', '4715.jpg', '4717.jpg', '4718.jpg', '4719.jpg', '4729.jpg', '4731.jpg', '4732.jpg', '4733.jpg', '4734.jpg', '4735.jpg', '4754.jpg', '4758.jpg', '4778.jpg', '4784.jpg', '4785.jpg', '4797.jpg', '4799.jpg', '4801.jpg', '4802.jpg', '4807.jpg', '4808.jpg', '4809.jpg', '4811.jpg', '4812.jpg', '4866.jpg', '4892.jpg', '4895.jpg', '4907.jpg', '4908.jpg', '4914.jpg', '4916.jpg', '4935.jpg', '4936.jpg', '4941.jpg', '4944.jpg', '4954.jpg', '4964.jpg', '4966.jpg', '4977.jpg', '4990.jpg', '4993.jpg', '5010.jpg', '5016.jpg', '5018.jpg', '5019.jpg', '5020.jpg', '5021.jpg', '5022.jpg', '5023.jpg', '5028.jpg', '5035.jpg', '5038.jpg', '5040.jpg', '5041.jpg', '5043.jpg', '5044.jpg', '5045.jpg', '5081.jpg', '5082.jpg', '5083.jpg', '5086.jpg', '5087.jpg', '5088.jpg', '5089.jpg', '5090.jpg', '5095.jpg', '5117.jpg', '5118.jpg', '5144.jpg', '5145.jpg', '5146.jpg', '5147.jpg', '5154.jpg', '5158.jpg', '5159.jpg', '5162.jpg', '5177.jpg', '5178.jpg', '5179.jpg', '5182.jpg', '5185.jpg', '5186.jpg', '5188.jpg', '5189.jpg', '5190.jpg', '5193.jpg', '5194.jpg', '5197.jpg', '5201.jpg', '5207.jpg', '5209.jpg', '5210.jpg', '5230.jpg', '5232.jpg', '5233.jpg', '5235.jpg', '5239.jpg', '5240.jpg', '5241.jpg', '5251.jpg', '5254.jpg', '5270.jpg', '5289.jpg', '5292.jpg', '5297.jpg', '5298.jpg', '5300.jpg', '5301.jpg', '5303.jpg', '5324.jpg', '5325.jpg', '5349.jpg', '5350.jpg', '5351.jpg', '5352.jpg', '5358.jpg', '5362.jpg', '5363.jpg', '5365.jpg', '5366.jpg', '5367.jpg', '5368.jpg', '5379.jpg', '5381.jpg', '5383.jpg', '5384.jpg', '5385.jpg', '5386.jpg', '5388.jpg', '5420.jpg', '5423.jpg', '5425.jpg', '5426.jpg', '5427.jpg', '5428.jpg', '5430.jpg', '5460.jpg', '5461.jpg', '5462.jpg', '5479.jpg', '5480.jpg', '5483.jpg', '5489.jpg', '5492.jpg', '5493.jpg', '5495.jpg', '5497.jpg', '5499.jpg', '5500.jpg', '5510.jpg', '5512.jpg', '5513.jpg', '5514.jpg', '5515.jpg', '5535.jpg', '5536.jpg', '5539.jpg', '5540.jpg', '5544.jpg', '5547.jpg', '5554.jpg', '5557.jpg', '5564.jpg', '5565.jpg', '5566.jpg', '5580.jpg', '5581.jpg', '5597.jpg', '5604.jpg', '5605.jpg', '5606.jpg', '5607.jpg', '5660.jpg', '5665.jpg', '5666.jpg', '5667.jpg', '5668.jpg', '5672.jpg', '5675.jpg', '5692.jpg', '5698.jpg', '5701.jpg', '5702.jpg', '5703.jpg', '5724.jpg', '5751.jpg', '5771.jpg', '5773.jpg', '5775.jpg', '5799.jpg', '5802.jpg', '5817.jpg', '5831.jpg', '5854.jpg', '5856.jpg', '5858.jpg', '5861.jpg', '5862.jpg', '5863.jpg', '5864.jpg', '5865.jpg', '5866.jpg', '5877.jpg', '5880.jpg', '5881.jpg', '5900.jpg', '5908.jpg', '5913.jpg', '5916.jpg', '5919.jpg', '5920.jpg', '5921.jpg', '5922.jpg', '5951.jpg', '6004.jpg', '6018.jpg', '6020.jpg', '6021.jpg', '6031.jpg', '6034.jpg', '6036.jpg', '6043.jpg', '6044.jpg', '6048.jpg', '6057.jpg', '6063.jpg', '6064.jpg', '6072.jpg', '6074.jpg', '6081.jpg', '6082.jpg', '6088.jpg', '6089.jpg', '6090.jpg', '6101.jpg', '6126.jpg', '6131.jpg', '6132.jpg', '6135.jpg', '6136.jpg', '6137.jpg', '6161.jpg', '6163.jpg', '6175.jpg', '6176.jpg', '6177.jpg', '6181.jpg', '6201.jpg', '6206.jpg', '6207.jpg', '6208.jpg', '6209.jpg', '6210.jpg', '6211.jpg', '6220.jpg', '6233.jpg', '6235.jpg', '6236.jpg', '6237.jpg', '6238.jpg', '6239.jpg', '6240.jpg', '6250.jpg', '6252.jpg', '6253.jpg', '6254.jpg', '6255.jpg', '6276.jpg', '6283.jpg', '6286.jpg', '6288.jpg', '6296.jpg', '6299.jpg', '6301.jpg', '6302.jpg', '6303.jpg', '6304.jpg', '6308.jpg', '6318.jpg', '6320.jpg', '6321.jpg', '6325.jpg', '6351.jpg', '6356.jpg', '6363.jpg', '6366.jpg', '6367.jpg', '6368.jpg', '6372.jpg', '6373.jpg', '6374.jpg', '6403.jpg', '6404.jpg', '6405.jpg', '6409.jpg', '6412.jpg', '6413.jpg', '6420.jpg', '6421.jpg', '6438.jpg', '6442.jpg', '6446.jpg', '6447.jpg', '6448.jpg', '6449.jpg', '6451.jpg', '6453.jpg', '6454.jpg', '6455.jpg', '6486.jpg', '6491.jpg', '6492.jpg', '6493.jpg', '6494.jpg', '6509.jpg', '6512.jpg', '6514.jpg', '6517.jpg', '6519.jpg', '6520.jpg', '6538.jpg', '6547.jpg', '6549.jpg', '6551.jpg', '6558.jpg', '6560.jpg', '6561.jpg', '6562.jpg', '6582.jpg', '6585.jpg', '6587.jpg', '6589.jpg', '6590.jpg', '6591.jpg', '6603.jpg', '6604.jpg', '6605.jpg', '6606.jpg', '6608.jpg', '6631.jpg', '6639.jpg', '6643.jpg', '6649.jpg', '6652.jpg', '6653.jpg', '6661.jpg', '6673.jpg', '6675.jpg', '6676.jpg', '6677.jpg', '6687.jpg', '6688.jpg', '6692.jpg', '6693.jpg', '6694.jpg', '6695.jpg', '6696.jpg', '6697.jpg', '6698.jpg', '6709.jpg', '6719.jpg', '6721.jpg', '6723.jpg', '6733.jpg', '6744.jpg', '6748.jpg', '6749.jpg', '6751.jpg', '6762.jpg', '6763.jpg', '6764.jpg', '6813.jpg', '6814.jpg', '6815.jpg', '6817.jpg', '6827.jpg', '6828.jpg', '6833.jpg', '6834.jpg', '6837.jpg', '6840.jpg', '6841.jpg', '6842.jpg', '6849.jpg', '6853.jpg', '6860.jpg', '6871.jpg', '6875.jpg', '6876.jpg', '6877.jpg', '6880.jpg', '6881.jpg', '6916.jpg', '6917.jpg', '6918.jpg', '6919.jpg', '6929.jpg', '6930.jpg', '6931.jpg', '6933.jpg', '6939.jpg', '6941.jpg', '6942.jpg', '6943.jpg', '6944.jpg', '6945.jpg', '6948.jpg', '6949.jpg', '6950.jpg', '6952.jpg', '6965.jpg', '6983.jpg', '6985.jpg', '6987.jpg', '6988.jpg', '6997.jpg', '7007.jpg', '7011.jpg', '7014.jpg', '7037.jpg', '7038.jpg', '7039.jpg', '7046.jpg', '7048.jpg', '7049.jpg', '7050.jpg', '7055.jpg', '7066.jpg', '7076.jpg', '7077.jpg', '7079.jpg', '7083.jpg', '7085.jpg', '7088.jpg', '7097.jpg', '7109.jpg', '7111.jpg', '7113.jpg', '7118.jpg', '7121.jpg', '7127.jpg', '7131.jpg', '7141.jpg', '7142.jpg', '7143.jpg', '7144.jpg', '7145.jpg', '7146.jpg', '7147.jpg', '7161.jpg', '7167.jpg', '7171.jpg', '7174.jpg', '7188.jpg', '7189.jpg', '7195.jpg', '7196.jpg', '7198.jpg', '7215.jpg', '7218.jpg', '7227.jpg', '7229.jpg', '7230.jpg', '7250.jpg', '7251.jpg', '7254.jpg', '7255.jpg', '7277.jpg', '7278.jpg', '7283.jpg', '7294.jpg', '7297.jpg', '7299.jpg', '7300.jpg', '7301.jpg', '7302.jpg', '7313.jpg', '7314.jpg', '7315.jpg', '7317.jpg', '7318.jpg', '7319.jpg', '7320.jpg', '7321.jpg', '7326.jpg', '7328.jpg', '7329.jpg', '7332.jpg', '7333.jpg', '7334.jpg', '7336.jpg', '7337.jpg', '7359.jpg', '7360.jpg', '7361.jpg', '7362.jpg', '7371.jpg', '7373.jpg', '7374.jpg', '7375.jpg', '7376.jpg', '7383.jpg', '7384.jpg', '7385.jpg', '7392.jpg', '7393.jpg', '7394.jpg', '7399.jpg', '7403.jpg', '7405.jpg', '7406.jpg', '7407.jpg', '7409.jpg', '7410.jpg', '7411.jpg', '7418.jpg', '7419.jpg', '7420.jpg', '7422.jpg', '7428.jpg', '7431.jpg', '7433.jpg', '7436.jpg', '7438.jpg', '7439.jpg', '7473.jpg', '7474.jpg', '7476.jpg', '7486.jpg', '7493.jpg', '7494.jpg', '7511.jpg', '7516.jpg', '7517.jpg', '7526.jpg', '7528.jpg', '7530.jpg', '7531.jpg', '7537.jpg', '7539.jpg', '7542.jpg', '7552.jpg', '7554.jpg', '7558.jpg', '7560.jpg', '7561.jpg', '7565.jpg', '7568.jpg', '7569.jpg', '7573.jpg', '7574.jpg', '7575.jpg', '7577.jpg', '7578.jpg', '7579.jpg', '7584.jpg', '7599.jpg', '7601.jpg', '7603.jpg', '7606.jpg', '7607.jpg', '7608.jpg', '7610.jpg', '7620.jpg', '7623.jpg', '7626.jpg', '7638.jpg', '7640.jpg', '7661.jpg', '7663.jpg', '7664.jpg', '7678.jpg', '7694.jpg', '7696.jpg', '7698.jpg', '7699.jpg', '7700.jpg', '7703.jpg', '7704.jpg', '7711.jpg', '7712.jpg', '7713.jpg', '7716.jpg', '7764.jpg', '7768.jpg', '7807.jpg', '7811.jpg', '7820.jpg', '7836.jpg', '7838.jpg', '7839.jpg', '7840.jpg', '7841.jpg', '7850.jpg', '7851.jpg', '7852.jpg', '7853.jpg', '7855.jpg', '7859.jpg', '7862.jpg', '7863.jpg', '7864.jpg', '7875.jpg', '7877.jpg', '7884.jpg', '7885.jpg', '7900.jpg', '7901.jpg', '7920.jpg', '7926.jpg', '7927.jpg', '7931.jpg', '7932.jpg', '7933.jpg', '7934.jpg', '7935.jpg', '7936.jpg', '7938.jpg', '7939.jpg', '7943.jpg', '7945.jpg', '7946.jpg', '7947.jpg', '7948.jpg', '7949.jpg', '7950.jpg', '7951.jpg', '7952.jpg', '7953.jpg', '7960.jpg', '7961.jpg', '7963.jpg', '7964.jpg', '7965.jpg', '7983.jpg', '7985.jpg', '8004.jpg', '8013.jpg', '8015.jpg', '8021.jpg', '8024.jpg', '8025.jpg', '8033.jpg', '8046.jpg', '8061.jpg', '8063.jpg', '8067.jpg', '8071.jpg', '8073.jpg', '8076.jpg', '8080.jpg', '8082.jpg', '8083.jpg', '8084.jpg', '8085.jpg', '8088.jpg', '8106.jpg', '8107.jpg', '8109.jpg', '8113.jpg', '8114.jpg', '8118.jpg', '8119.jpg', '8129.jpg', '8131.jpg', '8133.jpg', '8134.jpg', '8136.jpg', '8137.jpg', '8139.jpg', '8142.jpg', '8144.jpg', '8145.jpg', '8153.jpg', '8154.jpg', '8155.jpg', '8162.jpg', '8163.jpg', '8164.jpg', '8166.jpg', '8167.jpg', '8181.jpg', '8182.jpg', '8185.jpg', '8186.jpg', '8193.jpg', '8201.jpg', '8203.jpg', '8206.jpg', '8209.jpg', '8212.jpg', '8213.jpg', '8215.jpg', '8216.jpg', '8217.jpg', '8218.jpg', '8220.jpg', '8234.jpg', '8235.jpg', '8236.jpg', '8242.jpg', '8243.jpg', '8244.jpg', '8246.jpg', '8257.jpg', '8258.jpg', '8260.jpg', '8261.jpg', '8265.jpg', '8269.jpg', '8270.jpg', '8271.jpg', '8273.jpg', '8274.jpg', '8290.jpg', '8304.jpg', '8312.jpg', '8314.jpg', '8317.jpg', '8322.jpg', '8328.jpg', '8329.jpg', '8331.jpg', '8335.jpg', '8336.jpg', '8342.jpg', '8345.jpg', '8355.jpg', '8356.jpg', '8363.jpg', '8364.jpg', '8367.jpg', '8378.jpg', '8383.jpg', '8387.jpg', '8388.jpg', '8389.jpg', '8393.jpg', '8394.jpg', '8396.jpg', '8398.jpg', '8425.jpg', '8428.jpg', '8429.jpg', '8430.jpg', '8431.jpg', '8433.jpg', '8434.jpg', '8442.jpg', '8446.jpg', '8447.jpg', '8448.jpg', '8450.jpg', '8451.jpg', '8454.jpg', '8455.jpg', '8456.jpg', '8457.jpg', '8468.jpg', '8469.jpg', '8470.jpg', '8473.jpg', '8476.jpg', '8486.jpg', '8490.jpg', '8492.jpg', '8493.jpg', '8539.jpg', '8540.jpg', '8541.jpg', '8568.jpg', '8570.jpg', '8583.jpg', '8584.jpg', '8585.jpg', '8591.jpg', '8593.jpg', '8613.jpg', '8614.jpg', '8644.jpg', '8646.jpg', '8658.jpg', '8660.jpg', '8663.jpg', '8664.jpg', '8665.jpg', '8666.jpg', '8669.jpg', '8670.jpg', '8704.jpg', '8705.jpg', '8706.jpg', '8707.jpg', '8713.jpg', '8722.jpg', '8723.jpg', '8733.jpg', '8735.jpg', '8740.jpg', '8742.jpg', '8743.jpg', '8744.jpg', '8760.jpg', '8774.jpg', '8775.jpg', '8778.jpg', '8779.jpg', '8784.jpg', '8788.jpg', '8789.jpg', '8804.jpg', '8806.jpg', '8808.jpg', '8811.jpg', '8812.jpg', '8814.jpg', '8815.jpg', '8818.jpg', '8819.jpg', '8827.jpg', '8831.jpg', '8857.jpg', '8872.jpg', '8874.jpg', '8876.jpg', '8877.jpg', '8891.jpg', '8905.jpg', '8906.jpg', '891.jpg', '892.jpg', '8920.jpg', '8921.jpg', '8946.jpg', '8948.jpg', '8963.jpg', '8965.jpg', '8970.jpg', '8972.jpg', '8973.jpg', '898.jpg', '8981.jpg', '8984.jpg', '8985.jpg', '8993.jpg', '900.jpg', '9000.jpg', '9004.jpg', '901.jpg', '9011.jpg', '9014.jpg', '9018.jpg', '9020.jpg', '9021.jpg', '9023.jpg', '9024.jpg', '9025.jpg', '9026.jpg', '9027.jpg', '9028.jpg', '9029.jpg', '9030.jpg', '9031.jpg', '9034.jpg', '9035.jpg', '9043.jpg', '9047.jpg', '9048.jpg', '9049.jpg', '9054.jpg', '9062.jpg', '9064.jpg', '9069.jpg', '9083.jpg', '9085.jpg', '9087.jpg', '9088.jpg', '9128.jpg', '9129.jpg', '9132.jpg', '9149.jpg', '9150.jpg', '9161.jpg', '9167.jpg', '9168.jpg', '9170.jpg', '9171.jpg', '9172.jpg', '9173.jpg', '9177.jpg', '9178.jpg', '9179.jpg', '9180.jpg', '9182.jpg', '9183.jpg', '9189.jpg', '9192.jpg', '9193.jpg', '9209.jpg', '9224.jpg', '9226.jpg', '9227.jpg', '9228.jpg', '9230.jpg', '9250.jpg', '9258.jpg', '9261.jpg', '9270.jpg', '9271.jpg', '9272.jpg', '9273.jpg', '9274.jpg', '9275.jpg', '9279.jpg', '9281.jpg', '9300.jpg', '9310.jpg', '9343.jpg', '9344.jpg', '9345.jpg', '9346.jpg', '9348.jpg', '9352.jpg', '9354.jpg', '9358.jpg', '9361.jpg', '9363.jpg', '9364.jpg', '9369.jpg', '9375.jpg', '9405.jpg', '9406.jpg', '9407.jpg', '9409.jpg', '9472.jpg', '9473.jpg', '9474.jpg', '9476.jpg', '9480.jpg', '9482.jpg', '9483.jpg', '9486.jpg', '9487.jpg', '9488.jpg', '9489.jpg', '9490.jpg', '9491.jpg', '9501.jpg', '9503.jpg', '9505.jpg', '9507.jpg', '9508.jpg', '9509.jpg', '9524.jpg', '9525.jpg', '9527.jpg', '9530.jpg', '9532.jpg', '9534.jpg', '9535.jpg', '9536.jpg', '9538.jpg', '9539.jpg', '9541.jpg', '9557.jpg', '9560.jpg', '9561.jpg', '9563.jpg', '9581.jpg', '9582.jpg', '9583.jpg', '9584.jpg', '9619.jpg', '9621.jpg', '9623.jpg', '9624.jpg', '9626.jpg', '9648.jpg', '9653.jpg', '9654.jpg', '9660.jpg', '9661.jpg', '9663.jpg', '9666.jpg', '9667.jpg', '9668.jpg', '9669.jpg', '9674.jpg', '9675.jpg', '9678.jpg', '9679.jpg', '9699.jpg', '9700.jpg', '9703.jpg', '9706.jpg', '9711.jpg', '9716.jpg', '9717.jpg', '9718.jpg', '9719.jpg', '9720.jpg', '9722.jpg', '9724.jpg', '9733.jpg', '9736.jpg', '9737.jpg', '9738.jpg', '9739.jpg', '9742.jpg', '9743.jpg', '9744.jpg', '9748.jpg', '9750.jpg', '9751.jpg', '9753.jpg', '9754.jpg', '9755.jpg', '9756.jpg', '9757.jpg', '9766.jpg', '9767.jpg', '9768.jpg', '9770.jpg', '9787.jpg', '9789.jpg', '9790.jpg', '9797.jpg', '9799.jpg', '9804.jpg', '9809.jpg', '9810.jpg', '9813.jpg', '9814.jpg', '9815.jpg', '9816.jpg', '9818.jpg', '9820.jpg', '9824.jpg', '9825.jpg', '9826.jpg', '9828.jpg', '9829.jpg', '9851.jpg', '9853.jpg', '9854.jpg', '9864.jpg', '9865.jpg', '9866.jpg', '9878.jpg', '9883.jpg', '9885.jpg', '9887.jpg', '9888.jpg', '9896.jpg', '9897.jpg', '9899.jpg', '9900.jpg', '9901.jpg', '9902.jpg', '9903.jpg', '9908.jpg', '9913.jpg', '9934.jpg', '9937.jpg', '9940.jpg', '9942.jpg', '9946.jpg', '9947.jpg', '9948.jpg', '9961.jpg', '9966.jpg', '9986.jpg', '9987.jpg', '9990.jpg', '9991.jpg', '9992.jpg']

In [233]:
len(wrong_img)


Out[233]:
2697

In [247]:
for i in poi2[poi2.index.isin([1000,10017])].url:
    print i


http://www.tripadvisor.com/Attraction_Review-g31281-d8423934-Reviews-Sun_Splash-Mesa_Arizona.html
http://www.tripadvisor.com/Attraction_Review-g52970-d1954982-Reviews-Lancaster_Mennonite_Historical_Society-Lancaster_Lancaster_County_Pennsylvania.html

In [251]:
wrong_page = []
with open ("wrong_page.txt", "r") as f:
    for line in f.readlines():
        wrong_page.append(line.replace("\n",""))

In [258]:
for i in poi2[poi2.url.isin(wrong_page)].index:
    wrong_img.append(i)

In [263]:
len(wrong_img)


Out[263]:
2760

In [262]:
wrong_img_id = []
for i in wrong_img:
    wrong_img_id.append(str(i).replace(".jpg",""))

In [266]:
with open ("wrong_img_id.txt", "w") as f:
    for i in wrong_img_id:
        f.write(i+"\n")

In [ ]:


In [ ]: