In [2]:
import pandas as pd
import numpy as py
In [82]:
# poi = pd.read_csv("poi_detail_table_final_v2.csv", encoding = ('utf-8'), index_col =0)
poi.to_csv("poi_detail_table_final_v2.csv", encoding = ('utf-8'))
In [67]:
poi.coord_lat.loc[5051] = 34.2337769
poi.coord_long.loc[5051] = -77.9469075
# 34.2337769, -77.9469075
In [75]:
poi.loc[10120
Out[75]:
In [80]:
poi.name.replace({ "'O" : "%27O" }, regex=True, inplace = True)
In [79]:
poi2[poi2.name.str.contains("Yosemite")][["name", "county", "city"]]
Out[79]:
In [58]:
poi[poi.name == "Riverwalk"][["name", "county", "city"]]
Out[58]:
In [68]:
poi.loc[5051]
Out[68]:
In [40]:
poi2.loc[4619].replace("[']","%27", inplace = True, regex = True)
In [41]:
poi2.loc[4619]
Out[41]:
In [ ]:
In [263]:
a = poi[poi.poi_type.str.contains("Tour")][['name','tag','poi_type', 'num_reviews', 'ranking','interesting']].index
poi.interesting.loc[a] = False
In [235]:
SP = poi[poi.poi_type == "StatePark"][~poi.name.str.contains("Park")].index
poi.loc[SP][~poi.tag.str.contains("Park", na=False)][['name','tag','poi_type', 'num_reviews', 'ranking']]
# poi.poi_type.loc[a.index] = "Game"
Out[235]:
In [237]:
poi[poi.poi_type == "Unuse_theater"].poi_type
Out[237]:
In [202]:
aa = poi[poi.name.str.contains('Museum*|Historic Site*')][poi.poi_type != "Museum"][poi.tag.str.contains("Museum", na = False)][['name','tag','poi_type', 'num_reviews', 'ranking']].index
poi["poi_type"].loc[aa]
Out[202]:
In [153]:
poi[poi.poi_type == "Transportation"][poi.name.str.contains("Airport")]
# poi['poi_type'].loc[a] = "Unuse_transportation"
Out[153]:
In [128]:
a = poi[poi.poi_type == "Theater"][poi.num_reviews < 10].index
# [['name','tag','poi_type', 'num_reviews', 'ranking']]
# poi['poi_type'].loc[a] = "Unuse_theater"
In [60]:
poi[(poi.name.str.contains('Tour', case=False) and poi.tag.str.contains('Tour'))][['name','tag','poi_type']]
In [96]:
a = poi[poi.name.str.contains('Tour')]
b= a[a.tag.str.contains('Tour')].shape
b
# [['name','tag','poi_type']]
# poi["poi_type"].loc[b] = "Tour"
Out[96]:
In [97]:
poi[poi.name.str.contains('Tour')][['name','tag','poi_type']]
Out[97]:
In [7]:
# for i in poi.img_url[0:2]:
# print i
poi["img_url"] = 0
In [9]:
for i in poi.index:
poi["img_url"].loc[int(i)] = "https://s3.amazonaws.com/travel-with-friends/img_file/"+ str(i) + ".jpg"
if i%100 ==0:
print("already finish " + str(i) +" images")
In [4]:
for i in poi.index[0:2]:
print str(i)
In [ ]:
def outside_trip_poi(origin_city, origin_state, target_direction = 'N', n_days = 1, \
full_day = True, regular = True, debug = True, user_id = 'admin'):
outside_trip_id = '-'.join([str(origin_state.upper().replace(' ','-')), str(origin_city.upper().replace(' ','-')), \
target_direction,str(int(regular)), str(n_days)])
if not check_outside_trip_id(outside_trip_id, debug):
furthest_len = 140
if n_days == 1:
furthest_len = 140
#possible city coords, target city coord_lat, target city coord_long
# city_id, coords, coord_lat, coord_long = travel_outside_coords(origin_city, origin_state)
#coords: city, lat, long
# check_cities_info = []
# for item in coords:
# direction = direction_from_orgin(coord_long, coord_lat, item[2], item[1])
# if (target_direction == direction) and (geopy_dist((item[1], item[2]), (coord_lat, coord_long)) < furthest_len):
# check_cities_info.append(item)
# city_infos = []
# for city, _, _ in check_cities_info:
# county = None
# #index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating
# city_info = db_start_location(county, origin_state, city)
# city_infos.extend(city_info)
city_id, coord_lat, coord_long, city_infos = travel_outside_with_direction(origin_city, origin_state, target_direction, furthest_len, n_days=1)
if len(city_infos)<=0:
username_id = 1
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute('SELECT MAX(index) from outside_trip_table;')
new_index = cur.fetchone()[0] +1
cur.execute("INSERT into outside_trip_table(index, username_id, outside_trip_id, outside_route_ids, event_id_lst, origin_city, origin_state, target_direction, n_routes, regular, full_day, details) \
VALUES (%s,'%s', '%s', '%s','%s', '%s', '%s', '%s', %s,%s,%s,'%s');" \
%(new_index, username_id, outside_trip_id, '[]', '[]', origin_city, origin_state, target_direction, 0, regular, full_day, '[]'))
conn.commit()
conn.close()
print "finish update None for %s, %s, direction %s into database" %(origin_state, origin_city, target_direction)
return None
# city_infos = np.array(city_infos)
poi_coords = city_infos[:,1:3]
n_routes = sum(1 for t in np.array(city_infos)[:,3] if t >= 120)/10
if (n_routes>1) and (city_infos.shape[0]>=10):
kmeans = KMeans(n_clusters=n_routes).fit(poi_coords)
elif (city_infos.shape[0]> 20) or (n_routes>1):
kmeans = KMeans(n_clusters=2).fit(poi_coords)
else:
kmeans = KMeans(n_clusters=1).fit(poi_coords)
route_labels = kmeans.labels_
# print n_routes, len(route_labels), city_infos.shape
# print route_labels
outside_route_ids, outside_trip_details,event_id_lst ,details_theme =[],[],[],[]
for i in range(n_routes):
current_events, big_ix, med_ix, small_ix = [], [],[], []
for ix, label in enumerate(route_labels):
if label == i:
time = city_infos[ix,3]
event_ix = city_infos[ix,0]
current_events.append(event_ix)
if time > 180 :
big_ix.append(ix)
elif time >= 120 :
med_ix.append(ix)
else:
small_ix.append(ix)
big_ = sorted_outside_events(city_infos, big_ix)
med_ = sorted_outside_events(city_infos, med_ix)
small_ = sorted_outside_events(city_infos, small_ix)
# need to update!!!!!!!!
event_ids, event_type = create_outside_event_id_list(big_, med_, small_)
event_ids, event_type = db_outside_event_cloest_distance(coord_lat, coord_long, event_ids = event_ids, event_type = event_type)
event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_outside_google_driving_walking_time(city_id, coord_lat,coord_long, event_ids, event_type, origin_city = origin_city, origin_state = origin_state)
#why bug????
event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_outside_extra_events(event_ids, driving_time_list, walking_time_list)
outside_route_id = outside_trip_id + '-'+str(i)
# values = db_outside_route_trip_details(outside_route_id, event_ids, origin_city, origin_state, regular, full_day,n_days,i)
if check_outside_route_id(outside_route_id):
conn = psycopg2.connect(conn_str)
cur = conn.cursor()
cur.execute('DELETE FROM outside_route_table WHERE outside_route_id = %s;' %(outside_route_id))
conn.commit()
conn.close()
details = db_outside_route_trip_details(event_ids,i)
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# cur.execute('select max(index) from outside_route_table;')
# new_index = cur.fetchone()[0] + 1
# cur.execute("insert into outside_route_table (index, outside_route_id, full_day, regular, origin_city, origin_state, target_direction, details, event_type, event_ids, route_num) \
# VALUES (%s, '%s', %s, %s, '%s', '%s', '%s', '%s', '%s', '%s', %s);" \
# %(new_index, outside_route_id, full_day, regular, origin_city, origin_state, target_direction, str(details).replace("'","''"), event_type, str(event_ids) , i))
# conn.commit()
# conn.close()
details = db_outside_route_trip_details(event_ids,i)
route_theme = assign_theme(details)
info = [outside_route_id, full_day, regular, origin_city, origin_state, target_direction, str(details).replace("'","''"), event_type, str(event_ids) , i, route_theme[0]]
route_theme.extend(info)
details_theme.append(route_theme)
# outside_route_ids.append(outside_route_id)
# outside_trip_details.extend(details)
# event_id_lst.extend(event_ids)
return details_theme
info_to_psql = clean_details(details_theme)
return info_to_psql
# username_id = 1
# # conn = psycopg2.connect(conn_str)
# # cur = conn.cursor()
# # cur.execute('SELECT MAX(index) from outside_trip_table;')
# # new_index = cur.fetchone()[0] +1
# # cur.execute("INSERT into outside_trip_table(index, username_id, outside_trip_id, outside_route_ids, event_id_lst, origin_city, origin_state, target_direction, n_routes, regular, full_day, details) \
# # VALUES (%s,'%s', '%s', '%s','%s', '%s', '%s', '%s', %s,%s,%s,'%s');" \
# # %(new_index, username_id, outside_trip_id, str(outside_route_ids).replace("'","''"), str(event_id_lst), origin_city, origin_state, target_direction, n_routes, regular, full_day, str(outside_trip_details).replace("'","''")))
# # conn.commit()
# # conn.close()
# print "finish update %s, %s, direction %s into database" %(origin_state, origin_city, target_direction)
# return outside_trip_id, outside_trip_details
# else:
# print "ALERT: %s, %s, direction %s already in database" %(origin_state, origin_city, target_direction)
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# cur.execute("SELECT DISTINCT outside_trip_id, details FROM outside_trip_table WHERE outside_trip_id = '%s';" %(outside_trip_id))
# outside_trip_id, details = cur.fetchone()
# details = ast.literal_eval(details)
# conn.close()
# return outside_trip_id, details
In [ ]:
import pprint
import numpy as np
from distance import *
from outside_helpers import *
In [ ]:
direct = ["E","S","W","N"]
target_direction = direct[1]
origin_city = 'San Francisco'
origin_state = 'California'
print origin_city, origin_state
details= outside_trip_poi(origin_city,origin_state, target_direction)
In [ ]:
details
In [ ]:
# details_array= np.array(details)
details_array[:,[1,3]] = details_array[:,[1,3]].astype(np.float)
details_array[:,2].astype(np.int)
In [ ]:
details_array= np.array(details)
final = []
used =[]
for count, i, in enumerate(details_array):
if (i[0] == "national_park") or (i[0] == "theme_park"):
final.append(i[4])
used.append(count)
details_array = np.delete(details_array, used, axis = 0)
a= np.array(sorted(details_array, key=lambda x: (x[2].astype(np.int), -x[1].astype(np.float), x[3].astype(np.float))))
theme_select_dict={}
backup =[]
# a[:,0:4]
for count, i in enumerate(a):
if i[0] not in theme_select_dict:
theme_select_dict[i[0]] = 1
final.append(i[4:])
else:
backup.extend(i[4:])
In [ ]:
final
In [ ]:
def clean_details(details_theme):
details_array= np.array(details_theme)
final = []
used =[]
for count, i, in enumerate(details_array):
if (i[0] == "national_park") or (i[0] == "theme_park"): #select all national park & theme park
final.extend(i[4]) #save all details together
used.append(count) #save the count of those park
details_array = np.delete(details_array, used, axis = 0) #remove them from array
a= np.array(sorted(details_array, key=lambda x: (x[2], -x[1], -x[3] ))) #sorted the item base on ranking, num_review, review_score
theme_select_dict={}
backup =[]
# a[:,0:4]
for count, i in enumerate(a):
if i[0] not in theme_select_dict:
theme_select_dict[i[0]] = 1 #use dict to check for only one of each theme is choose
final.extend(i[4])
else:
backup.extend(i[4]) #backup for those didnt use
return final
In [ ]:
In [ ]:
def assign_theme(details):
assign_dict={"family" : 0,"lifestyle": 0,"nature": 0,"cultural": 0,"theme_park": 0,"national_park": 0,"other_list": 0}
assign_dict2={"family" : 0,"lifestyle": 0,"nature": 0,"cultural": 0,"theme_park": 0,"national_park": 0,"other_list": 0}
assign_dict3={"family" : -1,"lifestyle": -1,"nature": -1,"cultural": -1,"theme_park": -1,"national_park": -1,"other_list": -1}
#create a list for each poi
all_type=[]
for i in details:
all_type.append([i["poi_type"],i["adjusted_visit_length"], i["num_reviews"], i["ranking"]])
for i in all_type:
for key, value in theme_list_dict.items():
if i[0] in value: #locate the theme
assign_dict[key] += int(i[1]) #total time of theme
assign_dict2[key] += int(i[2]) #total # of review of theme
if assign_dict3[key] <0:
assign_dict3[key] = int(i[3])
else:
assign_dict3[key] = min(assign_dict3[key], int(i[3]))
assign_dict = sort_dict(assign_dict) #order descending
if assign_dict[0][0] == assign_dict[1][0]: #check if the total time is same
if assign_dict2[assign_dict[0][1]] > assign_dict2[assign_dict[1][1]]: #check number of review
return assign_dict[0][1]
elif assign_dict2[assign_dict[0][1]] < assign_dict2[assign_dict[1][1]]:
return assign_dict[1][1]
elif assign_dict3[assign_dict[0][1]] < assign_dict3[assign_dict[1][1]]: #check for ranking
return assign_dict[0][1]
else:
return assign_dict[1][1]
return assign_dict[0][1]
In [ ]:
In [ ]:
def sort_dict(input_dict):
temp_dict = [(input_dict[key], key) for key in input_dict]
temp_dict.sort(reverse = True)
return temp_dict
In [ ]:
print assign_theme(details)
In [ ]:
theme_list_dict = {
"family" : ["Park","Zoo","Game"],
"lifestyle" : ["Nightlife","Shopping","Theater","Food","Spa","Casino","Show","ShoppingMall","Show"],
"nature" : ["StatePark","NationalWildlifeRefuge","NationalHistoricalPark","NationalForest","NationalMonument","NationalMemorial"],
"cultural" : ["Landmark", "Museum","OutdoorActivities","Library","Stadium"],
"theme_park" : ["ThemePark"],
"national_park" : ["NationalPark"],
"other_list" : ["Other","VisotorCenter","Transportation","Tour"]
}
In [ ]:
museum =300
theater =60
landmark = 60
visitorCenter =15
In [ ]:
theme={}
theme_list = ["family","lifestyle","nature","cultural","theme_park","national_park"]
for i in theme_list:
theme[i]= 0
In [ ]:
poi = pd.read_csv("poi_detail_table_final_v1.csv", encoding=('utf-8'), index_col= 0)
In [ ]:
poi_type_u= set(poi.poi_type.str.encode('utf8'))
poi_type= list(poi.poi_type.str.encode('utf8'))
In [ ]:
from collections import Counter
counts = Counter(poi_type)
add = 0
for i in counts.items():
add += i[1]
print add
# print counts.items()[0][1]
In [ ]:
# d = {'key': 'value'}
# word_count_dict = {}
# for key, values in d.items():
# if values[1] in temp_dict:
# temp_dict[values[1]] = temp_dict[values[1]] + 1
# else:
# temp_dict[values[1]] = 1
In [ ]:
# from collections import defaultdict
# d = defaultdict(int)
# for word in poi_type:
# d[word] += 1
In [ ]:
poi_type_dict={}
for i in poi_type_u:
poi_type_dict[i] = poi_type.count(i)
In [ ]:
def sortFreqDict(freqdict):
aux = [(freqdict[key], key) for key in freqdict]
aux.sort(reverse=True)
# aux.reverse()
return aux
In [ ]:
sortFreqDict(poi_type_dict)
In [ ]:
poi[poi.poi_type == "NationalHistoricalPark"].name
In [ ]:
In [ ]:
poi.info()
In [ ]:
# conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"
with open('api_key_list.config') as key_file:
api_key_list = json.load(key_file)
conn_str = api_key_list["conn_str"]
engine_str = api_key_list["engine"]
engine = create_engine(engine_str)
In [ ]:
# import psycopg2
# conn = psycopg2.connect(conn_str)
# cur = conn.cursor()
# poi.to_sql('poi_detail_table_final_v1',engine, index=True, if_exists = "replace")
# conn.close
In [ ]:
a= [1.5,2,3,4,5.5,["hel"]]
In [ ]:
a= np.array(a)
In [ ]:
a[[0,4,5]]
In [17]:
import helpers
from us_state_abbrevation import *
def check_state(origin_state):
if not helpers.check_valid_state(origin_state):
origin_state = abb2state[str(origin_state).upper()]
return origin_state
state = u'Ca'
print state, type(state)
state = check_state(state)
print state, type(state)
In [269]:
dict1 = {'first':'hello', 'second':'world'}
dict2 = dict1 # pointer assignation mechanism
dict1['first'] = 'bye2'
In [270]:
dict2
Out[270]:
In [268]:
a=1
b=a
a=2
b
Out[268]:
In [ ]: