In [2]:
import pandas as pd
import numpy as py

In [82]:
# poi = pd.read_csv("poi_detail_table_final_v2.csv", encoding = ('utf-8'), index_col =0)
poi.to_csv("poi_detail_table_final_v2.csv", encoding = ('utf-8'))

In [67]:
poi.coord_lat.loc[5051] = 34.2337769
poi.coord_long.loc[5051] = -77.9469075

# 34.2337769, -77.9469075

In [75]:
poi.loc[10120


Out[75]:
address                  1030 North Clark Street, Orland Park, Illinois...
adjusted_visit_length                                                   90
city                                                           Orland Park
coord_lat                                                          41.9013
coord_long                                                        -87.6318
country                                                      United States
county                                                                COOK
description                                                            NaN
fee                                                                Unknown
geo_content              {u'status': u'OK', u'results': [{u'geometry': ...
name                                                    Dave & Buster%27s 
num_reviews                                                             37
poi_type                                                              Game
postal_code                                                            NaN
ranking                                                                  3
raw_visit_length                                                       NaN
review_score                                                           3.5
state                                                             Illinois
state_abb                                                               IL
street_address                                     1030 North Clark Street
tag                              Game & Entertainment Centers, Fun & Games
url                      http://www.tripadvisor.com/Attraction_Review-g...
icon_url                 https://s3.amazonaws.com/travel-with-friends/i...
check_full_address                                                       0
img_url                  https://s3.amazonaws.com/travel-with-friends/i...
interesting                                                           True
Name: 10120, dtype: object

In [80]:
poi.name.replace({ "'O" : "%27O" }, regex=True, inplace = True)

In [79]:
poi2[poi2.name.str.contains("Yosemite")][["name", "county", "city"]]


Out[79]:
name county city
7366 Lake Yosemite MERCED Merced
16652 Yosemite National Park NaN NaN

In [58]:
poi[poi.name == "Riverwalk"][["name", "county", "city"]]


Out[58]:
name county city
350 Riverwalk DUVAL Jacksonville
1670 Riverwalk HAMILTON Cincinnati
2796 Riverwalk RICHMOND Augusta
3890 Riverwalk DUPAGE Naperville
5051 Riverwalk BEXAR Wilmington
10184 Riverwalk PALM BEACH Jupiter
10225 Riverwalk DUBUQUE Dubuque
11131 Riverwalk MANATEE Bradenton

In [68]:
poi.loc[5051]


Out[68]:
address                  From Nutt St to Nunn Street, Wilmington, North...
adjusted_visit_length                                                   30
city                                                            Wilmington
coord_lat                                                          34.2338
coord_long                                                        -77.9469
country                                                      United States
county                                                               BEXAR
description                                                            NaN
fee                                                                Unknown
geo_content              {u'status': u'OK', u'results': [{u'geometry': ...
name                                                             Riverwalk
num_reviews                                                            790
poi_type                                                          Landmark
postal_code                                                          28401
ranking                                                                  6
raw_visit_length                                                       NaN
review_score                                                           4.5
state                                                       North Carolina
state_abb                                                               NC
street_address                                 From Nutt St to Nunn Street
tag                      Points of Interest & Landmarks, Sights & Landm...
url                      http://www.tripadvisor.com/Attraction_Review-g...
icon_url                 https://s3.amazonaws.com/travel-with-friends/i...
check_full_address                                                       1
img_url                  https://s3.amazonaws.com/travel-with-friends/i...
interesting                                                           True
Name: 5051, dtype: object

In [40]:
poi2.loc[4619].replace("[']","%27", inplace = True, regex = True)

In [41]:
poi2.loc[4619]


Out[41]:
address                  4900 Marie P Debartolo Way, Santa Clara, Calif...
adjusted_visit_length                                                  360
city                                                           Santa Clara
coord_lat                                                          37.4028
coord_long                                                        -121.971
country                                                      United States
county                                                         SANTA CLARA
description                                                            NaN
fee                                                                Unknown
geo_content              {u'status': u'OK', u'results': [{u'geometry': ...
name                                                        Levi's Stadium
num_reviews                                                            671
poi_type                                                          Landmark
postal_code                                                     95054-1100
ranking                                                                  1
raw_visit_length                                         More than 3 hours
review_score                                                             4
state                                                           California
state_abb                                                               CA
street_address                                  4900 Marie P Debartolo Way
tag                                  Arenas & Stadiums, Sights & Landmarks
url                      http://www.tripadvisor.com/Attraction_Review-g...
icon_url                 https://s3.amazonaws.com/travel-with-friends/i...
check_full_address                                                       1
img_url                  https://s3.amazonaws.com/travel-with-friends/i...
interesting                                                           True
Name: 4619, dtype: object

In [ ]:


In [263]:
a = poi[poi.poi_type.str.contains("Tour")][['name','tag','poi_type', 'num_reviews', 'ranking','interesting']].index
poi.interesting.loc[a] = False

In [235]:
SP = poi[poi.poi_type == "StatePark"][~poi.name.str.contains("Park")].index
poi.loc[SP][~poi.tag.str.contains("Park", na=False)][['name','tag','poi_type', 'num_reviews', 'ranking']]

# poi.poi_type.loc[a.index] = "Game"


/Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
/Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages/ipykernel/__main__.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  from ipykernel import kernelapp as app
Out[235]:
name tag poi_type num_reviews ranking

In [237]:
poi[poi.poi_type == "Unuse_theater"].poi_type


Out[237]:
901      Unuse_theater
1365     Unuse_theater
1438     Unuse_theater
1476     Unuse_theater
1529     Unuse_theater
1987     Unuse_theater
1997     Unuse_theater
2121     Unuse_theater
2167     Unuse_theater
2337     Unuse_theater
2694     Unuse_theater
2697     Unuse_theater
2746     Unuse_theater
2820     Unuse_theater
3061     Unuse_theater
3118     Unuse_theater
3154     Unuse_theater
3284     Unuse_theater
3372     Unuse_theater
3437     Unuse_theater
3511     Unuse_theater
3559     Unuse_theater
3602     Unuse_theater
3616     Unuse_theater
3617     Unuse_theater
3776     Unuse_theater
3824     Unuse_theater
3837     Unuse_theater
3853     Unuse_theater
3888     Unuse_theater
             ...      
13130    Unuse_theater
13196    Unuse_theater
13197    Unuse_theater
13212    Unuse_theater
13217    Unuse_theater
13231    Unuse_theater
13266    Unuse_theater
13296    Unuse_theater
13297    Unuse_theater
13314    Unuse_theater
13330    Unuse_theater
13361    Unuse_theater
13379    Unuse_theater
13395    Unuse_theater
13451    Unuse_theater
13462    Unuse_theater
13529    Unuse_theater
13548    Unuse_theater
13594    Unuse_theater
13615    Unuse_theater
13636    Unuse_theater
13673    Unuse_theater
13866    Unuse_theater
13927    Unuse_theater
14032    Unuse_theater
14085    Unuse_theater
14131    Unuse_theater
14160    Unuse_theater
14167    Unuse_theater
14198    Unuse_theater
Name: poi_type, dtype: object

In [202]:
aa = poi[poi.name.str.contains('Museum*|Historic Site*')][poi.poi_type != "Museum"][poi.tag.str.contains("Museum", na = False)][['name','tag','poi_type', 'num_reviews', 'ranking']].index
poi["poi_type"].loc[aa]


/Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
Out[202]:
Series([], Name: poi_type, dtype: object)

In [153]:
poi[poi.poi_type == "Transportation"][poi.name.str.contains("Airport")]
# poi['poi_type'].loc[a] = "Unuse_transportation"


/Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
Out[153]:
address adjusted_visit_length city coord_lat coord_long country county description fee geo_content ... raw_visit_length review_score state state_abb street_address tag url icon_url check_full_address img_url

0 rows × 25 columns


In [128]:
a = poi[poi.poi_type == "Theater"][poi.num_reviews < 10].index
# [['name','tag','poi_type', 'num_reviews', 'ranking']]
# poi['poi_type'].loc[a] = "Unuse_theater"


/Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':

In [60]:
poi[(poi.name.str.contains('Tour', case=False) and poi.tag.str.contains('Tour'))][['name','tag','poi_type']]


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-60-c050deb251a6> in <module>()
----> 1 poi[(poi.name.str.contains('Tour', case=False) and poi.tag.str.contains('Tour'))][['name','tag','poi_type']]

/Users/Gon/anaconda3/envs/python2/lib/python2.7/site-packages/pandas/core/generic.pyc in __nonzero__(self)
    915         raise ValueError("The truth value of a {0} is ambiguous. "
    916                          "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
--> 917                          .format(self.__class__.__name__))
    918 
    919     __bool__ = __nonzero__

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [96]:
a = poi[poi.name.str.contains('Tour')]
b= a[a.tag.str.contains('Tour')].shape
b
# [['name','tag','poi_type']]
# poi["poi_type"].loc[b] = "Tour"


Out[96]:
(37, 25)

In [97]:
poi[poi.name.str.contains('Tour')][['name','tag','poi_type']]


Out[97]:
name tag poi_type
566 Stranahan's Colorado Whiskey Tour Distilleries, Food & Drink Food
2186 Scooters Sunday In-House Tournament Bars & Clubs, Nightlife Nightlife
2188 Mack's Fish Camp - Tours Eco Tours, Fishing Charters & Tours, Tours, Ou... Tour
5095 Miami Brew Tours Beer Tastings & Tours, Tours, Food & Drink Tour
5422 NorCal Destinations Private Day Tours Private Tours, Tours Tour
5487 Palm Bay Kayaks Eco Tours Kayaking & Canoeing, Tours, Outdoor Activities... Tour
5548 Warner Bros. Studio Tour Hollywood Theme Parks, Water & Amusement Parks ThemePark
5803 Lake Mirror Historic Tour Historic Walking Areas, Points of Interest & L... Landmark
6006 Celebrity Helicopters Day Tours Helicopter Tours, Tours Tour
6168 Mercedes-Benz US International Visitor Center ... Visitor Centers, Traveler Resources VisotorCenter
6931 Suffolk Division of Tourism Great Dismal Swamp... Nature & Wildlife Tours, Outdoor Activities, T... Tour
8184 AZ Jeep Tours 4WD, ATV & Off-Road Tours, Outdoor Activities,... Tour
8246 Adventure West Motorcycle Tours Day Tours Motorcycle Tours, Tours Tour
8645 Tour 18 Dallas Golf Courses, Sightseeing Tours, Outdoor Activ... Tour
9056 Middle Mountain Foundation Interpretive Progra... Hiking & Camping Tours, Outdoor Activities, Tours Tour
9759 Green Motion Segway Tours Segway Tours, Tours Tour
10258 Historic Los Angeles Arboretum Tour Cultural Tours, Walking Tours, Tours, Historic... Tour
10302 Farm Tours of Ocala Ranches, Farms, Sights & Landmarks Landmark
10394 Vino 301 Wine Concierge - Tours Wine Tours & Tastings, Private Tours, Food & D... Tour
10422 Good Times Travel Day Tours Private Tours, Tours Tour
10430 Geo Savvy Tours- Day Tours Sightseeing Tours, Tours Tour
11083 Rickshaw Rick's Tours & Taxi Taxis & Shuttles, Cultural Tours, Transportati... Tour
11609 Historic Walking Tour Downtown Wilson Walking Tours, Historical & Heritage Tours, Tours Tour
11711 Mongol Global Tour Sightseeing Tours, Tours Tour
11924 SculptureTour Salina Art Galleries, Shopping, Museums Museum
12103 KBC Tours Day Tours Sightseeing Tours, Tours Tour
12117 Tomorrow's Transport Segway Tours Segway Tours, Tours Tour
12770 Extraordinary Tour Services City Tours, Private Tours, Food & Drink, Tours... Tour
12878 Danville Historical Society's Guided Walking Tour Historical & Heritage Tours, Tours Tour
13170 Arizona ATV Adventure Tours 4WD, ATV & Off-Road Tours, Outdoor Activities,... Tour
13190 North Bay Winery Tours Wine Tours & Tastings, Food & Drink, Tours Tour
13193 Mr. Zin's Wine Tours - Tours Wine Tours & Tastings, Private Tours, Food & D... Tour
13373 Burleson Historical Ghost Tour Ghost & Vampire Tours, Tours Tour
13390 Betsy & Tacy's House Tours & Gift Shop History Museums, Museums Museum
13464 Cedar Falls Tourism Center Visitor Centers, Traveler Resources VisotorCenter
13782 Valley Zipline Tours Zipline & Aerial Adventure Parks, Outdoor Acti... Tour
14140 Chatt Glide Tours Private Tours, Tours Tour
14199 Diamond Jack's River Tours River Rafting & Tubing, Boat Tours & Water Spo... Tour
14232 Bay Area Adventure Tours Sightseeing Tours, Tours Tour
14249 Apache Trail Tours Hiking & Camping Tours, Tours, Outdoor Activities Tour
14832 Big Bus Tours New York City Tours, Sightseeing Tours, Tours, Hop-On H... Tour
14842 Bretton Woods ATV Tour 4WD, ATV & Off-Road Tours, Outdoor Activities,... Tour
15133 Northwoods Outfitters - Day Tours River Rafting & Tubing, Fishing Charters & Tou... Tour
15376 Shore Trips & Tours Day Trips, Sightseeing Tours, Tours, Outdoor A... Tour
15503 Montana Fly Fishing Guides - Day Tours Fishing Charters & Tours, Tours, Outdoor Activ... Tour
16043 Extranomical Tours City Tours, Day Trips, Food & Drink, Tours, Pr... Tour
16438 Reedham Ferry Touring Park Nature & Parks StatePark

In [7]:
# for i in poi.img_url[0:2]:
#     print i
poi["img_url"] = 0

In [9]:
for i in poi.index:
    
    poi["img_url"].loc[int(i)] = "https://s3.amazonaws.com/travel-with-friends/img_file/"+ str(i) + ".jpg"
    if i%100 ==0:
        print("already finish " + str(i) +" images")


already finish 0 images
already finish 100 images
already finish 200 images
already finish 300 images
already finish 400 images
already finish 500 images
already finish 600 images
already finish 700 images
already finish 800 images
already finish 900 images
already finish 1000 images
already finish 1100 images
already finish 1200 images
already finish 1300 images
already finish 1400 images
already finish 1500 images
already finish 1600 images
already finish 1700 images
already finish 1800 images
already finish 1900 images
already finish 2000 images
already finish 2100 images
already finish 2200 images
already finish 2300 images
already finish 2400 images
already finish 2500 images
already finish 2600 images
already finish 2700 images
already finish 2800 images
already finish 2900 images
already finish 3000 images
already finish 3100 images
already finish 3200 images
already finish 3300 images
already finish 3400 images
already finish 3500 images
already finish 3600 images
already finish 3700 images
already finish 3800 images
already finish 3900 images
already finish 4000 images
already finish 4100 images
already finish 4200 images
already finish 4300 images
already finish 4400 images
already finish 4500 images
already finish 4600 images
already finish 4700 images
already finish 4800 images
already finish 4900 images
already finish 5000 images
already finish 5100 images
already finish 5200 images
already finish 5300 images
already finish 5400 images
already finish 5500 images
already finish 5600 images
already finish 5700 images
already finish 5800 images
already finish 5900 images
already finish 6000 images
already finish 6100 images
already finish 6200 images
already finish 6300 images
already finish 6400 images
already finish 6500 images
already finish 6600 images
already finish 6700 images
already finish 6800 images
already finish 6900 images
already finish 7000 images
already finish 7100 images
already finish 7200 images
already finish 7300 images
already finish 7400 images
already finish 7500 images
already finish 7600 images
already finish 7700 images
already finish 7800 images
already finish 7900 images
already finish 8000 images
already finish 8100 images
already finish 8200 images
already finish 8300 images
already finish 8400 images
already finish 8500 images
already finish 8600 images
already finish 8700 images
already finish 8800 images
already finish 8900 images
already finish 9000 images
already finish 9100 images
already finish 9200 images
already finish 9300 images
already finish 9400 images
already finish 9500 images
already finish 9600 images
already finish 9700 images
already finish 9800 images
already finish 9900 images
already finish 10000 images
already finish 10100 images
already finish 10200 images
already finish 10300 images
already finish 10400 images
already finish 10500 images
already finish 10600 images
already finish 10700 images
already finish 10800 images
already finish 10900 images
already finish 11000 images
already finish 11100 images
already finish 11200 images
already finish 11300 images
already finish 11400 images
already finish 11500 images
already finish 11600 images
already finish 11700 images
already finish 11800 images
already finish 11900 images
already finish 12000 images
already finish 12100 images
already finish 12200 images
already finish 12300 images
already finish 12400 images
already finish 12500 images
already finish 12600 images
already finish 12700 images
already finish 12800 images
already finish 12900 images
already finish 13000 images
already finish 13100 images
already finish 13200 images
already finish 13300 images
already finish 13400 images
already finish 13500 images
already finish 13600 images
already finish 13700 images
already finish 13800 images
already finish 13900 images
already finish 14000 images
already finish 14100 images
already finish 14200 images
already finish 14300 images
already finish 14400 images
already finish 14500 images
already finish 14600 images
already finish 14700 images
already finish 14800 images
already finish 14900 images
already finish 15000 images
already finish 15100 images
already finish 15200 images
already finish 15300 images
already finish 15400 images
already finish 15500 images
already finish 15600 images
already finish 15700 images
already finish 15800 images
already finish 15900 images
already finish 16000 images
already finish 16100 images
already finish 16200 images
already finish 16300 images
already finish 16400 images
already finish 16500 images
already finish 16600 images

In [4]:
for i in poi.index[0:2]:
    print str(i)


0
1

In [ ]:
def outside_trip_poi(origin_city, origin_state, target_direction = 'N', n_days = 1, \
                    full_day = True, regular = True, debug = True, user_id = 'admin'):
    outside_trip_id = '-'.join([str(origin_state.upper().replace(' ','-')), str(origin_city.upper().replace(' ','-')), \
                        target_direction,str(int(regular)), str(n_days)])
    if not check_outside_trip_id(outside_trip_id, debug):
        furthest_len = 140
        if n_days == 1:
            furthest_len = 140
        #possible city coords, target city coord_lat, target city coord_long
        # city_id, coords, coord_lat, coord_long = travel_outside_coords(origin_city, origin_state)
        #coords: city, lat, long
        # check_cities_info = []
        # for item in coords:
        #     direction = direction_from_orgin(coord_long,  coord_lat, item[2], item[1])
        #     if (target_direction == direction) and (geopy_dist((item[1], item[2]), (coord_lat, coord_long)) < furthest_len):
        #         check_cities_info.append(item)
        # city_infos = []
        # for city, _, _ in check_cities_info:
        #     county = None
        #     #index, coord0, coord1, adjusted_normal_time_spent, poi_rank, rating
        #     city_info = db_start_location(county, origin_state, city)
        #     city_infos.extend(city_info)
        city_id, coord_lat, coord_long, city_infos = travel_outside_with_direction(origin_city, origin_state, target_direction, furthest_len, n_days=1)
        if len(city_infos)<=0:
            username_id = 1
            conn = psycopg2.connect(conn_str)
            cur = conn.cursor()
            cur.execute('SELECT MAX(index) from outside_trip_table;')
            new_index = cur.fetchone()[0] +1
            cur.execute("INSERT into outside_trip_table(index, username_id, outside_trip_id, outside_route_ids, event_id_lst, origin_city, origin_state, target_direction, n_routes, regular, full_day, details) \
                         VALUES (%s,'%s', '%s', '%s','%s', '%s', '%s', '%s', %s,%s,%s,'%s');" \
                         %(new_index, username_id, outside_trip_id, '[]', '[]', origin_city, origin_state, target_direction, 0, regular, full_day, '[]'))
            conn.commit()
            conn.close()
            print "finish update None for %s, %s, direction %s into database" %(origin_state, origin_city, target_direction)
            return None
        # city_infos = np.array(city_infos)
        poi_coords = city_infos[:,1:3]
        n_routes = sum(1 for t in np.array(city_infos)[:,3] if t >= 120)/10
        if (n_routes>1) and (city_infos.shape[0]>=10):
            kmeans = KMeans(n_clusters=n_routes).fit(poi_coords)
        elif (city_infos.shape[0]> 20) or (n_routes>1):
            kmeans = KMeans(n_clusters=2).fit(poi_coords)
        else:
            kmeans = KMeans(n_clusters=1).fit(poi_coords)
        route_labels = kmeans.labels_
        # print n_routes, len(route_labels), city_infos.shape
        # print route_labels
        outside_route_ids, outside_trip_details,event_id_lst ,details_theme =[],[],[],[]
        for i in range(n_routes):
            current_events, big_ix, med_ix, small_ix = [], [],[], []
            for ix, label in enumerate(route_labels):
                if label == i:
                    time = city_infos[ix,3]
                    event_ix = city_infos[ix,0]
                    current_events.append(event_ix)
                    if time > 180 :
                        big_ix.append(ix)
                    elif time >= 120 :
                        med_ix.append(ix)
                    else:
                        small_ix.append(ix)
            big_ = sorted_outside_events(city_infos, big_ix)
            med_ = sorted_outside_events(city_infos, med_ix)
            small_ = sorted_outside_events(city_infos, small_ix)
            # need to update!!!!!!!!
            event_ids, event_type = create_outside_event_id_list(big_, med_, small_)
            event_ids, event_type = db_outside_event_cloest_distance(coord_lat, coord_long, event_ids = event_ids, event_type = event_type)
            event_ids, google_ids, name_list, driving_time_list, walking_time_list =db_outside_google_driving_walking_time(city_id, coord_lat,coord_long, event_ids, event_type, origin_city = origin_city, origin_state = origin_state)
            #why bug????
            event_ids, driving_time_list, walking_time_list, total_time_spent = db_remove_outside_extra_events(event_ids, driving_time_list, walking_time_list)
            outside_route_id = outside_trip_id + '-'+str(i)
#             values = db_outside_route_trip_details(outside_route_id, event_ids, origin_city, origin_state, regular, full_day,n_days,i)
            if check_outside_route_id(outside_route_id):
                conn = psycopg2.connect(conn_str)
                cur = conn.cursor()
                cur.execute('DELETE FROM outside_route_table WHERE outside_route_id = %s;' %(outside_route_id))
                conn.commit()
                conn.close()
                
            details = db_outside_route_trip_details(event_ids,i)
#             conn = psycopg2.connect(conn_str)
#             cur = conn.cursor()
#             cur.execute('select max(index) from outside_route_table;')
#             new_index = cur.fetchone()[0] + 1
#             cur.execute("insert into outside_route_table (index, outside_route_id, full_day, regular, origin_city, origin_state, target_direction, details, event_type, event_ids, route_num) \
#                         VALUES (%s, '%s', %s, %s, '%s', '%s', '%s', '%s', '%s', '%s', %s);" \
#                         %(new_index, outside_route_id, full_day, regular, origin_city, origin_state, target_direction, str(details).replace("'","''"), event_type, str(event_ids) , i))
#             conn.commit()
#             conn.close()
            details = db_outside_route_trip_details(event_ids,i)

            route_theme = assign_theme(details)
            info = [outside_route_id, full_day, regular, origin_city, origin_state, target_direction, str(details).replace("'","''"), event_type, str(event_ids) , i, route_theme[0]]
            route_theme.extend(info)
            
            details_theme.append(route_theme)
#             outside_route_ids.append(outside_route_id)
            # outside_trip_details.extend(details)
#             event_id_lst.extend(event_ids)
        return details_theme
        info_to_psql = clean_details(details_theme)
        return info_to_psql
#         username_id = 1
# #         conn = psycopg2.connect(conn_str)
# #         cur = conn.cursor()
# #         cur.execute('SELECT MAX(index) from outside_trip_table;')
# #         new_index = cur.fetchone()[0] +1
# #         cur.execute("INSERT into outside_trip_table(index, username_id, outside_trip_id, outside_route_ids, event_id_lst, origin_city, origin_state, target_direction, n_routes, regular, full_day, details) \
# #                      VALUES (%s,'%s', '%s', '%s','%s', '%s', '%s', '%s', %s,%s,%s,'%s');" \
# #                      %(new_index, username_id, outside_trip_id, str(outside_route_ids).replace("'","''"), str(event_id_lst), origin_city, origin_state, target_direction, n_routes, regular, full_day, str(outside_trip_details).replace("'","''")))
# #         conn.commit()
# #         conn.close()
#         print "finish update %s, %s, direction %s into database" %(origin_state, origin_city, target_direction)
#         return outside_trip_id, outside_trip_details
#     else:
#         print "ALERT: %s, %s, direction %s already in database" %(origin_state, origin_city, target_direction)
#         conn = psycopg2.connect(conn_str)
#         cur = conn.cursor()
#         cur.execute("SELECT DISTINCT outside_trip_id, details FROM outside_trip_table WHERE outside_trip_id = '%s';" %(outside_trip_id))
#         outside_trip_id, details = cur.fetchone()
#         details = ast.literal_eval(details)
#         conn.close()
#         return outside_trip_id, details

In [ ]:
import pprint
import numpy as np
from distance import *
from outside_helpers import *

In [ ]:
direct = ["E","S","W","N"]
target_direction = direct[1]
origin_city = 'San Francisco'
origin_state = 'California'
print origin_city, origin_state

details= outside_trip_poi(origin_city,origin_state, target_direction)

In [ ]:
details

In [ ]:
# details_array= np.array(details)

details_array[:,[1,3]] = details_array[:,[1,3]].astype(np.float)
details_array[:,2].astype(np.int)

In [ ]:
details_array= np.array(details)

final = []
used =[]
for count, i, in enumerate(details_array):
    if (i[0] == "national_park") or (i[0] == "theme_park"):
        final.append(i[4])
        used.append(count)
details_array = np.delete(details_array, used, axis = 0)
a= np.array(sorted(details_array, key=lambda x: (x[2].astype(np.int), -x[1].astype(np.float), x[3].astype(np.float))))

theme_select_dict={}
backup =[]
# a[:,0:4]
for count, i in enumerate(a):
    if i[0] not in theme_select_dict:
        theme_select_dict[i[0]] = 1
        final.append(i[4:])
    else:
        backup.extend(i[4:])

In [ ]:
final

In [ ]:
def clean_details(details_theme):
    details_array= np.array(details_theme)
    final = []
    used =[]
    for count, i, in enumerate(details_array):  
        if (i[0] == "national_park") or (i[0] == "theme_park"):  #select all national park & theme park 
            final.extend(i[4]) #save all details together
            used.append(count) #save the count of those park
    details_array = np.delete(details_array, used, axis = 0)  #remove them from array
    a= np.array(sorted(details_array, key=lambda x: (x[2], -x[1], -x[3] ))) #sorted the item base on ranking, num_review, review_score
    
    theme_select_dict={}
    backup =[]
    # a[:,0:4]
    for count, i in enumerate(a):
        if i[0] not in theme_select_dict:
            theme_select_dict[i[0]] = 1 #use dict to check for only one of each theme is choose
            final.extend(i[4])
        else:
            backup.extend(i[4]) #backup for those didnt use 
    return final

In [ ]:


In [ ]:
def assign_theme(details):
    assign_dict={"family" : 0,"lifestyle": 0,"nature": 0,"cultural": 0,"theme_park": 0,"national_park": 0,"other_list": 0}
    
    assign_dict2={"family" : 0,"lifestyle": 0,"nature": 0,"cultural": 0,"theme_park": 0,"national_park": 0,"other_list": 0}
    
    assign_dict3={"family" : -1,"lifestyle": -1,"nature": -1,"cultural": -1,"theme_park": -1,"national_park": -1,"other_list": -1}

    #create a list for each poi
    all_type=[]
    for i in details:
        all_type.append([i["poi_type"],i["adjusted_visit_length"], i["num_reviews"], i["ranking"]])

    for i in all_type:
        for key, value in theme_list_dict.items():
            if i[0] in value: #locate the theme 
                assign_dict[key] += int(i[1]) #total time of theme
                assign_dict2[key] += int(i[2]) #total # of review of theme
                if assign_dict3[key] <0:
                    assign_dict3[key] = int(i[3])
                else:
                    assign_dict3[key] = min(assign_dict3[key], int(i[3]))
    assign_dict = sort_dict(assign_dict) #order descending 
    
    if assign_dict[0][0] == assign_dict[1][0]: #check if the total time is same 
        if assign_dict2[assign_dict[0][1]] > assign_dict2[assign_dict[1][1]]:  #check number of review
            return assign_dict[0][1]
        elif assign_dict2[assign_dict[0][1]] < assign_dict2[assign_dict[1][1]]:
            return assign_dict[1][1]
        elif assign_dict3[assign_dict[0][1]] < assign_dict3[assign_dict[1][1]]: #check for ranking
            return assign_dict[0][1]
        else:
            return assign_dict[1][1]

            
    return assign_dict[0][1]

In [ ]:


In [ ]:
def sort_dict(input_dict):
	temp_dict = [(input_dict[key], key) for key in input_dict]
	temp_dict.sort(reverse = True)
	return temp_dict

In [ ]:
print assign_theme(details)

In [ ]:
theme_list_dict = {
    "family" : ["Park","Zoo","Game"],
    "lifestyle" : ["Nightlife","Shopping","Theater","Food","Spa","Casino","Show","ShoppingMall","Show"],
    "nature" : ["StatePark","NationalWildlifeRefuge","NationalHistoricalPark","NationalForest","NationalMonument","NationalMemorial"],
    "cultural" : ["Landmark", "Museum","OutdoorActivities","Library","Stadium"],
    "theme_park" : ["ThemePark"],
    "national_park" : ["NationalPark"],
    "other_list" : ["Other","VisotorCenter","Transportation","Tour"]
    }

In [ ]:
museum =300
theater =60
landmark = 60
visitorCenter =15

In [ ]:
theme={}
theme_list = ["family","lifestyle","nature","cultural","theme_park","national_park"]
for i in theme_list:
    theme[i]= 0

In [ ]:
poi = pd.read_csv("poi_detail_table_final_v1.csv", encoding=('utf-8'), index_col= 0)

In [ ]:
poi_type_u= set(poi.poi_type.str.encode('utf8'))
poi_type= list(poi.poi_type.str.encode('utf8'))

In [ ]:
from collections import Counter
counts = Counter(poi_type)
add = 0
for i in counts.items():
    add += i[1]
print add
# print counts.items()[0][1]

In [ ]:
# d = {'key': 'value'}
# word_count_dict = {}
# for key, values in d.items():
#     if values[1] in temp_dict:
#         temp_dict[values[1]] = temp_dict[values[1]] + 1
#     else:
#         temp_dict[values[1]] = 1

In [ ]:
# from collections import defaultdict
# d = defaultdict(int)

# for word in poi_type:
#     d[word] += 1

In [ ]:
poi_type_dict={}
for i in poi_type_u:
    poi_type_dict[i] = poi_type.count(i)

In [ ]:
def sortFreqDict(freqdict):
    aux = [(freqdict[key], key) for key in freqdict]
    aux.sort(reverse=True)
#     aux.reverse()
    return aux

In [ ]:
sortFreqDict(poi_type_dict)

In [ ]:
poi[poi.poi_type == "NationalHistoricalPark"].name

In [ ]:


In [ ]:
poi.info()

In [ ]:
# conn_str = "dbname='travel_with_friends' user='Gon' host='localhost'"
with open('api_key_list.config') as key_file:
    api_key_list = json.load(key_file)
conn_str = api_key_list["conn_str"]
engine_str = api_key_list["engine"]
engine = create_engine(engine_str)

In [ ]:
# import psycopg2
# conn = psycopg2.connect(conn_str)   
# cur = conn.cursor()
# poi.to_sql('poi_detail_table_final_v1',engine, index=True, if_exists = "replace")
# conn.close

In [ ]:
a= [1.5,2,3,4,5.5,["hel"]]

In [ ]:
a= np.array(a)

In [ ]:
a[[0,4,5]]

In [17]:
import helpers
from us_state_abbrevation import *

def check_state(origin_state):
    if not helpers.check_valid_state(origin_state):
        origin_state = abb2state[str(origin_state).upper()]
    return origin_state
state = u'Ca'
print state, type(state)
state = check_state(state)
print state, type(state)


Ca <type 'unicode'>
California <type 'str'>

In [269]:
dict1 = {'first':'hello', 'second':'world'}
dict2 = dict1 # pointer assignation mechanism
dict1['first'] = 'bye2'

In [270]:
dict2


Out[270]:
{'first': 'bye2', 'second': 'world'}

In [268]:
a=1
b=a
a=2
b


Out[268]:
1

In [ ]: