In [20]:
import pandas as pd
import igraph as ig
def get_dwell_time_df(db_connection):
foreigners = pd.read_sql("""
SELECT
cust_id,
prev_cust_id,
tower_id,
prev_tower_id,
dwell_time,
near_airport,
in_florence_comune
FROM optourism.foreigners_daytripper_dwell_time
""", con=db_connection)
foreigners['key'] = ((foreigners['tower_id'] != foreigners['prev_tower_id']) | (foreigners['cust_id'] != foreigners['prev_cust_id'])).astype(int).cumsum()
groups = foreigners.groupby(['cust_id', 'tower_id', 'near_airport', 'in_florence_comune', 'key'], sort=False)['dwell_time'].sum().reset_index()
transitions = groups.loc[groups['dwell_time'] >= pd.Timedelta('20 minutes')]
transitions['in_florence'] = transitions.apply(lambda x: (x['in_florence_comune'] | x['near_airport']), axis=1)
del transitions['in_florence_comune']
del transitions['key']
curated = transitions.loc[(transitions['in_florence'] == True) |
((transitions['cust_id'] == transitions['cust_id'].shift(1)) &
(transitions['in_florence'] != transitions['in_florence'].shift(1))) |
((transitions['cust_id'] == transitions['cust_id'].shift(-1)) &
(transitions['in_florence'] != transitions['in_florence'].shift(-1)))]
return curated
In [10]:
import sys
sys.path.append('../../src/')
from utils.database import dbutils
connection = dbutils.connect()
In [24]:
tower_vertices = pd.read_sql("""
SELECT DISTINCT cdr.tower_id, cdr.lat, cdr.lon, towers.region_name
FROM optourism.foreigners_daytripper_dwell_time as cdr
JOIN optourism.cdr_labeled_towers as towers
ON towers.id = cdr.tower_id
""", con=connection)
# tower_vertices['tower_id'] = tower_vertices['tower_id'].apply(
# lambda x: 'tower-%s' % x)
In [23]:
foreigners = get_dwell_time_df(connection)
# foreigners['tower_id'] = foreigners['tower_id'].apply(
# lambda x: 'tower-%s' % x)
In [25]:
foreigners['tower_region'] = foreigners.apply(lambda x: tower_vertices.loc[tower_vertices['tower_id'] == x['tower_id']].iloc[0]['region_name'] if x['in_florence'] == False else x['tower_id'], axis=1)
In [39]:
def get_most_common_location(dataframe):
grouped = dataframe.groupby(['tower_region']).count().reset_index()
grouped.rename(columns={'dwell_time': 'weight'}, inplace=True)
grouped = grouped.sort_values('weight', ascending=False)
return grouped.filter(['tower_region', 'weight'], axis=1)
In [41]:
# foreigners['tower_id'] = foreigners['tower_id'].apply(
# lambda x: 'tower-%s' % x)
#filtered_0 = foreigners.loc[foreigners['in_florence'] == True]
filtered_0 = foreigners
filtered_0['prev_tower_id'] = filtered_0['tower_id'].shift(1)
filtered_0.loc[(filtered_0['cust_id'] != filtered_0['cust_id'].shift(1)), 'prev_tower_id'] = 'source'
filtered = filtered_0.loc[filtered_0['prev_tower_id'] != 'source']
ending = filtered_0.groupby(['cust_id']).last()
most_common_end_nodes = get_most_common_location(ending)
print(most_common_end_nodes.head(40))
beginning = filtered_0.groupby(['cust_id']).first()
most_common_start_nodes = get_most_common_location(beginning)
print(most_common_start_nodes.head(40))
grouping = filtered.groupby(['tower_id', 'prev_tower_id']).count().reset_index()
grouping.rename(columns={'cust_id': 'weight'}, inplace=True)
del grouping['near_airport']
del grouping['dwell_time']
# graph = ig.Graph()
# #print(tower_vertices)
# graph.add_vertices(tower_vertices.shape[0])
# graph.vs['name'] = tower_vertices['tower_id']
# graph.vs['x'] = tower_vertices['lat']
# graph.vs['y'] = tower_vertices['lon']
# edges = zip(grouping['prev_tower_id'], grouping['tower_id'])
# graph.add_edges(edges)
# graph.es['weight'] = grouping['weight']
# graph.es.select(weight_lt=20).delete()
# visual_style = {'vertex_color': 'black',
# 'vertex_size': 3,
# 'edge_width': [.002 * i for i in
# graph.es["weight"]]}
# ig.plot(graph, bbox=(800, 800), **visual_style)
In [ ]:
#