In [20]:
import pandas as pd
import igraph as ig

def get_dwell_time_df(db_connection):
    foreigners = pd.read_sql("""
    SELECT
      cust_id,
      prev_cust_id,
      tower_id,
      prev_tower_id,
      dwell_time,
      near_airport,
      in_florence_comune
    FROM optourism.foreigners_daytripper_dwell_time
    """, con=db_connection)

    foreigners['key'] = ((foreigners['tower_id'] != foreigners['prev_tower_id']) | (foreigners['cust_id'] != foreigners['prev_cust_id'])).astype(int).cumsum()
    groups = foreigners.groupby(['cust_id', 'tower_id', 'near_airport', 'in_florence_comune', 'key'], sort=False)['dwell_time'].sum().reset_index()

    transitions = groups.loc[groups['dwell_time'] >= pd.Timedelta('20 minutes')]
    transitions['in_florence'] = transitions.apply(lambda x: (x['in_florence_comune'] | x['near_airport']), axis=1)
    del transitions['in_florence_comune']
    del transitions['key']

    curated = transitions.loc[(transitions['in_florence'] == True) |
                              ((transitions['cust_id'] == transitions['cust_id'].shift(1)) &
                               (transitions['in_florence'] != transitions['in_florence'].shift(1))) |
                              ((transitions['cust_id'] == transitions['cust_id'].shift(-1)) &
                              (transitions['in_florence'] != transitions['in_florence'].shift(-1)))]
    return curated

In [10]:
import sys
sys.path.append('../../src/')
from utils.database import dbutils

connection = dbutils.connect()


<connection object at 0x7fe124369050; dsn: 'host=db.dssg.io password=xxxxxxxxxxxxx user=optourism_db dbname=optourism port=5432', closed: 0>

In [24]:
tower_vertices = pd.read_sql("""
    SELECT DISTINCT cdr.tower_id, cdr.lat, cdr.lon, towers.region_name
    FROM optourism.foreigners_daytripper_dwell_time as cdr
    JOIN optourism.cdr_labeled_towers as towers
    ON towers.id = cdr.tower_id
""", con=connection)

# tower_vertices['tower_id'] = tower_vertices['tower_id'].apply(
#     lambda x: 'tower-%s' % x)

In [23]:
foreigners = get_dwell_time_df(connection)
# foreigners['tower_id'] = foreigners['tower_id'].apply(
#     lambda x: 'tower-%s' % x)


/home/clozano/.conda/envs/SpatialEnv2.7/lib/python2.7/site-packages/ipykernel_launcher.py:21: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [25]:
foreigners['tower_region'] = foreigners.apply(lambda x: tower_vertices.loc[tower_vertices['tower_id'] == x['tower_id']].iloc[0]['region_name'] if x['in_florence'] == False else x['tower_id'], axis=1)

In [39]:
def get_most_common_location(dataframe):
    grouped = dataframe.groupby(['tower_region']).count().reset_index()
    grouped.rename(columns={'dwell_time': 'weight'}, inplace=True)
    grouped = grouped.sort_values('weight', ascending=False)

    return grouped.filter(['tower_region', 'weight'], axis=1)

In [41]:
# foreigners['tower_id'] = foreigners['tower_id'].apply(
#     lambda x: 'tower-%s' % x)

#filtered_0 = foreigners.loc[foreigners['in_florence'] == True]
filtered_0 = foreigners
filtered_0['prev_tower_id'] = filtered_0['tower_id'].shift(1)
filtered_0.loc[(filtered_0['cust_id'] != filtered_0['cust_id'].shift(1)), 'prev_tower_id'] = 'source'
filtered = filtered_0.loc[filtered_0['prev_tower_id'] != 'source']


ending = filtered_0.groupby(['cust_id']).last()
most_common_end_nodes = get_most_common_location(ending)
print(most_common_end_nodes.head(40))

beginning = filtered_0.groupby(['cust_id']).first()
most_common_start_nodes = get_most_common_location(beginning)
print(most_common_start_nodes.head(40))

grouping = filtered.groupby(['tower_id', 'prev_tower_id']).count().reset_index()
grouping.rename(columns={'cust_id': 'weight'}, inplace=True)
del grouping['near_airport']
del grouping['dwell_time']

# graph = ig.Graph()

# #print(tower_vertices)
# graph.add_vertices(tower_vertices.shape[0])
# graph.vs['name'] = tower_vertices['tower_id']
# graph.vs['x'] = tower_vertices['lat']
# graph.vs['y'] = tower_vertices['lon']

# edges = zip(grouping['prev_tower_id'], grouping['tower_id'])
# graph.add_edges(edges)
# graph.es['weight'] = grouping['weight']
# graph.es.select(weight_lt=20).delete()

# visual_style = {'vertex_color': 'black',
#                 'vertex_size': 3,
#                 'edge_width': [.002 * i for i in
#                                graph.es["weight"]]}

# ig.plot(graph, bbox=(800, 800), **visual_style)


           tower_region  weight
84              Toscana    3529
77                Lazio     616
75       Emilia-Romagna     424
44                   55     297
87               Veneto     294
37                   47     202
1                     2     190
21                   27     166
78              Liguria     165
15                   20     146
86               Umbria     145
79            Lombardia     137
8                     9     129
34                   43     121
32                   40     119
23                   29      81
68                   82      73
46                   58      69
63                   77      60
25                   31      54
70                   84      43
67                   81      39
16                   21      36
74             Campania      34
58                   72      22
53                   66      20
0                     1      20
81             Piemonte      18
45                   56      16
14                   17      14
60                   74      12
5                     6      11
85  Trentino-Alto Adige      11
54                   67      10
49                   62      10
31                   39      10
7                     8       9
19                   25       9
80               Marche       8
55                   69       8
             tower_region  weight
89                Toscana    3419
81                  Lazio     597
79         Emilia-Romagna     493
92                 Veneto     307
47                     55     275
41                     47     191
16                     20     169
82                Liguria     163
22                     27     159
34                     40     153
1                       2     145
83              Lombardia     134
91                 Umbria     127
37                     43     112
71                     82      97
8                       9      85
49                     58      73
70                     81      72
24                     29      63
67                     77      57
0                       1      55
26                     31      44
73                     84      37
78               Campania      33
17                     21      27
80  Friuli Venezia Giulia      26
56                     66      24
14                     17      23
85               Piemonte      23
33                     39      20
48                     56      20
62                     72      19
20                     25      16
5                       6      15
53                     62      13
90    Trentino-Alto Adige      13
25                     30      12
32                     38      12
84                 Marche      11
44                     52      11

In [ ]:
#