In [ ]:
"""
Identificar pares OD que representam os movimentos mais relevantes, nomeadamente os movimentos pendulares
Toma os transbordos como parte de uma viagem... Para um conjunto de validacoes A, B e C,
se B for um transbordo, o destino de A e C
"""

In [3]:
from senpy.pgsql.tables.w             import create_table_by_query
from senpy.pgsql.tables.w             import replace_null_with_other_col_value
from senpy.pgsql.fields               import drop_column
from senpy.pgsql.fields               import columns_to_timestamp
from senpy.pgsql.tables._del          import drop_where_cols_are_same
from senpy.pgsql.q.count              import sel_where_groupByIs
from senpy.transportation.bustops     import get_isValidDestination_table
from senpy.transportation.bustops     import get_nearStopTable
from senpy.topsql                     import pgsql_to_xls
from senpy.tabledata.conversion.toxls import pgsql_to_xls

In [4]:
# Input parameters
conParam = {
    "HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
    "USER" : "postgres", "DATABASE" : "valcarris_v2"
}

FILTER_DAY = [
    '2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12', '2018-01-15',
    '2018-01-16', '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-22', '2018-01-23',
    '2018-01-24', '2018-01-25', '2018-01-26', '2018-01-29', '2018-01-30', '2018-01-31'
]

TABLES_SCHEMA = {
    "VALIDATIONS" : {
        "TNAME"  : "id_pares_od",
        "DAY"    : "dia",
        "HOUR"   : "hora",
        "MINUTE" : "minuto",
        "SECOND" : "segundo",
        "ROUTE"  : "id_percurso",
        "STOP"   : "paragem",
        "CLIENT" : "ncartao"
    },
    "ISOCHRONES" : {
        "TNAME"  : "isocronas",
        "STOP"   : "id",
        "GEOM"   : "geom"
    },
    "STOPS" : {
        "TNAME" : "percursos_geom_v2",
        "ROUTE" : "fid_percurso",
        "STOP"  : "paragem",
        "GEOM"  : "geom"
    }
}

stopsByRoute      = 'percursos_geom_v2'
stopsByRouteID    = 'fid_percurso'
stopsByRouteStop  = 'paragem'
stopsByRouteOrder = 'ordem'
stopsByRouteGeom  = 'geom'

stopsTime = "stops_time_dist"
stopsTime_Route = "fid_route"
stopsTime_Duration = "time_duration"
stopsTime_sA = "stop_a"
stopsTime_sB = "stop_b"

OUT_TABLE = "great_pairs_od"

In [ ]:
# From BUS entrances, erase cases where there is only one validation per day and card
tabledata = sel_where_groupByIs(
    conParam, TABLES_SCHEMA["VALIDATIONS"]["TNAME"],
    [TABLES_SCHEMA["VALIDATIONS"]["CLIENT"], TABLES_SCHEMA["VALIDATIONS"]["DAY"]],
    ">", "1", 'p_{}'.format(TABLES_SCHEMA["VALIDATIONS"]["TNAME"]),
    filterWhere=None if not FILTER_DAY else " OR ".join(
        "{}.{}='{}'".format(
            TABLES_SCHEMA["VALIDATIONS"]["TNAME"],
            TABLES_SCHEMA["VALIDATIONS"]["DAY"], x
        ) for x in FILTER_DAY
    )
)

In [ ]:
# Convert day, hours, minutes and seconds to timestamp
tabledata = columns_to_timestamp(
    conParam, tabledata,
    TABLES_SCHEMA["VALIDATIONS"]["DAY"], TABLES_SCHEMA["VALIDATIONS"]["HOUR"],
    TABLES_SCHEMA["VALIDATIONS"]["MINUTE"], TABLES_SCHEMA["VALIDATIONS"]["SECOND"], "time_val",
    "p_{}_ts".format(TABLES_SCHEMA["VALIDATIONS"]["TNAME"]), selColumns=[
        TABLES_SCHEMA["VALIDATIONS"]["ROUTE"], TABLES_SCHEMA["VALIDATIONS"]["STOP"],
        TABLES_SCHEMA["VALIDATIONS"]["CLIENT"], TABLES_SCHEMA["VALIDATIONS"]["DAY"]
    ]
)

In [ ]:
tabledata = "p_p_id_pares_od_ts"

In [ ]:
# Assign destinations to origins

QUERY = (
    "SELECT *, "
    "first_value({routeF}) OVER(PARTITION BY {cardF}, {dayF} "
        "ORDER BY {dayF}, time_val) AS tmpv, "
    "first_value({stopF}) OVER(PARTITION BY {cardF}, {dayF} "
        "ORDER BY {dayF}, time_val) AS tmpst "
    "FROM ("
        "SELECT *, "
        "row_number() OVER(PARTITION BY {cardF}, {dayF} "
            "ORDER BY {cardF}, {dayF}, time_val) AS valorder, "
        "lead({routeF}) OVER(PARTITION BY {cardF}, {dayF} "
            "ORDER BY {cardF}, {dayF}, time_val) AS dest_route, "
        "lead({stopF}) OVER(PARTITION BY {cardF}, {dayF} "
            "ORDER BY {cardF}, {dayF}, time_val) AS dest_stop, "
        "lead(time_val) OVER(PARTITION BY {cardF}, {dayF} "
            "ORDER BY {cardF}, {dayF}, time_val) AS arrival_time "
        "FROM {t} ORDER BY {cardF}, {dayF}, time_val"
    ") AS sub_tbl"
).format(
    dayF   = TABLES_SCHEMA["VALIDATIONS"]["DAY"],
    hourF  = TABLES_SCHEMA["VALIDATIONS"]["HOUR"],
    minF   = TABLES_SCHEMA["VALIDATIONS"]["MINUTE"],
    secF   = TABLES_SCHEMA["VALIDATIONS"]["SECOND"],
    t      = tabledata,
    stopF  = TABLES_SCHEMA["VALIDATIONS"]["STOP"],
    cardF  = TABLES_SCHEMA["VALIDATIONS"]["CLIENT"],
    routeF = TABLES_SCHEMA["VALIDATIONS"]["ROUTE"]
)

print QUERY

# Create matrix OD
create_table_by_query(conParam, "p_matrix_od", QUERY)

In [ ]:
# Update null values
replace_null_with_other_col_value(
    conParam, "p_matrix_od", 'dest_route', 'tmpv'
)

replace_null_with_other_col_value(
    conParam, "p_matrix_od", 'dest_stop', 'tmpst'
)

In [ ]:
drop_column(conParam, "p_matrix_od", ["tmpv", "tmpst"])

In [ ]:
# It is possible to find rows in which origins and destinations are the same
# Delete rows where the explained happens
drop_where_cols_are_same(
    conParam, "p_matrix_od",
    TABLES_SCHEMA["VALIDATIONS"]["STOP"], 'dest_stop'
)

In [ ]:
"""
See if destinations are valid.
See if second validation could be a destination of the first validation
"""

# For that we need to know which isochrones intersects a specific points
# Create a table AS (stop_a_route | stop_b | bool_intersect)
# In the future it will be possible to relate p_matrix od with this new table and know at once
# if the destination is possible.

destIsValidTbl = get_isValidDestination_table(
    conParam, TABLES_SCHEMA["STOPS"]["TNAME"],
    TABLES_SCHEMA["STOPS"]["STOP"],
    TABLES_SCHEMA["STOPS"]["GEOM"],
    TABLES_SCHEMA["STOPS"]["ROUTE"],
    TABLES_SCHEMA["ISOCHRONES"]["TNAME"],
    TABLES_SCHEMA["ISOCHRONES"]["STOP"],
    TABLES_SCHEMA["ISOCHRONES"]["GEOM"],
    'p_int_valtable'
)

In [ ]:
"""
See if destinations are valid.
See if second validation could be a destination of the first validation
Delete invalid destinations
"""

q = (
    "SELECT p_matrix_od.*, {isDestValidT}.bintersect "
    "FROM p_matrix_od INNER JOIN {isDestValidT} "
    "ON p_matrix_od.{routeF} = {isDestValidT}.{stoRouteF} AND "
    "p_matrix_od.dest_stop = {isDestValidT}.stop_b "
    "WHERE bintersect = 1"
).format(
    routeF       = TABLES_SCHEMA["VALIDATIONS"]["ROUTE"],
    stoRouteF    = TABLES_SCHEMA["STOPS"]["ROUTE"],
    isDestValidT = destIsValidTbl
)

create_table_by_query(conParam, "p_matrix_od_valid", q)

In [ ]:
"""
Get real destinations
"""

# First, create an auxiliar table with the distance between all stops and the
# near stop of another route
nearStopsTbl = get_nearStopTable(
    conParam,
    TABLES_SCHEMA["STOPS"]["TNAME"],
    TABLES_SCHEMA["STOPS"]["STOP"],
    TABLES_SCHEMA["STOPS"]["GEOM"],
    TABLES_SCHEMA["STOPS"]["ROUTE"],
    "p_near_stops"
)

In [ ]:
"""
Get real destinations and compare interval between validations and the trip duration
"""

checkTimeInt = (
    "SELECT foo.*, {stopTimeT}.{duration}, "
    "(arrival_time - time_val) AS deltaval, "
    "ABS(("
        "EXTRACT(hour FROM ((arrival_time - time_val) - {stopTimeT}.{duration})) + "
        "(EXTRACT(minute FROM ((arrival_time - time_val) - {stopTimeT}.{duration})) / 60.0) + "
        "(EXTRACT(second FROM ((arrival_time - time_val) - {stopTimeT}.{duration})) / 3600.0)"
    ")) AS time_compare FROM ("
        "SELECT p_matrix_od_valid.*, {nearStops}.stop_a AS real_dest, {nearStops}.distance "
        "FROM p_matrix_od_valid INNER JOIN {nearStops} ON "
        "p_matrix_od_valid.{validRoute} = {nearStops}.{stopsRoute} AND "
        "p_matrix_od_valid.dest_stop = {nearStops}.stop_b"
    ") AS foo INNER JOIN {stopTimeT} ON foo.{validRoute} = {stopTimeT}.{stopTimeRoute} AND "
    "foo.{stopF} = {stopTimeT}.{stopACol} AND foo.real_dest = {stopTimeT}.{stopBCol}"
).format(
    stopTimeT=stopsTime, duration=stopsTime_Duration,
    validRoute=routeField, stopsRoute=stopsByRouteID,
    stopTimeRoute=stopsTime_Route, stopF=stopField,
    stopACol=stopsTime_sA, stopBCol=stopsTime_sB,
    nearStops=nearStopsTbl
)

create_table_by_query(conParam, "p_matrix_od_time_val", checkTimeInt)

In [4]:
def get_case(n, N, current_q):
    if n+1 == N:
        _q = (
            "CASE "
                "WHEN LAG(time_compare, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) < 0.5 THEN "
                "{subQ} ELSE valorder "
            "END AS trip"
        ).format(
            subQ=current_q, a=N-n, cardF=clientField, dayF=dayField
        )
    else:
        _q = (
            "CASE "
                "WHEN LAG(time_compare, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) < 0.5 THEN "
                "{subQ} ELSE LAG(valorder, {e}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) "
            "END"
        ).format(subQ=current_q, a=N-n, e=N-n-1, cardF=clientField, dayF=dayField)
    
    return _q

def recursive_query(caseN):
    for i in range(caseN):
        if not i:
            q = (
                "CASE "
                    "WHEN LAG(time_compare, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) < 0.5 THEN "
                    "LAG(valorder, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) ELSE "
                    "LAG(valorder, {e}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) "
                "END"
            ).format(a=caseN, e=caseN-1, cardF=clientField, dayF=dayField)
        else:
            q = get_case(i, caseN, q)
    return q


q = (
    "SELECT *, CASE WHEN time_compare < 0.5 THEN 1 ELSE 0 END as transbordo, "
    "{cases} FROM p_matrix_od_time_val "
    "ORDER BY {cardF}, {dayF}, valorder"
).format(cases=recursive_query(10), cardF=clientField, dayF=dayField)

create_table_by_query(conParam, "p_matrix_id_trips", q)

In [5]:
# Get real matrix od

qMatrixOd = (
    "SELECT origin, redestination AS destination, COUNT(origin) AS ntrips FROM ("
        "SELECT route_origin, origin, dest_route, psdestination, redestination FROM ("
            "SELECT FIRST_VALUE({routeF}) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF}, {dayF}, time_val) AS route_origin, "
            "FIRST_VALUE({stopF}) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF}, {dayF}, time_val) AS origin, "
            "FIRST_VALUE(dest_route) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF} DESC, {dayF} DESC, time_val DESC) AS dest_route, "
            "FIRST_VALUE(dest_stop) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF} DESC, {dayF} DESC, time_val DESC) AS psdestination, "
            "FIRST_VALUE(real_dest) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF} DESC, {dayF} DESC, time_val DESC) AS redestination "
            "FROM p_matrix_id_trips "
            "ORDER BY {cardF}, {dayF}, time_val"
        ") AS foo "
        "GROUP BY route_origin, origin, dest_route, psdestination, redestination"
    ") AS foo2 "
    "GROUP BY origin, redestination"
).format(
    routeF=routeField, cardF=clientField, dayF=dayField, stopF=stopField
)

print qMatrixOd

create_table_by_query(conParam, OUT_TABLE, qMatrixOd)


SELECT origin, redestination AS destination, COUNT(origin) AS ntrips FROM (SELECT route_origin, origin, dest_route, psdestination, redestination FROM (SELECT FIRST_VALUE(fid_percurso) OVER(PARTITION BY ncartao, dia, trip ORDER BY ncartao, dia, time_val) AS route_origin, FIRST_VALUE(paragem) OVER(PARTITION BY ncartao, dia, trip ORDER BY ncartao, dia, time_val) AS origin, FIRST_VALUE(dest_route) OVER(PARTITION BY ncartao, dia, trip ORDER BY ncartao DESC, dia DESC, time_val DESC) AS dest_route, FIRST_VALUE(dest_stop) OVER(PARTITION BY ncartao, dia, trip ORDER BY ncartao DESC, dia DESC, time_val DESC) AS psdestination, FIRST_VALUE(real_dest) OVER(PARTITION BY ncartao, dia, trip ORDER BY ncartao DESC, dia DESC, time_val DESC) AS redestination FROM p_matrix_id_trips ORDER BY ncartao, dia, time_val) AS foo GROUP BY route_origin, origin, dest_route, psdestination, redestination) AS foo2 GROUP BY origin, redestination

In [ ]: