In [ ]:
"""
Identificar pares OD que representam os movimentos mais relevantes, nomeadamente os movimentos pendulares
Toma os transbordos como parte de uma viagem... Para um conjunto de validacoes A, B e C,
se B for um transbordo, o destino de A e C
"""
In [3]:
from senpy.pgsql.tables.w import create_table_by_query
from senpy.pgsql.tables.w import replace_null_with_other_col_value
from senpy.pgsql.fields import drop_column
from senpy.pgsql.fields import columns_to_timestamp
from senpy.pgsql.tables._del import drop_where_cols_are_same
from senpy.pgsql.q.count import sel_where_groupByIs
from senpy.transportation.bustops import get_isValidDestination_table
from senpy.transportation.bustops import get_nearStopTable
from senpy.topsql import pgsql_to_xls
from senpy.tabledata.conversion.toxls import pgsql_to_xls
In [4]:
# Input parameters
conParam = {
"HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
"USER" : "postgres", "DATABASE" : "valcarris_v2"
}
FILTER_DAY = [
'2018-01-08', '2018-01-09', '2018-01-10', '2018-01-11', '2018-01-12', '2018-01-15',
'2018-01-16', '2018-01-17', '2018-01-18', '2018-01-19', '2018-01-22', '2018-01-23',
'2018-01-24', '2018-01-25', '2018-01-26', '2018-01-29', '2018-01-30', '2018-01-31'
]
TABLES_SCHEMA = {
"VALIDATIONS" : {
"TNAME" : "id_pares_od",
"DAY" : "dia",
"HOUR" : "hora",
"MINUTE" : "minuto",
"SECOND" : "segundo",
"ROUTE" : "id_percurso",
"STOP" : "paragem",
"CLIENT" : "ncartao"
},
"ISOCHRONES" : {
"TNAME" : "isocronas",
"STOP" : "id",
"GEOM" : "geom"
},
"STOPS" : {
"TNAME" : "percursos_geom_v2",
"ROUTE" : "fid_percurso",
"STOP" : "paragem",
"GEOM" : "geom"
}
}
stopsByRoute = 'percursos_geom_v2'
stopsByRouteID = 'fid_percurso'
stopsByRouteStop = 'paragem'
stopsByRouteOrder = 'ordem'
stopsByRouteGeom = 'geom'
stopsTime = "stops_time_dist"
stopsTime_Route = "fid_route"
stopsTime_Duration = "time_duration"
stopsTime_sA = "stop_a"
stopsTime_sB = "stop_b"
OUT_TABLE = "great_pairs_od"
In [ ]:
# From BUS entrances, erase cases where there is only one validation per day and card
tabledata = sel_where_groupByIs(
conParam, TABLES_SCHEMA["VALIDATIONS"]["TNAME"],
[TABLES_SCHEMA["VALIDATIONS"]["CLIENT"], TABLES_SCHEMA["VALIDATIONS"]["DAY"]],
">", "1", 'p_{}'.format(TABLES_SCHEMA["VALIDATIONS"]["TNAME"]),
filterWhere=None if not FILTER_DAY else " OR ".join(
"{}.{}='{}'".format(
TABLES_SCHEMA["VALIDATIONS"]["TNAME"],
TABLES_SCHEMA["VALIDATIONS"]["DAY"], x
) for x in FILTER_DAY
)
)
In [ ]:
# Convert day, hours, minutes and seconds to timestamp
tabledata = columns_to_timestamp(
conParam, tabledata,
TABLES_SCHEMA["VALIDATIONS"]["DAY"], TABLES_SCHEMA["VALIDATIONS"]["HOUR"],
TABLES_SCHEMA["VALIDATIONS"]["MINUTE"], TABLES_SCHEMA["VALIDATIONS"]["SECOND"], "time_val",
"p_{}_ts".format(TABLES_SCHEMA["VALIDATIONS"]["TNAME"]), selColumns=[
TABLES_SCHEMA["VALIDATIONS"]["ROUTE"], TABLES_SCHEMA["VALIDATIONS"]["STOP"],
TABLES_SCHEMA["VALIDATIONS"]["CLIENT"], TABLES_SCHEMA["VALIDATIONS"]["DAY"]
]
)
In [ ]:
tabledata = "p_p_id_pares_od_ts"
In [ ]:
# Assign destinations to origins
QUERY = (
"SELECT *, "
"first_value({routeF}) OVER(PARTITION BY {cardF}, {dayF} "
"ORDER BY {dayF}, time_val) AS tmpv, "
"first_value({stopF}) OVER(PARTITION BY {cardF}, {dayF} "
"ORDER BY {dayF}, time_val) AS tmpst "
"FROM ("
"SELECT *, "
"row_number() OVER(PARTITION BY {cardF}, {dayF} "
"ORDER BY {cardF}, {dayF}, time_val) AS valorder, "
"lead({routeF}) OVER(PARTITION BY {cardF}, {dayF} "
"ORDER BY {cardF}, {dayF}, time_val) AS dest_route, "
"lead({stopF}) OVER(PARTITION BY {cardF}, {dayF} "
"ORDER BY {cardF}, {dayF}, time_val) AS dest_stop, "
"lead(time_val) OVER(PARTITION BY {cardF}, {dayF} "
"ORDER BY {cardF}, {dayF}, time_val) AS arrival_time "
"FROM {t} ORDER BY {cardF}, {dayF}, time_val"
") AS sub_tbl"
).format(
dayF = TABLES_SCHEMA["VALIDATIONS"]["DAY"],
hourF = TABLES_SCHEMA["VALIDATIONS"]["HOUR"],
minF = TABLES_SCHEMA["VALIDATIONS"]["MINUTE"],
secF = TABLES_SCHEMA["VALIDATIONS"]["SECOND"],
t = tabledata,
stopF = TABLES_SCHEMA["VALIDATIONS"]["STOP"],
cardF = TABLES_SCHEMA["VALIDATIONS"]["CLIENT"],
routeF = TABLES_SCHEMA["VALIDATIONS"]["ROUTE"]
)
print QUERY
# Create matrix OD
create_table_by_query(conParam, "p_matrix_od", QUERY)
In [ ]:
# Update null values
replace_null_with_other_col_value(
conParam, "p_matrix_od", 'dest_route', 'tmpv'
)
replace_null_with_other_col_value(
conParam, "p_matrix_od", 'dest_stop', 'tmpst'
)
In [ ]:
drop_column(conParam, "p_matrix_od", ["tmpv", "tmpst"])
In [ ]:
# It is possible to find rows in which origins and destinations are the same
# Delete rows where the explained happens
drop_where_cols_are_same(
conParam, "p_matrix_od",
TABLES_SCHEMA["VALIDATIONS"]["STOP"], 'dest_stop'
)
In [ ]:
"""
See if destinations are valid.
See if second validation could be a destination of the first validation
"""
# For that we need to know which isochrones intersects a specific points
# Create a table AS (stop_a_route | stop_b | bool_intersect)
# In the future it will be possible to relate p_matrix od with this new table and know at once
# if the destination is possible.
destIsValidTbl = get_isValidDestination_table(
conParam, TABLES_SCHEMA["STOPS"]["TNAME"],
TABLES_SCHEMA["STOPS"]["STOP"],
TABLES_SCHEMA["STOPS"]["GEOM"],
TABLES_SCHEMA["STOPS"]["ROUTE"],
TABLES_SCHEMA["ISOCHRONES"]["TNAME"],
TABLES_SCHEMA["ISOCHRONES"]["STOP"],
TABLES_SCHEMA["ISOCHRONES"]["GEOM"],
'p_int_valtable'
)
In [ ]:
"""
See if destinations are valid.
See if second validation could be a destination of the first validation
Delete invalid destinations
"""
q = (
"SELECT p_matrix_od.*, {isDestValidT}.bintersect "
"FROM p_matrix_od INNER JOIN {isDestValidT} "
"ON p_matrix_od.{routeF} = {isDestValidT}.{stoRouteF} AND "
"p_matrix_od.dest_stop = {isDestValidT}.stop_b "
"WHERE bintersect = 1"
).format(
routeF = TABLES_SCHEMA["VALIDATIONS"]["ROUTE"],
stoRouteF = TABLES_SCHEMA["STOPS"]["ROUTE"],
isDestValidT = destIsValidTbl
)
create_table_by_query(conParam, "p_matrix_od_valid", q)
In [ ]:
"""
Get real destinations
"""
# First, create an auxiliar table with the distance between all stops and the
# near stop of another route
nearStopsTbl = get_nearStopTable(
conParam,
TABLES_SCHEMA["STOPS"]["TNAME"],
TABLES_SCHEMA["STOPS"]["STOP"],
TABLES_SCHEMA["STOPS"]["GEOM"],
TABLES_SCHEMA["STOPS"]["ROUTE"],
"p_near_stops"
)
In [ ]:
"""
Get real destinations and compare interval between validations and the trip duration
"""
checkTimeInt = (
"SELECT foo.*, {stopTimeT}.{duration}, "
"(arrival_time - time_val) AS deltaval, "
"ABS(("
"EXTRACT(hour FROM ((arrival_time - time_val) - {stopTimeT}.{duration})) + "
"(EXTRACT(minute FROM ((arrival_time - time_val) - {stopTimeT}.{duration})) / 60.0) + "
"(EXTRACT(second FROM ((arrival_time - time_val) - {stopTimeT}.{duration})) / 3600.0)"
")) AS time_compare FROM ("
"SELECT p_matrix_od_valid.*, {nearStops}.stop_a AS real_dest, {nearStops}.distance "
"FROM p_matrix_od_valid INNER JOIN {nearStops} ON "
"p_matrix_od_valid.{validRoute} = {nearStops}.{stopsRoute} AND "
"p_matrix_od_valid.dest_stop = {nearStops}.stop_b"
") AS foo INNER JOIN {stopTimeT} ON foo.{validRoute} = {stopTimeT}.{stopTimeRoute} AND "
"foo.{stopF} = {stopTimeT}.{stopACol} AND foo.real_dest = {stopTimeT}.{stopBCol}"
).format(
stopTimeT=stopsTime, duration=stopsTime_Duration,
validRoute=routeField, stopsRoute=stopsByRouteID,
stopTimeRoute=stopsTime_Route, stopF=stopField,
stopACol=stopsTime_sA, stopBCol=stopsTime_sB,
nearStops=nearStopsTbl
)
create_table_by_query(conParam, "p_matrix_od_time_val", checkTimeInt)
In [4]:
def get_case(n, N, current_q):
if n+1 == N:
_q = (
"CASE "
"WHEN LAG(time_compare, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) < 0.5 THEN "
"{subQ} ELSE valorder "
"END AS trip"
).format(
subQ=current_q, a=N-n, cardF=clientField, dayF=dayField
)
else:
_q = (
"CASE "
"WHEN LAG(time_compare, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) < 0.5 THEN "
"{subQ} ELSE LAG(valorder, {e}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) "
"END"
).format(subQ=current_q, a=N-n, e=N-n-1, cardF=clientField, dayF=dayField)
return _q
def recursive_query(caseN):
for i in range(caseN):
if not i:
q = (
"CASE "
"WHEN LAG(time_compare, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) < 0.5 THEN "
"LAG(valorder, {a}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) ELSE "
"LAG(valorder, {e}) OVER(PARTITION BY {cardF}, {dayF} ORDER BY {cardF}, {dayF}, time_val) "
"END"
).format(a=caseN, e=caseN-1, cardF=clientField, dayF=dayField)
else:
q = get_case(i, caseN, q)
return q
q = (
"SELECT *, CASE WHEN time_compare < 0.5 THEN 1 ELSE 0 END as transbordo, "
"{cases} FROM p_matrix_od_time_val "
"ORDER BY {cardF}, {dayF}, valorder"
).format(cases=recursive_query(10), cardF=clientField, dayF=dayField)
create_table_by_query(conParam, "p_matrix_id_trips", q)
In [5]:
# Get real matrix od
qMatrixOd = (
"SELECT origin, redestination AS destination, COUNT(origin) AS ntrips FROM ("
"SELECT route_origin, origin, dest_route, psdestination, redestination FROM ("
"SELECT FIRST_VALUE({routeF}) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF}, {dayF}, time_val) AS route_origin, "
"FIRST_VALUE({stopF}) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF}, {dayF}, time_val) AS origin, "
"FIRST_VALUE(dest_route) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF} DESC, {dayF} DESC, time_val DESC) AS dest_route, "
"FIRST_VALUE(dest_stop) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF} DESC, {dayF} DESC, time_val DESC) AS psdestination, "
"FIRST_VALUE(real_dest) OVER(PARTITION BY {cardF}, {dayF}, trip ORDER BY {cardF} DESC, {dayF} DESC, time_val DESC) AS redestination "
"FROM p_matrix_id_trips "
"ORDER BY {cardF}, {dayF}, time_val"
") AS foo "
"GROUP BY route_origin, origin, dest_route, psdestination, redestination"
") AS foo2 "
"GROUP BY origin, redestination"
).format(
routeF=routeField, cardF=clientField, dayF=dayField, stopF=stopField
)
print qMatrixOd
create_table_by_query(conParam, OUT_TABLE, qMatrixOd)
In [ ]: