In [ ]:
"""
Get all circulations from GTFS and associate this circulations to
the information about the nfrota used to do the route. These last elements
are in OTHER_DATABASE in a table with BUS entrances.
"""
In [ ]:
import os
import pandas
from senpy import goToList
from senpy.pgsql.tables import copy_fromdb_todb
from senpy.pgsql.tables._del import del_tables
from senpy.pgsql.data.w import create_table_by_query
from senpy.pgsql.data.w import update_table
from senpy.pgsql.transportation.gtfs.circ import name_circulations
from senpy.tblconv.toxls import pgsql_to_xls
In [ ]:
conGTFS_DB = {
"HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
"USER" : "postgres", "DATABASE" : "gtfs_carris"
}
conOTHER_DB = {
"HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
"USER" : "postgres", "DATABASE" : "valcarris_v2"
}
GTFS_SCHEMA = {
"STOP_TIMES" : {
"TNAME" : "stop_times",
"TRIP" : "trip_id",
"STOP" : "stop_id",
"SEQUENCE" : "stop_sequence",
"DEPARTURE" : "departure_time"
},
"TRIPS" : {
"TNAME" : "trips",
"TRIP" : "trip_id",
"SERVICE" : "service_id"
},
"CALENDAR" : {
"TNAME" : "calendar_dates",
"SERVICE" : "service_id",
"DATE" : "date"
}
}
OTHER_SCHEMA = {
"ENTRANCES" : {
"TNAME" : "val_18jan_7as31",
"ROUTE" : ["route_id"],
"STOP" : "paragem",
"TIME" : "time",
"CAR" : "nfrota",
"DAY" : "dia"
},
"STOPS" : {
"TNAME" : "percursos_geom_v2",
"ROUTE" : ["fid_percurso"],
"SEQUENCE" : "ordem",
"STOP" : "paragem"
}
}
# OTHER_SCHEMA["STOPS"]["ROUTE"] must have the same order than OTHER_SCHEMA["ENTRANCES"]["ROUTE"]
# Entrances table must have entrances for one GTFS Day (eg: from 04:00:00 to 31:00:00)
GTFS_DAY = 20180308
ENTRANCES_DAY = "2018-01-18"
ouxls = r'D:\TRENMO_JASP\circulacoes_quinta3.xlsx'
In [ ]:
onGoingTbl = []
In [ ]:
# We have to databases: GTFS AND other with database entrances
# Send all Relevant data of the GTFS db to the OTHER DATABASE
tbls = [GTFS_SCHEMA[k]["TNAME"] for k in GTFS_SCHEMA]
copy_fromdb_todb(conGTFS_DB, conOTHER_DB, tbls)
onGoingTbl += tbls
In [ ]:
# ID circulations and save result in a new table
service_schema = {
"TRIPS" : GTFS_SCHEMA["TRIPS"], "CALENDAR" : GTFS_SCHEMA["CALENDAR"],
"FILTER_DAY" : GTFS_DAY
}
id_circulations = name_circulations(
conOTHER_DB, GTFS_SCHEMA["STOP_TIMES"], OTHER_SCHEMA["STOPS"],
"circ_id", serviceSchema=service_schema
)
In [ ]:
id_circulations = "circ_id"
onGoingTbl.append(id_circulations)
In [ ]:
# Sanitize DEPARTURE_TIME FIELD
create_table_by_query(conOTHER_DB, "stop_times_sanitize", (
"SELECT {trip}, {stop}, {sequence}, "
"to_char(TO_TIMESTAMP('{day}' || ' ' || {depTime}, 'YYYY-MM-DD HH24:MI:SS'), 'YYYY-MM-DD HH24:MI:SS') AS {depTime} "
"FROM {tbl}"
).format(
tbl = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
trip = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
stop = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
sequence = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"],
depTime = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
day = ENTRANCES_DAY
))
In [ ]:
GTFS_SCHEMA["STOP_TIMES"]["TNAME"] = "stop_times_sanitize"
onGoingTbl.append("stop_times_sanitize")
In [ ]:
# In stop_times, add meta about the route (add route id)
q = (
"SELECT {sT}.{tripId}, {sT}.{stopId}, {sT}.{stopSq}, "
"{sT}.{depTime}, {idCirc}.{idRoute} FROM {sT} "
"LEFT JOIN {idCirc} "
"ON {sT}.{tripId} = {idCirc}.fid "
"INNER JOIN ("
"SELECT {tripsTbl}.{tripsTripId}, {calenTbl}.{calenDate} "
"FROM {tripsTbl} INNER JOIN {calenTbl} ON "
"{tripsTbl}.{tripsServId} = {calenTbl}.{calenServId}"
") AS trip_service "
"ON {sT}.{tripId} = trip_service.{tripsTripId} "
"WHERE trip_service.{calenDate} = {day}"
).format(
sT = "stop_times_sanitize",
tripId = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
stopId = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
stopSq = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"],
depTime = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
idCirc = id_circulations,
idRoute = ", ".join(OTHER_SCHEMA["STOPS"]["ROUTE"]),
tripsTbl = GTFS_SCHEMA["TRIPS"]["TNAME"],
tripsTripId = GTFS_SCHEMA["TRIPS"]["TRIP"],
tripsServId = GTFS_SCHEMA["TRIPS"]["SERVICE"],
calenTbl = GTFS_SCHEMA["CALENDAR"]["TNAME"],
calenServId = GTFS_SCHEMA["CALENDAR"]["SERVICE"],
calenDate = GTFS_SCHEMA["CALENDAR"]["DATE"],
day = GTFS_DAY
)
create_table_by_query(conOTHER_DB, "stop_times_fid", q)
In [ ]:
update_table(
conOTHER_DB, "stop_times_fid",
{x : "\'notfound\'" for x in OTHER_SCHEMA["STOPS"]["ROUTE"]},
{x : "NULL" for x in OTHER_SCHEMA["STOPS"]["ROUTE"]},
logic_operator='AND'
)
In [ ]:
onGoingTbl.append("stop_times_fid")
In [ ]:
# Sanitize time column in entrances table
q = (
"SELECT {valRou}, {car}, {stop}, "
"to_char(TO_TIMESTAMP({dayF} || ' ' || {tempo}, 'YYYY-MM-DD HH24:MI:SS'), 'YYYY-MM-DD HH24:MI:SS') AS {tempo} "
"FROM {tbl}"
).format(
tbl = OTHER_SCHEMA["ENTRANCES"]["TNAME"],
valRou = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
car = OTHER_SCHEMA["ENTRANCES"]["CAR"],
stop = OTHER_SCHEMA["ENTRANCES"]["STOP"],
tempo = OTHER_SCHEMA["ENTRANCES"]["TIME"],
dayF = OTHER_SCHEMA["ENTRANCES"]["DAY"]
)
new_entrances_table = "{}_san".format(OTHER_SCHEMA["ENTRANCES"]["TNAME"])
create_table_by_query(conOTHER_DB, new_entrances_table, q)
In [ ]:
OTHER_SCHEMA["ENTRANCES"]["TNAME"] = new_entrances_table
onGoingTbl.append(new_entrances_table)
In [ ]:
# Get a table with the potencial stop_time for one validation
# For a entrance with IDROUTE 26B|0|ASC will be created a row
# for each stop_time of the IDROUTE 26B|0|ASC
Q = (
"SELECT {valTbl}.{valRou}, {valTbl}.{car}, {valTbl}.{stop}, "
"{valTbl}.{tempo}, {stopTimeTbl}.{tripId}, "
"{stopTimeTbl}.{stopId}, {stopTimeTbl}.{depTime} "
"FROM {valTbl} INNER JOIN {stopTimeTbl} "
"ON {relRoute} AND "
"{valTbl}.{stop} = {stopTimeTbl}.{stopId}"
).format(
valTbl = OTHER_SCHEMA["ENTRANCES"]["TNAME"],
valRou = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
car = OTHER_SCHEMA["ENTRANCES"]["CAR"],
stop = OTHER_SCHEMA["ENTRANCES"]["STOP"],
tempo = OTHER_SCHEMA["ENTRANCES"]["TIME"],
stopTimeTbl = "stop_times_fid",
tripId = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
stopId = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
depTime = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
stopsRou = ", ".join(OTHER_SCHEMA["STOPS"]["ROUTE"]),
relRoute = " AND ".join([
"{}.{} = {}.{}".format(
OTHER_SCHEMA["ENTRANCES"]["TNAME"],
OTHER_SCHEMA["ENTRANCES"]["ROUTE"][i],
"stop_times_fid",
OTHER_SCHEMA["STOPS"]["ROUTE"][i],
) for i in range(len(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]))
])
)
create_table_by_query(conOTHER_DB, "val_stoptime_all_possibilities", Q)
In [ ]:
onGoingTbl.append("val_stoptime_all_possibilities")
In [ ]:
# From the table val_stoptime_all_possibilities
# Get the correct circulation for each entrance
timeDifference = (
"(SELECT *, "
"("
"(EXTRACT(day FROM "
"(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
") * 24) + "
"EXTRACT(hour FROM "
"(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
") + (EXTRACT(minute FROM "
"(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
") / 60.0) + (EXTRACT(second FROM "
"(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
") / 3600.0)"
") AS tinterval "
"FROM val_stoptime_all_possibilities) AS tmdf"
).format(
timeVal=OTHER_SCHEMA["ENTRANCES"]["TIME"],
timeDep=GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"]
)
q = (
"SELECT * FROM ("
"SELECT *, "
"CASE "
"WHEN tinterval_abs = MIN(tinterval_abs) OVER (PARTITION BY "
"{rouId}, {carId}, {stopId}, {timeVal}) "
"THEN 1 ELSE 0 "
"END AS iscirc "
"FROM ("
"SELECT *, ABS(tinterval) AS tinterval_abs "
"FROM {tmDifT} "
"WHERE tinterval >= -0.167 AND tinterval < 0.55"
") AS valcirctmp"
") AS foo WHERE iscirc = 1"
).format(
rouId = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
carId = OTHER_SCHEMA["ENTRANCES"]["CAR"],
stopId = OTHER_SCHEMA["ENTRANCES"]["STOP"],
timeVal = OTHER_SCHEMA["ENTRANCES"]["TIME"],
tmDifT = timeDifference
)
create_table_by_query(conOTHER_DB, "val_circ", q)
In [ ]:
onGoingTbl.append("val_circ")
In [ ]:
"""
Delete cases when one entrance has more than
one stop_time with a equal Minimum difference between
the validation time
"""
q = (
"SELECT val_circ.* "
"FROM val_circ INNER JOIN ("
"SELECT {rouId}, {stop}, {tempo}, COUNT({stop}) AS conta "
"FROM val_circ "
"GROUP BY {rouId}, {stop}, {tempo}"
") AS foo "
"ON {rouIdJ} AND val_circ.{stop} = foo.{stop} "
"AND val_circ.{tempo} = foo.{tempo} "
"WHERE (conta > 1 AND tinterval < tinterval_abs) "
"OR conta = 1"
).format(
rouId = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
stop = OTHER_SCHEMA["ENTRANCES"]["STOP"],
tempo = OTHER_SCHEMA["ENTRANCES"]["TIME"],
rouIdJ = " AND ".join(["val_circ.{i} = foo.{i}".format(
i=x) for x in OTHER_SCHEMA["ENTRANCES"]["ROUTE"]])
)
create_table_by_query(conOTHER_DB, "val_circ_clean", q)
In [ ]:
"""
Produce final result
Exclude cases when one circulation has more than one vehicle
When on circulation has more than one vehicle,
select vehicle with more entrances
"""
q = (
"SELECT {id_circT}.*, foo3.{carFld}, foo3.n_{carFld}, foo3.n_entrances, "
"((foo3.n_{carFld} / foo3.n_entrances) * 100) AS perc "
"FROM {id_circT} LEFT JOIN ("
"SELECT * FROM ("
"SELECT {stopTimeRouId}, {stopTimeTripId}, "
"CASE "
"WHEN count_val = MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) "
"THEN {carFld} ELSE NULL "
"END AS {carFld}, "
"CASE "
"WHEN count_val = MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) "
"THEN MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) ELSE NULL "
"END AS n_{carFld}, "
"CASE "
"WHEN count_val = MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) "
"THEN SUM(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) ELSE NULL "
"END AS n_entrances "
"FROM ("
"SELECT {stopTimeRouId}, {stopTimeTripId}, {carFld}, COUNT({carFld}) AS count_val "
"FROM val_circ_clean "
"GROUP BY {stopTimeRouId}, {stopTimeTripId}, {carFld}"
") AS foo"
") AS foo2 "
"WHERE {carFld} IS NOT NULL"
") AS foo3 ON {id_circT}.fid = foo3.{stopTimeTripId}"
).format(
id_circT = id_circulations,
carFld = OTHER_SCHEMA["ENTRANCES"]["CAR"],
stopTimeRouId = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
stopTimeTripId = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
)
FINAL_TABLE = os.path.splitext(os.path.basename(ouxls))[0]
create_table_by_query(conOTHER_DB, FINAL_TABLE, q)
In [ ]:
pgsql_to_xls(FINAL_TABLE, ouxls, dic_con=conOTHER_DB)
In [ ]:
onGoingTbl.append(FINAL_TABLE)
In [ ]:
print onGoingTbl
In [ ]: