In [ ]:
"""
Get all circulations from GTFS and associate this circulations to
the information about the nfrota used to do the route. These last elements
are in OTHER_DATABASE in a table with BUS entrances.
"""

In [ ]:
import os
import pandas

from senpy                                import goToList
from senpy.pgsql.tables                   import copy_fromdb_todb
from senpy.pgsql.tables._del              import del_tables
from senpy.pgsql.data.w                   import create_table_by_query
from senpy.pgsql.data.w                   import update_table
from senpy.pgsql.transportation.gtfs.circ import name_circulations
from senpy.tblconv.toxls                  import pgsql_to_xls

In [ ]:
conGTFS_DB = {
    "HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
    "USER" : "postgres", "DATABASE" : "gtfs_carris"
}

conOTHER_DB = {
    "HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
    "USER" : "postgres", "DATABASE" : "valcarris_v2"
}

GTFS_SCHEMA = {
    "STOP_TIMES" : {
        "TNAME"     : "stop_times",
        "TRIP"      : "trip_id",
        "STOP"      : "stop_id",
        "SEQUENCE"  : "stop_sequence",
        "DEPARTURE" : "departure_time"
    },
    "TRIPS" : {
        "TNAME"   : "trips",
        "TRIP"    : "trip_id",
        "SERVICE" : "service_id"
    },
    "CALENDAR" : {
        "TNAME"   : "calendar_dates",
        "SERVICE" : "service_id",
        "DATE"    : "date"
    }
}

OTHER_SCHEMA = {
    "ENTRANCES" : {
        "TNAME" : "val_18jan_7as31",
        "ROUTE" : ["route_id"],
        "STOP"  : "paragem",
        "TIME"  : "time",
        "CAR"   : "nfrota",
        "DAY"   : "dia"
    },
    "STOPS" : {
        "TNAME"    : "percursos_geom_v2",
        "ROUTE"    : ["fid_percurso"],
        "SEQUENCE" : "ordem",
        "STOP"     : "paragem"
    }
}

# OTHER_SCHEMA["STOPS"]["ROUTE"] must have the same order than OTHER_SCHEMA["ENTRANCES"]["ROUTE"]

# Entrances table must have entrances for one GTFS Day (eg: from 04:00:00 to 31:00:00)

GTFS_DAY = 20180308
ENTRANCES_DAY = "2018-01-18"

ouxls = r'D:\TRENMO_JASP\circulacoes_quinta3.xlsx'

In [ ]:
onGoingTbl = []

In [ ]:
# We have to databases: GTFS AND other with database entrances
# Send all Relevant data of the GTFS db to the OTHER DATABASE
tbls = [GTFS_SCHEMA[k]["TNAME"] for k in GTFS_SCHEMA]
copy_fromdb_todb(conGTFS_DB, conOTHER_DB, tbls)
onGoingTbl += tbls

In [ ]:
# ID circulations and save result in a new table
service_schema = {
    "TRIPS" : GTFS_SCHEMA["TRIPS"], "CALENDAR" : GTFS_SCHEMA["CALENDAR"],
    "FILTER_DAY" : GTFS_DAY
}

id_circulations = name_circulations(
    conOTHER_DB, GTFS_SCHEMA["STOP_TIMES"], OTHER_SCHEMA["STOPS"],
    "circ_id", serviceSchema=service_schema
)

In [ ]:
id_circulations = "circ_id"
onGoingTbl.append(id_circulations)

In [ ]:
# Sanitize DEPARTURE_TIME FIELD
create_table_by_query(conOTHER_DB, "stop_times_sanitize", (
    "SELECT {trip}, {stop}, {sequence}, "
    "to_char(TO_TIMESTAMP('{day}' || ' ' || {depTime}, 'YYYY-MM-DD HH24:MI:SS'), 'YYYY-MM-DD HH24:MI:SS') AS {depTime} "
    "FROM {tbl}"
).format(
    tbl      = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
    trip     = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    stop     = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    sequence = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"],
    depTime  = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    day      = ENTRANCES_DAY
))

In [ ]:
GTFS_SCHEMA["STOP_TIMES"]["TNAME"] = "stop_times_sanitize"
onGoingTbl.append("stop_times_sanitize")

In [ ]:
# In stop_times, add meta about the route (add route id)

q = (
    "SELECT {sT}.{tripId}, {sT}.{stopId}, {sT}.{stopSq}, "
    "{sT}.{depTime}, {idCirc}.{idRoute} FROM {sT} "
    "LEFT JOIN {idCirc} "
    "ON {sT}.{tripId} = {idCirc}.fid "
    "INNER JOIN ("
        "SELECT {tripsTbl}.{tripsTripId}, {calenTbl}.{calenDate} "
        "FROM {tripsTbl} INNER JOIN {calenTbl} ON "
        "{tripsTbl}.{tripsServId} = {calenTbl}.{calenServId}"
    ") AS trip_service "
    "ON {sT}.{tripId} = trip_service.{tripsTripId} "
    "WHERE trip_service.{calenDate} = {day}"
).format(
    sT          = "stop_times_sanitize",
    tripId      = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    stopId      = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    stopSq      = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"],
    depTime     = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    idCirc      = id_circulations,
    idRoute     = ", ".join(OTHER_SCHEMA["STOPS"]["ROUTE"]),
    tripsTbl    = GTFS_SCHEMA["TRIPS"]["TNAME"],
    tripsTripId = GTFS_SCHEMA["TRIPS"]["TRIP"],
    tripsServId = GTFS_SCHEMA["TRIPS"]["SERVICE"],
    calenTbl    = GTFS_SCHEMA["CALENDAR"]["TNAME"],
    calenServId = GTFS_SCHEMA["CALENDAR"]["SERVICE"],
    calenDate   = GTFS_SCHEMA["CALENDAR"]["DATE"],
    day         = GTFS_DAY
)

create_table_by_query(conOTHER_DB, "stop_times_fid", q)

In [ ]:
update_table(
    conOTHER_DB, "stop_times_fid",
    {x : "\'notfound\'" for x in OTHER_SCHEMA["STOPS"]["ROUTE"]},
    {x : "NULL"         for x in OTHER_SCHEMA["STOPS"]["ROUTE"]},
    logic_operator='AND'
)

In [ ]:
onGoingTbl.append("stop_times_fid")

In [ ]:
# Sanitize time column in entrances table

q = (
    "SELECT {valRou}, {car}, {stop}, "
    "to_char(TO_TIMESTAMP({dayF} || ' ' || {tempo}, 'YYYY-MM-DD HH24:MI:SS'), 'YYYY-MM-DD HH24:MI:SS') AS {tempo} "
    "FROM {tbl}"
).format(
    tbl    = OTHER_SCHEMA["ENTRANCES"]["TNAME"],
    valRou = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
    car    = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    stop   = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo  = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    dayF   = OTHER_SCHEMA["ENTRANCES"]["DAY"]
)

new_entrances_table = "{}_san".format(OTHER_SCHEMA["ENTRANCES"]["TNAME"])
create_table_by_query(conOTHER_DB, new_entrances_table, q)

In [ ]:
OTHER_SCHEMA["ENTRANCES"]["TNAME"] = new_entrances_table
onGoingTbl.append(new_entrances_table)

In [ ]:
# Get a table with the potencial stop_time for one validation
# For a entrance with IDROUTE 26B|0|ASC will be created a row
# for each stop_time of the IDROUTE 26B|0|ASC

Q = (
    "SELECT {valTbl}.{valRou}, {valTbl}.{car}, {valTbl}.{stop}, "
    "{valTbl}.{tempo}, {stopTimeTbl}.{tripId}, "
    "{stopTimeTbl}.{stopId}, {stopTimeTbl}.{depTime} "
    "FROM {valTbl} INNER JOIN {stopTimeTbl} "
    "ON {relRoute} AND "
    "{valTbl}.{stop} = {stopTimeTbl}.{stopId}"
).format(
    valTbl      = OTHER_SCHEMA["ENTRANCES"]["TNAME"],
    valRou      = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
    car         = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    stop        = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo       = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    stopTimeTbl = "stop_times_fid",
    tripId      = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    stopId      = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    depTime     = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    stopsRou    = ", ".join(OTHER_SCHEMA["STOPS"]["ROUTE"]),
    relRoute    = " AND ".join([
        "{}.{} = {}.{}".format(
            OTHER_SCHEMA["ENTRANCES"]["TNAME"],
            OTHER_SCHEMA["ENTRANCES"]["ROUTE"][i],
            "stop_times_fid",
            OTHER_SCHEMA["STOPS"]["ROUTE"][i],
        ) for i in range(len(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]))
    ])
)

create_table_by_query(conOTHER_DB, "val_stoptime_all_possibilities", Q)

In [ ]:
onGoingTbl.append("val_stoptime_all_possibilities")

In [ ]:
# From the table val_stoptime_all_possibilities
# Get the correct circulation for each entrance

timeDifference = (
    "(SELECT *, "
    "("
        "(EXTRACT(day FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") * 24) + "
        "EXTRACT(hour FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") + (EXTRACT(minute FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") / 60.0) + (EXTRACT(second FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") / 3600.0)"
    ") AS tinterval "
    "FROM val_stoptime_all_possibilities) AS tmdf"
).format(
    timeVal=OTHER_SCHEMA["ENTRANCES"]["TIME"],
    timeDep=GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"]
)

q = (
    "SELECT * FROM ("
        "SELECT *, "
        "CASE "
            "WHEN tinterval_abs = MIN(tinterval_abs) OVER (PARTITION BY "
                "{rouId}, {carId}, {stopId}, {timeVal}) "
            "THEN 1 ELSE 0 "
        "END AS iscirc "
        "FROM ("
            "SELECT *, ABS(tinterval) AS tinterval_abs "
            "FROM {tmDifT} "
            "WHERE tinterval >= -0.167 AND tinterval < 0.55"
        ") AS valcirctmp"
    ") AS foo WHERE iscirc = 1"
).format(
    rouId   = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
    carId   = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    stopId  = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    timeVal = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    tmDifT  = timeDifference
)

create_table_by_query(conOTHER_DB, "val_circ", q)

In [ ]:
onGoingTbl.append("val_circ")

In [ ]:
"""
Delete cases when one entrance has more than
one stop_time with a equal Minimum difference between
the validation time
"""

q = (
    "SELECT val_circ.* "
    "FROM val_circ INNER JOIN ("
        "SELECT {rouId}, {stop}, {tempo}, COUNT({stop}) AS conta "
        "FROM val_circ "
        "GROUP BY {rouId}, {stop}, {tempo}"
    ") AS foo "
    "ON {rouIdJ} AND val_circ.{stop} = foo.{stop} "
    "AND val_circ.{tempo} = foo.{tempo} "
    "WHERE (conta > 1 AND tinterval < tinterval_abs) "
    "OR conta = 1"
).format(
    rouId  = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
    stop   = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo  = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    rouIdJ = " AND ".join(["val_circ.{i} = foo.{i}".format(
        i=x) for x in OTHER_SCHEMA["ENTRANCES"]["ROUTE"]])
)

create_table_by_query(conOTHER_DB, "val_circ_clean", q)

In [ ]:
"""
Produce final result

Exclude cases when one circulation has more than one vehicle
When on circulation has more than one vehicle,
select vehicle with more entrances
"""

q = (
    "SELECT {id_circT}.*, foo3.{carFld}, foo3.n_{carFld}, foo3.n_entrances, "
    "((foo3.n_{carFld} / foo3.n_entrances) * 100) AS perc "
    "FROM {id_circT} LEFT JOIN ("
        "SELECT * FROM ("
            "SELECT {stopTimeRouId}, {stopTimeTripId}, "
            "CASE "
                "WHEN count_val = MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) "
                "THEN {carFld} ELSE NULL "
            "END AS {carFld}, "
            "CASE "
                "WHEN count_val = MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) "
                "THEN MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) ELSE NULL "
            "END AS n_{carFld}, "
            "CASE "
                "WHEN count_val = MAX(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) "
                "THEN SUM(count_val) OVER (PARTITION BY {stopTimeRouId}, {stopTimeTripId}) ELSE NULL "
            "END AS n_entrances "
            "FROM ("
                "SELECT {stopTimeRouId}, {stopTimeTripId}, {carFld}, COUNT({carFld}) AS count_val "
                "FROM val_circ_clean "
                "GROUP BY {stopTimeRouId}, {stopTimeTripId}, {carFld}"
            ") AS foo"
        ") AS foo2 "
        "WHERE {carFld} IS NOT NULL"
    ") AS foo3 ON {id_circT}.fid = foo3.{stopTimeTripId}"
).format(
    id_circT       = id_circulations,
    carFld         = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    stopTimeRouId  = ", ".join(OTHER_SCHEMA["ENTRANCES"]["ROUTE"]),
    stopTimeTripId = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    
)

FINAL_TABLE = os.path.splitext(os.path.basename(ouxls))[0]
create_table_by_query(conOTHER_DB, FINAL_TABLE, q)

In [ ]:
pgsql_to_xls(FINAL_TABLE, ouxls, dic_con=conOTHER_DB)

In [ ]:
onGoingTbl.append(FINAL_TABLE)

In [ ]:
print onGoingTbl

In [ ]: