In [ ]:
"""
For each Bus entrance, ID the trip from GTFS

O tamanho da tabela inicial pode nao ser
identico ao da tabela final porque as validacoes sao apagadas
caso a diferenca entre o momento da validacao e o tempo de saida
mais proximo do primeiro se verificar superior a 30 minutos
"""

In [ ]:
import os
import pandas

from senpy                                import goToList
from senpy.pgsql.tables                   import copy_fromdb_todb
from senpy.pgsql.tables._del              import del_tables
from senpy.frompsql                       import sql_query
from senpy.pgsql.tables.w                 import create_table_by_query
from senpy.pgsql.tables.w                 import update_table
from senpy.toxls                          import pgsql_to_xls
from senpy.pgsql.transportation.gtfs.circ import name_circulations

In [ ]:
conGTFS_DB = {
    "HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
    "USER" : "postgres", "DATABASE" : "gtfs_carris"
}

conOTHER_DB = {
    "HOST" : "localhost", "PORT" : "5432", "PASSWORD" : "admin",
    "USER" : "postgres", "DATABASE" : "valcarris_v2"
}

GTFS_SCHEMA = {
    "STOP_TIMES" : {
        "TNAME"     : "stop_times",
        "TRIP"      : "trip_id",
        "STOP"      : "stop_id",
        "SEQUENCE"  : "stop_sequence",
        "DEPARTURE" : "departure_time"
    },
    "TRIPS" : {
        "TNAME"   : "trips",
        "TRIP"    : "trip_id",
        "SERVICE" : "service_id"
    },
    "CALENDAR" : {
        "TNAME"   : "calendar_dates",
        "SERVICE" : "service_id",
        "DATE"    : "date"
    }
}

OTHER_SCHEMA = {
    "ENTRANCES" : {
        "TNAME"    : "val_17jan_7as31",
        "ROUTE"    : ["route_id"],
        "STOP"     : "paragem",
        "TIME"     : "time",
        "CAR"      : "nfrota",
        "DAY"      : "dia",
        "SEQUENCE" : "ordem",
        "CLIENT"   : "ncartao"
    },
    "STOPS" : {
        "TNAME"    : "percursos_geom_v2",
        "ROUTE"    : ["fid_percurso"],
        "SEQUENCE" : "ordem",
        "STOP"     : "paragem"
    }
}

# OTHER_SCHEMA["STOPS"]["ROUTE"] must have the same order than OTHER_SCHEMA["ENTRANCES"]["ROUTE"]

# Entrances table must have entrances for one GTFS Day (eg: from 04:00:00 to 31:00:00)

GTFS_DAY = 20180307
ENTRANCES_DAY = "2018-01-17"

OutTable = r"D:\TRENMO_JASP\CARRIS\tst_17jan.xlsx"

In [ ]:
# TODO: merge ROUTE ID into one column

OTHER_SCHEMA["STOPS"]["ROUTE"] = OTHER_SCHEMA["STOPS"]["ROUTE"][0]
OTHER_SCHEMA["ENTRANCES"]["ROUTE"] = OTHER_SCHEMA["ENTRANCES"]["ROUTE"][0]

In [ ]:
onGoingTbl = []

In [ ]:
# We have two databases: GTFS AND other with database entrances
# Send all Relevant data of the GTFS db to the OTHER DATABASE
tbls = [GTFS_SCHEMA[k]["TNAME"] for k in GTFS_SCHEMA]
onGoingTbl += tbls

In [ ]:
copy_fromdb_todb(conGTFS_DB, conOTHER_DB, tbls)

In [ ]:
# ID circulations and save result in a new table
service_schema = {
    "TRIPS" : GTFS_SCHEMA["TRIPS"], "CALENDAR" : GTFS_SCHEMA["CALENDAR"],
    "FILTER_DAY" : GTFS_DAY
}

id_circulations = name_circulations(
    conOTHER_DB, GTFS_SCHEMA["STOP_TIMES"], OTHER_SCHEMA["STOPS"],
    "circ_id", serviceSchema=service_schema, tripIdColName="fid"
)

In [ ]:
id_circulations = "circ_id"
onGoingTbl.append(id_circulations)

In [ ]:
# Sanitize DEPARTURE_TIME FIELD
update_table(conOTHER_DB, GTFS_SCHEMA["STOP_TIMES"]["TNAME"], {
    GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"] : (
        "to_char(TO_TIMESTAMP('{day}' || ' ' || {depTime}, "
        "'YYYY-MM-DD HH24:MI:SS'), 'YYYY-MM-DD HH24:MI:SS')"
    ).format(
        day     = ENTRANCES_DAY,
        depTime = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"]
    )
})

In [ ]:
# In stop_times, add meta about the route (add route id)

q = (
    "SELECT {sT}.{tripId}, {sT}.{stopId}, {sT}.{stopSq}, "
    "{sT}.{depTime}, {idCirc}.{idRoute} FROM {sT} "
    "LEFT JOIN {idCirc} "
    "ON {sT}.{tripId} = {idCirc}.fid "
    "INNER JOIN ("
        "SELECT {tripsTbl}.{tripsTripId}, {calenTbl}.{calenDate} "
        "FROM {tripsTbl} INNER JOIN {calenTbl} ON "
        "{tripsTbl}.{tripsServId} = {calenTbl}.{calenServId}"
    ") AS trip_service "
    "ON {sT}.{tripId} = trip_service.{tripsTripId} "
    "WHERE trip_service.{calenDate} = {day}"
).format(
    sT          = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
    tripId      = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    stopId      = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    stopSq      = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"],
    depTime     = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    idCirc      = id_circulations,
    idRoute     = OTHER_SCHEMA["STOPS"]["ROUTE"],
    tripsTbl    = GTFS_SCHEMA["TRIPS"]["TNAME"],
    tripsTripId = GTFS_SCHEMA["TRIPS"]["TRIP"],
    tripsServId = GTFS_SCHEMA["TRIPS"]["SERVICE"],
    calenTbl    = GTFS_SCHEMA["CALENDAR"]["TNAME"],
    calenServId = GTFS_SCHEMA["CALENDAR"]["SERVICE"],
    calenDate   = GTFS_SCHEMA["CALENDAR"]["DATE"],
    day         = GTFS_DAY
)

create_table_by_query(conOTHER_DB, "stop_times_fid", q)

In [ ]:
update_table(
    conOTHER_DB, "stop_times_fid",
    {OTHER_SCHEMA["STOPS"]["ROUTE"] : "\'notfound\'"},
    {OTHER_SCHEMA["STOPS"]["ROUTE"] : "NULL"        },
    logic_operator='AND'
)

In [ ]:
GTFS_SCHEMA["STOP_TIMES"]["TNAME"] = "stop_times_fid"
onGoingTbl.append("stop_times_fid")

In [ ]:
# Sanitize time column in entrances table

q = (
    "SELECT {valRou}, {car}, {stop}, {order}, {card}, "
    "to_char(TO_TIMESTAMP({dayF} || ' ' || {tempo}, 'YYYY-MM-DD HH24:MI:SS'), 'YYYY-MM-DD HH24:MI:SS') AS {tempo} "
    "FROM {tbl}"
).format(
    tbl    = OTHER_SCHEMA["ENTRANCES"]["TNAME"],
    valRou = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    car    = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    stop   = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo  = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    dayF   = OTHER_SCHEMA["ENTRANCES"]["DAY"],
    order  = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    card   = OTHER_SCHEMA["ENTRANCES"]["CLIENT"]
)

new_entrances_table = "{}_san".format(OTHER_SCHEMA["ENTRANCES"]["TNAME"])
create_table_by_query(conOTHER_DB, new_entrances_table, q)

In [ ]:
OTHER_SCHEMA["ENTRANCES"]["TNAME"] = "{}_san".format(OTHER_SCHEMA["ENTRANCES"]["TNAME"])
onGoingTbl.append(OTHER_SCHEMA["ENTRANCES"]["TNAME"])

In [ ]:
# Get a table with the potencial stop_time for one validation
# For a entrance with IDROUTE 26B|0|ASC will be created a row
# for each stop_time of the IDROUTE 26B|0|ASC

Q = (
    "SELECT {valTbl}.{valRou}, {valTbl}.{car}, {valTbl}.{stop}, {valTbl}.{seq}, "
    "{valTbl}.{tempo}, {valTbl}.{card}, {stopTimeTbl}.{tripId}, "
    "{stopTimeTbl}.{stopId}, {stopTimeTbl}.{depTime} "
    "FROM {valTbl} INNER JOIN {stopTimeTbl} "
    "ON {valTbl}.{valRou} = {stopTimeTbl}.{stopsRou} AND "
    "{valTbl}.{stop} = {stopTimeTbl}.{stopId} AND "
    "{valTbl}.{seq} = {stopTimeTbl}.{stopTSeq}"
).format(
    valTbl      = OTHER_SCHEMA["ENTRANCES"]["TNAME"],
    valRou      = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    car         = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    stop        = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo       = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    stopTimeTbl = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
    tripId      = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    stopId      = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    depTime     = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    stopsRou    = OTHER_SCHEMA["STOPS"]["ROUTE"],
    seq         = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    card        = OTHER_SCHEMA["ENTRANCES"]["CLIENT"],
    stopTSeq    = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"]
)

create_table_by_query(conOTHER_DB, "val_stoptime_all_possibilities", Q)

In [ ]:
onGoingTbl.append("val_stoptime_all_possibilities")

In [ ]:
# From the table val_stoptime_all_possibilities
# Get the correct circulation for each entrance

timeDifference = (
    "(SELECT *, "
    "("
        "(EXTRACT(day FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") * 24) + "
        "EXTRACT(hour FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") + (EXTRACT(minute FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") / 60.0) + (EXTRACT(second FROM "
            "(TO_TIMESTAMP({timeVal}, 'YYYY-MM-DD HH24:MI:SS') - TO_TIMESTAMP({timeDep}, 'YYYY-MM-DD HH24:MI:SS'))"
        ") / 3600.0)"
    ") AS tinterval "
    "FROM val_stoptime_all_possibilities) AS tmdf"
).format(
    timeVal=OTHER_SCHEMA["ENTRANCES"]["TIME"],
    timeDep=GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"]
)

q = (
    "SELECT * FROM ("
        "SELECT *, "
        "CASE "
            "WHEN tinterval_abs = MIN(tinterval_abs) OVER (PARTITION BY "
                "{rouId}, {carId}, {stopId}, {card}, {timeVal}) "
            "THEN 1 ELSE 0 "
        "END AS iscirc "
        "FROM ("
            "SELECT *, ABS(tinterval) AS tinterval_abs "
            "FROM {tmDifT} "
            "WHERE tinterval >= -0.1667 AND tinterval < 0.55"
        ") AS valcirctmp"
    ") AS foo WHERE iscirc = 1"
).format(
    rouId   = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    carId   = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    stopId  = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    timeVal = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    card    = OTHER_SCHEMA["ENTRANCES"]["CLIENT"],
    tmDifT  = timeDifference
)

create_table_by_query(conOTHER_DB, "val_circ", q)

In [ ]:
onGoingTbl.append("val_circ")

In [ ]:
"""
Delete cases when one entrance has more than
one stop_time with a equal Minimum difference between
the validation time
"""

q = (
    "SELECT val_circ.* "
    "FROM val_circ INNER JOIN ("
        "SELECT {rouId}, {stop}, {tempo}, {card}, COUNT({stop}) AS conta "
        "FROM val_circ "
        "GROUP BY {rouId}, {stop}, {card}, {tempo}"
    ") AS foo "
    "ON val_circ.{rouId} = foo.{rouId} AND val_circ.{stop} = foo.{stop} "
    "AND val_circ.{tempo} = foo.{tempo} AND val_circ.{card} = foo.{card} "
    "WHERE (conta > 1 AND tinterval < tinterval_abs) "
    "OR conta = 1"
).format(
    rouId  = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    stop   = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo  = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    card   = OTHER_SCHEMA["ENTRANCES"]["CLIENT"]
)

create_table_by_query(conOTHER_DB, "val_circ_clean", q)

In [ ]:
onGoingTbl.append("val_circ_clean")

In [ ]:
"""
val_circ_clean has many classification errors - wrong circulation

Execute a query to identify these errors
"""

errorQ = (
    "SELECT terrors.*, id_circ.next_circ, id_circ.previous_circ "
    "FROM ("
        "SELECT main.{valRouId}, main.{valStop}, main.{sequence}, "
        "main.{card}, main.{tempo}, "
        "main.{trip}, 1 AS iserror "
        "FROM val_circ_clean AS main INNER JOIN val_circ_clean AS foo "
        "ON main.{trip} = foo.{trip} "
        "WHERE main.{sequence} > foo.{sequence} AND "
        "TO_TIMESTAMP(main.{tempo}, 'YYYY-MM-DD HH24:MI:SS') < "
        "TO_TIMESTAMP(foo.{tempo}, 'YYYY-MM-DD HH24:MI:SS') "
        "GROUP BY main.{valRouId}, main.{valStop}, main.{sequence}, "
        "main.{card}, main.{tempo}, main.{trip}"
    ") AS terrors INNER JOIN ("
        "SELECT *, "
        "LEAD(fid) OVER (PARTITION BY stops, {idRou} ORDER BY departure) AS next_circ, "
        "LAG(fid) OVER (PARTITION BY stops, {idRou} ORDER BY departure) AS previous_circ "
        "FROM circ_id"
    ") AS id_circ "
    "ON terrors.{trip} = id_circ.fid"
).format(
    valRouId = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    valStop  = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    sequence = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    trip     = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    idRou    = OTHER_SCHEMA["STOPS"]["ROUTE"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"]
)

print errorQ

create_table_by_query(conOTHER_DB, "errors_class", errorQ)

In [ ]:
onGoingTbl.append("errors_class")

In [ ]:
"""
Solve errors - attempt 1
"""

solveQ = (
    "SELECT main.*, errors_class.iserror, "
    "errors_class.next_circ, "
    "errors_class.previous_circ, "
    "CASE "
        "WHEN errors_class.iserror IS NULL THEN main.{tripId} ELSE "
        "errors_class.previous_circ "
    "END AS real_trip "
    "FROM val_circ_clean AS main "
    "LEFT JOIN errors_class "
    "ON main.{rouId} = errors_class.{rouId} "
    "AND main.{stop} = errors_class.{stop} "
    "AND main.{tempo} = errors_class.{tempo} "
    "AND main.{tripId} = errors_class.{tripId} "
    "AND main.{sequence} = errors_class.{sequence} "
    "AND main.{card} = errors_class.{card}"
).format(
    rouId    = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    stop     = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    tripId   = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    sequence = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"],
    depTime  = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"]
)

create_table_by_query(conOTHER_DB, "val_circ_errorfree1", solveQ)

In [ ]:
onGoingTbl.append("val_circ_errorfree1")

In [ ]:
errorQ = (
    "SELECT main.{valRouId}, main.{valStop}, main.{sequence}, "
    "main.{card}, main.{tempo}, "
    "main.real_trip, 1 AS iserror "
    "FROM val_circ_errorfree1 AS main INNER JOIN val_circ_errorfree1 AS foo "
    "ON main.real_trip = foo.real_trip "
    "WHERE main.iserror=1 AND ((main.{sequence} > foo.{sequence} AND "
    "TO_TIMESTAMP(main.{tempo}, 'YYYY-MM-DD HH24:MI:SS') < "
    "TO_TIMESTAMP(foo.{tempo}, 'YYYY-MM-DD HH24:MI:SS')) OR "
    "(main.{sequence} < foo.{sequence} AND "
    "TO_TIMESTAMP(main.{tempo}, 'YYYY-MM-DD HH24:MI:SS') > "
    "TO_TIMESTAMP(foo.{tempo}, 'YYYY-MM-DD HH24:MI:SS'))) "
    "GROUP BY main.{valRouId}, main.{valStop}, main.{sequence}, "
    "main.{card}, main.{tempo}, main.real_trip"
).format(
    valRouId = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    valStop  = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    sequence = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    idRou    = OTHER_SCHEMA["STOPS"]["ROUTE"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"]
)

create_table_by_query(conOTHER_DB, "errors_class2", errorQ)

In [ ]:
onGoingTbl.append("errors_class2")

In [ ]:
"""
Solve errors - attempt 2
"""

solveQ = (
    "SELECT main.*, errors_class2.iserror AS iserror2, "
    "CASE "
        "WHEN errors_class2.iserror IS NULL THEN main.real_trip ELSE "
        "main.next_circ "
    "END AS real_trip2 "
    "FROM val_circ_errorfree1 AS main "
    "LEFT JOIN errors_class2 "
    "ON main.{rouId} = errors_class2.{rouId} "
    "AND main.{stop} = errors_class2.{stop} "
    "AND main.{tempo} = errors_class2.{tempo} "
    "AND main.real_trip = errors_class2.real_trip "
    "AND main.{sequence} = errors_class2.{sequence} "
    "AND main.{card} = errors_class2.{card}"
).format(
    rouId    = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    stop     = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    sequence = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"],
    depTime  = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"]
)

create_table_by_query(conOTHER_DB, "val_circ_errorfree2", solveQ)

In [ ]:
onGoingTbl.append("val_circ_errorfree2")

In [ ]:
"""
Mesmo depois de tudo isto, ha situacoes em que
se verifica que uma validacao nao pertence nem
a circulacao seguinte, nem a circulacao anterior.
Nestes casos, as validacoes sao apagadas pois nao e possivel determinar
a circulacao.
"""

errorQ = (
    "SELECT main.{valRouId}, main.{valStop}, main.{sequence}, "
    "main.{card}, main.{tempo}, "
    "main.real_trip2, 1 AS iserror3 "
    "FROM val_circ_errorfree2 AS main INNER JOIN val_circ_errorfree2 AS foo "
    "ON main.real_trip2 = foo.real_trip2 "
    "WHERE main.iserror2=1 AND main.{sequence} > foo.{sequence} AND "
    "TO_TIMESTAMP(main.{tempo}, 'YYYY-MM-DD HH24:MI:SS') < "
    "TO_TIMESTAMP(foo.{tempo}, 'YYYY-MM-DD HH24:MI:SS') "
    "GROUP BY main.{valRouId}, main.{valStop}, main.{sequence}, "
    "main.{card}, main.{tempo}, main.real_trip2"
).format(
    valRouId = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    valStop  = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    sequence = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    idRou    = OTHER_SCHEMA["STOPS"]["ROUTE"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"]
)


create_table_by_query(conOTHER_DB, "errors_class3", errorQ)

solveQ = (
    "SELECT main.*, errors_class3.iserror3, "
    "CASE "
        "WHEN errors_class3.iserror3 = 1 THEN 'not_found' "
        "ELSE main.real_trip2 "
    "END AS real_trip3 "
    "FROM val_circ_errorfree2 AS main "
    "LEFT JOIN errors_class3 "
    "ON main.{rouId} = errors_class3.{rouId} "
    "AND main.{stop} = errors_class3.{stop} "
    "AND main.{tempo} = errors_class3.{tempo} "
    "AND main.real_trip2 = errors_class3.real_trip2 "
    "AND main.{sequence} = errors_class3.{sequence} "
    "AND main.{card} = errors_class3.{card}"
).format(
    rouId    = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    stop     = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    tripId   = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    sequence = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"],
    depTime  = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"]
)

create_table_by_query(conOTHER_DB, "val_circ_errorfree3", solveQ)

In [ ]:
onGoingTbl.append("errors_class3")
onGoingTbl.append("val_circ_errorfree3")

In [ ]:
if os.path.splitext(OutTable)[1] == '':
    outpg = OutTable
else:
    outpg = os.path.splitext(os.path.basename(OutTable))[0]

In [ ]:
# Sanitize last table
# Produce final table

finalQ = (
    "SELECT main.{route}, main.{car}, main.{card}, "
    "main.{stop}, main.{order}, main.{tempo}, "
    "main.real_trip3 AS {tripId}, "
    "circ_id.departure AS departure_circ, circ_id.depar_last_stop, "
    "foo.{stopTime} AS departure_stop "
    "FROM val_circ_errorfree3 AS main "
    "INNER JOIN circ_id "
    "ON main.real_trip3 = circ_id.fid "
    "INNER JOIN {stopTbl} AS foo "
    "ON main.real_trip3 = foo.{tripId} AND "
    "main.{stop} = foo.{stopId} AND main.{order} = "
    "foo.{sequence}"
).format(
    route    = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    car      = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"],
    stop     = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    order    = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    tripId   = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    stopTbl  = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
    stopTime = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    stopId   = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    sequence = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"]
)

create_table_by_query(conOTHER_DB, outpg, finalQ)

In [ ]:
# Sanitize last table
# Produce final table

finalQ = (
    "SELECT main.{route}, main.{car}, main.{card}, "
    "main.{stop}, main.{order}, main.{tempo}, "
    "main.real_trip2 AS {tripId}, "
    "circ_id.departure AS departure_circ, circ_id.depar_last_stop, "
    "foo.{stopTime} AS departure_stop "
    "FROM val_circ_errorfree2 AS main "
    "INNER JOIN circ_id "
    "ON main.real_trip2 = circ_id.fid "
    "INNER JOIN {stopTbl} AS foo "
    "ON main.real_trip2 = foo.{tripId} AND "
    "main.{stop} = foo.{stopId} AND main.{order} = "
    "foo.{sequence}"
).format(
    route    = OTHER_SCHEMA["ENTRANCES"]["ROUTE"],
    car      = OTHER_SCHEMA["ENTRANCES"]["CAR"],
    card     = OTHER_SCHEMA["ENTRANCES"]["CLIENT"],
    stop     = OTHER_SCHEMA["ENTRANCES"]["STOP"],
    order    = OTHER_SCHEMA["ENTRANCES"]["SEQUENCE"],
    tempo    = OTHER_SCHEMA["ENTRANCES"]["TIME"],
    tripId   = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    stopTbl  = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
    stopTime = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    stopId   = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    sequence = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"]
)

create_table_by_query(conOTHER_DB, outpg, finalQ)

In [ ]:
"""
TODO: Use nfrota to identify errors
"""

In [ ]:
print onGoingTbl

In [ ]:
del_tables(conOTHER_DB, onGoingTbl)

In [ ]:
if os.path.splitext(OutTable)[1] == '.xlsx':
    pgsql_to_xls(outpg, OutTable, dic_con=conOTHER_DB)

In [ ]: