In [1]:
valbycirc_table  = r'D:\TRENMO_JASP\CARRIS\validacoes\val_by_circ\v14a20jan_withcircgpby_v4.xlsx'
valbycirc_diacol = "dia"
valbycirc_dia    = "2018-01-20"
valbycirc_trip = "trip_id"

valbycirc_table_not_sanitized = r'D:\TRENMO_JASP\CARRIS\validacoes\val_by_circ\tst_20jan.xlsx'
nentrances = "nvalidacoes"
valbycirc_ns_trip = "trip_id"

conGTFS = {
    "HOST" : "localhost", "PORT" : "5432", "USER" : "postgres",
    "PASSWORD" : "admin", "DATABASE" : "gtfs_carris"
}

GTFS_SCHEMA = {
    "TRIPS" : {
        "TNAME"   : "trips",
        "TRIP"    : "trip_id",
        "SERVICE" : "service_id"
    },
    "TRIPS_META" : {
        "TNAME"       : "trips_with_meta",
        "TRIP"        : "trip_id",
        "FID_ROUTE"   : "fid_percurso",
        "LONG_NAME"   : "long_name",
        "DESCRIPTION" : "trip_headsign"
    },
    "STOP_TIMES" : {
        "TNAME" : "stop_times",
        "TRIP"  : "trip_id",
        "STOP"  : "stop_id",
        "SEQUENCE" : "stop_sequence",
        "DEPARTURE" : "departure_time"
    },
    "CALENDAR" : {
        "TNAME"   : "calendar_dates",
        "SERVICE" : "service_id",
        "DATE"    : "date"
    }
}

#filterRoute = ['736|0|ASC', '736|0|DESC', '736|3|ASC', '736|3|DESC','736|4|ASC', '736|4|DESC']
filterRoute = ""

GTFS_DAY = 20180310

report_table = r'D:\TRENMO_JASP\CARRIS\validacoes\val_by_circ\tst_20jan_report.xlsx'

In [2]:
import os
from senpy.os.info           import get_filename
from senpy.topsql.pnd        import xls_to_pgsql
from senpy.frompsql          import sql_query
from senpy.frompsql.pnd      import sql_to_pandas
from senpy.toxls.pnd         import dataframe_to_xls
from senpy.pgsql.tables.w    import create_table_by_query
from senpy.pgsql.tables._del import del_tables

In [3]:
onGoing = []

In [4]:
# Table with BUS entrances by GTFS Trip to PostgreSQL
# Table without any errors

valbycirc = xls_to_pgsql(
    conGTFS, valbycirc_table,
    get_filename(valbycirc_table)
)

In [5]:
# Table with BUS entrances by GTFS Trip to PostgreSQL
# Table with errors

valbycircErrors = xls_to_pgsql(
    conGTFS, valbycirc_table_not_sanitized,
    get_filename(valbycirc_table_not_sanitized)
)

In [6]:
onGoing.append(valbycirc)
onGoing.append(valbycircErrors)

In [7]:
# Get TRIPS ID's for the given GTFS_DAY

q = (
    "SELECT {t_tripId}, stops, order_, start_time, end_time, "
    "CAST(TO_DATE(CAST(start_time AS text), "
        "'YYYY-MM-DD HH24:MI:SS') AS text) AS circ_day FROM ("
    "SELECT {t_tripId}, array_agg({stopid} ORDER BY {stopSq}) AS stops, "
    "array_agg({stopSq} ORDER BY {stopSq}) AS order_, "
    "MIN({depTime}) AS start_time, MAX({depTime}) AS end_time "
    "FROM ("
        "SELECT {tripsT}.{t_tripId}, {stopTimesT}.{stopid}, "
        "{stopTimesT}.{stopSq}, "
        "TO_TIMESTAMP('{valday} ' || {stopTimesT}.{depTime}, 'YYYY-MM-DD HH24:MI:SS') "
        "AS {depTime} FROM {tripsT} "
        "INNER JOIN {stopTimesT} "
        "ON {tripsT}.{t_tripId} = {stopTimesT}.{st_tripId} "
        "INNER JOIN {calenTbl} AS calen "
        "ON {tripsT}.{servId} = calen.{cal_servId} "
        "WHERE calen.{_date} = {day}"
    ") AS foo GROUP BY {t_tripId}) AS foo2"
).format(
    t_tripId   = GTFS_SCHEMA["TRIPS"]["TRIP"],
    stopid     = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
    stopSq     = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"],
    depTime    = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
    tripsT     = GTFS_SCHEMA["TRIPS"]["TNAME"],
    stopTimesT = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
    st_tripId  = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
    calenTbl   = GTFS_SCHEMA["CALENDAR"]["TNAME"],
    servId     = GTFS_SCHEMA["TRIPS"]["SERVICE"],
    cal_servId = GTFS_SCHEMA["CALENDAR"]["SERVICE"],
    _date      = GTFS_SCHEMA["CALENDAR"]["DATE"],
    day        = str(GTFS_DAY),
    valday     = valbycirc_dia
)

select_trips = create_table_by_query(conGTFS, "trips_that_day", q)

In [8]:
onGoing.append(select_trips)

In [9]:
# Get TRIPS not in valbycirc_table in the valbycirc_dia

q = (
    "SELECT all_trips.*, {tbl_meta}.{roumeta}, "
    "{tbl_meta}.{lng_name},  {tbl_meta}.{desc} "
    "FROM {tbl_all} AS all_trips "
    "LEFT JOIN {tbl_val} AS cls_trips "
    "ON all_trips.{all_tripid} = cls_trips.{val_tripid} "
    "AND all_trips.circ_day = cls_trips.{day} "
    "INNER JOIN {tbl_meta} ON "
    "all_trips.{all_tripid} = {tbl_meta}.{meta_tripid} "
    "WHERE cls_trips.{val_tripid} IS NULL"
).format(
    tbl_meta   = GTFS_SCHEMA["TRIPS_META"]["TNAME"],
    roumeta    = GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"],
    lng_name   = GTFS_SCHEMA["TRIPS_META"]["LONG_NAME"],
    tbl_all    = select_trips,
    tbl_val    = valbycirc,
    all_tripid = GTFS_SCHEMA["TRIPS"]["TRIP"],
    val_tripid = valbycirc_trip,
    day        = valbycirc_diacol,
    meta_tripid= GTFS_SCHEMA["TRIPS_META"]["TRIP"],
    desc       = GTFS_SCHEMA["TRIPS_META"]["DESCRIPTION"]
)

tripsNotIn = create_table_by_query(conGTFS, "trips_not_in", q)

In [10]:
onGoing.append(tripsNotIn)

In [11]:
# Export Result

trips_not_in = sql_to_pandas(
    conGTFS, (
        "SELECT notin.*, cls.{} FROM {} AS notin "
        "LEFT JOIN {} AS cls "
        "ON notin.{} = cls.{}"
        "{}"
    ).format(
        nentrances, tripsNotIn, valbycircErrors,
        valbycirc_trip,
        valbycirc_ns_trip,
        "" if not filterRoute else " WHERE {}".format(" OR ".join([
            "notin.{}='{}'".format(
                GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"], x
            ) for x in filterRoute
        ]))
    )
)

In [12]:
trips_not_in["start_time"] = trips_not_in["start_time"].astype(str)
trips_not_in["end_time"] = trips_not_in["end_time"].astype(str)
trips_not_in["circ_day"] = trips_not_in["circ_day"].astype(str)
trips_not_in["stops"] = trips_not_in["stops"].astype(str)
trips_not_in["order_"] = trips_not_in["order_"].astype(str)

In [13]:
trips_not_in["start_time"] = trips_not_in["start_time"].str[:-6]
trips_not_in["end_time"] = trips_not_in["end_time"].str[:-6]

In [14]:
dataframe_to_xls(trips_not_in, report_table)


Out[14]:
'D:\\TRENMO_JASP\\CARRIS\\validacoes\\val_by_circ\\tst_20jan_report.xlsx'

In [15]:
del_tables(conGTFS, onGoing)

In [ ]:
# TRIPS NOT IN TO Pandas

trips_not_in = sql_to_pandas(
    conGTFS, "SELECT * FROM {}{}".format(
        tripsNotIn,
        "" if not filterRoute else " WHERE {}".format(" OR ".join([
            "{}='{}'".format(
                GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"], x
            ) for x in filterRoute
        ]))
    )
)

In [ ]:
print trips_not_in

In [ ]:
def count_bus_entrances(row):
    stops__ = list(row.stops)
    
    q = (
        "SELECT COUNT(*) "
        "FROM {valtbl} "
        "WHERE TO_TIMESTAMP({valDay} || ' ' || {valHour}, 'YYYY-MM-DD HH24:MI:SS') "
        "> TO_TIMESTAMP('{startTime}', 'YYYY-MM-DD HH24:MI:SS') AND "
        "TO_TIMESTAMP({valDay} || ' ' || {valHour}, 'YYYY-MM-DD HH24:MI:SS') "
        "< TO_TIMESTAMP('{endTime}', 'YYYY-MM-DD HH24:MI:SS') "
        "AND {routeCol} = '{routeId}' AND ({stops})"
    ).format(
        valtbl    = GTFS_SCHEMA["ENTRANCES"]["TNAME"],
        valDay    = GTFS_SCHEMA["ENTRANCES"]["DAY"],
        valHour   = GTFS_SCHEMA["ENTRANCES"]["TIME"],
        startTime = row.start_time,
        endTime   = row.end_time,
        routeCol  = GTFS_SCHEMA["ENTRANCES"]["FID_ROUTE"],
        routeId   = row[GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"]],
        stops     = " OR ".join([
            "{}={}".format(
                GTFS_SCHEMA["ENTRANCES"]["STOP"], x
            ) for x in stops__
        ])
    )
    
    nval = sql_query(conGTFS, q)[0][0]
    
    row["nval"] = nval
    
    return row

In [ ]:
trips_not_in = trips_not_in.apply(lambda x: count_bus_entrances(x), axis=1)

In [ ]:
print trips_not_in