In [1]:
valbycirc_table = r'D:\TRENMO_JASP\CARRIS\validacoes\val_by_circ\v14a20jan_withcircgpby_v4.xlsx'
valbycirc_diacol = "dia"
valbycirc_dia = "2018-01-20"
valbycirc_trip = "trip_id"
valbycirc_table_not_sanitized = r'D:\TRENMO_JASP\CARRIS\validacoes\val_by_circ\tst_20jan.xlsx'
nentrances = "nvalidacoes"
valbycirc_ns_trip = "trip_id"
conGTFS = {
"HOST" : "localhost", "PORT" : "5432", "USER" : "postgres",
"PASSWORD" : "admin", "DATABASE" : "gtfs_carris"
}
GTFS_SCHEMA = {
"TRIPS" : {
"TNAME" : "trips",
"TRIP" : "trip_id",
"SERVICE" : "service_id"
},
"TRIPS_META" : {
"TNAME" : "trips_with_meta",
"TRIP" : "trip_id",
"FID_ROUTE" : "fid_percurso",
"LONG_NAME" : "long_name",
"DESCRIPTION" : "trip_headsign"
},
"STOP_TIMES" : {
"TNAME" : "stop_times",
"TRIP" : "trip_id",
"STOP" : "stop_id",
"SEQUENCE" : "stop_sequence",
"DEPARTURE" : "departure_time"
},
"CALENDAR" : {
"TNAME" : "calendar_dates",
"SERVICE" : "service_id",
"DATE" : "date"
}
}
#filterRoute = ['736|0|ASC', '736|0|DESC', '736|3|ASC', '736|3|DESC','736|4|ASC', '736|4|DESC']
filterRoute = ""
GTFS_DAY = 20180310
report_table = r'D:\TRENMO_JASP\CARRIS\validacoes\val_by_circ\tst_20jan_report.xlsx'
In [2]:
import os
from senpy.os.info import get_filename
from senpy.topsql.pnd import xls_to_pgsql
from senpy.frompsql import sql_query
from senpy.frompsql.pnd import sql_to_pandas
from senpy.toxls.pnd import dataframe_to_xls
from senpy.pgsql.tables.w import create_table_by_query
from senpy.pgsql.tables._del import del_tables
In [3]:
onGoing = []
In [4]:
# Table with BUS entrances by GTFS Trip to PostgreSQL
# Table without any errors
valbycirc = xls_to_pgsql(
conGTFS, valbycirc_table,
get_filename(valbycirc_table)
)
In [5]:
# Table with BUS entrances by GTFS Trip to PostgreSQL
# Table with errors
valbycircErrors = xls_to_pgsql(
conGTFS, valbycirc_table_not_sanitized,
get_filename(valbycirc_table_not_sanitized)
)
In [6]:
onGoing.append(valbycirc)
onGoing.append(valbycircErrors)
In [7]:
# Get TRIPS ID's for the given GTFS_DAY
q = (
"SELECT {t_tripId}, stops, order_, start_time, end_time, "
"CAST(TO_DATE(CAST(start_time AS text), "
"'YYYY-MM-DD HH24:MI:SS') AS text) AS circ_day FROM ("
"SELECT {t_tripId}, array_agg({stopid} ORDER BY {stopSq}) AS stops, "
"array_agg({stopSq} ORDER BY {stopSq}) AS order_, "
"MIN({depTime}) AS start_time, MAX({depTime}) AS end_time "
"FROM ("
"SELECT {tripsT}.{t_tripId}, {stopTimesT}.{stopid}, "
"{stopTimesT}.{stopSq}, "
"TO_TIMESTAMP('{valday} ' || {stopTimesT}.{depTime}, 'YYYY-MM-DD HH24:MI:SS') "
"AS {depTime} FROM {tripsT} "
"INNER JOIN {stopTimesT} "
"ON {tripsT}.{t_tripId} = {stopTimesT}.{st_tripId} "
"INNER JOIN {calenTbl} AS calen "
"ON {tripsT}.{servId} = calen.{cal_servId} "
"WHERE calen.{_date} = {day}"
") AS foo GROUP BY {t_tripId}) AS foo2"
).format(
t_tripId = GTFS_SCHEMA["TRIPS"]["TRIP"],
stopid = GTFS_SCHEMA["STOP_TIMES"]["STOP"],
stopSq = GTFS_SCHEMA["STOP_TIMES"]["SEQUENCE"],
depTime = GTFS_SCHEMA["STOP_TIMES"]["DEPARTURE"],
tripsT = GTFS_SCHEMA["TRIPS"]["TNAME"],
stopTimesT = GTFS_SCHEMA["STOP_TIMES"]["TNAME"],
st_tripId = GTFS_SCHEMA["STOP_TIMES"]["TRIP"],
calenTbl = GTFS_SCHEMA["CALENDAR"]["TNAME"],
servId = GTFS_SCHEMA["TRIPS"]["SERVICE"],
cal_servId = GTFS_SCHEMA["CALENDAR"]["SERVICE"],
_date = GTFS_SCHEMA["CALENDAR"]["DATE"],
day = str(GTFS_DAY),
valday = valbycirc_dia
)
select_trips = create_table_by_query(conGTFS, "trips_that_day", q)
In [8]:
onGoing.append(select_trips)
In [9]:
# Get TRIPS not in valbycirc_table in the valbycirc_dia
q = (
"SELECT all_trips.*, {tbl_meta}.{roumeta}, "
"{tbl_meta}.{lng_name}, {tbl_meta}.{desc} "
"FROM {tbl_all} AS all_trips "
"LEFT JOIN {tbl_val} AS cls_trips "
"ON all_trips.{all_tripid} = cls_trips.{val_tripid} "
"AND all_trips.circ_day = cls_trips.{day} "
"INNER JOIN {tbl_meta} ON "
"all_trips.{all_tripid} = {tbl_meta}.{meta_tripid} "
"WHERE cls_trips.{val_tripid} IS NULL"
).format(
tbl_meta = GTFS_SCHEMA["TRIPS_META"]["TNAME"],
roumeta = GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"],
lng_name = GTFS_SCHEMA["TRIPS_META"]["LONG_NAME"],
tbl_all = select_trips,
tbl_val = valbycirc,
all_tripid = GTFS_SCHEMA["TRIPS"]["TRIP"],
val_tripid = valbycirc_trip,
day = valbycirc_diacol,
meta_tripid= GTFS_SCHEMA["TRIPS_META"]["TRIP"],
desc = GTFS_SCHEMA["TRIPS_META"]["DESCRIPTION"]
)
tripsNotIn = create_table_by_query(conGTFS, "trips_not_in", q)
In [10]:
onGoing.append(tripsNotIn)
In [11]:
# Export Result
trips_not_in = sql_to_pandas(
conGTFS, (
"SELECT notin.*, cls.{} FROM {} AS notin "
"LEFT JOIN {} AS cls "
"ON notin.{} = cls.{}"
"{}"
).format(
nentrances, tripsNotIn, valbycircErrors,
valbycirc_trip,
valbycirc_ns_trip,
"" if not filterRoute else " WHERE {}".format(" OR ".join([
"notin.{}='{}'".format(
GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"], x
) for x in filterRoute
]))
)
)
In [12]:
trips_not_in["start_time"] = trips_not_in["start_time"].astype(str)
trips_not_in["end_time"] = trips_not_in["end_time"].astype(str)
trips_not_in["circ_day"] = trips_not_in["circ_day"].astype(str)
trips_not_in["stops"] = trips_not_in["stops"].astype(str)
trips_not_in["order_"] = trips_not_in["order_"].astype(str)
In [13]:
trips_not_in["start_time"] = trips_not_in["start_time"].str[:-6]
trips_not_in["end_time"] = trips_not_in["end_time"].str[:-6]
In [14]:
dataframe_to_xls(trips_not_in, report_table)
Out[14]:
In [15]:
del_tables(conGTFS, onGoing)
In [ ]:
# TRIPS NOT IN TO Pandas
trips_not_in = sql_to_pandas(
conGTFS, "SELECT * FROM {}{}".format(
tripsNotIn,
"" if not filterRoute else " WHERE {}".format(" OR ".join([
"{}='{}'".format(
GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"], x
) for x in filterRoute
]))
)
)
In [ ]:
print trips_not_in
In [ ]:
def count_bus_entrances(row):
stops__ = list(row.stops)
q = (
"SELECT COUNT(*) "
"FROM {valtbl} "
"WHERE TO_TIMESTAMP({valDay} || ' ' || {valHour}, 'YYYY-MM-DD HH24:MI:SS') "
"> TO_TIMESTAMP('{startTime}', 'YYYY-MM-DD HH24:MI:SS') AND "
"TO_TIMESTAMP({valDay} || ' ' || {valHour}, 'YYYY-MM-DD HH24:MI:SS') "
"< TO_TIMESTAMP('{endTime}', 'YYYY-MM-DD HH24:MI:SS') "
"AND {routeCol} = '{routeId}' AND ({stops})"
).format(
valtbl = GTFS_SCHEMA["ENTRANCES"]["TNAME"],
valDay = GTFS_SCHEMA["ENTRANCES"]["DAY"],
valHour = GTFS_SCHEMA["ENTRANCES"]["TIME"],
startTime = row.start_time,
endTime = row.end_time,
routeCol = GTFS_SCHEMA["ENTRANCES"]["FID_ROUTE"],
routeId = row[GTFS_SCHEMA["TRIPS_META"]["FID_ROUTE"]],
stops = " OR ".join([
"{}={}".format(
GTFS_SCHEMA["ENTRANCES"]["STOP"], x
) for x in stops__
])
)
nval = sql_query(conGTFS, q)[0][0]
row["nval"] = nval
return row
In [ ]:
trips_not_in = trips_not_in.apply(lambda x: count_bus_entrances(x), axis=1)
In [ ]:
print trips_not_in