In [1]:
import json
import numpy as np
import pandas as pd
import models as m
import matplotlib.pyplot  as plt

from datetime import datetime as dt

pd.options.display.float_format = '{:20,.2f}'.format

In [2]:
# Leer bicycle data a un dataframe
df = pd.read_json('data/bicycle_data.json').transpose().dropna()
# Crear columna para duración promedio del viaje
df['trip_length'] = df.apply(lambda row: (row.total_time / row.total_trips) / 60, axis=1)
df = df.sort_values(by='trip_length', ascending=False)

In [9]:
# Leer los viajes de las bicicletas que tienen números muy altos
trips = m.s.query(m.Trip).filter(
    m.Trip.bicycle_id==4667,
    m.Trip.departure_time >= dt(year=2016, month=5, day=13),
    m.Trip.departure_time <= dt(year=2017, month=5, day=26))
df2 = pd.read_sql(trips.statement, trips.session.bind).sort_values(by='departure_time')
# Crear columna de duracion de cada viaje
df2['trip_length'] = df2.apply(lambda row: (row['arrival_time'] - row['departure_time']).total_seconds(), axis=1)
df2.round(4)
df2.sort_values(by='departure_time', ascending=False)


Out[9]:
id gender age bicycle_id departure_station departure_time arrival_station arrival_time trip_length
9 41128127 M 61 4667 204 2016-05-14 20:02:35.000 224 2017-03-24 15:48:27.000 27,114,352.00
8 33071646 M 51 4667 213 2016-05-14 17:20:30.247 204 2016-05-14 17:34:57.157 866.91
7 33069610 M 44 4667 237 2016-05-14 15:20:00.507 213 2016-05-14 15:27:47.000 466.49
6 33061591 M 25 4667 116 2016-05-14 07:05:58.997 52 2016-05-14 07:15:15.000 556.00
5 33056977 M 30 4667 129 2016-05-13 19:46:02.117 116 2016-05-13 19:58:35.600 753.48
4 33056091 M 53 4667 63 2016-05-13 19:24:02.993 129 2016-05-13 19:33:26.000 563.01
3 33049300 M 28 4667 78 2016-05-13 16:44:42.643 63 2016-05-13 16:49:00.000 257.36
2 33047505 M 33 4667 79 2016-05-13 15:55:05.003 78 2016-05-13 15:58:08.917 183.91
1 33046656 M 39 4667 300 2016-05-13 15:34:22.947 79 2016-05-13 15:45:43.000 680.05
0 33045554 M 30 4667 337 2016-05-13 15:09:57.230 300 2016-05-13 15:15:03.000 305.77

In [3]:
# Encontrar los viajes que duran más de día y medio en las primeras 400 columnas
ids_2 = []
j = 0
for index, row in df.iterrows():
#     long_trips = False
    trips = m.s.query(m.Trip).filter_by(bicycle_id=int(index))
    df_temp = pd.read_sql(trips.statement, trips.session.bind)
    # Crear columna de duracion de cada viaje
    df_temp['trip_length'] = df_temp.apply(lambda row: (row['arrival_time'] - row['departure_time']).total_seconds(), axis=1)
    df_temp = df_temp.sort_values(by='trip_length', ascending=False)
    for i, r in df_temp.iterrows():
        if r['trip_length'] > 129600:
#             ids_2.append((r['id'], r['bicycle_id'], r['trip_length']))
            ids_2.append(r)
#             long_trips =True
        else:
            break
    if j >= 400:
        break
    j += 1
#     if not long_trips:
#         break
# ids_2

In [14]:
# Graficar la distribucion de los minutos de viaje
plt.hist([r.trip_length/60 for r in ids_2],
         bins=np.logspace(np.log10(10000),np.log10(10000000.0),50))
plt.gca().set_xscale("log")
len(m.s.query(m.Trip).filter(
    m.Trip.bicycle_id==int(viajes_anorm.iloc[0].bicycle_id),
    m.Trip.departure_time > viajes_anorm.iloc[0].departure_time,
    m.Trip.departure_time < viajes_anorm.iloc[0].arrival_time).all())plt.show()



In [18]:
def total_interm_trips(row):
    return len(m.s.query(m.Trip).filter(
        m.Trip.bicycle_id==int(row.bicycle_id),
        m.Trip.departure_time > row.departure_time,
        m.Trip.departure_time < row.arrival_time).all())

trips = pd.DataFrame(ids_2)
trips['viajes_entre_fechas'] = trips.apply(total_interm_trips, axis=1)
viajes_repar = trips[(trips.arrival_station > 452) | (trips.departure_station > 452)]
viajes_anorm = trips[(trips.arrival_station <= 452) & (trips.departure_station <= 452)]
viajes_anorm = viajes_anorm.sort_values(by='id', ascending=False)

In [20]:
writer = pd.ExcelWriter('data/output.xlsx')
trips.to_excel(writer,'Viajes mayores a 1.5 días')
viajes_repar.to_excel(writer, 'A estaciones de reparación')
viajes_anorm.to_excel(writer, 'Viajes anormales')
writer.save()

In [19]:
viajes_anorm.sort_values(by='trip_length', ascending=False)


Out[19]:
id gender age bicycle_id departure_station departure_time arrival_station arrival_time trip_length viajes_entre_fechas
9467 31312696 M 29 395 36 2011-10-14 18:34:55.000 63 2016-03-06 02:27:40.000 138,613,965.00 6956
7374 31312697 M 35 867 66 2011-10-14 18:41:48.913 63 2016-03-06 02:33:54.000 138,613,925.09 5088
8682 31312695 M 65 865 84 2011-10-14 18:27:07.607 63 2016-03-06 02:14:49.000 138,613,661.39 6449
1312 41128127 M 61 4667 204 2016-05-14 20:02:35.000 224 2017-03-24 15:48:27.000 27,114,352.00 0
4281 27088107 F 31 2888 36 2014-12-12 08:47:04.560 273 2015-09-16 14:51:14.000 24,041,049.44 0
1379 10374996 M 31 2162 84 2012-12-12 09:01:25.773 236 2013-09-13 16:08:17.000 23,785,611.23 969
4932 7843418 M 48 1138 36 2012-09-16 17:58:21.077 96 2013-04-09 18:59:46.887 17,715,685.81 717
3942 9516998 M 32 74 182 2013-01-27 08:39:42.453 182 2013-08-01 09:13:40.000 16,072,437.55 0
5667 25933366 F 27 2722 194 2015-02-07 17:03:05.357 123 2015-08-03 22:10:11.803 15,311,226.45 857
894 11183151 M 28 1963 175 2013-05-01 09:09:54.500 177 2013-10-22 01:04:48.000 15,004,493.50 0
1748 15329003 F 32 2544 39 2013-11-20 20:12:05.250 164 2014-04-28 21:00:16.000 13,740,490.75 0
2485 4380090 M 26 113 32 2011-12-19 14:45:10.000 54 2012-05-20 15:44:45.000 13,222,775.00 0
755 1975527 M 31 349 86 2011-01-04 00:01:37.553 85 2011-05-27 19:14:45.850 12,424,388.30 0
229 698865 M 50 573 22 2010-07-06 21:02:19.000 86 2010-11-19 08:06:54.247 11,703,875.25 0
1062 2196109 M 24 757 77 2011-01-28 17:40:13.297 18 2011-06-06 16:01:08.477 11,139,655.18 0
3697 21041922 F 31 2232 71 2014-09-17 21:13:42.857 35 2015-01-23 12:27:07.583 11,027,604.73 0
2247 3575940 M 26 1173 40 2011-09-05 08:25:24.053 85 2012-01-09 17:16:47.000 10,918,282.95 0
862 7843420 M 43 3390 46 2012-12-20 20:03:19.000 25 2013-04-25 10:09:09.000 10,850,750.00 635
3737 5176191 M 30 74 57 2012-05-20 15:30:16.103 59 2012-09-13 20:02:54.630 10,038,758.53 0
7821 40822484 M 18 3018 41 2016-11-17 18:20:17.000 257 2017-03-13 10:10:02.000 9,992,985.00 0
5385 28134425 M 35 2542 249 2015-07-15 19:20:23.030 230 2015-10-24 15:28:54.000 8,712,510.97 0
51 126559 M 28 528 85 2010-02-02 18:04:38.357 26 2010-05-11 01:05:08.853 8,406,030.50 0
40 126560 M 26 366 45 2010-02-07 16:50:56.057 25 2010-05-12 06:09:36.817 8,083,120.76 0
36 126562 M 28 356 45 2010-02-07 17:38:34.837 25 2010-05-12 06:08:56.597 8,080,221.76 0
39 126561 F 28 190 45 2010-02-07 17:08:03.173 23 2010-05-11 01:07:46.903 7,977,583.73 0
1537 34789604 F 32 6586 264 2016-04-25 15:15:08.040 210 2016-07-20 12:37:48.000 7,420,959.96 0
855 1737606 M 48 124 85 2011-02-08 16:14:03.000 86 2011-04-23 01:53:02.197 6,341,939.20 0
2489 3383722 M 40 762 58 2011-10-14 19:04:54.627 68 2011-12-22 20:09:13.000 5,965,458.37 0
1084 1737605 M 39 161 36 2011-01-29 18:50:27.347 43 2011-04-08 17:56:31.900 5,958,364.55 0
3498 4577180 M 39 1129 58 2012-04-17 10:30:00.207 58 2012-06-24 09:38:26.000 5,872,105.79 0
... ... ... ... ... ... ... ... ... ... ...
33 5806706 M 22 3094 218 2012-11-23 19:00:38.697 233 2012-11-25 13:02:35.000 151,316.30 0
1662 1975898 M 23 588 13 2011-05-16 14:38:39.977 70 2011-05-18 08:18:23.367 149,983.39 0
7919 39799435 M 32 3396 134 2017-02-04 22:51:06.000 142 2017-02-06 16:14:06.000 148,980.00 0
892 41361292 M 36 9200 389 2017-03-31 18:15:54.000 417 2017-04-02 10:52:37.000 146,203.00 0
128 126804 M 50 221 59 2010-05-25 00:21:08.813 80 2010-05-26 16:54:19.390 145,990.58 0
2312 35536840 M 32 6223 47 2016-08-19 22:06:15.000 116 2016-08-21 14:12:45.000 144,390.00 0
1620 1976052 M 26 190 85 2011-05-22 22:32:50.680 85 2011-05-24 14:33:40.270 144,049.59 0
159 249998 F 21 918 36 2010-07-14 12:41:58.120 59 2010-07-16 04:20:48.130 142,730.01 0
4635 5806333 F 35 866 209 2012-11-10 18:01:29.907 85 2012-11-12 09:35:29.297 142,439.39 0
870 1035044 M 30 1189 36 2011-01-26 19:22:32.747 47 2011-01-28 10:52:37.543 142,204.80 0
2616 3203970 M 27 987 87 2011-10-31 00:06:57.213 26 2011-11-01 15:29:49.130 141,771.92 0
91 42877159 F 32 9728 36 2017-05-28 17:30:51.000 154 2017-05-30 08:49:51.000 141,540.00 0
399 5806512 F 25 2049 137 2012-11-16 11:56:42.120 135 2012-11-18 03:03:01.357 140,779.24 0
697 34869030 F 22 6656 7 2016-07-22 19:30:28.083 256 2016-07-24 10:32:35.000 140,526.92 0
1761 30411634 M 27 4285 21 2016-01-26 20:20:21.070 26 2016-01-28 11:15:02.070 140,081.00 0
17 23158048 M 36 6565 327 2015-04-17 18:10:13.267 310 2015-04-19 09:02:12.000 139,918.73 0
175 250091 F 33 366 4 2010-07-19 22:11:19.000 3 2010-07-21 13:02:33.113 139,874.11 0
235 249941 M 23 1007 42 2010-07-11 20:40:50.877 42 2010-07-13 11:31:00.753 139,809.88 0
4075 5176654 M 31 187 22 2012-09-13 21:12:29.323 246 2012-09-15 11:45:24.000 138,774.68 0
8607 42387486 M 27 3415 261 2017-05-12 08:56:24.000 120 2017-05-13 23:28:22.000 138,718.00 0
229 250086 F 35 797 41 2010-07-19 18:52:34.000 43 2010-07-21 08:55:28.047 136,974.05 0
2329 2597039 F 30 432 75 2011-08-17 20:28:05.410 83 2011-08-19 10:21:46.587 136,421.18 0
2991 3762480 F 26 279 78 2012-02-19 10:49:49.463 78 2012-02-21 00:42:56.837 136,387.37 0
158 191525 M 27 779 11 2010-06-24 09:48:37.857 14 2010-06-25 23:34:40.650 135,962.79 0
655 7351539 M 21 2688 28 2013-03-23 15:02:00.380 242 2013-03-25 04:18:48.000 134,207.62 0
2434 2596914 M 42 722 57 2011-08-10 08:17:43.000 28 2011-08-11 21:11:36.837 132,833.84 0
1594 1737873 M 30 526 27 2011-04-11 21:44:47.707 85 2011-04-13 10:29:16.297 132,268.59 0
60 22082174 M 27 4431 80 2015-03-06 16:34:24.453 317 2015-03-08 05:12:48.280 131,903.83 0
897 8919648 M 51 3263 13 2013-06-14 20:34:07.740 196 2013-06-16 08:40:41.013 129,993.27 0
837 41189538 F 29 7199 318 2017-03-25 14:44:28.000 426 2017-03-27 02:50:00.000 129,932.00 0

217 rows × 10 columns


In [ ]: