In [63]:
import numpy as np 
import pandas as pd

# plots
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

trip= pd.read_csv('trip.csv')
#wed= pd.read_csv('weather.csv')

In [15]:
trip.sample(2)
#trip.dtypes


Out[15]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
178891 384502 1347 7/29/2014 8:50 Mountain View Caltrain Station 28 7/29/2014 9:13 San Antonio Shopping Center 31 146 Subscriber 94133
11479 20196 902 9/13/2013 11:59 South Van Ness at Market 66 9/13/2013 12:14 Commercial at Montgomery 45 202 Subscriber 94115

In [4]:
#Cantidad de viajes por bike
trip = trip['duration'].sum()
trip


Out[4]:
742280971

In [53]:
# Number of duration per bike -  consultar*
#trip.groupby('duration').sum()['bike_id'].plot(figsize=(14,4));
trip.groupby('bike_id').sum()['duration'].plot(figsize=(14,4));
#trip3 = trip[trip['bike_id'] == 34]
#trip3



In [2]:
# Top five bikes durations - falta ver como lo puedo graficar. Grafico anterior*
trip.sort_values('duration',ascending=False).head(5)


Out[2]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
573566 568474 17270400 12/6/2014 21:59 South Van Ness at Market 66 6/24/2015 20:18 2nd at Folsom 62 535 Customer 95531
382718 825850 2137000 6/28/2015 21:50 Market at Sansome 77 7/23/2015 15:27 Yerba Buena Center of the Arts (3rd @ Howard) 68 466 Customer 97213
440339 750192 1852590 5/2/2015 6:17 San Antonio Shopping Center 31 5/23/2015 16:53 Castro Street and El Camino Real 32 680 Subscriber 94024
371066 841176 1133540 7/10/2015 10:35 University and Emerson 35 7/23/2015 13:27 University and Emerson 35 262 Customer 94306
80510 111309 722236 11/30/2013 13:29 University and Emerson 35 12/8/2013 22:06 University and Emerson 35 247 Customer 94301

In [5]:
# Top five bikes less durations 
#trip.sort_index('duration',ascending=False).head(5)


#Are trip getting longer or shorter?
# susbcripciones length by time, are subscriptions getting longer or shorter ? 
trip.groupby('subscription_type').mean().loc[:,'duration'].plot(linewidth=2,figsize=(12,8));



In [16]:
trip.loc[trip.duration>722236,["id","bike_id", "duration"]].sort_values(by="duration",ascending=False).head(10)


Out[16]:
id bike_id duration
573566 568474 535 17270400
382718 825850 466 2137000
440339 750192 680 1852590
371066 841176 262 1133540

In [17]:
trip.loc[trip.duration<722236,["id","bike_id", "duration"]].sort_values(by="duration",ascending=False).head(10)


Out[17]:
id bike_id duration
606063 522337 692 720454
223016 323594 633 716480
195379 361321 251 715339
421839 774999 230 688899
524521 635260 132 655939
287337 237942 369 644771
93400 129504 653 619322
443792 745640 196 611240
524518 635263 9 602338
20535 32121 168 597517

In [9]:
# Histogram of ratings*
trip['duration'].hist(figsize=(16,8));



In [10]:
# Histogram of durations
#trip = trip['duration'].value_counts()
trip.hist(figsize=(12,8),bins=20,);



In [33]:
# Histogram of bikes
trip = trip['bike_id'].value_counts()
trip = trip[trip > 5000]
#trip.plot(kind='bar',rot=90,figsize=(12,8));

In [ ]:
#stat= pd.read_csv('status.csv', low_memory=False)
#stat.head(3)
stat= pd.read_csv('status.csv', iterator=True, chunksize=500)  # gives TextFileReader, which is iterable with chunks of 1000 rows.
df = pd.concat(stat, ignore_index=True)  # df is DataFrame. If errors, do `list(tp)` instead of `tp`
df.sample(3)

In [30]:
#CON SIOMA
#rated_movies = pd.merge(movies,ratings, left_on='movieId', right_on='movieId', how='inner')
#rated_movies.sample(2)

#Cantidad de viajes por dia con el mismo startdate
#test = trip.groupby(['start_date'])['id'].count()
#test.head()

tw = pd.merge(trip,wed, left_on='start_date', right_on='date', how='outer')
tw.head()
#tw.sample(2)


Out[30]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type ... mean_visibility_miles min_visibility_miles max_wind_Speed_mph mean_wind_speed_mph max_gust_speed_mph precipitation_inches cloud_cover events wind_dir_degrees zip_code_y
0 4576.0 63.0 8/29/2013 14:13 South Van Ness at Market 66.0 8/29/2013 14:14 South Van Ness at Market 66.0 520.0 Subscriber ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 4578.0 4521.0 8/29/2013 14:13 San Francisco City Hall 58.0 8/29/2013 15:29 Townsend at 7th 65.0 623.0 Customer ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 4577.0 4524.0 8/29/2013 14:13 San Francisco City Hall 58.0 8/29/2013 15:28 Townsend at 7th 65.0 632.0 Customer ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 4607.0 70.0 8/29/2013 14:42 San Jose City Hall 10.0 8/29/2013 14:43 San Jose City Hall 10.0 661.0 Subscriber ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 4130.0 71.0 8/29/2013 10:16 Mountain View City Hall 27.0 8/29/2013 10:17 Mountain View City Hall 27.0 48.0 Subscriber ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 35 columns


In [77]:
# Top five bikes start_date - falta ver como lo puedo graficar. Grafico anterior*
#ver como puedo hacer los de menor salida 
trip.sort_values('start_date',ascending=False).head(5)


Out[77]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
661658 444343 483 9/9/2014 9:59 Yerba Buena Center of the Arts (3rd @ Howard) 68 9/9/2014 10:07 Market at 10th 67 593 Subscriber 94107
661659 444342 228 9/9/2014 9:59 Townsend at 7th 65 9/9/2014 10:02 San Francisco Caltrain (Townsend at 4th) 70 411 Subscriber 94107
661660 444341 630 9/9/2014 9:55 Harry Bridges Plaza (Ferry Building) 50 9/9/2014 10:05 2nd at South Park 64 394 Subscriber 94706
661661 444340 513 9/9/2014 9:54 Harry Bridges Plaza (Ferry Building) 50 9/9/2014 10:02 Market at 4th 76 590 Customer 94133
661664 444337 561 9/9/2014 9:53 Harry Bridges Plaza (Ferry Building) 50 9/9/2014 10:02 2nd at Townsend 61 393 Subscriber 94960

In [5]:
# Number of trip by station_id
#trip['start_station_name'].value_counts()[0:19].plot(kind='bar',figsize=(14,4));
#TopOstations = trip['start_station_name'].value_counts()[0:29].index.tolist()
#Dataframe[column].value_counts().index.tolist()
#['apple', 'sausage', 'banana', 'cheese']
trip.sort_index(by='duration', ascending=[False])


/home/dai/anaconda2/lib/python2.7/site-packages/ipykernel/__main__.py:6: FutureWarning: by argument to sort_index is deprecated, pls use .sort_values(by=...)
Out[5]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
573566 568474 17270400 12/6/2014 21:59 South Van Ness at Market 66 6/24/2015 20:18 2nd at Folsom 62 535 Customer 95531
382718 825850 2137000 6/28/2015 21:50 Market at Sansome 77 7/23/2015 15:27 Yerba Buena Center of the Arts (3rd @ Howard) 68 466 Customer 97213
440339 750192 1852590 5/2/2015 6:17 San Antonio Shopping Center 31 5/23/2015 16:53 Castro Street and El Camino Real 32 680 Subscriber 94024
371066 841176 1133540 7/10/2015 10:35 University and Emerson 35 7/23/2015 13:27 University and Emerson 35 262 Customer 94306
80510 111309 722236 11/30/2013 13:29 University and Emerson 35 12/8/2013 22:06 University and Emerson 35 247 Customer 94301
606063 522337 720454 10/30/2014 8:29 Redwood City Caltrain Station 22 11/7/2014 15:36 Stanford in Redwood City 25 692 Customer 94010
223016 323594 716480 6/13/2014 16:57 Harry Bridges Plaza (Ferry Building) 50 6/21/2014 23:59 Civic Center BART (7th at Market) 72 633 Subscriber 94131
195379 361321 715339 7/13/2014 5:50 Arena Green / SAP Center 14 7/21/2014 12:32 Adobe on Almaden 5 251 Customer nil
421839 774999 688899 5/20/2015 15:27 Palo Alto Caltrain Station 34 5/28/2015 14:49 California Ave Caltrain Station 36 230 Customer nil
524521 635260 655939 2/8/2015 3:05 San Jose Civic Center 3 2/15/2015 17:17 SJSU 4th at San Carlos 12 132 Customer 89451
287337 237942 644771 4/6/2014 3:37 South Van Ness at Market 66 4/13/2014 14:44 Clay at Battery 41 369 Customer 94014
93400 129504 619322 12/18/2013 9:16 San Jose Diridon Caltrain Station 2 12/25/2013 13:18 SJSU 4th at San Carlos 12 653 Subscriber 94041
443792 745640 611240 4/29/2015 9:41 University and Emerson 35 5/6/2015 11:28 San Antonio Shopping Center 31 196 Customer 81
524518 635263 602338 2/8/2015 3:09 San Jose Civic Center 3 2/15/2015 2:28 San Jose Civic Center 3 9 Customer 89451
20535 32121 597517 9/23/2013 18:24 California Ave Caltrain Station 36 9/30/2013 16:23 Palo Alto Caltrain Station 34 168 Customer 95051
418295 779645 594550 5/24/2015 13:33 San Pedro Square 6 5/31/2015 10:42 San Pedro Square 6 39 Customer 95118
119830 166010 586356 1/25/2014 20:00 San Antonio Caltrain Station 29 2/1/2014 14:53 San Antonio Caltrain Station 29 693 Customer 94303
152689 421286 560792 8/23/2014 9:48 Rengstorff Avenue / California Street 33 8/29/2014 21:35 Mountain View Caltrain Station 28 90 Customer 94040
393058 812544 552697 6/18/2015 10:45 Mountain View Caltrain Station 28 6/24/2015 20:17 Palo Alto Caltrain Station 34 70 Customer 90024
505391 661326 531240 2/27/2015 15:45 San Jose Diridon Caltrain Station 2 3/5/2015 19:19 SJSU 4th at San Carlos 12 662 Customer 95112
252081 284683 517856 5/14/2014 21:14 Castro Street and El Camino Real 32 5/20/2014 21:05 San Antonio Shopping Center 31 670 Customer 94040
517402 644642 505483 2/14/2015 17:30 San Pedro Square 6 2/20/2015 13:54 San Pedro Square 6 22 Customer 95136
379557 830135 502617 7/1/2015 14:12 Redwood City Medical Center 26 7/7/2015 9:49 Redwood City Caltrain Station 22 15 Customer nil
640982 473196 489435 9/28/2014 18:21 Japantown 9 10/4/2014 10:19 Japantown 9 685 Customer 95116
155325 417776 464952 8/21/2014 6:16 Market at 4th 76 8/26/2014 15:26 Davis at Jackson 42 466 Customer 94102
619315 504411 457110 10/17/2014 18:49 Market at 10th 67 10/23/2014 1:47 Market at 10th 67 415 Customer 94582
43549 62246 429384 10/17/2013 16:06 Davis at Jackson 42 10/22/2013 15:23 Powell Street BART 39 465 Customer 43113
287336 237943 411846 4/6/2014 3:38 South Van Ness at Market 66 4/10/2014 22:02 Civic Center BART (7th at Market) 72 634 Customer 94014
574968 566582 410157 12/4/2014 21:55 Harry Bridges Plaza (Ferry Building) 50 12/9/2014 15:51 2nd at Folsom 62 572 Customer nil
531427 626177 405271 1/30/2015 21:25 Japantown 9 2/4/2015 14:00 Paseo de San Antonio 7 117 Customer 95112
... ... ... ... ... ... ... ... ... ... ... ...
2887 8576 60 9/2/2013 9:40 Harry Bridges Plaza (Ferry Building) 50 9/2/2013 9:41 Harry Bridges Plaza (Ferry Building) 50 354 Subscriber 94102
616421 508274 60 10/21/2014 11:57 San Francisco Caltrain 2 (330 Townsend) 69 10/21/2014 11:58 San Francisco Caltrain 2 (330 Townsend) 69 578 Subscriber 94107
88852 122928 60 12/12/2013 9:18 Powell at Post (Union Square) 71 12/12/2013 9:19 Powell at Post (Union Square) 71 614 Subscriber 94103
406816 794867 60 6/4/2015 19:13 Steuart at Market 74 6/4/2015 19:14 Steuart at Market 74 461 Subscriber 94103
92925 128828 60 12/17/2013 17:28 Steuart at Market 74 12/17/2013 17:29 Steuart at Market 74 409 Subscriber 94105
511423 652693 60 2/21/2015 12:45 Powell Street BART 39 2/21/2015 12:46 Powell Street BART 39 532 Customer 92111
510302 654272 60 2/23/2015 14:12 Beale at Market 56 2/23/2015 14:13 Beale at Market 56 672 Subscriber 94402
98183 136099 60 12/26/2013 15:55 2nd at South Park 64 12/26/2013 15:56 2nd at South Park 64 423 Subscriber 94107
509279 655741 60 2/24/2015 9:18 2nd at Townsend 61 2/24/2015 9:19 2nd at Townsend 61 498 Subscriber 94403
252114 284638 60 5/14/2014 20:11 Market at 10th 67 5/14/2014 20:12 Market at 10th 67 372 Subscriber 94107
456799 728496 60 4/16/2015 13:39 Embarcadero at Sansome 60 4/16/2015 13:40 Embarcadero at Sansome 60 496 Subscriber 94521
213972 335844 60 6/23/2014 17:31 Powell Street BART 39 6/23/2014 17:32 Powell Street BART 39 407 Subscriber 94102
556034 592699 60 1/6/2015 11:38 South Van Ness at Market 66 1/6/2015 11:39 Market at 10th 67 621 Subscriber 94123
531011 626726 60 2/1/2015 8:02 San Francisco Caltrain 2 (330 Townsend) 69 2/1/2015 8:03 San Francisco Caltrain 2 (330 Townsend) 69 452 Subscriber 94107
55127 77650 60 10/30/2013 18:22 Harry Bridges Plaza (Ferry Building) 50 10/30/2013 18:23 Harry Bridges Plaza (Ferry Building) 50 416 Subscriber 94110
181744 380584 60 7/25/2014 17:30 Castro Street and El Camino Real 32 7/25/2014 17:31 Castro Street and El Camino Real 32 155 Subscriber 94133
115725 160479 60 1/21/2014 13:01 2nd at Townsend 61 1/21/2014 13:02 2nd at Townsend 61 320 Subscriber 94115
547555 603851 60 1/14/2015 12:37 Golden Gate at Polk 59 1/14/2015 12:38 Golden Gate at Polk 59 548 Subscriber 94109
345157 874889 60 8/4/2015 9:57 San Francisco Caltrain 2 (330 Townsend) 69 8/4/2015 9:58 San Francisco Caltrain 2 (330 Townsend) 69 374 Subscriber 94107
666342 438041 60 9/4/2014 10:53 Civic Center BART (7th at Market) 72 9/4/2014 10:54 Civic Center BART (7th at Market) 72 291 Subscriber 94117
40000 57581 60 10/14/2013 14:47 Clay at Battery 41 10/14/2013 14:48 Clay at Battery 41 368 Subscriber 94158
597389 534343 60 11/7/2014 9:45 Temporary Transbay Terminal (Howard at Beale) 55 11/7/2014 9:46 Temporary Transbay Terminal (Howard at Beale) 55 300 Subscriber 94107
542394 611456 60 1/20/2015 17:46 Embarcadero at Folsom 51 1/20/2015 17:47 Embarcadero at Folsom 51 471 Subscriber 95032
194364 362788 60 7/14/2014 14:09 Powell Street BART 39 7/14/2014 14:10 Powell Street BART 39 372 Subscriber 94102
633879 483333 60 10/4/2014 19:21 Yerba Buena Center of the Arts (3rd @ Howard) 68 10/4/2014 19:22 Yerba Buena Center of the Arts (3rd @ Howard) 68 560 Customer nil
10457 18792 60 9/12/2013 10:09 Civic Center BART (7th at Market) 72 9/12/2013 10:10 Civic Center BART (7th at Market) 72 632 Subscriber 94103
326026 900176 60 8/21/2015 13:24 Temporary Transbay Terminal (Howard at Beale) 55 8/21/2015 13:25 Temporary Transbay Terminal (Howard at Beale) 55 522 Subscriber 94109
450401 736948 60 4/22/2015 17:47 Embarcadero at Folsom 51 4/22/2015 17:48 Embarcadero at Folsom 51 585 Subscriber 94103
72159 100171 60 11/18/2013 15:01 Temporary Transbay Terminal (Howard at Beale) 55 11/18/2013 15:02 Temporary Transbay Terminal (Howard at Beale) 55 469 Subscriber 94158
2925 8651 60 9/2/2013 10:50 San Francisco Caltrain 2 (330 Townsend) 69 9/2/2013 10:51 San Francisco Caltrain 2 (330 Townsend) 69 544 Subscriber 94107

669959 rows × 11 columns


In [15]:
#Cargo los datos nuevamente pero parseando las duraciones
trip2 = pd.read_csv('trip.csv', parse_dates=['duration'])
#Observacion de los tipos de datos
trip2.tail(3)


Out[15]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
669956 432949 538 9/1/2014 0:05 South Van Ness at Market 66 9/1/2014 0:14 5th at Howard 57 466 Customer 32
669957 432948 568 9/1/2014 0:05 South Van Ness at Market 66 9/1/2014 0:15 5th at Howard 57 461 Customer 32
669958 432947 569 9/1/2014 0:05 South Van Ness at Market 66 9/1/2014 0:15 5th at Howard 57 318 Customer 32

In [20]:
#podemos los primeros valores 
trip.head(3)


Out[20]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
0 4576 63 8/29/2013 14:13 South Van Ness at Market 66 8/29/2013 14:14 South Van Ness at Market 66 520 Subscriber 94127
1 4607 70 8/29/2013 14:42 San Jose City Hall 10 8/29/2013 14:43 San Jose City Hall 10 661 Subscriber 95138
2 4130 71 8/29/2013 10:16 Mountain View City Hall 27 8/29/2013 10:17 Mountain View City Hall 27 48 Subscriber 97214

In [22]:
#y los ultimos
trip.tail(5)


Out[22]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
669954 432951 619 9/1/2014 4:21 Powell Street BART 39 9/1/2014 4:32 Townsend at 7th 65 335 Subscriber 94118
669955 432950 6712 9/1/2014 3:16 Harry Bridges Plaza (Ferry Building) 50 9/1/2014 5:08 San Francisco Caltrain (Townsend at 4th) 70 259 Customer 44100
669956 432949 538 9/1/2014 0:05 South Van Ness at Market 66 9/1/2014 0:14 5th at Howard 57 466 Customer 32
669957 432948 568 9/1/2014 0:05 South Van Ness at Market 66 9/1/2014 0:15 5th at Howard 57 461 Customer 32
669958 432947 569 9/1/2014 0:05 South Van Ness at Market 66 9/1/2014 0:15 5th at Howard 57 318 Customer 32

In [23]:
trip['start_station_name'][:3] #realizamos una proyeccion


Out[23]:
0    63
1    70
2    71
Name: duration, dtype: int64

In [78]:
duration_station = trip['duration'].value_counts()
%matplotlib notebook
# top 20
duration_station[:5].plot('bar');



In [79]:
fig = plt.figure(figsize=(10,4));

trip['subscription_type'].value_counts().plot('bar');



In [75]:
#Que estacion de Origen tiene las mayor cantidad¶
trip['start_station_name'].value_counts()[:10].plot('bar');



In [69]:
trip['start_station_name'].value_counts()[:20].plot('bar');



In [73]:
trip['start_station_name'].value_counts()[10:].plot('bar');



In [ ]:


In [ ]: