notebook.community

Edit and run



In [18]:

    
import numpy as np 
import pandas as pd

# plots
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

trip= pd.read_csv('trip.csv')
#wed= pd.read_csv('weather.csv')



In [ ]:

    
trip.sample(2)
#trip.dtypes



In [ ]:

    
#Cantidad de viajes por bike
trip = trip['duration'].sum()
trip



In [ ]:

    
# Number of duration per bike -  consultar*
#trip.groupby('duration').count()['bike_id'].plot(figsize=(14,4));
trip.groupby('bike_id').count()['duration'].plot(figsize=(14,4));



In [ ]:

    
# Top five bikes durations - falta ver como lo puedo graficar. Grafico anterior*
trip.sort_values('duration',ascending=False).head(5)



In [ ]:

    
# Top five bikes less durations 
#trip.sort_index('duration',ascending=False).head(5)


#Are trip getting longer or shorter?
# susbcripciones length by time, are subscriptions getting longer or shorter ? 
trip.groupby('subscription_type').mean().loc[:,'duration'].plot(linewidth=2,figsize=(12,8));



In [ ]:

    
trip.loc[trip.duration>722236,["id","bike_id", "duration"]].sort_values(by="duration",ascending=False).head(10)



In [ ]:

    
trip.loc[trip.duration<722236,["id","bike_id", "duration"]].sort_values(by="duration",ascending=False).head(10)



In [ ]:

    
# Histogram of ratings*
trip['duration'].hist(figsize=(16,8));



In [ ]:

    
# Histogram of durations
trip = trip['duration'].value_counts()
trip.hist(figsize=(12,8),bins=20,);



In [ ]:

    
# Histogram of bikes
trip = trip['bike_id'].value_counts()
trip = trip[trip > 5000]
#trip.plot(kind='bar',rot=90,figsize=(12,8));



In [ ]:

    
#stat= pd.read_csv('status.csv', low_memory=False)
#stat.head(3)
stat= pd.read_csv('status.csv', iterator=True, chunksize=500)  # gives TextFileReader, which is iterable with chunks of 1000 rows.
df = pd.concat(stat, ignore_index=True)  # df is DataFrame. If errors, do `list(tp)` instead of `tp`
df.sample(3)



In [ ]:

    
#CON SIOMA
#rated_movies = pd.merge(movies,ratings, left_on='movieId', right_on='movieId', how='inner')
#rated_movies.sample(2)

#Cantidad de viajes por dia con el mismo startdate
#test = trip.groupby(['start_date'])['id'].count()
#test.head()

tw = pd.merge(trip,wed, left_on='start_date', right_on='date', how='outer')
tw.head()
#tw.sample(2)



In [ ]:

    
# Top five bikes start_date - falta ver como lo puedo graficar. Grafico anterior*
#ver como puedo hacer los de menor salida 
trip.sort_values('start_date',ascending=False).head(5)



In [ ]:

    
# Number of trip by station_id
#trip['start_station_name'].value_counts()[0:19].plot(kind='bar',figsize=(14,4));
#TopOstations = trip['start_station_name'].value_counts()[0:29].index.tolist()
#Dataframe[column].value_counts().index.tolist()
#['apple', 'sausage', 'banana', 'cheese']
trip.sort_index(by='duration', ascending=[False])



In [ ]:



In [ ]:

    
#df.pivot(index='date', columns='variable', values='value')
tp1 = trip[trip['start_station_name'].isin(TopOperators)].groupby(['Operator','cluster']).size().reset_index()
ap1.columns=['Operator','cluster','size']
ap1 = ap1.pivot(index='cluster', columns='Operator',values='size')
ap1.fillna(0,inplace=True)
fig, ax = plt.subplots(figsize=(14,16))         # Sample figsize in inches
sns.heatmap(ap1, annot=False, linewidths=.5, ax=ax);



In [ ]:

    
trip.groupby('bike_id')['duration'].plot(figsize=(14,4));



In [19]:

    
#Visualizacion de la cantidad de viajes segun cada bikeID
#trip2 = trip.groupby('bike_id').count()['duration']
#trip2
trip.groupby('bike_id').count()['duration'].plot(figsize=(14,4));



In [ ]:

    
trip2



In [ ]:

    
trip3 = trip[trip['bike_id'] == 34]
trip3



In [20]:

    
#Visualizacion de la duracion de viajes en segundos segun bikeID
trip.groupby('bike_id').sum()['duration'].plot(figsize=(14,4));



In [21]:

    
#Cambiamos la duracion de segundos a minutos
trip['duration'] = trip['duration'].apply(lambda x: x/60)
trip.head(10)









    Out[21]:






  
    
      
      id
      duration
      start_date
      start_station_name
      start_station_id
      end_date
      end_station_name
      end_station_id
      bike_id
      subscription_type
      zip_code
    
  
  
    
      0
      4576
      1
      8/29/2013 14:13
      South Van Ness at Market
      66
      8/29/2013 14:14
      South Van Ness at Market
      66
      520
      Subscriber
      94127
    
    
      1
      4607
      1
      8/29/2013 14:42
      San Jose City Hall
      10
      8/29/2013 14:43
      San Jose City Hall
      10
      661
      Subscriber
      95138
    
    
      2
      4130
      1
      8/29/2013 10:16
      Mountain View City Hall
      27
      8/29/2013 10:17
      Mountain View City Hall
      27
      48
      Subscriber
      97214
    
    
      3
      4251
      1
      8/29/2013 11:29
      San Jose City Hall
      10
      8/29/2013 11:30
      San Jose City Hall
      10
      26
      Subscriber
      95060
    
    
      4
      4299
      1
      8/29/2013 12:02
      South Van Ness at Market
      66
      8/29/2013 12:04
      Market at 10th
      67
      319
      Subscriber
      94103
    
    
      5
      4927
      1
      8/29/2013 18:54
      Golden Gate at Polk
      59
      8/29/2013 18:56
      Golden Gate at Polk
      59
      527
      Subscriber
      94109
    
    
      6
      4500
      1
      8/29/2013 13:25
      Santa Clara at Almaden
      4
      8/29/2013 13:27
      Adobe on Almaden
      5
      679
      Subscriber
      95112
    
    
      7
      4563
      1
      8/29/2013 14:02
      San Salvador at 1st
      8
      8/29/2013 14:04
      San Salvador at 1st
      8
      687
      Subscriber
      95112
    
    
      8
      4760
      1
      8/29/2013 17:01
      South Van Ness at Market
      66
      8/29/2013 17:03
      South Van Ness at Market
      66
      553
      Subscriber
      94103
    
    
      9
      4258
      1
      8/29/2013 11:33
      San Jose City Hall
      10
      8/29/2013 11:35
      MLK Library
      11
      107
      Subscriber
      95060



In [23]:

    
#Visualizacion de la duracion de viajes en segundos segun bikeID
trip.groupby('bike_id').sum()['duration'].plot(figsize=(14,4));



In [24]:

    
#Cambiamos la duracion de segundos a horas
trip['duration'] = trip['duration'].apply(lambda x: x/3600)
#Visualizacion de la duracion de viajes en segundos segun bikeID
trip.groupby('bike_id').sum()['duration'].plot(figsize=(14,4));



In [27]:

    
conteoDeViajesSegunStartStation = trip['start_station_name'].value_counts()
conteoDeViajesSegunStartStation









    Out[27]:





San Francisco Caltrain (Townsend at 4th)         49092
San Francisco Caltrain 2 (330 Townsend)          33742
Harry Bridges Plaza (Ferry Building)             32934
Embarcadero at Sansome                           27713
Temporary Transbay Terminal (Howard at Beale)    26089
2nd at Townsend                                  25837
Steuart at Market                                24838
Market at Sansome                                24172
Townsend at 7th                                  23724
Market at 10th                                   20272
Market at 4th                                    20165
2nd at South Park                                18496
Powell Street BART                               18378
Grant Avenue at Columbus Avenue                  16306
2nd at Folsom                                    15940
Beale at Market                                  15709
Embarcadero at Bryant                            14811
Civic Center BART (7th at Market)                14102
Embarcadero at Folsom                            14054
5th at Howard                                    13526
Howard at 2nd                                    13163
South Van Ness at Market                         12880
Powell at Post (Union Square)                    12496
Mechanics Plaza (Market at Battery)              12224
Spear at Folsom                                  11949
Commercial at Montgomery                         11888
Yerba Buena Center of the Arts (3rd @ Howard)    11251
Embarcadero at Vallejo                           10592
Broadway St at Battery St                        10310
Clay at Battery                                   9994
                                                 ...  
Castro Street and El Camino Real                  2035
MLK Library                                       2034
San Antonio Caltrain Station                      1990
Japantown                                         1910
San Antonio Shopping Center                       1818
Ryland Park                                       1747
San Salvador at 1st                               1692
St James Park                                     1680
Evelyn Park and Ride                              1667
San Jose Civic Center                             1594
Redwood City Caltrain Station                     1554
University and Emerson                            1512
Arena Green / SAP Center                          1496
SJSU - San Salvador at 9th                        1473
Washington at Kearney                             1472
Cowper at University                              1374
Adobe on Almaden                                  1257
SJSU 4th at San Carlos                            1170
Rengstorff Avenue / California Street             1129
California Ave Caltrain Station                   1026
Santa Clara County Civic Center                    840
Park at Olive                                      750
Stanford in Redwood City                           436
Mezes Park                                         341
Redwood City Medical Center                        311
San Mateo County Center                            287
Franklin at Maple                                  224
Redwood City Public Library                        213
Broadway at Main                                    67
San Jose Government Center                          23
Name: start_station_name, dtype: int64



In [28]:

    
#Visualizacion de cantidad de viajes segun start_station_name
trip.groupby('start_station_name').count()['duration'].plot(figsize=(14,4));



In [29]:

    
#Visualizacion de cantidad de viajes segun start_station_id
trip.groupby('start_station_id').count()['duration'].plot(figsize=(14,4));



In [31]:

    
#Visualizacion de cantidad de viajes segun start_station_name
trip.groupby('start_station_name').count()['duration'].plot('bar',figsize=(14,4));



In [32]:

    
#Visualizacion de cantidad de viajes segun end_station_name
trip.groupby('end_station_name').count()['duration'].plot('bar',figsize=(14,4));



In [ ]:

	id	duration	start_date	start_station_name	start_station_id	end_date	end_station_name	end_station_id	bike_id	subscription_type	zip_code
0	4576	1	8/29/2013 14:13	South Van Ness at Market	66	8/29/2013 14:14	South Van Ness at Market	66	520	Subscriber	94127
1	4607	1	8/29/2013 14:42	San Jose City Hall	10	8/29/2013 14:43	San Jose City Hall	10	661	Subscriber	95138
2	4130	1	8/29/2013 10:16	Mountain View City Hall	27	8/29/2013 10:17	Mountain View City Hall	27	48	Subscriber	97214
3	4251	1	8/29/2013 11:29	San Jose City Hall	10	8/29/2013 11:30	San Jose City Hall	10	26	Subscriber	95060
4	4299	1	8/29/2013 12:02	South Van Ness at Market	66	8/29/2013 12:04	Market at 10th	67	319	Subscriber	94103
5	4927	1	8/29/2013 18:54	Golden Gate at Polk	59	8/29/2013 18:56	Golden Gate at Polk	59	527	Subscriber	94109
6	4500	1	8/29/2013 13:25	Santa Clara at Almaden	4	8/29/2013 13:27	Adobe on Almaden	5	679	Subscriber	95112
7	4563	1	8/29/2013 14:02	San Salvador at 1st	8	8/29/2013 14:04	San Salvador at 1st	8	687	Subscriber	95112
8	4760	1	8/29/2013 17:01	South Van Ness at Market	66	8/29/2013 17:03	South Van Ness at Market	66	553	Subscriber	94103
9	4258	1	8/29/2013 11:33	San Jose City Hall	10	8/29/2013 11:35	MLK Library	11	107	Subscriber	95060