In [19]:
# magic function para hacer que los graficos de matplotlib se renderizen en el notebook.
%matplotlib inline

import datetime as datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('default') # Make the graphs a bit prettier
plt.rcParams['figure.figsize'] = (15, 5)

In [ ]:
#Cargo los datos de los viajes pero sin el casteo a de fecha a Date
trip = pd.read_csv('trip.csv', low_memory=False)

In [ ]:
#Observacion de los tipos de los datos
trip.dtypes

In [20]:
#Cargo los datos nuevamente pero parseando las fechas a DataTime
trip2 = pd.read_csv('trip.csv', parse_dates=['start_date','end_date'])

In [21]:
#Observacion de los tipos de datos
trip2.head(10)


Out[21]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
0 4576 63 2013-08-29 14:13:00 South Van Ness at Market 66 2013-08-29 14:14:00 South Van Ness at Market 66 520 Subscriber 94127
1 4607 70 2013-08-29 14:42:00 San Jose City Hall 10 2013-08-29 14:43:00 San Jose City Hall 10 661 Subscriber 95138
2 4130 71 2013-08-29 10:16:00 Mountain View City Hall 27 2013-08-29 10:17:00 Mountain View City Hall 27 48 Subscriber 97214
3 4251 77 2013-08-29 11:29:00 San Jose City Hall 10 2013-08-29 11:30:00 San Jose City Hall 10 26 Subscriber 95060
4 4299 83 2013-08-29 12:02:00 South Van Ness at Market 66 2013-08-29 12:04:00 Market at 10th 67 319 Subscriber 94103
5 4927 103 2013-08-29 18:54:00 Golden Gate at Polk 59 2013-08-29 18:56:00 Golden Gate at Polk 59 527 Subscriber 94109
6 4500 109 2013-08-29 13:25:00 Santa Clara at Almaden 4 2013-08-29 13:27:00 Adobe on Almaden 5 679 Subscriber 95112
7 4563 111 2013-08-29 14:02:00 San Salvador at 1st 8 2013-08-29 14:04:00 San Salvador at 1st 8 687 Subscriber 95112
8 4760 113 2013-08-29 17:01:00 South Van Ness at Market 66 2013-08-29 17:03:00 South Van Ness at Market 66 553 Subscriber 94103
9 4258 114 2013-08-29 11:33:00 San Jose City Hall 10 2013-08-29 11:35:00 MLK Library 11 107 Subscriber 95060

In [4]:
#A cada dato de la columna de comienzo del viaje (fecha) le aplico una funcion para saber en que dia de la semana fueron
#realizados los viajes y se calculan la cantidad por mes
#dayofweek nos da los dias ordenados desde 0(lunes) hasta 6(domingo)
trip2['start_date'].apply(lambda x: x.dayofweek).value_counts()


Out[4]:
1    122259
2    120201
3    119089
0    115873
4    109361
5     44785
6     38391
Name: start_date, dtype: int64

In [5]:
#Realizo un plot de barras para visualizar lo calculado en el paso anterior
plt = trip2['start_date'].apply(lambda x: x.dayofweek).value_counts().plot('bar')
plt.set_xlabel('Dias de la semana')
plt.set_ylabel('Cantidad')
plt.set_title('Cantidad de viajes por dia de la semana')
plt.set_xticklabels(['Martes','Miercoles','Jueves','Lunes','Viernes','Sabado','Domingo'], fontdict=None, minor=False)
#plt.ylabel('Cantidad de viajes ')
#plt.xlabel('dias de la semana')
#plt.show()


Out[5]:
[<matplotlib.text.Text at 0xac8f2f0>,
 <matplotlib.text.Text at 0xac8fb30>,
 <matplotlib.text.Text at 0xb0e10f0>,
 <matplotlib.text.Text at 0xb0e1470>,
 <matplotlib.text.Text at 0xb0e17f0>,
 <matplotlib.text.Text at 0xb0e1b70>,
 <matplotlib.text.Text at 0xb0e1ef0>]

In [ ]:
plt.set_ylabel('start_date')

In [ ]:
contadorPorDia = trip2['start_date'].apply(lambda x: x.dayofweek).value_counts()
contadorPorDia

In [ ]:
tripAgrupadoPorDias = trip2[['start_date']]
tripAgrupadoPorDias

In [ ]:
tripAgrupadoPorDias['diaDeLaSemana'] =  tripAgrupadoPorDias['start_date'].apply(lambda x: x.dayofweek)

In [ ]:
agrupacion = tripAgrupadoPorDias.groupby('diaDeLaSemana').count
agrupacion

In [6]:
#Realizo un plot en el cual observamos la cantidad de viajes segun el mes del año
plt = trip2['start_date'].apply(lambda x: x.month).value_counts().plot('bar')
plt.set_xlabel('meses')
plt.set_ylabel('Cantidad de viajes')
plt.set_title('Cantidad de viajes por dia de la semana')


Out[6]:
<matplotlib.text.Text at 0xa8a27d0>

In [25]:
agrupacion = trip2[['start_date']]
agrupacion['cantidad'] = 1
agrupacion = agrupacion.groupby('start_date').aggregate(sum)
agrupacion
agrupacion.plot(rot=90,xticks=range(2013,2015),linewidth=2,figsize=(12,8),label='Viajes Realizados');

#Primero agrupamos por start_date
#agrupacion = trip2.groupby('start_date').aggregate(sum)
#agrupacion

#plt.figure(
#trip2.groupby('start_date').sum().loc[:,'start_date'].plot(rot=90,xticks=range(2013,2015),linewidth=2,figsize=(12,8),label='Viajes Realizados');
#plt.legend();


C:\ProgramData\Anaconda2\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
Out[25]:
cantidad
start_date
2013-08-29 09:08:00 1
2013-08-29 09:24:00 3
2013-08-29 09:25:00 1
2013-08-29 09:31:00 1
2013-08-29 09:35:00 1
2013-08-29 09:36:00 1
2013-08-29 09:38:00 1
2013-08-29 09:41:00 1
2013-08-29 09:42:00 1
2013-08-29 09:57:00 1
2013-08-29 10:11:00 1
2013-08-29 10:13:00 1
2013-08-29 10:14:00 2
2013-08-29 10:16:00 1
2013-08-29 10:17:00 2
2013-08-29 10:19:00 2
2013-08-29 10:20:00 1
2013-08-29 10:42:00 1
2013-08-29 10:47:00 1
2013-08-29 10:49:00 1
2013-08-29 10:56:00 1
2013-08-29 11:03:00 1
2013-08-29 11:04:00 2
2013-08-29 11:05:00 1
2013-08-29 11:06:00 1
2013-08-29 11:09:00 1
2013-08-29 11:13:00 1
2013-08-29 11:14:00 1
2013-08-29 11:15:00 1
2013-08-29 11:17:00 2
... ...
2015-08-31 20:33:00 1
2015-08-31 20:34:00 1
2015-08-31 20:36:00 1
2015-08-31 20:37:00 1
2015-08-31 20:39:00 1
2015-08-31 20:44:00 1
2015-08-31 20:46:00 1
2015-08-31 20:53:00 2
2015-08-31 20:58:00 1
2015-08-31 21:01:00 1
2015-08-31 21:06:00 1
2015-08-31 21:07:00 1
2015-08-31 21:11:00 1
2015-08-31 21:16:00 1
2015-08-31 21:17:00 1
2015-08-31 21:19:00 1
2015-08-31 21:25:00 1
2015-08-31 21:31:00 1
2015-08-31 21:39:00 1
2015-08-31 21:44:00 1
2015-08-31 21:49:00 1
2015-08-31 21:57:00 1
2015-08-31 22:12:00 1
2015-08-31 22:16:00 1
2015-08-31 23:07:00 2
2015-08-31 23:09:00 1
2015-08-31 23:10:00 1
2015-08-31 23:11:00 1
2015-08-31 23:13:00 1
2015-08-31 23:26:00 1

361559 rows × 1 columns


In [34]:
#Intento fallido de poner los horas y minutos en 0 para despues agrupar en base a dia mes y año
trip2_con_casteo_por_dia = trip2[trip2['start_date'].apply(lambda x: x.replace(hour=0,minute=0))]
agrupacion2 = pd.DataFrame({'Count' : trip2.groupby(['start_date'])['id'].count()}).reset_index()
agrupacion2
#plt.figure.Figure()
#agrupacion.plot(rot=90,xticks=range(2013,2015),linewidth=2,figsize=(12,8),label='Viajes Realizados');
#plt.legend();


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-34-8862dfc18af8> in <module>()
----> 1 trip2_con_casteo_por_dia = trip2[trip2['start_date'].apply(lambda x: x.replace(hour=0,minute=0))]
      2 agrupacion2 = pd.DataFrame({'Count' : trip2.groupby(['start_date'])['id'].count()}).reset_index()
      3 agrupacion2
      4 #plt.figure.Figure()
      5 #agrupacion.plot(rot=90,xticks=range(2013,2015),linewidth=2,figsize=(12,8),label='Viajes Realizados');

C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
   2051         if isinstance(key, (Series, np.ndarray, Index, list)):
   2052             # either boolean or fancy integer index
-> 2053             return self._getitem_array(key)
   2054         elif isinstance(key, DataFrame):
   2055             return self._getitem_frame(key)

C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _getitem_array(self, key)
   2095             return self.take(indexer, axis=0, convert=False)
   2096         else:
-> 2097             indexer = self.ix._convert_to_indexer(key, axis=1)
   2098             return self.take(indexer, axis=1, convert=True)
   2099 

C:\ProgramData\Anaconda2\lib\site-packages\pandas\core\indexing.pyc in _convert_to_indexer(self, obj, axis, is_setter)
   1228                 mask = check == -1
   1229                 if mask.any():
-> 1230                     raise KeyError('%s not in index' % objarr[mask])
   1231 
   1232                 return _values_from_object(indexer)

KeyError: "['2013-08-29T00:00:00.000000000' '2013-08-29T00:00:00.000000000'\n '2013-08-29T00:00:00.000000000' ..., '2014-09-01T00:00:00.000000000'\n '2014-09-01T00:00:00.000000000' '2014-09-01T00:00:00.000000000'] not in index"

In [41]:
trip_con_fecha_en_columnas= trip2 [['start_date']]
trip_con_fecha_en_columnas['dia'] = trip_con_fecha_en_columnas['start_date'].apply(lambda x: x.day) 
trip_con_fecha_en_columnas['mes'] = trip_con_fecha_en_columnas['start_date'].apply(lambda x: x.month)
trip_con_fecha_en_columnas['anio'] = trip_con_fecha_en_columnas['start_date'].apply(lambda x: x.year)
#trip_con_fecha_en_columnas['hora'] = trip_con_fecha_en_columnas['start_date'].apply(lambda x: x.hour)
#trip_con_fecha_en_columnas['minutos'] = trip_con_fecha_en_columnas['start_date'].apply(lambda x: x.minute)
trip_con_fecha_en_columnas


C:\ProgramData\Anaconda2\lib\site-packages\ipykernel\__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
C:\ProgramData\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
C:\ProgramData\Anaconda2\lib\site-packages\ipykernel\__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[41]:
start_date dia mes anio
0 2013-08-29 14:13:00 29 8 2013
1 2013-08-29 14:42:00 29 8 2013
2 2013-08-29 10:16:00 29 8 2013
3 2013-08-29 11:29:00 29 8 2013
4 2013-08-29 12:02:00 29 8 2013
5 2013-08-29 18:54:00 29 8 2013
6 2013-08-29 13:25:00 29 8 2013
7 2013-08-29 14:02:00 29 8 2013
8 2013-08-29 17:01:00 29 8 2013
9 2013-08-29 11:33:00 29 8 2013
10 2013-08-29 13:52:00 29 8 2013
11 2013-08-29 13:23:00 29 8 2013
12 2013-08-29 19:32:00 29 8 2013
13 2013-08-29 13:57:00 29 8 2013
14 2013-08-29 12:31:00 29 8 2013
15 2013-08-29 16:57:00 29 8 2013
16 2013-08-29 11:25:00 29 8 2013
17 2013-08-29 12:11:00 29 8 2013
18 2013-08-29 22:21:00 29 8 2013
19 2013-08-29 22:06:00 29 8 2013
20 2013-08-29 19:42:00 29 8 2013
21 2013-08-29 12:45:00 29 8 2013
22 2013-08-29 11:40:00 29 8 2013
23 2013-08-29 22:12:00 29 8 2013
24 2013-08-29 10:56:00 29 8 2013
25 2013-08-29 13:53:00 29 8 2013
26 2013-08-29 13:43:00 29 8 2013
27 2013-08-29 13:31:00 29 8 2013
28 2013-08-29 21:43:00 29 8 2013
29 2013-08-29 18:45:00 29 8 2013
... ... ... ... ...
669929 2014-09-01 09:30:00 1 9 2014
669930 2014-09-01 09:29:00 1 9 2014
669931 2014-09-01 09:29:00 1 9 2014
669932 2014-09-01 09:18:00 1 9 2014
669933 2014-09-01 09:16:00 1 9 2014
669934 2014-09-01 08:58:00 1 9 2014
669935 2014-09-01 08:55:00 1 9 2014
669936 2014-09-01 08:55:00 1 9 2014
669937 2014-09-01 08:49:00 1 9 2014
669938 2014-09-01 08:41:00 1 9 2014
669939 2014-09-01 08:31:00 1 9 2014
669940 2014-09-01 08:31:00 1 9 2014
669941 2014-09-01 08:30:00 1 9 2014
669942 2014-09-01 08:26:00 1 9 2014
669943 2014-09-01 08:24:00 1 9 2014
669944 2014-09-01 08:16:00 1 9 2014
669945 2014-09-01 08:12:00 1 9 2014
669946 2014-09-01 08:11:00 1 9 2014
669947 2014-09-01 07:37:00 1 9 2014
669948 2014-09-01 07:37:00 1 9 2014
669949 2014-09-01 07:32:00 1 9 2014
669950 2014-09-01 07:03:00 1 9 2014
669951 2014-09-01 06:58:00 1 9 2014
669952 2014-09-01 05:54:00 1 9 2014
669953 2014-09-01 04:59:00 1 9 2014
669954 2014-09-01 04:21:00 1 9 2014
669955 2014-09-01 03:16:00 1 9 2014
669956 2014-09-01 00:05:00 1 9 2014
669957 2014-09-01 00:05:00 1 9 2014
669958 2014-09-01 00:05:00 1 9 2014

669959 rows × 4 columns


In [44]:
#agrupamos por dia mes y anio
trip3 =trip_con_fecha_en_columnas.groupby(['anio','mes','dia']).count()
trip3


Out[44]:
start_date
anio mes dia
2013 8 29 748
30 714
31 640
9 1 706
2 661
3 597
4 606
5 677
6 814
7 796
8 704
9 769
10 892
11 880
12 942
13 981
14 702
15 624
16 914
17 1073
18 1110
19 1044
20 1102
21 388
22 584
23 1050
24 1075
25 1264
26 1082
27 1101
... ... ... ...
2015 8 2 320
3 1283
4 1360
5 1341
6 1299
7 1197
8 432
9 361
10 1284
11 1397
12 1400
13 1401
14 1182
15 451
16 393
17 1418
18 1407
19 1353
20 1353
21 1215
22 421
23 311
24 1309
25 1421
26 1465
27 1443
28 1220
29 342
30 331
31 1369

733 rows × 1 columns


In [3]:
#Podemos ver en base al dataframe obtenido anteriormente que la fechas de los viajes que tenemos son entre Agosto de 2013 y agosto de 2015
trip_con_horayminutos_en_columnas= trip2 [['start_date']]
trip_con_horayminutos_en_columnas['hora'] = trip_con_horayminutos_en_columnas['start_date'].apply(lambda x: x.hour) 
#trip_con_horayminutos_en_columnas['minutos'] = trip_con_fecha_en_columnas['start_date'].apply(lambda x: x.minute)
trip_con_horayminutos_en_columnas


C:\ProgramData\Anaconda2\lib\site-packages\ipykernel\__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
Out[3]:
start_date hora
0 2013-08-29 14:13:00 14
1 2013-08-29 14:42:00 14
2 2013-08-29 10:16:00 10
3 2013-08-29 11:29:00 11
4 2013-08-29 12:02:00 12
5 2013-08-29 18:54:00 18
6 2013-08-29 13:25:00 13
7 2013-08-29 14:02:00 14
8 2013-08-29 17:01:00 17
9 2013-08-29 11:33:00 11
10 2013-08-29 13:52:00 13
11 2013-08-29 13:23:00 13
12 2013-08-29 19:32:00 19
13 2013-08-29 13:57:00 13
14 2013-08-29 12:31:00 12
15 2013-08-29 16:57:00 16
16 2013-08-29 11:25:00 11
17 2013-08-29 12:11:00 12
18 2013-08-29 22:21:00 22
19 2013-08-29 22:06:00 22
20 2013-08-29 19:42:00 19
21 2013-08-29 12:45:00 12
22 2013-08-29 11:40:00 11
23 2013-08-29 22:12:00 22
24 2013-08-29 10:56:00 10
25 2013-08-29 13:53:00 13
26 2013-08-29 13:43:00 13
27 2013-08-29 13:31:00 13
28 2013-08-29 21:43:00 21
29 2013-08-29 18:45:00 18
... ... ...
669929 2014-09-01 09:30:00 9
669930 2014-09-01 09:29:00 9
669931 2014-09-01 09:29:00 9
669932 2014-09-01 09:18:00 9
669933 2014-09-01 09:16:00 9
669934 2014-09-01 08:58:00 8
669935 2014-09-01 08:55:00 8
669936 2014-09-01 08:55:00 8
669937 2014-09-01 08:49:00 8
669938 2014-09-01 08:41:00 8
669939 2014-09-01 08:31:00 8
669940 2014-09-01 08:31:00 8
669941 2014-09-01 08:30:00 8
669942 2014-09-01 08:26:00 8
669943 2014-09-01 08:24:00 8
669944 2014-09-01 08:16:00 8
669945 2014-09-01 08:12:00 8
669946 2014-09-01 08:11:00 8
669947 2014-09-01 07:37:00 7
669948 2014-09-01 07:37:00 7
669949 2014-09-01 07:32:00 7
669950 2014-09-01 07:03:00 7
669951 2014-09-01 06:58:00 6
669952 2014-09-01 05:54:00 5
669953 2014-09-01 04:59:00 4
669954 2014-09-01 04:21:00 4
669955 2014-09-01 03:16:00 3
669956 2014-09-01 00:05:00 0
669957 2014-09-01 00:05:00 0
669958 2014-09-01 00:05:00 0

669959 rows × 2 columns


In [55]:
trip4 =trip_con_horayminutos_en_columnas.groupby('hora').count()
trip4


Out[55]:
start_date
hora
0 2171
1 1189
2 692
3 342
4 1022
5 3449
6 14312
7 43939
8 85864
9 62897
10 30106
11 29141
12 34384
13 31740
14 27156
15 33223
16 59099
17 82705
18 57652
19 29188
20 16527
21 11277
22 7434
23 4450

In [59]:
trip4.index.count_values().plot('area')


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-59-ece1d1812fa8> in <module>()
----> 1 trip4.index.count_values().plot('area')

AttributeError: 'Int64Index' object has no attribute 'count_values'

In [5]:
#Este grafico esta bien, nos muestra la cantidad de viajes segun el dia hora minuto, habria que realizar uno que sea de dia a dia
import seaborn as sns
%matplotlib inline
trip2.groupby('start_date').count()['duration'].plot(figsize=(14,4));



In [7]:
trip5 = pd.read_csv('trip.csv')
trip5['DiaMesAnio'] = trip5['start_date'].str.extract(r'(\d+/\d+/\d+)',expand=False)
trip5.head(5)


Out[7]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code DiaMesAnio
0 4576 63 8/29/2013 14:13 South Van Ness at Market 66 8/29/2013 14:14 South Van Ness at Market 66 520 Subscriber 94127 8/29/2013
1 4607 70 8/29/2013 14:42 San Jose City Hall 10 8/29/2013 14:43 San Jose City Hall 10 661 Subscriber 95138 8/29/2013
2 4130 71 8/29/2013 10:16 Mountain View City Hall 27 8/29/2013 10:17 Mountain View City Hall 27 48 Subscriber 97214 8/29/2013
3 4251 77 8/29/2013 11:29 San Jose City Hall 10 8/29/2013 11:30 San Jose City Hall 10 26 Subscriber 95060 8/29/2013
4 4299 83 8/29/2013 12:02 South Van Ness at Market 66 8/29/2013 12:04 Market at 10th 67 319 Subscriber 94103 8/29/2013

In [10]:
trip5.groupby('DiaMesAnio').count()['id'].plot(figsize=(14,4));



In [15]:
#Visualizacion de cantidad de viajes segun la hora del viaje
trip_con_horayminutos_en_columnas.groupby('hora').count().plot(figsize=(14,4));



In [7]:
#Visualizacion de cantidad de viajes segun la hora del viaje
trip_con_horayminutos_en_columnas.groupby('hora').count()['start_date'].plot('bar');



In [22]:
#Cambiamos la duracion a minutos
trip2['duration'] = trip2['duration'].apply(lambda x: x/60)
trip2.head(5)
#Realizamos un plot para ver la cantidad de viajes segun los minutos
#trip2.groupby('duration').count()['id'].plot(figsize=(14,4));


Out[22]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
0 4576 1 2013-08-29 14:13:00 South Van Ness at Market 66 2013-08-29 14:14:00 South Van Ness at Market 66 520 Subscriber 94127
1 4607 1 2013-08-29 14:42:00 San Jose City Hall 10 2013-08-29 14:43:00 San Jose City Hall 10 661 Subscriber 95138
2 4130 1 2013-08-29 10:16:00 Mountain View City Hall 27 2013-08-29 10:17:00 Mountain View City Hall 27 48 Subscriber 97214
3 4251 1 2013-08-29 11:29:00 San Jose City Hall 10 2013-08-29 11:30:00 San Jose City Hall 10 26 Subscriber 95060
4 4299 1 2013-08-29 12:02:00 South Van Ness at Market 66 2013-08-29 12:04:00 Market at 10th 67 319 Subscriber 94103

In [27]:
trip2.groupby('duration').count()['id'].plot('bar')


Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7cd9370>

In [34]:
#Cantidad de viajes segun la duracion (en minutos). Visualizacion de la cantidad de viajes segun la duracion del viaje 
trip2['duration'].value_counts()[:20].plot('bar')


Out[34]:
<matplotlib.axes._subplots.AxesSubplot at 0x145f31d0>

In [37]:
maximo_valor = trip2['duration'].max()
minimo_valor = trip2['duration'].min()
maximo_valor
#minimo_valor


Out[37]:
287840

In [36]:
#Cantidad de viajes segun la duracion (en minutos). Visualizacion de la cantidad de viajes segun la duracion del viaje 
#En si este grafico mucho no sirve porque nos muestra viajes unicos con ese minuto particular
trip2['duration'].value_counts().tail(20).plot('bar')


Out[36]:
<matplotlib.axes._subplots.AxesSubplot at 0x1497edb0>

In [38]:
station = pd.read_csv('station.csv', low_memory=False)
station.rename(columns={'id': 'start_station_id'}, inplace=True)
station.head(10)


Out[38]:
start_station_id name lat long dock_count city installation_date
0 2 San Jose Diridon Caltrain Station 37.329732 -121.901782 27 San Jose 8/6/2013
1 3 San Jose Civic Center 37.330698 -121.888979 15 San Jose 8/5/2013
2 4 Santa Clara at Almaden 37.333988 -121.894902 11 San Jose 8/6/2013
3 5 Adobe on Almaden 37.331415 -121.893200 19 San Jose 8/5/2013
4 6 San Pedro Square 37.336721 -121.894074 15 San Jose 8/7/2013
5 7 Paseo de San Antonio 37.333798 -121.886943 15 San Jose 8/7/2013
6 8 San Salvador at 1st 37.330165 -121.885831 15 San Jose 8/5/2013
7 9 Japantown 37.348742 -121.894715 15 San Jose 8/5/2013
8 10 San Jose City Hall 37.337391 -121.886995 15 San Jose 8/6/2013
9 11 MLK Library 37.335885 -121.885660 19 San Jose 8/6/2013

In [39]:
arch_unidos = pd.merge(trip2, station, on='start_station_id', how='inner')
arch_unidos.head(10)


Out[39]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code name lat long dock_count city installation_date
0 4576 1 2013-08-29 14:13:00 South Van Ness at Market 66 2013-08-29 14:14:00 South Van Ness at Market 66 520 Subscriber 94127 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
1 4299 1 2013-08-29 12:02:00 South Van Ness at Market 66 2013-08-29 12:04:00 Market at 10th 67 319 Subscriber 94103 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
2 4760 1 2013-08-29 17:01:00 South Van Ness at Market 66 2013-08-29 17:03:00 South Van Ness at Market 66 553 Subscriber 94103 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
3 5070 2 2013-08-29 21:43:00 South Van Ness at Market 66 2013-08-29 21:46:00 South Van Ness at Market 66 598 Subscriber 94115 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
4 4765 3 2013-08-29 17:05:00 South Van Ness at Market 66 2013-08-29 17:08:00 Market at 10th 67 553 Subscriber 94103 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
5 4560 3 2013-08-29 13:58:00 South Van Ness at Market 66 2013-08-29 14:02:00 San Francisco City Hall 58 438 Subscriber 94124 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
6 4559 4 2013-08-29 13:58:00 South Van Ness at Market 66 2013-08-29 14:02:00 San Francisco City Hall 58 554 Subscriber 94115 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
7 4584 4 2013-08-29 14:17:00 South Van Ness at Market 66 2013-08-29 14:21:00 South Van Ness at Market 66 587 Subscriber 94612 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
8 5075 5 2013-08-29 21:47:00 South Van Ness at Market 66 2013-08-29 21:52:00 Civic Center BART (7th at Market) 72 598 Subscriber 94115 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013
9 4981 6 2013-08-29 19:41:00 South Van Ness at Market 66 2013-08-29 19:47:00 Market at 10th 67 632 Subscriber 94110 South Van Ness at Market 37.774814 -122.418954 19 San Francisco 8/23/2013

In [40]:
#Visulizacion de la cantidad de viajes segun la ciudad
arch_unidos['city'].value_counts().plot('bar')


Out[40]:
<matplotlib.axes._subplots.AxesSubplot at 0x15a19590>

In [41]:
#Visulizacion de la cantidad de viajes segun la estacion
arch_unidos['start_station_name'].value_counts().tail(20).plot('bar')


Out[41]:
<matplotlib.axes._subplots.AxesSubplot at 0x15ab6710>

In [ ]: