In [18]:
import numpy as np 
import pandas as pd

# plots
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

trip= pd.read_csv('trip.csv')
#wed= pd.read_csv('weather.csv')

In [ ]:
trip.sample(2)
#trip.dtypes

In [ ]:
#Cantidad de viajes por bike
trip = trip['duration'].sum()
trip

In [ ]:
# Number of duration per bike -  consultar*
#trip.groupby('duration').count()['bike_id'].plot(figsize=(14,4));
trip.groupby('bike_id').count()['duration'].plot(figsize=(14,4));

In [ ]:
# Top five bikes durations - falta ver como lo puedo graficar. Grafico anterior*
trip.sort_values('duration',ascending=False).head(5)

In [ ]:
# Top five bikes less durations 
#trip.sort_index('duration',ascending=False).head(5)


#Are trip getting longer or shorter?
# susbcripciones length by time, are subscriptions getting longer or shorter ? 
trip.groupby('subscription_type').mean().loc[:,'duration'].plot(linewidth=2,figsize=(12,8));

In [ ]:
trip.loc[trip.duration>722236,["id","bike_id", "duration"]].sort_values(by="duration",ascending=False).head(10)

In [ ]:
trip.loc[trip.duration<722236,["id","bike_id", "duration"]].sort_values(by="duration",ascending=False).head(10)

In [ ]:
# Histogram of ratings*
trip['duration'].hist(figsize=(16,8));

In [ ]:
# Histogram of durations
trip = trip['duration'].value_counts()
trip.hist(figsize=(12,8),bins=20,);

In [ ]:
# Histogram of bikes
trip = trip['bike_id'].value_counts()
trip = trip[trip > 5000]
#trip.plot(kind='bar',rot=90,figsize=(12,8));

In [ ]:
#stat= pd.read_csv('status.csv', low_memory=False)
#stat.head(3)
stat= pd.read_csv('status.csv', iterator=True, chunksize=500)  # gives TextFileReader, which is iterable with chunks of 1000 rows.
df = pd.concat(stat, ignore_index=True)  # df is DataFrame. If errors, do `list(tp)` instead of `tp`
df.sample(3)

In [ ]:
#CON SIOMA
#rated_movies = pd.merge(movies,ratings, left_on='movieId', right_on='movieId', how='inner')
#rated_movies.sample(2)

#Cantidad de viajes por dia con el mismo startdate
#test = trip.groupby(['start_date'])['id'].count()
#test.head()

tw = pd.merge(trip,wed, left_on='start_date', right_on='date', how='outer')
tw.head()
#tw.sample(2)

In [ ]:
# Top five bikes start_date - falta ver como lo puedo graficar. Grafico anterior*
#ver como puedo hacer los de menor salida 
trip.sort_values('start_date',ascending=False).head(5)

In [ ]:
# Number of trip by station_id
#trip['start_station_name'].value_counts()[0:19].plot(kind='bar',figsize=(14,4));
#TopOstations = trip['start_station_name'].value_counts()[0:29].index.tolist()
#Dataframe[column].value_counts().index.tolist()
#['apple', 'sausage', 'banana', 'cheese']
trip.sort_index(by='duration', ascending=[False])

In [ ]:


In [ ]:
#df.pivot(index='date', columns='variable', values='value')
tp1 = trip[trip['start_station_name'].isin(TopOperators)].groupby(['Operator','cluster']).size().reset_index()
ap1.columns=['Operator','cluster','size']
ap1 = ap1.pivot(index='cluster', columns='Operator',values='size')
ap1.fillna(0,inplace=True)
fig, ax = plt.subplots(figsize=(14,16))         # Sample figsize in inches
sns.heatmap(ap1, annot=False, linewidths=.5, ax=ax);

In [ ]:
trip.groupby('bike_id')['duration'].plot(figsize=(14,4));

In [19]:
#Visualizacion de la cantidad de viajes segun cada bikeID
#trip2 = trip.groupby('bike_id').count()['duration']
#trip2
trip.groupby('bike_id').count()['duration'].plot(figsize=(14,4));



In [ ]:
trip2

In [ ]:
trip3 = trip[trip['bike_id'] == 34]
trip3

In [20]:
#Visualizacion de la duracion de viajes en segundos segun bikeID
trip.groupby('bike_id').sum()['duration'].plot(figsize=(14,4));



In [21]:
#Cambiamos la duracion de segundos a minutos
trip['duration'] = trip['duration'].apply(lambda x: x/60)
trip.head(10)


Out[21]:
id duration start_date start_station_name start_station_id end_date end_station_name end_station_id bike_id subscription_type zip_code
0 4576 1 8/29/2013 14:13 South Van Ness at Market 66 8/29/2013 14:14 South Van Ness at Market 66 520 Subscriber 94127
1 4607 1 8/29/2013 14:42 San Jose City Hall 10 8/29/2013 14:43 San Jose City Hall 10 661 Subscriber 95138
2 4130 1 8/29/2013 10:16 Mountain View City Hall 27 8/29/2013 10:17 Mountain View City Hall 27 48 Subscriber 97214
3 4251 1 8/29/2013 11:29 San Jose City Hall 10 8/29/2013 11:30 San Jose City Hall 10 26 Subscriber 95060
4 4299 1 8/29/2013 12:02 South Van Ness at Market 66 8/29/2013 12:04 Market at 10th 67 319 Subscriber 94103
5 4927 1 8/29/2013 18:54 Golden Gate at Polk 59 8/29/2013 18:56 Golden Gate at Polk 59 527 Subscriber 94109
6 4500 1 8/29/2013 13:25 Santa Clara at Almaden 4 8/29/2013 13:27 Adobe on Almaden 5 679 Subscriber 95112
7 4563 1 8/29/2013 14:02 San Salvador at 1st 8 8/29/2013 14:04 San Salvador at 1st 8 687 Subscriber 95112
8 4760 1 8/29/2013 17:01 South Van Ness at Market 66 8/29/2013 17:03 South Van Ness at Market 66 553 Subscriber 94103
9 4258 1 8/29/2013 11:33 San Jose City Hall 10 8/29/2013 11:35 MLK Library 11 107 Subscriber 95060

In [23]:
#Visualizacion de la duracion de viajes en segundos segun bikeID
trip.groupby('bike_id').sum()['duration'].plot(figsize=(14,4));



In [24]:
#Cambiamos la duracion de segundos a horas
trip['duration'] = trip['duration'].apply(lambda x: x/3600)
#Visualizacion de la duracion de viajes en segundos segun bikeID
trip.groupby('bike_id').sum()['duration'].plot(figsize=(14,4));



In [27]:
conteoDeViajesSegunStartStation = trip['start_station_name'].value_counts()
conteoDeViajesSegunStartStation


Out[27]:
San Francisco Caltrain (Townsend at 4th)         49092
San Francisco Caltrain 2 (330 Townsend)          33742
Harry Bridges Plaza (Ferry Building)             32934
Embarcadero at Sansome                           27713
Temporary Transbay Terminal (Howard at Beale)    26089
2nd at Townsend                                  25837
Steuart at Market                                24838
Market at Sansome                                24172
Townsend at 7th                                  23724
Market at 10th                                   20272
Market at 4th                                    20165
2nd at South Park                                18496
Powell Street BART                               18378
Grant Avenue at Columbus Avenue                  16306
2nd at Folsom                                    15940
Beale at Market                                  15709
Embarcadero at Bryant                            14811
Civic Center BART (7th at Market)                14102
Embarcadero at Folsom                            14054
5th at Howard                                    13526
Howard at 2nd                                    13163
South Van Ness at Market                         12880
Powell at Post (Union Square)                    12496
Mechanics Plaza (Market at Battery)              12224
Spear at Folsom                                  11949
Commercial at Montgomery                         11888
Yerba Buena Center of the Arts (3rd @ Howard)    11251
Embarcadero at Vallejo                           10592
Broadway St at Battery St                        10310
Clay at Battery                                   9994
                                                 ...  
Castro Street and El Camino Real                  2035
MLK Library                                       2034
San Antonio Caltrain Station                      1990
Japantown                                         1910
San Antonio Shopping Center                       1818
Ryland Park                                       1747
San Salvador at 1st                               1692
St James Park                                     1680
Evelyn Park and Ride                              1667
San Jose Civic Center                             1594
Redwood City Caltrain Station                     1554
University and Emerson                            1512
Arena Green / SAP Center                          1496
SJSU - San Salvador at 9th                        1473
Washington at Kearney                             1472
Cowper at University                              1374
Adobe on Almaden                                  1257
SJSU 4th at San Carlos                            1170
Rengstorff Avenue / California Street             1129
California Ave Caltrain Station                   1026
Santa Clara County Civic Center                    840
Park at Olive                                      750
Stanford in Redwood City                           436
Mezes Park                                         341
Redwood City Medical Center                        311
San Mateo County Center                            287
Franklin at Maple                                  224
Redwood City Public Library                        213
Broadway at Main                                    67
San Jose Government Center                          23
Name: start_station_name, dtype: int64

In [28]:
#Visualizacion de cantidad de viajes segun start_station_name
trip.groupby('start_station_name').count()['duration'].plot(figsize=(14,4));



In [29]:
#Visualizacion de cantidad de viajes segun start_station_id
trip.groupby('start_station_id').count()['duration'].plot(figsize=(14,4));



In [31]:
#Visualizacion de cantidad de viajes segun start_station_name
trip.groupby('start_station_name').count()['duration'].plot('bar',figsize=(14,4));



In [32]:
#Visualizacion de cantidad de viajes segun end_station_name
trip.groupby('end_station_name').count()['duration'].plot('bar',figsize=(14,4));



In [ ]: