In [3]:
import pandas as pd
from datetime import datetime
from ggplot import *
# %matplotlib inline

def plot_weather_data(turnstile_weather):
    '''
    You are passed in a dataframe called turnstile_weather. 
    Use turnstile_weather along with ggplot to make a data visualization
    focused on the MTA and weather data we used in assignment #3.  
    You should feel free to implement something that we discussed in class 
    (e.g., scatterplots, line plots, or histograms) or attempt to implement
    something more advanced if you'd like.  

    Here are some suggestions for things to investigate and illustrate:
     * Ridership by time of day or day of week
     * How ridership varies based on Subway station
     * Which stations have more exits or entries at different times of day

    If you'd like to learn more about ggplot and its capabilities, take
    a look at the documentation at:
    https://pypi.python.org/pypi/ggplot/
     
    You can check out:
    https://www.dropbox.com/s/meyki2wl9xfa7yk/turnstile_data_master_with_weather.csv
     
    To see all the columns and data points included in the turnstile_weather 
    dataframe. 
     
    However, due to the limitation of our Amazon EC2 server, we are giving you about 1/3
    of the actual data in the turnstile_weather dataframe
    '''    
    turnstile_weather = pd.read_csv(turnstile_weather)
    daysn = []

    def get_day(date):
        return datetime.strftime(datetime.strptime(date,'%Y-%m-%d').date(),'%a')

    for the_date in turnstile_weather['DATEn']:
        daysn.append(get_day(the_date))

    turnstile_weather['Dayn'] = daysn

    grouped = turnstile_weather.groupby('Dayn',as_index=False).sum()
    plot = ggplot(grouped, aes(x='Dayn',y='ENTRIESn_hourly')) + \
           geom_bar(aes(weight='ENTRIESn_hourly'), fill='blue')

    return plot

In [2]:
plot_weather_data('C:/Vindico/Projects/Data/Course/Python/Udacity/Introduction to Data Science/Lesson 4/turnstile_data_master_with_weather.csv')


A variable was mapped to y.
    stat_bin sets the y value to the count of cases in each group.
    The mapping to y was ignored.
    If you want y to represent values in the data, use stat="bar".
C:\Anaconda\lib\site-packages\pandas\util\decorators.py:81: FutureWarning: the 'rows' keyword is deprecated, use 'index' instead
  warnings.warn(msg, FutureWarning)
Out[2]:
<ggplot: (4109067)>

In [4]:
def plot_weather_data(turnstile_weather):
    '''
    You are passed in a dataframe called turnstile_weather. 
    Use turnstile_weather along with ggplot to make a data visualization
    focused on the MTA and weather data we used in assignment #3.  
    You should feel free to implement something that we discussed in class 
    (e.g., scatterplots, line plots, or histograms) or attempt to implement
    something more advanced if you'd like.  

    Here are some suggestions for things to investigate and illustrate:
     * Ridership by time of day or day of week
     * How ridership varies based on Subway station
     * Which stations have more exits or entries at different times of day

    If you'd like to learn more about ggplot and its capabilities, take
    a look at the documentation at:
    https://pypi.python.org/pypi/ggplot/
     
    You can check out:
    https://www.dropbox.com/s/meyki2wl9xfa7yk/turnstile_data_master_with_weather.csv
     
    To see all the columns and data points included in the turnstile_weather 
    dataframe. 
     
    However, due to the limitation of our Amazon EC2 server, we are giving you about 1/3
    of the actual data in the turnstile_weather dataframe
    '''    
    turnstile_weather = pd.read_csv(turnstile_weather)
    turnstile_weather.DATEn = pd.to_datetime(turnstile_weather.DATEn) 
    grouped = turnstile_weather.groupby('DATEn', as_index=False).sum()
    grouped.index.name = "DATEn"
    plot  = ggplot(grouped, aes(x = 'DATEn', y = 'EXITSn_hourly'))
    plot = plot + geom_line()
    return plot

In [5]:
plot_weather_data('C:/Vindico/Projects/Data/Course/Python/Udacity/Introduction to Data Science/Lesson 4/turnstile_data_master_with_weather.csv')


Out[5]:
<ggplot: (22284253)>