In [1]:
from jupyterworkflow.data import get_fremont_data
import pandas as pd


def test_fremont_data():
    data = get_fremont_data()
    assert all(data.columns == ["West", "East", "Total"])
    assert isinstance(data.index, pd.DatetimeIndex)

mkdir jupyterworkflow/tests subl jupyterworkflow/tests/test_data.py upper function copied to test_data.py

git status

python -m pytest jupyterworkflow

1 failed in 10.69 seconds.

Too slow. We should refactor and check the output is consistent with our new function.

subl Makefile

test: python -m pytest jupyterworkflow


In [2]:
test_fremont_data()

In [3]:
data = pd.read_csv("Fremont.csv", index_col="Date")

try:
    data.index = pd.to_datetime(data.index, format="%m/%d/%Y %H:%M:%S %p")
except TypeError:
    data.index = pd.to_datetime(data.index)

In [4]:
data.index


Out[4]:
DatetimeIndex(['2012-10-03 12:00:00', '2012-10-03 01:00:00',
               '2012-10-03 02:00:00', '2012-10-03 03:00:00',
               '2012-10-03 04:00:00', '2012-10-03 05:00:00',
               '2012-10-03 06:00:00', '2012-10-03 07:00:00',
               '2012-10-03 08:00:00', '2012-10-03 09:00:00',
               ...
               '2017-07-31 02:00:00', '2017-07-31 03:00:00',
               '2017-07-31 04:00:00', '2017-07-31 05:00:00',
               '2017-07-31 06:00:00', '2017-07-31 07:00:00',
               '2017-07-31 08:00:00', '2017-07-31 09:00:00',
               '2017-07-31 10:00:00', '2017-07-31 11:00:00'],
              dtype='datetime64[ns]', name='Date', length=42312, freq=None)

import os from urllib.request import urlretrieve

import pandas as pd

FREMONT_URL = "https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD" /# create a function to only dowload this data if we need to download it, first run..

def get_fremont_data(filename="Fremont.csv", url=FREMONT_URL, force_download=False): """Download and cache the fremont data

Parameters
----------
filename :string (optional)
    loation to save the data
url: string (optional)
    web location of the data
force_download: bool (optional)
    if True, force redownload of data

Returns
-------
data: pandas.DataFrame
    The fremont bridge data
"""
if force_download or not os.path.exists(filename):
    urlretrieve(url, filename)
data = pd.read_csv("Fremont.csv", index_col="Date")
try:
    data.index = pd.to_datetime(data.index, format="%m/%d/%Y %H:%M:%S %p")
except TypeError:
    data.index = pd.to_datetime(data.index)

data.columns = ["West", "East"]
data["Total"] = data["West"] + data["East"]
return data



NEW Data,py

let's run a unit test to see

make test


In [ ]: