In [1]:
from jupyterworkflow.data import get_fremont_data
import pandas as pd
def test_fremont_data():
data = get_fremont_data()
assert all(data.columns == ["West", "East", "Total"])
assert isinstance(data.index, pd.DatetimeIndex)
mkdir jupyterworkflow/tests subl jupyterworkflow/tests/test_data.py upper function copied to test_data.py
git status
python -m pytest jupyterworkflow
1 failed in 10.69 seconds.
Too slow. We should refactor and check the output is consistent with our new function.
subl Makefile
test: python -m pytest jupyterworkflow
In [2]:
test_fremont_data()
In [3]:
data = pd.read_csv("Fremont.csv", index_col="Date")
try:
data.index = pd.to_datetime(data.index, format="%m/%d/%Y %H:%M:%S %p")
except TypeError:
data.index = pd.to_datetime(data.index)
In [4]:
data.index
Out[4]:
import os from urllib.request import urlretrieve
import pandas as pd
FREMONT_URL = "https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD" /# create a function to only dowload this data if we need to download it, first run..
def get_fremont_data(filename="Fremont.csv", url=FREMONT_URL, force_download=False): """Download and cache the fremont data
Parameters
----------
filename :string (optional)
loation to save the data
url: string (optional)
web location of the data
force_download: bool (optional)
if True, force redownload of data
Returns
-------
data: pandas.DataFrame
The fremont bridge data
"""
if force_download or not os.path.exists(filename):
urlretrieve(url, filename)
data = pd.read_csv("Fremont.csv", index_col="Date")
try:
data.index = pd.to_datetime(data.index, format="%m/%d/%Y %H:%M:%S %p")
except TypeError:
data.index = pd.to_datetime(data.index)
data.columns = ["West", "East"]
data["Total"] = data["West"] + data["East"]
return data
NEW Data,py
let's run a unit test to see
make test
In [ ]: