In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use("seaborn")
In [2]:
from jupyterworkflow.data import get_fremont_data
In [3]:
data = get_fremont_data()
data.head()
Out[3]:
In [4]:
data.resample("W").sum().plot()
Out[4]:
In [5]:
data.groupby(data.index.time).mean().plot()
Out[5]:
In [6]:
pivoted = data.pivot_table("Total", index=data.index.time, columns=data.index.date)
pivoted.iloc[:5, :5]
Out[6]:
In [7]:
pivoted.plot(legend=False, alpha=0.01)
Out[7]:
SECOND PART To make a python package so we and other people can use it for analysis.
Go to the directory
mkdir jupyterworkflow create a directory touch jupyterworkflow/init.py initialize a python package create a data.py in this directory.
import os from urllib.request import urlretrieve
import pandas as pd
FREMONT_URL = "https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD" /# create a function to only dowload this data if we need to download it, first run..
def get_fremont_data(filename="Fremont.csv", url=FREMONT_URL, force_download=False): """Download and cache the fremont data
Parameters
----------
filename :string (optional)
loation to save the data
url: string (optional)
web location of the data
force_download: bool (optional)
if True, force redownload of data
Returns
-------
data: pandas.DataFrame
The fremont bridge data
"""
if force_download or not os.path.exists(filename):
urlretrieve(url, filename)
data = pd.read_csv("Fremont.csv", index_col="Date", parse_dates=True)
data.columns = ["West", "East"]
data["Total"] = data["West"] + data["East"]
return data
In [8]:
#get_fremont_data?
Nice time to test the tools to see they are doing what we want to do Unit tests
In [ ]: