In [1]:
%matplotlib notebook

In [2]:
import pandas
import datetime
import numpy as np
from matplotlib import pyplot as plt

In [3]:
def plot_data(title, data):
    plt.figure()
    x = plt.scatter(
        [x[0] for x in data],
        [x[1] for x in data],
        [4],
        c=[x[2] for x in data],
        alpha=5000.0 / len(data), cmap='jet')
    plt.gca().set_yscale('log')
    print(plt.gca().viewLim)
    plt.title(title)

In [4]:
d = pandas.read_csv('missouri-normal.csv')

In [5]:
values = []

for (day, month, year, value, typ) in zip(
        d['day'], d['month'], d['year'], d['permit_value'], d['type']):
    if not value:
        value = np.nan
    try:
        dt = datetime.date(year, month, day)
    except ValueError:
        pass
    else:
        if typ == 'New Construction':
            t = 1
        else:
            t = 0
        values.append((dt, value, float(t)))

In [6]:
plot_data("St. Louis", values)


Bbox(x0=718459.6729032258, y0=0.9972222222222222, x1=733744.327096774, y1=290043289.35361546)

In [7]:
d = pandas.read_csv('seattle.csv')

In [8]:
values = []
for (date, value, target) in zip(
        d['Issue Date'], d['Value'], d['target']):
    if not value:
        continue
    try:
        dt = datetime.datetime.fromtimestamp(date)
    except ValueError:
        pass
    else:
        if target:
            t = 1
        else:
            t = 0
        values.append((dt, value, t))

In [9]:
plot_data("Seattle", values)


Bbox(x0=734041.3229032257, y0=0.9972222222222222, x1=736067.6770967741, y1=742225880.9807426)

In [ ]: