In [1]:
%matplotlib notebook
In [2]:
import pandas
import datetime
import numpy as np
from matplotlib import pyplot as plt
In [3]:
def plot_data(title, data):
plt.figure()
x = plt.scatter(
[x[0] for x in data],
[x[1] for x in data],
[4],
c=[x[2] for x in data],
alpha=5000.0 / len(data), cmap='jet')
plt.gca().set_yscale('log')
print(plt.gca().viewLim)
plt.title(title)
In [4]:
d = pandas.read_csv('missouri-normal.csv')
In [5]:
values = []
for (day, month, year, value, typ) in zip(
d['day'], d['month'], d['year'], d['permit_value'], d['type']):
if not value:
value = np.nan
try:
dt = datetime.date(year, month, day)
except ValueError:
pass
else:
if typ == 'New Construction':
t = 1
else:
t = 0
values.append((dt, value, float(t)))
In [6]:
plot_data("St. Louis", values)
In [7]:
d = pandas.read_csv('seattle.csv')
In [8]:
values = []
for (date, value, target) in zip(
d['Issue Date'], d['Value'], d['target']):
if not value:
continue
try:
dt = datetime.datetime.fromtimestamp(date)
except ValueError:
pass
else:
if target:
t = 1
else:
t = 0
values.append((dt, value, t))
In [9]:
plot_data("Seattle", values)
In [ ]: