In [1]:
%%sql -d standard
SELECT
timestamp,
borough,
latitude,
longitude
FROM
`bigquery-public-data.new_york.nypd_mv_collisions`
ORDER BY
timestamp DESC
LIMIT
15
Out[1]:
In [2]:
%%sql --module nyc_collisions
SELECT
IF(borough = 'MANHATTAN', 1, 0) AS is_mt,
latitude,
longitude
FROM
`bigquery-public-data.new_york.nypd_mv_collisions`
WHERE
LENGTH(borough) > 0
AND latitude IS NOT NULL AND latitude != 0.0
AND longitude IS NOT NULL AND longitude != 0.0
AND borough != 'BRONX'
ORDER BY
RAND()
LIMIT
10000
In [3]:
import datalab.bigquery as bq
nyc_cols = bq.Query(nyc_collisions).to_dataframe(dialect='standard').as_matrix()
import numpy as np
is_mt = nyc_cols[:,0].astype(np.int32)
latlng = nyc_cols[:,1:3].astype(np.float32)
print("Is Manhattan: " + str(is_mt))
print("\nLat/Lng: \n\n" + str(latlng))
print("\nLoaded " + str(is_mt.size) + " rows.")
In [4]:
# standardization
from sklearn.preprocessing import StandardScaler
latlng_std = StandardScaler().fit_transform(latlng)
# plotting
import matplotlib.pyplot as plt
lat = latlng_std[:,0]
lng = latlng_std[:,1]
plt.scatter(lng[is_mt == 1], lat[is_mt == 1], c='b') # plot points in Manhattan in blue
plt.scatter(lng[is_mt == 0], lat[is_mt == 0], c='y') # plot points outside Manhattan in yellow
plt.show()
In [5]:
# 8,000 pairs for training
latlng_train = latlng_std[0:8000]
is_mt_train = is_mt[0:8000]
# 2,000 pairs for test
latlng_test = latlng_std[8000:10000]
is_mt_test = is_mt[8000:10000]
In [6]:
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR) # supress warning messages
# define two feature columns with real values
feature_columns = [tf.contrib.layers.real_valued_column("", dimension=2)]
# create a neural network
dnnc = tf.contrib.learn.DNNClassifier(
feature_columns=feature_columns,
hidden_units=[20, 20, 20, 20],
n_classes=2)
dnnc
Out[6]:
In [7]:
# plot a predicted map of Manhattan
def plot_predicted_map(classifier):
is_mt_pred = classifier.predict(latlng_std, as_iterable=False) # an array of prediction results
plt.scatter(lng[is_mt_pred == 1], lat[is_mt_pred == 1], c='b')
plt.scatter(lng[is_mt_pred == 0], lat[is_mt_pred == 0], c='y')
plt.show()
# print the accuracy of the neural network
def print_accuracy(classifier):
accuracy = classifier.evaluate(x=latlng_test, y=is_mt_test)["accuracy"]
print('Accuracy: {:.2%}'.format(accuracy))
# train the model just for 1 step and print the accuracy
dnnc.fit(x=latlng_train, y=is_mt_train, steps=1)
plot_predicted_map(dnnc)
print_accuracy(dnnc)
In [8]:
steps = 20
for i in range (1, 6):
dnnc.fit(x=latlng_train, y=is_mt_train, steps=steps)
plot_predicted_map(dnnc)
print('Steps: ' + str(i * steps))
print('\nTraining Finished.')
print_accuracy(dnnc)
In [ ]: