In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn')
In [3]:
%%time
# Note: this is strictly for comparison of how slow it is
# to load all Chicago crimes data with pandas
# see notebooks/all-chicago-crime-charts.ipynb notebook
# for loading same data with dask in a compressed snappy parquet format
# set csv data file path
csv_data_file = '../raw_data/Crimes_-_2001_to_present.csv'
print('Loading crime data from: {}'.format(csv_data_file))
print('...')
# load crimes csv data into pandas dataframe
crimes = pd.read_csv(csv_data_file, error_bad_lines=False)
print('Crime data loaded into memory.')
In [ ]: