In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn')

In [3]:
%%time

# Note: this is strictly for comparison of how slow it is 
# to load all Chicago crimes data with pandas
# see notebooks/all-chicago-crime-charts.ipynb notebook 
# for loading same data with dask in a compressed snappy parquet format

# set csv data file path
csv_data_file = '../raw_data/Crimes_-_2001_to_present.csv'
print('Loading crime data from: {}'.format(csv_data_file))
print('...')

# load crimes csv data into pandas dataframe
crimes = pd.read_csv(csv_data_file, error_bad_lines=False)

print('Crime data loaded into memory.')


Loading crime data from: ../raw_data/Crimes_-_2001_to_present.csv
Crime data loaded into memory.
Wall time: 2min 13s

In [ ]: