In [1]:
import pandas as pd
import numpy as np

NYC


In [97]:
ny = pd.read_csv('sketch_161113_ActivityMatrix/data/NY_crimes_2010_2016.csv', 
                 usecols=["CMPLNT_FR_TM", "BORO_NM"])
print ny.shape
ny.head()

In [100]:
ny['hour'] = pd.to_datetime(ny['CMPLNT_FR_TM'], format='%H:%M:%S').dt.hour
ny.drop('CMPLNT_FR_TM', axis=1, inplace=True)
ny.head()


Out[100]:
BORO_NM hour
0 BROOKLYN 1.0
1 BRONX 23.0
2 MANHATTAN 23.0
3 BROOKLYN 23.0
4 MANHATTAN 23.0

In [108]:
ny_crime_table = ny.pivot_table(index=['BORO_NM'],
                        columns=['hour'], aggfunc=len)
ny_crime_table


Out[108]:
hour 0.0 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 ... 14.0 15.0 16.0 17.0 18.0 19.0 20.0 21.0 22.0 23.0
BORO_NM
BRONX 29424 21699 16854 13768 12018 9165 9472 13362 20428 21989 ... 34455 39599 37528 37272 38682 38619 38459 35753 33761 30247
BROOKLYN 46299 30918 24434 20067 17203 12874 13572 18831 30700 32913 ... 47016 52981 51086 50842 52331 51419 52380 48484 45430 41536
MANHATTAN 31042 25505 21933 19048 15902 9290 8280 11762 20903 25766 ... 40489 45016 44235 44030 42970 39539 36320 31014 28726 28178
QUEENS 27901 20368 17168 15267 15036 10126 9886 14050 20907 22300 ... 29928 33939 32318 32128 33394 32313 32332 30165 28886 27126
STATEN ISLAND 6778 4658 3608 2844 2245 1633 1788 3021 4978 5056 ... 7752 8223 7577 7550 7851 7723 8128 7086 7162 6278

5 rows × 24 columns


In [ ]:
ny_table.to_csv('data/ny_count_BORO_HOUR')

CHICAGO


In [103]:
import pickle
chicago_district = pickle.load(open('sketch_161113_ActivityMatrix/data/chicago_district.p','rb'))

chi = pd.read_csv('sketch_161113_ActivityMatrix/data/Chicago_Crimes_2010_2016.csv',
                 usecols=["Date", 'District' ]).dropna() 
chi.shape
chi.head()


Out[103]:
Date District
0 05/03/2016 09:00:00 PM 11.0
1 05/03/2016 09:35:00 PM 8.0
2 05/03/2016 02:45:00 PM 9.0
3 05/03/2016 06:42:00 PM 8.0
4 05/03/2016 03:00:00 PM 3.0

In [104]:
chi['hour'] = pd.to_datetime(chi['Date'], format = '%m/%d/%Y %I:%M:%S %p').dt.hour
chi.drop('Date', axis=1, inplace=True)

In [106]:
chi_crime_table = chi.pivot_table(index=['District'],
                        columns=['hour'], aggfunc=len)
chi_crime_table.head()


Out[106]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
District
1.0 2666.0 1449.0 1304.0 1044.0 831.0 844.0 1171.0 1768.0 2892.0 4229.0 ... 5871.0 5614.0 5592.0 5712.0 5308.0 4801.0 3817.0 3137.0 2622.0 1979.0
2.0 4037.0 2193.0 1727.0 1398.0 1176.0 1066.0 1490.0 2372.0 3598.0 4639.0 ... 4838.0 5396.0 4698.0 4474.0 4949.0 5194.0 5057.0 4700.0 4334.0 3462.0
3.0 4999.0 3174.0 2438.0 2163.0 1603.0 1383.0 1738.0 2637.0 3888.0 5252.0 ... 5464.0 6151.0 5933.0 5676.0 5972.0 6399.0 6343.0 5915.0 5535.0 4695.0
4.0 6481.0 3959.0 3170.0 2537.0 2075.0 1812.0 2048.0 2936.0 4515.0 6077.0 ... 6330.0 6712.0 6430.0 6210.0 6467.0 6664.0 6960.0 6819.0 6599.0 5437.0
5.0 4520.0 2685.0 2128.0 1838.0 1405.0 1241.0 1522.0 2328.0 3497.0 4446.0 ... 4873.0 5137.0 4835.0 4734.0 5245.0 5507.0 5443.0 5395.0 5188.0 4129.0

5 rows × 24 columns


In [107]:
chi_crime_table = chi_crime_table.ix[chicago_district]
chi_crime_table.index = chicago_district.values()
chi_crime_table.head()


Out[107]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
Central 2666.0 1449.0 1304.0 1044.0 831.0 844.0 1171.0 1768.0 2892.0 4229.0 ... 5871.0 5614.0 5592.0 5712.0 5308.0 4801.0 3817.0 3137.0 2622.0 1979.0
Wentworth 4037.0 2193.0 1727.0 1398.0 1176.0 1066.0 1490.0 2372.0 3598.0 4639.0 ... 4838.0 5396.0 4698.0 4474.0 4949.0 5194.0 5057.0 4700.0 4334.0 3462.0
Grand Crossing 4999.0 3174.0 2438.0 2163.0 1603.0 1383.0 1738.0 2637.0 3888.0 5252.0 ... 5464.0 6151.0 5933.0 5676.0 5972.0 6399.0 6343.0 5915.0 5535.0 4695.0
South Chicago 6481.0 3959.0 3170.0 2537.0 2075.0 1812.0 2048.0 2936.0 4515.0 6077.0 ... 6330.0 6712.0 6430.0 6210.0 6467.0 6664.0 6960.0 6819.0 6599.0 5437.0
Calumet 4520.0 2685.0 2128.0 1838.0 1405.0 1241.0 1522.0 2328.0 3497.0 4446.0 ... 4873.0 5137.0 4835.0 4734.0 5245.0 5507.0 5443.0 5395.0 5188.0 4129.0

5 rows × 24 columns

BALTIMORE


In [3]:
ba_crime_data = pd.read_csv("sketch_161113_ActivityMatrix/data/BA_crime.csv", 
                        usecols = ['CrimeTime', 'District']).dropna()
print ba_crime_data.shape
ba_crime_data.head()


(285749, 2)

In [90]:
ba_crime = ba_crime_data.dropna()
ba_crime.District = ba_crime.District.str.lower()
ba_crime.groupby(['District']).agg('count')


Out[90]:
CrimeTime
District
central 33784
eastern 24168
gay street 1
northeastern 44832
northern 32005
northestern 280
northwestern 28690
southeastern 39245
southern 33031
southestern 205
southwestern 26242
western 23266

In [91]:
ba_crime.loc[ba_crime['District'] == 'northestern', 'District'] = 'northeastern'
ba_crime.loc[ba_crime['District'] == 'southestern', 'District'] = 'southeastern'
ba_crime = ba_crime[ba_crime.District != 'gay street']
ba_crime.groupby(['District']).agg('count')


Out[91]:
CrimeTime
District
central 33784
eastern 24168
northeastern 45112
northern 32005
northwestern 28690
southeastern 39450
southern 33031
southwestern 26242
western 23266

In [92]:
ba_crime['hour'] = ba_crime['CrimeTime'].str[:2].astype(int)
ba_crime.drop('CrimeTime', axis=1, inplace=True)
ba_crime.head()


Out[92]:
District hour
0 central 2
1 southeastern 2
2 northeastern 3
3 northeastern 3
4 central 3

In [112]:
ba_crime_table = ba_crime.pivot_table(index=['District'],
                        columns=['hour'], aggfunc=len)
ba_crime_table = ba_crime_table.drop(24, 1)
ba_crime_table


Out[112]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
District
central 1299.0 1457.0 1122.0 636.0 462.0 389.0 484.0 793.0 1187.0 1204.0 ... 1756.0 1961.0 2021.0 2041.0 2102.0 1840.0 1793.0 1692.0 1643.0 1509.0
eastern 1077.0 1058.0 648.0 521.0 412.0 313.0 418.0 660.0 927.0 931.0 ... 1209.0 1475.0 1378.0 1329.0 1385.0 1281.0 1178.0 1194.0 1160.0 1186.0
northeastern 2011.0 1816.0 1250.0 986.0 701.0 630.0 824.0 1302.0 1726.0 1620.0 ... 2052.0 2515.0 2482.0 2533.0 2625.0 2370.0 2532.0 2690.0 2518.0 2287.0
northern 1362.0 1322.0 864.0 604.0 539.0 419.0 537.0 832.0 1202.0 1209.0 ... 1518.0 1814.0 1804.0 1921.0 1972.0 1808.0 1737.0 1713.0 1668.0 1536.0
northwestern 1146.0 974.0 692.0 532.0 414.0 393.0 429.0 812.0 1116.0 1085.0 ... 1349.0 1702.0 1589.0 1680.0 1756.0 1580.0 1702.0 1649.0 1519.0 1410.0
southeastern 1724.0 1793.0 1478.0 870.0 586.0 549.0 616.0 1020.0 1368.0 1262.0 ... 1788.0 2093.0 2204.0 2439.0 2595.0 2259.0 2262.0 2209.0 2155.0 1891.0
southern 1563.0 1661.0 1213.0 806.0 535.0 478.0 543.0 834.0 1062.0 1063.0 ... 1487.0 1773.0 1875.0 1942.0 1996.0 1751.0 1815.0 1839.0 1844.0 1732.0
southwestern 1204.0 1009.0 702.0 507.0 378.0 375.0 488.0 780.0 969.0 987.0 ... 1196.0 1542.0 1493.0 1523.0 1619.0 1429.0 1484.0 1469.0 1386.0 1312.0
western 1112.0 963.0 608.0 498.0 347.0 342.0 325.0 581.0 774.0 882.0 ... 1124.0 1461.0 1279.0 1254.0 1240.0 1230.0 1289.0 1370.0 1266.0 1200.0

9 rows × 24 columns

Concat and normalize


In [113]:
nychiba = pd.concat([chi_crime_table, ny_crime_table, ba_crime_table])
nychiba.index = nychiba.index.str.lower()
nychiba


Out[113]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
central 2666.0 1449.0 1304.0 1044.0 831.0 844.0 1171.0 1768.0 2892.0 4229.0 ... 5871.0 5614.0 5592.0 5712.0 5308.0 4801.0 3817.0 3137.0 2622.0 1979.0
wentworth 4037.0 2193.0 1727.0 1398.0 1176.0 1066.0 1490.0 2372.0 3598.0 4639.0 ... 4838.0 5396.0 4698.0 4474.0 4949.0 5194.0 5057.0 4700.0 4334.0 3462.0
grand crossing 4999.0 3174.0 2438.0 2163.0 1603.0 1383.0 1738.0 2637.0 3888.0 5252.0 ... 5464.0 6151.0 5933.0 5676.0 5972.0 6399.0 6343.0 5915.0 5535.0 4695.0
south chicago 6481.0 3959.0 3170.0 2537.0 2075.0 1812.0 2048.0 2936.0 4515.0 6077.0 ... 6330.0 6712.0 6430.0 6210.0 6467.0 6664.0 6960.0 6819.0 6599.0 5437.0
calumet 4520.0 2685.0 2128.0 1838.0 1405.0 1241.0 1522.0 2328.0 3497.0 4446.0 ... 4873.0 5137.0 4835.0 4734.0 5245.0 5507.0 5443.0 5395.0 5188.0 4129.0
gresham 6448.0 3867.0 3154.0 2606.0 2081.0 1833.0 2135.0 2941.0 4590.0 6032.0 ... 6307.0 6928.0 6529.0 6259.0 6603.0 6858.0 6738.0 6540.0 6702.0 5645.0
englewood 6118.0 3767.0 3130.0 2575.0 1942.0 1641.0 1830.0 2600.0 4244.0 5693.0 ... 6534.0 7006.0 6443.0 6249.0 6969.0 7205.0 6945.0 6705.0 6740.0 5529.0
chicago lawn 7534.0 4016.0 3530.0 2888.0 2389.0 2292.0 2823.0 3717.0 5107.0 6600.0 ... 7493.0 7892.0 7625.0 7469.0 7996.0 8245.0 8046.0 7825.0 7920.0 6082.0
deering 5657.0 3346.0 2845.0 2225.0 1668.0 1529.0 1843.0 2538.0 3487.0 4368.0 ... 5236.0 5688.0 5510.0 5237.0 5990.0 6195.0 6098.0 5801.0 6140.0 4822.0
ogden 4680.0 2997.0 2416.0 1930.0 1586.0 1338.0 1522.0 2116.0 3276.0 4175.0 ... 4514.0 5051.0 4692.0 4749.0 5352.0 5749.0 5744.0 5435.0 5054.0 4291.0
harrison 6015.0 3839.0 3182.0 2650.0 1935.0 1620.0 2816.0 3157.0 4812.0 6172.0 ... 6807.0 7528.0 7546.0 7132.0 8118.0 9819.0 8763.0 7713.0 7360.0 5728.0
near west 5489.0 3434.0 3077.0 2378.0 1707.0 1442.0 1726.0 2685.0 3886.0 4833.0 ... 4880.0 5128.0 4771.0 4935.0 5237.0 5399.0 5584.0 5311.0 5423.0 4516.0
shakespeare 4792.0 3167.0 2742.0 2254.0 1621.0 1167.0 1287.0 1796.0 2804.0 3462.0 ... 3568.0 3644.0 3634.0 3936.0 4231.0 4338.0 4446.0 4278.0 4622.0 3847.0
austin 4618.0 2929.0 2316.0 1898.0 1361.0 1095.0 1324.0 1939.0 2788.0 4165.0 ... 4715.0 5095.0 4666.0 4596.0 5375.0 5867.0 5235.0 5124.0 5320.0 4470.0
jefferson park 4126.0 2034.0 1770.0 1433.0 1162.0 1191.0 1919.0 2078.0 2843.0 3512.0 ... 3368.0 3638.0 3451.0 3498.0 4046.0 3840.0 3772.0 3851.0 3939.0 3008.0
albany park 3397.0 1925.0 1888.0 1486.0 1139.0 912.0 1011.0 1535.0 2145.0 2783.0 ... 3019.0 3399.0 3069.0 3106.0 3340.0 3285.0 3353.0 3509.0 3565.0 2819.0
near north 4905.0 3753.0 3759.0 3118.0 1906.0 1098.0 1127.0 1609.0 2381.0 3555.0 ... 4936.0 5074.0 4932.0 5421.0 5625.0 5493.0 4706.0 4012.0 3852.0 3501.0
town hall 5867.0 3835.0 3571.0 2814.0 1950.0 1321.0 1378.0 2119.0 3122.0 3932.0 ... 4446.0 4893.0 4669.0 5011.0 5341.0 5299.0 5222.0 4930.0 4976.0 4184.0
lincoln 1925.0 960.0 806.0 691.0 522.0 431.0 574.0 861.0 1411.0 1666.0 ... 1767.0 1929.0 1685.0 1854.0 1915.0 1813.0 1910.0 1867.0 1890.0 1572.0
morgan park 3900.0 1934.0 1611.0 1303.0 915.0 840.0 1007.0 1781.0 2717.0 3541.0 ... 3831.0 4313.0 3454.0 3414.0 3605.0 3758.0 3802.0 3841.0 3689.0 2912.0
rogers park 3556.0 1805.0 1436.0 1241.0 928.0 747.0 985.0 1399.0 2034.0 2695.0 ... 2959.0 3379.0 3042.0 3157.0 3422.0 3398.0 3590.0 3574.0 3662.0 2753.0
grand central 6266.0 4011.0 3258.0 2784.0 2019.0 1803.0 2143.0 2855.0 4186.0 5820.0 ... 6396.0 6647.0 6073.0 6055.0 6665.0 7137.0 6987.0 6989.0 7181.0 5730.0
bronx 29424.0 21699.0 16854.0 13768.0 12018.0 9165.0 9472.0 13362.0 20428.0 21989.0 ... 34455.0 39599.0 37528.0 37272.0 38682.0 38619.0 38459.0 35753.0 33761.0 30247.0
brooklyn 46299.0 30918.0 24434.0 20067.0 17203.0 12874.0 13572.0 18831.0 30700.0 32913.0 ... 47016.0 52981.0 51086.0 50842.0 52331.0 51419.0 52380.0 48484.0 45430.0 41536.0
manhattan 31042.0 25505.0 21933.0 19048.0 15902.0 9290.0 8280.0 11762.0 20903.0 25766.0 ... 40489.0 45016.0 44235.0 44030.0 42970.0 39539.0 36320.0 31014.0 28726.0 28178.0
queens 27901.0 20368.0 17168.0 15267.0 15036.0 10126.0 9886.0 14050.0 20907.0 22300.0 ... 29928.0 33939.0 32318.0 32128.0 33394.0 32313.0 32332.0 30165.0 28886.0 27126.0
staten island 6778.0 4658.0 3608.0 2844.0 2245.0 1633.0 1788.0 3021.0 4978.0 5056.0 ... 7752.0 8223.0 7577.0 7550.0 7851.0 7723.0 8128.0 7086.0 7162.0 6278.0
central 1299.0 1457.0 1122.0 636.0 462.0 389.0 484.0 793.0 1187.0 1204.0 ... 1756.0 1961.0 2021.0 2041.0 2102.0 1840.0 1793.0 1692.0 1643.0 1509.0
eastern 1077.0 1058.0 648.0 521.0 412.0 313.0 418.0 660.0 927.0 931.0 ... 1209.0 1475.0 1378.0 1329.0 1385.0 1281.0 1178.0 1194.0 1160.0 1186.0
northeastern 2011.0 1816.0 1250.0 986.0 701.0 630.0 824.0 1302.0 1726.0 1620.0 ... 2052.0 2515.0 2482.0 2533.0 2625.0 2370.0 2532.0 2690.0 2518.0 2287.0
northern 1362.0 1322.0 864.0 604.0 539.0 419.0 537.0 832.0 1202.0 1209.0 ... 1518.0 1814.0 1804.0 1921.0 1972.0 1808.0 1737.0 1713.0 1668.0 1536.0
northwestern 1146.0 974.0 692.0 532.0 414.0 393.0 429.0 812.0 1116.0 1085.0 ... 1349.0 1702.0 1589.0 1680.0 1756.0 1580.0 1702.0 1649.0 1519.0 1410.0
southeastern 1724.0 1793.0 1478.0 870.0 586.0 549.0 616.0 1020.0 1368.0 1262.0 ... 1788.0 2093.0 2204.0 2439.0 2595.0 2259.0 2262.0 2209.0 2155.0 1891.0
southern 1563.0 1661.0 1213.0 806.0 535.0 478.0 543.0 834.0 1062.0 1063.0 ... 1487.0 1773.0 1875.0 1942.0 1996.0 1751.0 1815.0 1839.0 1844.0 1732.0
southwestern 1204.0 1009.0 702.0 507.0 378.0 375.0 488.0 780.0 969.0 987.0 ... 1196.0 1542.0 1493.0 1523.0 1619.0 1429.0 1484.0 1469.0 1386.0 1312.0
western 1112.0 963.0 608.0 498.0 347.0 342.0 325.0 581.0 774.0 882.0 ... 1124.0 1461.0 1279.0 1254.0 1240.0 1230.0 1289.0 1370.0 1266.0 1200.0

36 rows × 24 columns


In [114]:
nychiba_norm = nychiba.sub(nychiba.mean(axis=1), axis=0)
nychiba_norm = nychiba_norm.div(nychiba_norm.std(axis=1), axis=0)
nychiba_norm = (nychiba_norm*1000).astype(int)
nychiba_norm.to_csv('sketch_161113_ActivityMatrix/data/NY_CHI_BA_perhour_norm.csv')

GUN ONLY

NYC


In [4]:
ny_data = pd.read_csv('sketch_161113_ActivityMatrix/data/NY_crimes_2010_2016.csv', 
                 usecols=["CMPLNT_FR_TM", "BORO_NM",'PD_DESC'])

In [116]:
ny_data = ny_data.dropna()

In [117]:
ny_gun = ny_data[ny_data.PD_DESC.str.contains('WEAPONS')]
ny_gun['hour']= pd.to_datetime(ny_gun.CMPLNT_FR_TM, format='%H:%M:%S').dt.hour
ny_gun.drop('CMPLNT_FR_TM', axis=1, inplace=True)
ny_gun.drop('PD_DESC', axis=1, inplace=True)
ny_gun.head()


/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[117]:
BORO_NM hour
2 MANHATTAN 23
4 MANHATTAN 23
14 MANHATTAN 22
49 BRONX 21
57 BROOKLYN 21

In [118]:
ny_gun_table = ny_gun.pivot_table(index=['BORO_NM'],
                        columns=['hour'], aggfunc=len)
ny_gun_table


Out[118]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
BORO_NM
BRONX 1418 1186 802 523 289 185 310 159 240 301 ... 883 1104 1183 1249 1465 1695 1720 1784 1770 1697
BROOKLYN 1343 1075 677 454 303 153 527 233 253 300 ... 945 1011 1019 984 1094 1402 1533 1569 1637 1452
MANHATTAN 878 839 589 397 313 197 225 121 159 241 ... 657 700 784 737 692 750 805 843 846 876
QUEENS 909 718 505 398 317 217 345 192 237 261 ... 835 793 816 811 751 787 767 816 970 946
STATEN ISLAND 205 145 74 65 36 37 65 20 32 48 ... 115 121 130 103 111 171 170 174 221 246

5 rows × 24 columns

CHICAGO


In [7]:
chi_data = pd.read_csv('sketch_161113_ActivityMatrix/data/Chicago_Crimes_2010_2016.csv'
                 ,usecols=["Date", 'District', 'Description']).dropna()

In [8]:
chi_data.to_csv('sketch_161113_ActivityMatrix/data/Chicago_Crimes_clean.csv')

In [120]:
chi_data.head()


Out[120]:
Date Description District
0 05/03/2016 09:00:00 PM POSS: CANNABIS 30GMS OR LESS 11.0
1 05/03/2016 09:35:00 PM OVER $500 8.0
2 05/03/2016 02:45:00 PM SIMPLE 9.0
3 05/03/2016 06:42:00 PM DOMESTIC BATTERY SIMPLE 8.0
4 05/03/2016 03:00:00 PM STRONGARM - NO WEAPON 3.0

In [121]:
chi_gun = chi_data[chi_data.Description.str.contains('GUN|GUNS|WEAPON|WEAPONS')]
chi_gun['hour']= pd.to_datetime(chi_gun.Date, format = '%m/%d/%Y %I:%M:%S %p').dt.hour
chi_gun.drop('Description', axis=1, inplace=True)
chi_gun.drop('Date', axis=1, inplace=True)
chi_gun.head()


/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[121]:
District hour
4 3.0 15
21 15.0 22
26 11.0 23
28 9.0 22
30 7.0 22

In [122]:
chi_gun_table = chi_gun.pivot_table(index=['District'],
                        columns=['hour'], aggfunc=len)
chi_gun_table = chi_gun_table.ix[chicago_district]
chi_gun_table.index = chicago_district.values()
chi_gun_table.head()


Out[122]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
Central 147.0 147.0 119.0 90.0 76.0 54.0 44.0 32.0 59.0 73.0 ... 137.0 142.0 143.0 157.0 173.0 143.0 165.0 181.0 158.0 138.0
Wentworth 297.0 239.0 190.0 143.0 135.0 102.0 90.0 111.0 168.0 165.0 ... 338.0 452.0 433.0 388.0 482.0 474.0 504.0 520.0 389.0 369.0
Grand Crossing 472.0 403.0 328.0 247.0 175.0 147.0 147.0 180.0 215.0 239.0 ... 486.0 544.0 569.0 569.0 633.0 718.0 670.0 692.0 694.0 582.0
South Chicago 504.0 392.0 305.0 249.0 182.0 136.0 148.0 162.0 219.0 253.0 ... 512.0 554.0 567.0 596.0 614.0 691.0 724.0 720.0 673.0 566.0
Calumet 439.0 311.0 246.0 187.0 123.0 105.0 93.0 145.0 219.0 252.0 ... 395.0 467.0 475.0 456.0 552.0 590.0 597.0 622.0 638.0 553.0

5 rows × 24 columns

BALTIMORE


In [124]:
ba_gun_data = pd.read_csv("sketch_161113_ActivityMatrix/data/BA_firearm.csv", 
                        usecols = ['CrimeTime', 'District']).dropna()
print ba_gun_data.shape
ba_gun_data.head()


(21180, 2)
Out[124]:
CrimeTime District
0 02:56:00 SOUTHEASTERN
1 00:30:00 WESTERN
2 00:30:00 WESTERN
3 13:05:00 CENTRAL
4 14:00:00 EASTERN

In [130]:
ba_gun = ba_gun_data.dropna()
ba_gun.District = ba_gun.District.str.lower()
ba_gun.loc[ba_gun['District'] == 'northestern', 'District'] = 'northeastern'
ba_gun.loc[ba_gun['District'] == 'southestern', 'District'] = 'southeastern'
ba_gun.groupby(['District']).agg('count')


Out[130]:
CrimeTime
District
central 1873
eastern 2207
northeastern 3300
northern 2256
northwestern 2634
southeastern 2130
southern 2129
southwestern 2358
western 2293

In [131]:
ba_gun['hour'] = ba_gun['CrimeTime'].str[:2].astype(int)
ba_gun.drop('CrimeTime', axis=1, inplace=True)
ba_gun.head()


Out[131]:
District hour
0 southeastern 2
1 western 0
2 western 0
3 central 13
4 eastern 14

In [132]:
ba_gun_table = ba_gun.pivot_table(index=['District'],
                        columns=['hour'], aggfunc=len)
ba_gun_table = ba_gun_table.drop(24, 1)
ba_gun_table


Out[132]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
District
central 128.0 154.0 120.0 68.0 53.0 35.0 28.0 11.0 21.0 16.0 ... 65.0 54.0 63.0 83.0 89.0 118.0 121.0 177.0 147.0 137.0
eastern 148.0 127.0 95.0 64.0 65.0 23.0 25.0 23.0 23.0 30.0 ... 80.0 130.0 110.0 92.0 116.0 149.0 154.0 163.0 154.0 173.0
northeastern 213.0 200.0 149.0 120.0 64.0 52.0 36.0 35.0 50.0 33.0 ... 96.0 142.0 153.0 128.0 150.0 192.0 224.0 322.0 323.0 315.0
northern 156.0 187.0 142.0 72.0 55.0 49.0 30.0 22.0 12.0 24.0 ... 67.0 104.0 77.0 102.0 97.0 144.0 142.0 190.0 192.0 194.0
northwestern 204.0 173.0 117.0 98.0 56.0 65.0 42.0 17.0 47.0 36.0 ... 70.0 117.0 113.0 112.0 124.0 139.0 190.0 212.0 219.0 224.0
southeastern 153.0 149.0 125.0 80.0 45.0 38.0 29.0 13.0 37.0 27.0 ... 79.0 55.0 79.0 80.0 99.0 116.0 159.0 192.0 205.0 214.0
southern 147.0 170.0 94.0 71.0 49.0 55.0 26.0 23.0 22.0 22.0 ... 67.0 58.0 87.0 77.0 104.0 114.0 143.0 163.0 221.0 202.0
southwestern 154.0 131.0 94.0 60.0 44.0 43.0 44.0 25.0 31.0 35.0 ... 82.0 114.0 82.0 103.0 139.0 130.0 192.0 163.0 201.0 222.0
western 149.0 112.0 79.0 53.0 45.0 57.0 21.0 25.0 31.0 54.0 ... 110.0 137.0 86.0 102.0 117.0 141.0 149.0 186.0 203.0 178.0

9 rows × 24 columns

MERGE


In [134]:
nychiba_gun = pd.concat([chi_gun_table, ny_gun_table, ba_gun_table])
nychiba_gun.index = nychiba_gun.index.str.lower()
nychiba_gun


Out[134]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
central 147.0 147.0 119.0 90.0 76.0 54.0 44.0 32.0 59.0 73.0 ... 137.0 142.0 143.0 157.0 173.0 143.0 165.0 181.0 158.0 138.0
wentworth 297.0 239.0 190.0 143.0 135.0 102.0 90.0 111.0 168.0 165.0 ... 338.0 452.0 433.0 388.0 482.0 474.0 504.0 520.0 389.0 369.0
grand crossing 472.0 403.0 328.0 247.0 175.0 147.0 147.0 180.0 215.0 239.0 ... 486.0 544.0 569.0 569.0 633.0 718.0 670.0 692.0 694.0 582.0
south chicago 504.0 392.0 305.0 249.0 182.0 136.0 148.0 162.0 219.0 253.0 ... 512.0 554.0 567.0 596.0 614.0 691.0 724.0 720.0 673.0 566.0
calumet 439.0 311.0 246.0 187.0 123.0 105.0 93.0 145.0 219.0 252.0 ... 395.0 467.0 475.0 456.0 552.0 590.0 597.0 622.0 638.0 553.0
gresham 558.0 517.0 405.0 314.0 220.0 163.0 173.0 209.0 282.0 303.0 ... 476.0 612.0 657.0 640.0 640.0 694.0 708.0 720.0 745.0 696.0
englewood 662.0 548.0 454.0 376.0 262.0 183.0 186.0 180.0 262.0 331.0 ... 536.0 642.0 657.0 665.0 740.0 778.0 758.0 797.0 865.0 773.0
chicago lawn 386.0 334.0 304.0 236.0 177.0 152.0 119.0 141.0 249.0 270.0 ... 547.0 589.0 603.0 660.0 641.0 642.0 682.0 659.0 602.0 503.0
deering 413.0 373.0 289.0 258.0 192.0 160.0 142.0 105.0 148.0 177.0 ... 366.0 458.0 426.0 415.0 523.0 509.0 514.0 501.0 520.0 508.0
ogden 398.0 361.0 340.0 249.0 201.0 142.0 142.0 151.0 180.0 216.0 ... 350.0 421.0 388.0 415.0 512.0 535.0 586.0 586.0 549.0 484.0
harrison 585.0 514.0 431.0 385.0 274.0 218.0 202.0 241.0 282.0 357.0 ... 514.0 557.0 601.0 625.0 699.0 742.0 727.0 678.0 728.0 647.0
near west 315.0 347.0 322.0 281.0 174.0 129.0 102.0 102.0 124.0 136.0 ... 198.0 243.0 238.0 255.0 231.0 309.0 335.0 408.0 373.0 366.0
shakespeare 309.0 285.0 314.0 271.0 204.0 119.0 83.0 54.0 67.0 78.0 ... 143.0 157.0 158.0 180.0 198.0 228.0 235.0 245.0 297.0 271.0
austin 437.0 352.0 289.0 246.0 155.0 99.0 137.0 153.0 173.0 204.0 ... 274.0 380.0 346.0 376.0 406.0 533.0 478.0 521.0 535.0 463.0
jefferson park 117.0 118.0 96.0 82.0 152.0 257.0 247.0 230.0 210.0 190.0 ... 242.0 216.0 224.0 200.0 201.0 202.0 164.0 140.0 148.0 128.0
albany park 180.0 165.0 201.0 125.0 94.0 80.0 42.0 37.0 65.0 60.0 ... 140.0 171.0 163.0 146.0 165.0 166.0 172.0 197.0 209.0 204.0
near north 183.0 228.0 292.0 269.0 169.0 96.0 53.0 33.0 37.0 51.0 ... 85.0 108.0 135.0 120.0 127.0 154.0 158.0 158.0 150.0 151.0
town hall 264.0 351.0 397.0 360.0 249.0 130.0 66.0 53.0 48.0 53.0 ... 137.0 127.0 146.0 131.0 152.0 166.0 201.0 215.0 221.0 261.0
lincoln 91.0 83.0 70.0 59.0 45.0 28.0 31.0 31.0 41.0 31.0 ... 73.0 73.0 74.0 76.0 91.0 79.0 87.0 90.0 102.0 99.0
morgan park 256.0 177.0 135.0 104.0 72.0 47.0 47.0 81.0 102.0 124.0 ... 257.0 317.0 274.0 291.0 298.0 369.0 336.0 388.0 311.0 295.0
rogers park 208.0 158.0 185.0 160.0 99.0 76.0 60.0 51.0 42.0 67.0 ... 144.0 179.0 168.0 161.0 170.0 218.0 247.0 275.0 230.0 193.0
grand central 428.0 379.0 320.0 298.0 220.0 170.0 124.0 143.0 178.0 205.0 ... 398.0 471.0 528.0 492.0 545.0 520.0 554.0 617.0 518.0 475.0
bronx 1418.0 1186.0 802.0 523.0 289.0 185.0 310.0 159.0 240.0 301.0 ... 883.0 1104.0 1183.0 1249.0 1465.0 1695.0 1720.0 1784.0 1770.0 1697.0
brooklyn 1343.0 1075.0 677.0 454.0 303.0 153.0 527.0 233.0 253.0 300.0 ... 945.0 1011.0 1019.0 984.0 1094.0 1402.0 1533.0 1569.0 1637.0 1452.0
manhattan 878.0 839.0 589.0 397.0 313.0 197.0 225.0 121.0 159.0 241.0 ... 657.0 700.0 784.0 737.0 692.0 750.0 805.0 843.0 846.0 876.0
queens 909.0 718.0 505.0 398.0 317.0 217.0 345.0 192.0 237.0 261.0 ... 835.0 793.0 816.0 811.0 751.0 787.0 767.0 816.0 970.0 946.0
staten island 205.0 145.0 74.0 65.0 36.0 37.0 65.0 20.0 32.0 48.0 ... 115.0 121.0 130.0 103.0 111.0 171.0 170.0 174.0 221.0 246.0
central 128.0 154.0 120.0 68.0 53.0 35.0 28.0 11.0 21.0 16.0 ... 65.0 54.0 63.0 83.0 89.0 118.0 121.0 177.0 147.0 137.0
eastern 148.0 127.0 95.0 64.0 65.0 23.0 25.0 23.0 23.0 30.0 ... 80.0 130.0 110.0 92.0 116.0 149.0 154.0 163.0 154.0 173.0
northeastern 213.0 200.0 149.0 120.0 64.0 52.0 36.0 35.0 50.0 33.0 ... 96.0 142.0 153.0 128.0 150.0 192.0 224.0 322.0 323.0 315.0
northern 156.0 187.0 142.0 72.0 55.0 49.0 30.0 22.0 12.0 24.0 ... 67.0 104.0 77.0 102.0 97.0 144.0 142.0 190.0 192.0 194.0
northwestern 204.0 173.0 117.0 98.0 56.0 65.0 42.0 17.0 47.0 36.0 ... 70.0 117.0 113.0 112.0 124.0 139.0 190.0 212.0 219.0 224.0
southeastern 153.0 149.0 125.0 80.0 45.0 38.0 29.0 13.0 37.0 27.0 ... 79.0 55.0 79.0 80.0 99.0 116.0 159.0 192.0 205.0 214.0
southern 147.0 170.0 94.0 71.0 49.0 55.0 26.0 23.0 22.0 22.0 ... 67.0 58.0 87.0 77.0 104.0 114.0 143.0 163.0 221.0 202.0
southwestern 154.0 131.0 94.0 60.0 44.0 43.0 44.0 25.0 31.0 35.0 ... 82.0 114.0 82.0 103.0 139.0 130.0 192.0 163.0 201.0 222.0
western 149.0 112.0 79.0 53.0 45.0 57.0 21.0 25.0 31.0 54.0 ... 110.0 137.0 86.0 102.0 117.0 141.0 149.0 186.0 203.0 178.0

36 rows × 24 columns


In [135]:
nychiba_gun_norm = nychiba_gun.sub(nychi_gun.mean(axis=1), axis=0)
nychiba_gun_norm = nychiba_gun_norm.div(nychiba_gun_norm.std(axis=1), axis=0)
nychiba_gun_norm = (nychiba_gun_norm*1000).astype(int)
nychiba_gun_norm.to_csv('sketch_161113_ActivityMatrix/data/NY_CHI_BA_perhour_gun_norm.csv')
nychiba_gun_norm.head()


Out[135]:
hour 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23
central 721 721 75 -594 -917 -1425 -1655 -1932 -1309 -986 ... 490 605 628 951 1321 628 1136 1505 975 513
wentworth 20 -395 -747 -1085 -1142 -1379 -1465 -1314 -905 -927 ... 315 1134 997 674 1349 1292 1507 1622 681 538
grand crossing 230 -133 -529 -957 -1337 -1485 -1485 -1311 -1126 -999 ... 304 611 743 743 1081 1530 1276 1392 1403 811
south chicago 378 -199 -648 -937 -1283 -1521 -1459 -1386 -1092 -917 ... 419 636 703 853 946 1343 1513 1493 1250 698
calumet 395 -343 -718 -1059 -1429 -1532 -1602 -1302 -874 -684 ... 141 556 603 493 1047 1266 1307 1451 1544 1053

5 rows × 24 columns


In [ ]: