In [1]:
import pandas as pd
import numpy as np
In [97]:
ny = pd.read_csv('sketch_161113_ActivityMatrix/data/NY_crimes_2010_2016.csv',
usecols=["CMPLNT_FR_TM", "BORO_NM"])
print ny.shape
ny.head()
In [100]:
ny['hour'] = pd.to_datetime(ny['CMPLNT_FR_TM'], format='%H:%M:%S').dt.hour
ny.drop('CMPLNT_FR_TM', axis=1, inplace=True)
ny.head()
Out[100]:
BORO_NM
hour
0
BROOKLYN
1.0
1
BRONX
23.0
2
MANHATTAN
23.0
3
BROOKLYN
23.0
4
MANHATTAN
23.0
In [108]:
ny_crime_table = ny.pivot_table(index=['BORO_NM'],
columns=['hour'], aggfunc=len)
ny_crime_table
Out[108]:
hour
0.0
1.0
2.0
3.0
4.0
5.0
6.0
7.0
8.0
9.0
...
14.0
15.0
16.0
17.0
18.0
19.0
20.0
21.0
22.0
23.0
BORO_NM
BRONX
29424
21699
16854
13768
12018
9165
9472
13362
20428
21989
...
34455
39599
37528
37272
38682
38619
38459
35753
33761
30247
BROOKLYN
46299
30918
24434
20067
17203
12874
13572
18831
30700
32913
...
47016
52981
51086
50842
52331
51419
52380
48484
45430
41536
MANHATTAN
31042
25505
21933
19048
15902
9290
8280
11762
20903
25766
...
40489
45016
44235
44030
42970
39539
36320
31014
28726
28178
QUEENS
27901
20368
17168
15267
15036
10126
9886
14050
20907
22300
...
29928
33939
32318
32128
33394
32313
32332
30165
28886
27126
STATEN ISLAND
6778
4658
3608
2844
2245
1633
1788
3021
4978
5056
...
7752
8223
7577
7550
7851
7723
8128
7086
7162
6278
5 rows × 24 columns
In [ ]:
ny_table.to_csv('data/ny_count_BORO_HOUR')
In [103]:
import pickle
chicago_district = pickle.load(open('sketch_161113_ActivityMatrix/data/chicago_district.p','rb'))
chi = pd.read_csv('sketch_161113_ActivityMatrix/data/Chicago_Crimes_2010_2016.csv',
usecols=["Date", 'District' ]).dropna()
chi.shape
chi.head()
Out[103]:
Date
District
0
05/03/2016 09:00:00 PM
11.0
1
05/03/2016 09:35:00 PM
8.0
2
05/03/2016 02:45:00 PM
9.0
3
05/03/2016 06:42:00 PM
8.0
4
05/03/2016 03:00:00 PM
3.0
In [104]:
chi['hour'] = pd.to_datetime(chi['Date'], format = '%m/%d/%Y %I:%M:%S %p').dt.hour
chi.drop('Date', axis=1, inplace=True)
In [106]:
chi_crime_table = chi.pivot_table(index=['District'],
columns=['hour'], aggfunc=len)
chi_crime_table.head()
Out[106]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
District
1.0
2666.0
1449.0
1304.0
1044.0
831.0
844.0
1171.0
1768.0
2892.0
4229.0
...
5871.0
5614.0
5592.0
5712.0
5308.0
4801.0
3817.0
3137.0
2622.0
1979.0
2.0
4037.0
2193.0
1727.0
1398.0
1176.0
1066.0
1490.0
2372.0
3598.0
4639.0
...
4838.0
5396.0
4698.0
4474.0
4949.0
5194.0
5057.0
4700.0
4334.0
3462.0
3.0
4999.0
3174.0
2438.0
2163.0
1603.0
1383.0
1738.0
2637.0
3888.0
5252.0
...
5464.0
6151.0
5933.0
5676.0
5972.0
6399.0
6343.0
5915.0
5535.0
4695.0
4.0
6481.0
3959.0
3170.0
2537.0
2075.0
1812.0
2048.0
2936.0
4515.0
6077.0
...
6330.0
6712.0
6430.0
6210.0
6467.0
6664.0
6960.0
6819.0
6599.0
5437.0
5.0
4520.0
2685.0
2128.0
1838.0
1405.0
1241.0
1522.0
2328.0
3497.0
4446.0
...
4873.0
5137.0
4835.0
4734.0
5245.0
5507.0
5443.0
5395.0
5188.0
4129.0
5 rows × 24 columns
In [107]:
chi_crime_table = chi_crime_table.ix[chicago_district]
chi_crime_table.index = chicago_district.values()
chi_crime_table.head()
Out[107]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
Central
2666.0
1449.0
1304.0
1044.0
831.0
844.0
1171.0
1768.0
2892.0
4229.0
...
5871.0
5614.0
5592.0
5712.0
5308.0
4801.0
3817.0
3137.0
2622.0
1979.0
Wentworth
4037.0
2193.0
1727.0
1398.0
1176.0
1066.0
1490.0
2372.0
3598.0
4639.0
...
4838.0
5396.0
4698.0
4474.0
4949.0
5194.0
5057.0
4700.0
4334.0
3462.0
Grand Crossing
4999.0
3174.0
2438.0
2163.0
1603.0
1383.0
1738.0
2637.0
3888.0
5252.0
...
5464.0
6151.0
5933.0
5676.0
5972.0
6399.0
6343.0
5915.0
5535.0
4695.0
South Chicago
6481.0
3959.0
3170.0
2537.0
2075.0
1812.0
2048.0
2936.0
4515.0
6077.0
...
6330.0
6712.0
6430.0
6210.0
6467.0
6664.0
6960.0
6819.0
6599.0
5437.0
Calumet
4520.0
2685.0
2128.0
1838.0
1405.0
1241.0
1522.0
2328.0
3497.0
4446.0
...
4873.0
5137.0
4835.0
4734.0
5245.0
5507.0
5443.0
5395.0
5188.0
4129.0
5 rows × 24 columns
In [3]:
ba_crime_data = pd.read_csv("sketch_161113_ActivityMatrix/data/BA_crime.csv",
usecols = ['CrimeTime', 'District']).dropna()
print ba_crime_data.shape
ba_crime_data.head()
(285749, 2)
In [90]:
ba_crime = ba_crime_data.dropna()
ba_crime.District = ba_crime.District.str.lower()
ba_crime.groupby(['District']).agg('count')
Out[90]:
CrimeTime
District
central
33784
eastern
24168
gay street
1
northeastern
44832
northern
32005
northestern
280
northwestern
28690
southeastern
39245
southern
33031
southestern
205
southwestern
26242
western
23266
In [91]:
ba_crime.loc[ba_crime['District'] == 'northestern', 'District'] = 'northeastern'
ba_crime.loc[ba_crime['District'] == 'southestern', 'District'] = 'southeastern'
ba_crime = ba_crime[ba_crime.District != 'gay street']
ba_crime.groupby(['District']).agg('count')
Out[91]:
CrimeTime
District
central
33784
eastern
24168
northeastern
45112
northern
32005
northwestern
28690
southeastern
39450
southern
33031
southwestern
26242
western
23266
In [92]:
ba_crime['hour'] = ba_crime['CrimeTime'].str[:2].astype(int)
ba_crime.drop('CrimeTime', axis=1, inplace=True)
ba_crime.head()
Out[92]:
District
hour
0
central
2
1
southeastern
2
2
northeastern
3
3
northeastern
3
4
central
3
In [112]:
ba_crime_table = ba_crime.pivot_table(index=['District'],
columns=['hour'], aggfunc=len)
ba_crime_table = ba_crime_table.drop(24, 1)
ba_crime_table
Out[112]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
District
central
1299.0
1457.0
1122.0
636.0
462.0
389.0
484.0
793.0
1187.0
1204.0
...
1756.0
1961.0
2021.0
2041.0
2102.0
1840.0
1793.0
1692.0
1643.0
1509.0
eastern
1077.0
1058.0
648.0
521.0
412.0
313.0
418.0
660.0
927.0
931.0
...
1209.0
1475.0
1378.0
1329.0
1385.0
1281.0
1178.0
1194.0
1160.0
1186.0
northeastern
2011.0
1816.0
1250.0
986.0
701.0
630.0
824.0
1302.0
1726.0
1620.0
...
2052.0
2515.0
2482.0
2533.0
2625.0
2370.0
2532.0
2690.0
2518.0
2287.0
northern
1362.0
1322.0
864.0
604.0
539.0
419.0
537.0
832.0
1202.0
1209.0
...
1518.0
1814.0
1804.0
1921.0
1972.0
1808.0
1737.0
1713.0
1668.0
1536.0
northwestern
1146.0
974.0
692.0
532.0
414.0
393.0
429.0
812.0
1116.0
1085.0
...
1349.0
1702.0
1589.0
1680.0
1756.0
1580.0
1702.0
1649.0
1519.0
1410.0
southeastern
1724.0
1793.0
1478.0
870.0
586.0
549.0
616.0
1020.0
1368.0
1262.0
...
1788.0
2093.0
2204.0
2439.0
2595.0
2259.0
2262.0
2209.0
2155.0
1891.0
southern
1563.0
1661.0
1213.0
806.0
535.0
478.0
543.0
834.0
1062.0
1063.0
...
1487.0
1773.0
1875.0
1942.0
1996.0
1751.0
1815.0
1839.0
1844.0
1732.0
southwestern
1204.0
1009.0
702.0
507.0
378.0
375.0
488.0
780.0
969.0
987.0
...
1196.0
1542.0
1493.0
1523.0
1619.0
1429.0
1484.0
1469.0
1386.0
1312.0
western
1112.0
963.0
608.0
498.0
347.0
342.0
325.0
581.0
774.0
882.0
...
1124.0
1461.0
1279.0
1254.0
1240.0
1230.0
1289.0
1370.0
1266.0
1200.0
9 rows × 24 columns
In [113]:
nychiba = pd.concat([chi_crime_table, ny_crime_table, ba_crime_table])
nychiba.index = nychiba.index.str.lower()
nychiba
Out[113]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
central
2666.0
1449.0
1304.0
1044.0
831.0
844.0
1171.0
1768.0
2892.0
4229.0
...
5871.0
5614.0
5592.0
5712.0
5308.0
4801.0
3817.0
3137.0
2622.0
1979.0
wentworth
4037.0
2193.0
1727.0
1398.0
1176.0
1066.0
1490.0
2372.0
3598.0
4639.0
...
4838.0
5396.0
4698.0
4474.0
4949.0
5194.0
5057.0
4700.0
4334.0
3462.0
grand crossing
4999.0
3174.0
2438.0
2163.0
1603.0
1383.0
1738.0
2637.0
3888.0
5252.0
...
5464.0
6151.0
5933.0
5676.0
5972.0
6399.0
6343.0
5915.0
5535.0
4695.0
south chicago
6481.0
3959.0
3170.0
2537.0
2075.0
1812.0
2048.0
2936.0
4515.0
6077.0
...
6330.0
6712.0
6430.0
6210.0
6467.0
6664.0
6960.0
6819.0
6599.0
5437.0
calumet
4520.0
2685.0
2128.0
1838.0
1405.0
1241.0
1522.0
2328.0
3497.0
4446.0
...
4873.0
5137.0
4835.0
4734.0
5245.0
5507.0
5443.0
5395.0
5188.0
4129.0
gresham
6448.0
3867.0
3154.0
2606.0
2081.0
1833.0
2135.0
2941.0
4590.0
6032.0
...
6307.0
6928.0
6529.0
6259.0
6603.0
6858.0
6738.0
6540.0
6702.0
5645.0
englewood
6118.0
3767.0
3130.0
2575.0
1942.0
1641.0
1830.0
2600.0
4244.0
5693.0
...
6534.0
7006.0
6443.0
6249.0
6969.0
7205.0
6945.0
6705.0
6740.0
5529.0
chicago lawn
7534.0
4016.0
3530.0
2888.0
2389.0
2292.0
2823.0
3717.0
5107.0
6600.0
...
7493.0
7892.0
7625.0
7469.0
7996.0
8245.0
8046.0
7825.0
7920.0
6082.0
deering
5657.0
3346.0
2845.0
2225.0
1668.0
1529.0
1843.0
2538.0
3487.0
4368.0
...
5236.0
5688.0
5510.0
5237.0
5990.0
6195.0
6098.0
5801.0
6140.0
4822.0
ogden
4680.0
2997.0
2416.0
1930.0
1586.0
1338.0
1522.0
2116.0
3276.0
4175.0
...
4514.0
5051.0
4692.0
4749.0
5352.0
5749.0
5744.0
5435.0
5054.0
4291.0
harrison
6015.0
3839.0
3182.0
2650.0
1935.0
1620.0
2816.0
3157.0
4812.0
6172.0
...
6807.0
7528.0
7546.0
7132.0
8118.0
9819.0
8763.0
7713.0
7360.0
5728.0
near west
5489.0
3434.0
3077.0
2378.0
1707.0
1442.0
1726.0
2685.0
3886.0
4833.0
...
4880.0
5128.0
4771.0
4935.0
5237.0
5399.0
5584.0
5311.0
5423.0
4516.0
shakespeare
4792.0
3167.0
2742.0
2254.0
1621.0
1167.0
1287.0
1796.0
2804.0
3462.0
...
3568.0
3644.0
3634.0
3936.0
4231.0
4338.0
4446.0
4278.0
4622.0
3847.0
austin
4618.0
2929.0
2316.0
1898.0
1361.0
1095.0
1324.0
1939.0
2788.0
4165.0
...
4715.0
5095.0
4666.0
4596.0
5375.0
5867.0
5235.0
5124.0
5320.0
4470.0
jefferson park
4126.0
2034.0
1770.0
1433.0
1162.0
1191.0
1919.0
2078.0
2843.0
3512.0
...
3368.0
3638.0
3451.0
3498.0
4046.0
3840.0
3772.0
3851.0
3939.0
3008.0
albany park
3397.0
1925.0
1888.0
1486.0
1139.0
912.0
1011.0
1535.0
2145.0
2783.0
...
3019.0
3399.0
3069.0
3106.0
3340.0
3285.0
3353.0
3509.0
3565.0
2819.0
near north
4905.0
3753.0
3759.0
3118.0
1906.0
1098.0
1127.0
1609.0
2381.0
3555.0
...
4936.0
5074.0
4932.0
5421.0
5625.0
5493.0
4706.0
4012.0
3852.0
3501.0
town hall
5867.0
3835.0
3571.0
2814.0
1950.0
1321.0
1378.0
2119.0
3122.0
3932.0
...
4446.0
4893.0
4669.0
5011.0
5341.0
5299.0
5222.0
4930.0
4976.0
4184.0
lincoln
1925.0
960.0
806.0
691.0
522.0
431.0
574.0
861.0
1411.0
1666.0
...
1767.0
1929.0
1685.0
1854.0
1915.0
1813.0
1910.0
1867.0
1890.0
1572.0
morgan park
3900.0
1934.0
1611.0
1303.0
915.0
840.0
1007.0
1781.0
2717.0
3541.0
...
3831.0
4313.0
3454.0
3414.0
3605.0
3758.0
3802.0
3841.0
3689.0
2912.0
rogers park
3556.0
1805.0
1436.0
1241.0
928.0
747.0
985.0
1399.0
2034.0
2695.0
...
2959.0
3379.0
3042.0
3157.0
3422.0
3398.0
3590.0
3574.0
3662.0
2753.0
grand central
6266.0
4011.0
3258.0
2784.0
2019.0
1803.0
2143.0
2855.0
4186.0
5820.0
...
6396.0
6647.0
6073.0
6055.0
6665.0
7137.0
6987.0
6989.0
7181.0
5730.0
bronx
29424.0
21699.0
16854.0
13768.0
12018.0
9165.0
9472.0
13362.0
20428.0
21989.0
...
34455.0
39599.0
37528.0
37272.0
38682.0
38619.0
38459.0
35753.0
33761.0
30247.0
brooklyn
46299.0
30918.0
24434.0
20067.0
17203.0
12874.0
13572.0
18831.0
30700.0
32913.0
...
47016.0
52981.0
51086.0
50842.0
52331.0
51419.0
52380.0
48484.0
45430.0
41536.0
manhattan
31042.0
25505.0
21933.0
19048.0
15902.0
9290.0
8280.0
11762.0
20903.0
25766.0
...
40489.0
45016.0
44235.0
44030.0
42970.0
39539.0
36320.0
31014.0
28726.0
28178.0
queens
27901.0
20368.0
17168.0
15267.0
15036.0
10126.0
9886.0
14050.0
20907.0
22300.0
...
29928.0
33939.0
32318.0
32128.0
33394.0
32313.0
32332.0
30165.0
28886.0
27126.0
staten island
6778.0
4658.0
3608.0
2844.0
2245.0
1633.0
1788.0
3021.0
4978.0
5056.0
...
7752.0
8223.0
7577.0
7550.0
7851.0
7723.0
8128.0
7086.0
7162.0
6278.0
central
1299.0
1457.0
1122.0
636.0
462.0
389.0
484.0
793.0
1187.0
1204.0
...
1756.0
1961.0
2021.0
2041.0
2102.0
1840.0
1793.0
1692.0
1643.0
1509.0
eastern
1077.0
1058.0
648.0
521.0
412.0
313.0
418.0
660.0
927.0
931.0
...
1209.0
1475.0
1378.0
1329.0
1385.0
1281.0
1178.0
1194.0
1160.0
1186.0
northeastern
2011.0
1816.0
1250.0
986.0
701.0
630.0
824.0
1302.0
1726.0
1620.0
...
2052.0
2515.0
2482.0
2533.0
2625.0
2370.0
2532.0
2690.0
2518.0
2287.0
northern
1362.0
1322.0
864.0
604.0
539.0
419.0
537.0
832.0
1202.0
1209.0
...
1518.0
1814.0
1804.0
1921.0
1972.0
1808.0
1737.0
1713.0
1668.0
1536.0
northwestern
1146.0
974.0
692.0
532.0
414.0
393.0
429.0
812.0
1116.0
1085.0
...
1349.0
1702.0
1589.0
1680.0
1756.0
1580.0
1702.0
1649.0
1519.0
1410.0
southeastern
1724.0
1793.0
1478.0
870.0
586.0
549.0
616.0
1020.0
1368.0
1262.0
...
1788.0
2093.0
2204.0
2439.0
2595.0
2259.0
2262.0
2209.0
2155.0
1891.0
southern
1563.0
1661.0
1213.0
806.0
535.0
478.0
543.0
834.0
1062.0
1063.0
...
1487.0
1773.0
1875.0
1942.0
1996.0
1751.0
1815.0
1839.0
1844.0
1732.0
southwestern
1204.0
1009.0
702.0
507.0
378.0
375.0
488.0
780.0
969.0
987.0
...
1196.0
1542.0
1493.0
1523.0
1619.0
1429.0
1484.0
1469.0
1386.0
1312.0
western
1112.0
963.0
608.0
498.0
347.0
342.0
325.0
581.0
774.0
882.0
...
1124.0
1461.0
1279.0
1254.0
1240.0
1230.0
1289.0
1370.0
1266.0
1200.0
36 rows × 24 columns
In [114]:
nychiba_norm = nychiba.sub(nychiba.mean(axis=1), axis=0)
nychiba_norm = nychiba_norm.div(nychiba_norm.std(axis=1), axis=0)
nychiba_norm = (nychiba_norm*1000).astype(int)
nychiba_norm.to_csv('sketch_161113_ActivityMatrix/data/NY_CHI_BA_perhour_norm.csv')
In [4]:
ny_data = pd.read_csv('sketch_161113_ActivityMatrix/data/NY_crimes_2010_2016.csv',
usecols=["CMPLNT_FR_TM", "BORO_NM",'PD_DESC'])
In [116]:
ny_data = ny_data.dropna()
In [117]:
ny_gun = ny_data[ny_data.PD_DESC.str.contains('WEAPONS')]
ny_gun['hour']= pd.to_datetime(ny_gun.CMPLNT_FR_TM, format='%H:%M:%S').dt.hour
ny_gun.drop('CMPLNT_FR_TM', axis=1, inplace=True)
ny_gun.drop('PD_DESC', axis=1, inplace=True)
ny_gun.head()
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
from ipykernel import kernelapp as app
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[117]:
BORO_NM
hour
2
MANHATTAN
23
4
MANHATTAN
23
14
MANHATTAN
22
49
BRONX
21
57
BROOKLYN
21
In [118]:
ny_gun_table = ny_gun.pivot_table(index=['BORO_NM'],
columns=['hour'], aggfunc=len)
ny_gun_table
Out[118]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
BORO_NM
BRONX
1418
1186
802
523
289
185
310
159
240
301
...
883
1104
1183
1249
1465
1695
1720
1784
1770
1697
BROOKLYN
1343
1075
677
454
303
153
527
233
253
300
...
945
1011
1019
984
1094
1402
1533
1569
1637
1452
MANHATTAN
878
839
589
397
313
197
225
121
159
241
...
657
700
784
737
692
750
805
843
846
876
QUEENS
909
718
505
398
317
217
345
192
237
261
...
835
793
816
811
751
787
767
816
970
946
STATEN ISLAND
205
145
74
65
36
37
65
20
32
48
...
115
121
130
103
111
171
170
174
221
246
5 rows × 24 columns
In [7]:
chi_data = pd.read_csv('sketch_161113_ActivityMatrix/data/Chicago_Crimes_2010_2016.csv'
,usecols=["Date", 'District', 'Description']).dropna()
In [8]:
chi_data.to_csv('sketch_161113_ActivityMatrix/data/Chicago_Crimes_clean.csv')
In [120]:
chi_data.head()
Out[120]:
Date
Description
District
0
05/03/2016 09:00:00 PM
POSS: CANNABIS 30GMS OR LESS
11.0
1
05/03/2016 09:35:00 PM
OVER $500
8.0
2
05/03/2016 02:45:00 PM
SIMPLE
9.0
3
05/03/2016 06:42:00 PM
DOMESTIC BATTERY SIMPLE
8.0
4
05/03/2016 03:00:00 PM
STRONGARM - NO WEAPON
3.0
In [121]:
chi_gun = chi_data[chi_data.Description.str.contains('GUN|GUNS|WEAPON|WEAPONS')]
chi_gun['hour']= pd.to_datetime(chi_gun.Date, format = '%m/%d/%Y %I:%M:%S %p').dt.hour
chi_gun.drop('Description', axis=1, inplace=True)
chi_gun.drop('Date', axis=1, inplace=True)
chi_gun.head()
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
from ipykernel import kernelapp as app
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
app.launch_new_instance()
/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
Out[121]:
District
hour
4
3.0
15
21
15.0
22
26
11.0
23
28
9.0
22
30
7.0
22
In [122]:
chi_gun_table = chi_gun.pivot_table(index=['District'],
columns=['hour'], aggfunc=len)
chi_gun_table = chi_gun_table.ix[chicago_district]
chi_gun_table.index = chicago_district.values()
chi_gun_table.head()
Out[122]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
Central
147.0
147.0
119.0
90.0
76.0
54.0
44.0
32.0
59.0
73.0
...
137.0
142.0
143.0
157.0
173.0
143.0
165.0
181.0
158.0
138.0
Wentworth
297.0
239.0
190.0
143.0
135.0
102.0
90.0
111.0
168.0
165.0
...
338.0
452.0
433.0
388.0
482.0
474.0
504.0
520.0
389.0
369.0
Grand Crossing
472.0
403.0
328.0
247.0
175.0
147.0
147.0
180.0
215.0
239.0
...
486.0
544.0
569.0
569.0
633.0
718.0
670.0
692.0
694.0
582.0
South Chicago
504.0
392.0
305.0
249.0
182.0
136.0
148.0
162.0
219.0
253.0
...
512.0
554.0
567.0
596.0
614.0
691.0
724.0
720.0
673.0
566.0
Calumet
439.0
311.0
246.0
187.0
123.0
105.0
93.0
145.0
219.0
252.0
...
395.0
467.0
475.0
456.0
552.0
590.0
597.0
622.0
638.0
553.0
5 rows × 24 columns
In [124]:
ba_gun_data = pd.read_csv("sketch_161113_ActivityMatrix/data/BA_firearm.csv",
usecols = ['CrimeTime', 'District']).dropna()
print ba_gun_data.shape
ba_gun_data.head()
(21180, 2)
Out[124]:
CrimeTime
District
0
02:56:00
SOUTHEASTERN
1
00:30:00
WESTERN
2
00:30:00
WESTERN
3
13:05:00
CENTRAL
4
14:00:00
EASTERN
In [130]:
ba_gun = ba_gun_data.dropna()
ba_gun.District = ba_gun.District.str.lower()
ba_gun.loc[ba_gun['District'] == 'northestern', 'District'] = 'northeastern'
ba_gun.loc[ba_gun['District'] == 'southestern', 'District'] = 'southeastern'
ba_gun.groupby(['District']).agg('count')
Out[130]:
CrimeTime
District
central
1873
eastern
2207
northeastern
3300
northern
2256
northwestern
2634
southeastern
2130
southern
2129
southwestern
2358
western
2293
In [131]:
ba_gun['hour'] = ba_gun['CrimeTime'].str[:2].astype(int)
ba_gun.drop('CrimeTime', axis=1, inplace=True)
ba_gun.head()
Out[131]:
District
hour
0
southeastern
2
1
western
0
2
western
0
3
central
13
4
eastern
14
In [132]:
ba_gun_table = ba_gun.pivot_table(index=['District'],
columns=['hour'], aggfunc=len)
ba_gun_table = ba_gun_table.drop(24, 1)
ba_gun_table
Out[132]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
District
central
128.0
154.0
120.0
68.0
53.0
35.0
28.0
11.0
21.0
16.0
...
65.0
54.0
63.0
83.0
89.0
118.0
121.0
177.0
147.0
137.0
eastern
148.0
127.0
95.0
64.0
65.0
23.0
25.0
23.0
23.0
30.0
...
80.0
130.0
110.0
92.0
116.0
149.0
154.0
163.0
154.0
173.0
northeastern
213.0
200.0
149.0
120.0
64.0
52.0
36.0
35.0
50.0
33.0
...
96.0
142.0
153.0
128.0
150.0
192.0
224.0
322.0
323.0
315.0
northern
156.0
187.0
142.0
72.0
55.0
49.0
30.0
22.0
12.0
24.0
...
67.0
104.0
77.0
102.0
97.0
144.0
142.0
190.0
192.0
194.0
northwestern
204.0
173.0
117.0
98.0
56.0
65.0
42.0
17.0
47.0
36.0
...
70.0
117.0
113.0
112.0
124.0
139.0
190.0
212.0
219.0
224.0
southeastern
153.0
149.0
125.0
80.0
45.0
38.0
29.0
13.0
37.0
27.0
...
79.0
55.0
79.0
80.0
99.0
116.0
159.0
192.0
205.0
214.0
southern
147.0
170.0
94.0
71.0
49.0
55.0
26.0
23.0
22.0
22.0
...
67.0
58.0
87.0
77.0
104.0
114.0
143.0
163.0
221.0
202.0
southwestern
154.0
131.0
94.0
60.0
44.0
43.0
44.0
25.0
31.0
35.0
...
82.0
114.0
82.0
103.0
139.0
130.0
192.0
163.0
201.0
222.0
western
149.0
112.0
79.0
53.0
45.0
57.0
21.0
25.0
31.0
54.0
...
110.0
137.0
86.0
102.0
117.0
141.0
149.0
186.0
203.0
178.0
9 rows × 24 columns
In [134]:
nychiba_gun = pd.concat([chi_gun_table, ny_gun_table, ba_gun_table])
nychiba_gun.index = nychiba_gun.index.str.lower()
nychiba_gun
Out[134]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
central
147.0
147.0
119.0
90.0
76.0
54.0
44.0
32.0
59.0
73.0
...
137.0
142.0
143.0
157.0
173.0
143.0
165.0
181.0
158.0
138.0
wentworth
297.0
239.0
190.0
143.0
135.0
102.0
90.0
111.0
168.0
165.0
...
338.0
452.0
433.0
388.0
482.0
474.0
504.0
520.0
389.0
369.0
grand crossing
472.0
403.0
328.0
247.0
175.0
147.0
147.0
180.0
215.0
239.0
...
486.0
544.0
569.0
569.0
633.0
718.0
670.0
692.0
694.0
582.0
south chicago
504.0
392.0
305.0
249.0
182.0
136.0
148.0
162.0
219.0
253.0
...
512.0
554.0
567.0
596.0
614.0
691.0
724.0
720.0
673.0
566.0
calumet
439.0
311.0
246.0
187.0
123.0
105.0
93.0
145.0
219.0
252.0
...
395.0
467.0
475.0
456.0
552.0
590.0
597.0
622.0
638.0
553.0
gresham
558.0
517.0
405.0
314.0
220.0
163.0
173.0
209.0
282.0
303.0
...
476.0
612.0
657.0
640.0
640.0
694.0
708.0
720.0
745.0
696.0
englewood
662.0
548.0
454.0
376.0
262.0
183.0
186.0
180.0
262.0
331.0
...
536.0
642.0
657.0
665.0
740.0
778.0
758.0
797.0
865.0
773.0
chicago lawn
386.0
334.0
304.0
236.0
177.0
152.0
119.0
141.0
249.0
270.0
...
547.0
589.0
603.0
660.0
641.0
642.0
682.0
659.0
602.0
503.0
deering
413.0
373.0
289.0
258.0
192.0
160.0
142.0
105.0
148.0
177.0
...
366.0
458.0
426.0
415.0
523.0
509.0
514.0
501.0
520.0
508.0
ogden
398.0
361.0
340.0
249.0
201.0
142.0
142.0
151.0
180.0
216.0
...
350.0
421.0
388.0
415.0
512.0
535.0
586.0
586.0
549.0
484.0
harrison
585.0
514.0
431.0
385.0
274.0
218.0
202.0
241.0
282.0
357.0
...
514.0
557.0
601.0
625.0
699.0
742.0
727.0
678.0
728.0
647.0
near west
315.0
347.0
322.0
281.0
174.0
129.0
102.0
102.0
124.0
136.0
...
198.0
243.0
238.0
255.0
231.0
309.0
335.0
408.0
373.0
366.0
shakespeare
309.0
285.0
314.0
271.0
204.0
119.0
83.0
54.0
67.0
78.0
...
143.0
157.0
158.0
180.0
198.0
228.0
235.0
245.0
297.0
271.0
austin
437.0
352.0
289.0
246.0
155.0
99.0
137.0
153.0
173.0
204.0
...
274.0
380.0
346.0
376.0
406.0
533.0
478.0
521.0
535.0
463.0
jefferson park
117.0
118.0
96.0
82.0
152.0
257.0
247.0
230.0
210.0
190.0
...
242.0
216.0
224.0
200.0
201.0
202.0
164.0
140.0
148.0
128.0
albany park
180.0
165.0
201.0
125.0
94.0
80.0
42.0
37.0
65.0
60.0
...
140.0
171.0
163.0
146.0
165.0
166.0
172.0
197.0
209.0
204.0
near north
183.0
228.0
292.0
269.0
169.0
96.0
53.0
33.0
37.0
51.0
...
85.0
108.0
135.0
120.0
127.0
154.0
158.0
158.0
150.0
151.0
town hall
264.0
351.0
397.0
360.0
249.0
130.0
66.0
53.0
48.0
53.0
...
137.0
127.0
146.0
131.0
152.0
166.0
201.0
215.0
221.0
261.0
lincoln
91.0
83.0
70.0
59.0
45.0
28.0
31.0
31.0
41.0
31.0
...
73.0
73.0
74.0
76.0
91.0
79.0
87.0
90.0
102.0
99.0
morgan park
256.0
177.0
135.0
104.0
72.0
47.0
47.0
81.0
102.0
124.0
...
257.0
317.0
274.0
291.0
298.0
369.0
336.0
388.0
311.0
295.0
rogers park
208.0
158.0
185.0
160.0
99.0
76.0
60.0
51.0
42.0
67.0
...
144.0
179.0
168.0
161.0
170.0
218.0
247.0
275.0
230.0
193.0
grand central
428.0
379.0
320.0
298.0
220.0
170.0
124.0
143.0
178.0
205.0
...
398.0
471.0
528.0
492.0
545.0
520.0
554.0
617.0
518.0
475.0
bronx
1418.0
1186.0
802.0
523.0
289.0
185.0
310.0
159.0
240.0
301.0
...
883.0
1104.0
1183.0
1249.0
1465.0
1695.0
1720.0
1784.0
1770.0
1697.0
brooklyn
1343.0
1075.0
677.0
454.0
303.0
153.0
527.0
233.0
253.0
300.0
...
945.0
1011.0
1019.0
984.0
1094.0
1402.0
1533.0
1569.0
1637.0
1452.0
manhattan
878.0
839.0
589.0
397.0
313.0
197.0
225.0
121.0
159.0
241.0
...
657.0
700.0
784.0
737.0
692.0
750.0
805.0
843.0
846.0
876.0
queens
909.0
718.0
505.0
398.0
317.0
217.0
345.0
192.0
237.0
261.0
...
835.0
793.0
816.0
811.0
751.0
787.0
767.0
816.0
970.0
946.0
staten island
205.0
145.0
74.0
65.0
36.0
37.0
65.0
20.0
32.0
48.0
...
115.0
121.0
130.0
103.0
111.0
171.0
170.0
174.0
221.0
246.0
central
128.0
154.0
120.0
68.0
53.0
35.0
28.0
11.0
21.0
16.0
...
65.0
54.0
63.0
83.0
89.0
118.0
121.0
177.0
147.0
137.0
eastern
148.0
127.0
95.0
64.0
65.0
23.0
25.0
23.0
23.0
30.0
...
80.0
130.0
110.0
92.0
116.0
149.0
154.0
163.0
154.0
173.0
northeastern
213.0
200.0
149.0
120.0
64.0
52.0
36.0
35.0
50.0
33.0
...
96.0
142.0
153.0
128.0
150.0
192.0
224.0
322.0
323.0
315.0
northern
156.0
187.0
142.0
72.0
55.0
49.0
30.0
22.0
12.0
24.0
...
67.0
104.0
77.0
102.0
97.0
144.0
142.0
190.0
192.0
194.0
northwestern
204.0
173.0
117.0
98.0
56.0
65.0
42.0
17.0
47.0
36.0
...
70.0
117.0
113.0
112.0
124.0
139.0
190.0
212.0
219.0
224.0
southeastern
153.0
149.0
125.0
80.0
45.0
38.0
29.0
13.0
37.0
27.0
...
79.0
55.0
79.0
80.0
99.0
116.0
159.0
192.0
205.0
214.0
southern
147.0
170.0
94.0
71.0
49.0
55.0
26.0
23.0
22.0
22.0
...
67.0
58.0
87.0
77.0
104.0
114.0
143.0
163.0
221.0
202.0
southwestern
154.0
131.0
94.0
60.0
44.0
43.0
44.0
25.0
31.0
35.0
...
82.0
114.0
82.0
103.0
139.0
130.0
192.0
163.0
201.0
222.0
western
149.0
112.0
79.0
53.0
45.0
57.0
21.0
25.0
31.0
54.0
...
110.0
137.0
86.0
102.0
117.0
141.0
149.0
186.0
203.0
178.0
36 rows × 24 columns
In [135]:
nychiba_gun_norm = nychiba_gun.sub(nychi_gun.mean(axis=1), axis=0)
nychiba_gun_norm = nychiba_gun_norm.div(nychiba_gun_norm.std(axis=1), axis=0)
nychiba_gun_norm = (nychiba_gun_norm*1000).astype(int)
nychiba_gun_norm.to_csv('sketch_161113_ActivityMatrix/data/NY_CHI_BA_perhour_gun_norm.csv')
nychiba_gun_norm.head()
Out[135]:
hour
0
1
2
3
4
5
6
7
8
9
...
14
15
16
17
18
19
20
21
22
23
central
721
721
75
-594
-917
-1425
-1655
-1932
-1309
-986
...
490
605
628
951
1321
628
1136
1505
975
513
wentworth
20
-395
-747
-1085
-1142
-1379
-1465
-1314
-905
-927
...
315
1134
997
674
1349
1292
1507
1622
681
538
grand crossing
230
-133
-529
-957
-1337
-1485
-1485
-1311
-1126
-999
...
304
611
743
743
1081
1530
1276
1392
1403
811
south chicago
378
-199
-648
-937
-1283
-1521
-1459
-1386
-1092
-917
...
419
636
703
853
946
1343
1513
1493
1250
698
calumet
395
-343
-718
-1059
-1429
-1532
-1602
-1302
-874
-684
...
141
556
603
493
1047
1266
1307
1451
1544
1053
5 rows × 24 columns
In [ ]: