In [103]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import *
In [2]:
cols = ['Week Number', 'RequestType', 'Created Year', 'CD', 'Longitude', 'Latitude']
df = pd.read_csv('311_parsed.csv', usecols = cols)
In [50]:
c = df.groupby(['Created Year','Week Number','RequestType']).size().unstack()
c['Total'] = c.sum(axis=1)
c
Out[50]:
RequestType
Bulky Items
Dead Animal Removal
Electronic Waste
Feedback
Graffiti Removal
Homeless Encampment
Illegal Dumping Pickup
Metal/Household Appliances
Multiple Streetlight Issue
Other
Report Water Waste
Single Streetlight Issue
Total
Created Year
Week Number
2015
32
4192.0
274.0
330.0
31.0
NaN
43.0
362.0
377.0
15.0
78.0
NaN
81.0
5783.0
33
8313.0
499.0
672.0
146.0
2.0
56.0
596.0
710.0
58.0
36.0
NaN
159.0
11247.0
34
8111.0
490.0
704.0
164.0
1.0
73.0
661.0
738.0
71.0
13.0
3.0
214.0
11243.0
35
7685.0
473.0
641.0
25.0
6.0
67.0
609.0
767.0
68.0
9.0
2.0
210.0
10562.0
36
8494.0
421.0
606.0
1.0
NaN
69.0
767.0
763.0
79.0
10.0
2.0
175.0
11387.0
37
8143.0
471.0
603.0
1.0
NaN
52.0
704.0
758.0
36.0
21.0
2.0
148.0
10939.0
38
9037.0
439.0
755.0
1.0
2.0
64.0
1388.0
843.0
56.0
10.0
3.0
245.0
12843.0
39
11289.0
491.0
963.0
2.0
NaN
60.0
1521.0
897.0
72.0
10.0
11.0
224.0
15540.0
40
9901.0
457.0
801.0
5.0
1.0
70.0
1516.0
941.0
86.0
4.0
6.0
250.0
14038.0
41
8230.0
436.0
654.0
NaN
NaN
67.0
896.0
827.0
73.0
12.0
9.0
242.0
11446.0
42
8349.0
449.0
597.0
3.0
NaN
68.0
946.0
837.0
54.0
16.0
9.0
193.0
11521.0
43
8006.0
440.0
645.0
2.0
NaN
78.0
946.0
929.0
89.0
5.0
7.0
296.0
11443.0
44
8028.0
470.0
746.0
4.0
NaN
78.0
968.0
1000.0
52.0
11.0
10.0
256.0
11623.0
45
7410.0
426.0
726.0
6.0
NaN
68.0
984.0
853.0
81.0
4.0
4.0
341.0
10903.0
46
6835.0
427.0
724.0
5.0
NaN
69.0
912.0
866.0
95.0
10.0
8.0
289.0
10240.0
47
6913.0
416.0
813.0
7.0
1.0
96.0
971.0
939.0
73.0
7.0
6.0
385.0
10627.0
48
5428.0
351.0
671.0
4.0
NaN
58.0
726.0
812.0
71.0
2.0
5.0
145.0
8273.0
49
7556.0
443.0
987.0
4.0
13.0
78.0
1050.0
1106.0
102.0
4.0
7.0
269.0
11619.0
50
7274.0
444.0
1039.0
7.0
NaN
62.0
922.0
995.0
112.0
6.0
5.0
358.0
11224.0
51
6506.0
366.0
791.0
1.0
2.0
57.0
843.0
919.0
77.0
5.0
5.0
355.0
9927.0
52
4727.0
352.0
586.0
NaN
NaN
48.0
657.0
688.0
37.0
2.0
1.0
222.0
7320.0
53
4873.0
268.0
632.0
1.0
NaN
42.0
633.0
737.0
47.0
2.0
3.0
130.0
7368.0
2016
1
6691.0
431.0
843.0
7.0
1849.0
112.0
1118.0
965.0
123.0
292.0
2.0
242.0
12675.0
2
6989.0
474.0
848.0
9.0
2876.0
102.0
1081.0
977.0
119.0
318.0
7.0
253.0
14053.0
3
6734.0
490.0
787.0
6.0
2913.0
119.0
924.0
918.0
97.0
334.0
14.0
319.0
13655.0
4
6908.0
446.0
784.0
10.0
2676.0
110.0
878.0
927.0
97.0
401.0
14.0
260.0
13511.0
5
6713.0
406.0
788.0
2.0
2584.0
83.0
851.0
889.0
79.0
333.0
14.0
305.0
13047.0
6
6540.0
474.0
667.0
8.0
2705.0
101.0
964.0
913.0
73.0
310.0
25.0
235.0
13015.0
7
6405.0
379.0
746.0
8.0
2394.0
108.0
804.0
931.0
55.0
301.0
37.0
262.0
12430.0
8
7990.0
387.0
902.0
7.0
2931.0
126.0
1018.0
1168.0
88.0
324.0
26.0
323.0
15290.0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
2018
1
9285.0
450.0
770.0
4.0
2115.0
584.0
1953.0
1453.0
80.0
403.0
35.0
189.0
17321.0
2
9459.0
480.0
816.0
12.0
1971.0
574.0
1869.0
1436.0
130.0
402.0
21.0
168.0
17338.0
3
11021.0
513.0
793.0
5.0
2442.0
678.0
2015.0
1537.0
170.0
363.0
18.0
177.0
19732.0
4
10029.0
469.0
877.0
8.0
2324.0
669.0
1878.0
1505.0
158.0
297.0
17.0
215.0
18446.0
5
10090.0
456.0
787.0
4.0
2567.0
602.0
1785.0
1532.0
97.0
316.0
39.0
165.0
18440.0
6
10748.0
409.0
891.0
9.0
2334.0
631.0
1804.0
1547.0
109.0
325.0
31.0
184.0
19022.0
7
7835.0
442.0
678.0
16.0
2418.0
443.0
1551.0
1377.0
91.0
304.0
31.0
221.0
15407.0
8
7712.0
273.0
679.0
16.0
2082.0
430.0
1533.0
1232.0
84.0
253.0
23.0
213.0
14530.0
9
9460.0
381.0
754.0
7.0
6403.0
615.0
1476.0
1425.0
92.0
276.0
33.0
212.0
21134.0
10
9811.0
355.0
738.0
4.0
8409.0
753.0
1574.0
1513.0
94.0
273.0
22.0
212.0
23758.0
11
9317.0
408.0
702.0
14.0
7186.0
599.0
1720.0
1392.0
87.0
270.0
28.0
213.0
21936.0
12
8957.0
396.0
655.0
8.0
6877.0
543.0
1437.0
1309.0
78.0
315.0
18.0
203.0
20796.0
13
10330.0
460.0
807.0
8.0
7708.0
616.0
1537.0
1635.0
110.0
270.0
23.0
207.0
23711.0
14
10679.0
522.0
812.0
6.0
7931.0
781.0
1854.0
1635.0
85.0
354.0
32.0
192.0
24883.0
15
10278.0
538.0
772.0
7.0
8850.0
648.0
1672.0
1628.0
97.0
402.0
18.0
244.0
25154.0
16
9930.0
538.0
785.0
4.0
6165.0
596.0
1522.0
1552.0
86.0
314.0
28.0
200.0
21720.0
17
10759.0
508.0
818.0
4.0
5259.0
697.0
1539.0
1703.0
75.0
299.0
17.0
128.0
21806.0
18
10922.0
453.0
763.0
3.0
5946.0
777.0
1753.0
1656.0
53.0
287.0
30.0
131.0
22774.0
19
10566.0
538.0
795.0
8.0
6323.0
701.0
1701.0
1669.0
68.0
311.0
29.0
139.0
22848.0
20
10662.0
524.0
792.0
14.0
5960.0
781.0
1779.0
1638.0
53.0
304.0
31.0
142.0
22680.0
21
10217.0
585.0
746.0
11.0
6024.0
764.0
1800.0
1583.0
49.0
338.0
20.0
117.0
22254.0
22
10779.0
531.0
742.0
11.0
5107.0
728.0
1800.0
1655.0
52.0
314.0
21.0
109.0
21849.0
23
11749.0
559.0
823.0
11.0
5684.0
864.0
1848.0
1890.0
59.0
311.0
23.0
154.0
23975.0
24
11547.0
613.0
834.0
45.0
5847.0
988.0
2034.0
1767.0
34.0
373.0
34.0
131.0
24247.0
25
11549.0
568.0
838.0
55.0
6174.0
953.0
1953.0
1885.0
45.0
371.0
33.0
131.0
24555.0
26
12337.0
597.0
815.0
4.0
5975.0
828.0
1979.0
1944.0
76.0
362.0
27.0
138.0
25082.0
27
11282.0
590.0
734.0
6.0
4489.0
625.0
1745.0
1706.0
88.0
383.0
29.0
118.0
21795.0
28
12395.0
653.0
884.0
7.0
6240.0
845.0
2181.0
1852.0
81.0
317.0
39.0
133.0
25627.0
29
13363.0
615.0
919.0
10.0
5541.0
790.0
2379.0
1953.0
79.0
294.0
37.0
128.0
26108.0
30
750.0
26.0
44.0
NaN
375.0
38.0
114.0
89.0
7.0
13.0
5.0
14.0
1475.0
157 rows × 13 columns
In [25]:
cols = ['Bulky Items', 'Dead Animal Removal', 'Electronic Waste', 'Feedback', 'Graffiti Removal', 'Homeless Encampment', 'Illegal Dumping Pickup', 'Metal/Household Appliances', 'Multiple Streetlight Issue', 'Other', 'Report Water Waste', 'Single Streetlight Issue']
In [105]:
#Boxplot of all data to visualize any outliers
plt.rcParams.update({'font.size': 10})
c.plot.box(return_type='dict', widths = 0.7, figsize=(10,15))
plt.xticks(rotation = 'vertical')
plt.show()
In [ ]:
#Max Outliers for individual weekly request grouped by year:
for x in cols:
max_specific_outliers = c[c[x] > c[x].mean() + 2 * c[x].std()]
max_specific_outliers = max_specific_outliers[[x,'Total']]
In [34]:
#Min Outliers for individual weekly request types grouped by year:
for x in cols:
min_specific_outliers = c[c[x] < c[x].mean() - 2 * c[x].std()]
min_specific_outliers = min_specific_outliers[[x,'Total']]
In [107]:
#Max Outliers for total of weekly requests grouped by year
max_total_outliers = c[c['Total'] > c['Total'].mean() + 2 * c['Total'].std()]
max_total_outliers = max_total_outliers[['Total']]
max_total_outliers
Out[107]:
RequestType
Total
Created Year
Week Number
2018
15
25154.0
26
25082.0
28
25627.0
29
26108.0
In [38]:
#Min Outliers for total of weekly requests grouped by year
min_total_outliers = c[c['Total'] < c['Total'].mean() - 2 * c['Total'].std()]
min_total_outliers = min_total_outliers[['Total']]
min_total_outliers
Out[38]:
RequestType
Total
Created Year
Week Number
2015
32
5783.0
2016
53
1005.0
2017
43
2699.0
47
44.0
48
5170.0
2018
30
1475.0
In [44]:
c['Diff'] = c['Total'].diff()
c['Diff'] = c['Diff'].abs()
In [45]:
c
Out[45]:
RequestType
Bulky Items
Dead Animal Removal
Electronic Waste
Feedback
Graffiti Removal
Homeless Encampment
Illegal Dumping Pickup
Metal/Household Appliances
Multiple Streetlight Issue
Other
Report Water Waste
Single Streetlight Issue
Total
Diff
Created Year
Week Number
2015
32
4192.0
274.0
330.0
31.0
NaN
43.0
362.0
377.0
15.0
78.0
NaN
81.0
5783.0
NaN
33
8313.0
499.0
672.0
146.0
2.0
56.0
596.0
710.0
58.0
36.0
NaN
159.0
11247.0
5464.0
34
8111.0
490.0
704.0
164.0
1.0
73.0
661.0
738.0
71.0
13.0
3.0
214.0
11243.0
4.0
35
7685.0
473.0
641.0
25.0
6.0
67.0
609.0
767.0
68.0
9.0
2.0
210.0
10562.0
681.0
36
8494.0
421.0
606.0
1.0
NaN
69.0
767.0
763.0
79.0
10.0
2.0
175.0
11387.0
825.0
37
8143.0
471.0
603.0
1.0
NaN
52.0
704.0
758.0
36.0
21.0
2.0
148.0
10939.0
448.0
38
9037.0
439.0
755.0
1.0
2.0
64.0
1388.0
843.0
56.0
10.0
3.0
245.0
12843.0
1904.0
39
11289.0
491.0
963.0
2.0
NaN
60.0
1521.0
897.0
72.0
10.0
11.0
224.0
15540.0
2697.0
40
9901.0
457.0
801.0
5.0
1.0
70.0
1516.0
941.0
86.0
4.0
6.0
250.0
14038.0
1502.0
41
8230.0
436.0
654.0
NaN
NaN
67.0
896.0
827.0
73.0
12.0
9.0
242.0
11446.0
2592.0
42
8349.0
449.0
597.0
3.0
NaN
68.0
946.0
837.0
54.0
16.0
9.0
193.0
11521.0
75.0
43
8006.0
440.0
645.0
2.0
NaN
78.0
946.0
929.0
89.0
5.0
7.0
296.0
11443.0
78.0
44
8028.0
470.0
746.0
4.0
NaN
78.0
968.0
1000.0
52.0
11.0
10.0
256.0
11623.0
180.0
45
7410.0
426.0
726.0
6.0
NaN
68.0
984.0
853.0
81.0
4.0
4.0
341.0
10903.0
720.0
46
6835.0
427.0
724.0
5.0
NaN
69.0
912.0
866.0
95.0
10.0
8.0
289.0
10240.0
663.0
47
6913.0
416.0
813.0
7.0
1.0
96.0
971.0
939.0
73.0
7.0
6.0
385.0
10627.0
387.0
48
5428.0
351.0
671.0
4.0
NaN
58.0
726.0
812.0
71.0
2.0
5.0
145.0
8273.0
2354.0
49
7556.0
443.0
987.0
4.0
13.0
78.0
1050.0
1106.0
102.0
4.0
7.0
269.0
11619.0
3346.0
50
7274.0
444.0
1039.0
7.0
NaN
62.0
922.0
995.0
112.0
6.0
5.0
358.0
11224.0
395.0
51
6506.0
366.0
791.0
1.0
2.0
57.0
843.0
919.0
77.0
5.0
5.0
355.0
9927.0
1297.0
52
4727.0
352.0
586.0
NaN
NaN
48.0
657.0
688.0
37.0
2.0
1.0
222.0
7320.0
2607.0
53
4873.0
268.0
632.0
1.0
NaN
42.0
633.0
737.0
47.0
2.0
3.0
130.0
7368.0
48.0
2016
1
6691.0
431.0
843.0
7.0
1849.0
112.0
1118.0
965.0
123.0
292.0
2.0
242.0
12675.0
5307.0
2
6989.0
474.0
848.0
9.0
2876.0
102.0
1081.0
977.0
119.0
318.0
7.0
253.0
14053.0
1378.0
3
6734.0
490.0
787.0
6.0
2913.0
119.0
924.0
918.0
97.0
334.0
14.0
319.0
13655.0
398.0
4
6908.0
446.0
784.0
10.0
2676.0
110.0
878.0
927.0
97.0
401.0
14.0
260.0
13511.0
144.0
5
6713.0
406.0
788.0
2.0
2584.0
83.0
851.0
889.0
79.0
333.0
14.0
305.0
13047.0
464.0
6
6540.0
474.0
667.0
8.0
2705.0
101.0
964.0
913.0
73.0
310.0
25.0
235.0
13015.0
32.0
7
6405.0
379.0
746.0
8.0
2394.0
108.0
804.0
931.0
55.0
301.0
37.0
262.0
12430.0
585.0
8
7990.0
387.0
902.0
7.0
2931.0
126.0
1018.0
1168.0
88.0
324.0
26.0
323.0
15290.0
2860.0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
2018
1
9285.0
450.0
770.0
4.0
2115.0
584.0
1953.0
1453.0
80.0
403.0
35.0
189.0
17321.0
4320.0
2
9459.0
480.0
816.0
12.0
1971.0
574.0
1869.0
1436.0
130.0
402.0
21.0
168.0
17338.0
17.0
3
11021.0
513.0
793.0
5.0
2442.0
678.0
2015.0
1537.0
170.0
363.0
18.0
177.0
19732.0
2394.0
4
10029.0
469.0
877.0
8.0
2324.0
669.0
1878.0
1505.0
158.0
297.0
17.0
215.0
18446.0
1286.0
5
10090.0
456.0
787.0
4.0
2567.0
602.0
1785.0
1532.0
97.0
316.0
39.0
165.0
18440.0
6.0
6
10748.0
409.0
891.0
9.0
2334.0
631.0
1804.0
1547.0
109.0
325.0
31.0
184.0
19022.0
582.0
7
7835.0
442.0
678.0
16.0
2418.0
443.0
1551.0
1377.0
91.0
304.0
31.0
221.0
15407.0
3615.0
8
7712.0
273.0
679.0
16.0
2082.0
430.0
1533.0
1232.0
84.0
253.0
23.0
213.0
14530.0
877.0
9
9460.0
381.0
754.0
7.0
6403.0
615.0
1476.0
1425.0
92.0
276.0
33.0
212.0
21134.0
6604.0
10
9811.0
355.0
738.0
4.0
8409.0
753.0
1574.0
1513.0
94.0
273.0
22.0
212.0
23758.0
2624.0
11
9317.0
408.0
702.0
14.0
7186.0
599.0
1720.0
1392.0
87.0
270.0
28.0
213.0
21936.0
1822.0
12
8957.0
396.0
655.0
8.0
6877.0
543.0
1437.0
1309.0
78.0
315.0
18.0
203.0
20796.0
1140.0
13
10330.0
460.0
807.0
8.0
7708.0
616.0
1537.0
1635.0
110.0
270.0
23.0
207.0
23711.0
2915.0
14
10679.0
522.0
812.0
6.0
7931.0
781.0
1854.0
1635.0
85.0
354.0
32.0
192.0
24883.0
1172.0
15
10278.0
538.0
772.0
7.0
8850.0
648.0
1672.0
1628.0
97.0
402.0
18.0
244.0
25154.0
271.0
16
9930.0
538.0
785.0
4.0
6165.0
596.0
1522.0
1552.0
86.0
314.0
28.0
200.0
21720.0
3434.0
17
10759.0
508.0
818.0
4.0
5259.0
697.0
1539.0
1703.0
75.0
299.0
17.0
128.0
21806.0
86.0
18
10922.0
453.0
763.0
3.0
5946.0
777.0
1753.0
1656.0
53.0
287.0
30.0
131.0
22774.0
968.0
19
10566.0
538.0
795.0
8.0
6323.0
701.0
1701.0
1669.0
68.0
311.0
29.0
139.0
22848.0
74.0
20
10662.0
524.0
792.0
14.0
5960.0
781.0
1779.0
1638.0
53.0
304.0
31.0
142.0
22680.0
168.0
21
10217.0
585.0
746.0
11.0
6024.0
764.0
1800.0
1583.0
49.0
338.0
20.0
117.0
22254.0
426.0
22
10779.0
531.0
742.0
11.0
5107.0
728.0
1800.0
1655.0
52.0
314.0
21.0
109.0
21849.0
405.0
23
11749.0
559.0
823.0
11.0
5684.0
864.0
1848.0
1890.0
59.0
311.0
23.0
154.0
23975.0
2126.0
24
11547.0
613.0
834.0
45.0
5847.0
988.0
2034.0
1767.0
34.0
373.0
34.0
131.0
24247.0
272.0
25
11549.0
568.0
838.0
55.0
6174.0
953.0
1953.0
1885.0
45.0
371.0
33.0
131.0
24555.0
308.0
26
12337.0
597.0
815.0
4.0
5975.0
828.0
1979.0
1944.0
76.0
362.0
27.0
138.0
25082.0
527.0
27
11282.0
590.0
734.0
6.0
4489.0
625.0
1745.0
1706.0
88.0
383.0
29.0
118.0
21795.0
3287.0
28
12395.0
653.0
884.0
7.0
6240.0
845.0
2181.0
1852.0
81.0
317.0
39.0
133.0
25627.0
3832.0
29
13363.0
615.0
919.0
10.0
5541.0
790.0
2379.0
1953.0
79.0
294.0
37.0
128.0
26108.0
481.0
30
750.0
26.0
44.0
NaN
375.0
38.0
114.0
89.0
7.0
13.0
5.0
14.0
1475.0
24633.0
157 rows × 14 columns
In [46]:
#Max Outliers for change in total number of weekly requests grouped by year
max_change_outliers = c[c['Diff'] > c['Diff'].mean() + 2 * c['Diff'].std()]
max_change_outliers = max_change_outliers[['Diff']]
max_change_outliers
Out[46]:
RequestType
Diff
Created Year
Week Number
2016
53
11553.0
2017
1
13847.0
43
12770.0
44
10203.0
47
10146.0
49
11170.0
2018
30
24633.0
In [47]:
#Min Outliers for change in total number of weekly requests grouped by year
min_change_outliers = c[c['Diff'] < c['Diff'].mean() - 2 * c['Diff'].std()]
min_change_outliers = min_change_outliers[['Diff']]
min_change_outliers
Out[47]:
RequestType
Diff
Created Year
Week Number
In [ ]:
Content source: datala/311-analysis
Similar notebooks: