In [103]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import *

In [2]:
cols = ['Week Number', 'RequestType', 'Created Year', 'CD', 'Longitude', 'Latitude']
df = pd.read_csv('311_parsed.csv', usecols = cols)

In [50]:
c = df.groupby(['Created Year','Week Number','RequestType']).size().unstack()
c['Total'] = c.sum(axis=1)
c


Out[50]:
RequestType Bulky Items Dead Animal Removal Electronic Waste Feedback Graffiti Removal Homeless Encampment Illegal Dumping Pickup Metal/Household Appliances Multiple Streetlight Issue Other Report Water Waste Single Streetlight Issue Total
Created Year Week Number
2015 32 4192.0 274.0 330.0 31.0 NaN 43.0 362.0 377.0 15.0 78.0 NaN 81.0 5783.0
33 8313.0 499.0 672.0 146.0 2.0 56.0 596.0 710.0 58.0 36.0 NaN 159.0 11247.0
34 8111.0 490.0 704.0 164.0 1.0 73.0 661.0 738.0 71.0 13.0 3.0 214.0 11243.0
35 7685.0 473.0 641.0 25.0 6.0 67.0 609.0 767.0 68.0 9.0 2.0 210.0 10562.0
36 8494.0 421.0 606.0 1.0 NaN 69.0 767.0 763.0 79.0 10.0 2.0 175.0 11387.0
37 8143.0 471.0 603.0 1.0 NaN 52.0 704.0 758.0 36.0 21.0 2.0 148.0 10939.0
38 9037.0 439.0 755.0 1.0 2.0 64.0 1388.0 843.0 56.0 10.0 3.0 245.0 12843.0
39 11289.0 491.0 963.0 2.0 NaN 60.0 1521.0 897.0 72.0 10.0 11.0 224.0 15540.0
40 9901.0 457.0 801.0 5.0 1.0 70.0 1516.0 941.0 86.0 4.0 6.0 250.0 14038.0
41 8230.0 436.0 654.0 NaN NaN 67.0 896.0 827.0 73.0 12.0 9.0 242.0 11446.0
42 8349.0 449.0 597.0 3.0 NaN 68.0 946.0 837.0 54.0 16.0 9.0 193.0 11521.0
43 8006.0 440.0 645.0 2.0 NaN 78.0 946.0 929.0 89.0 5.0 7.0 296.0 11443.0
44 8028.0 470.0 746.0 4.0 NaN 78.0 968.0 1000.0 52.0 11.0 10.0 256.0 11623.0
45 7410.0 426.0 726.0 6.0 NaN 68.0 984.0 853.0 81.0 4.0 4.0 341.0 10903.0
46 6835.0 427.0 724.0 5.0 NaN 69.0 912.0 866.0 95.0 10.0 8.0 289.0 10240.0
47 6913.0 416.0 813.0 7.0 1.0 96.0 971.0 939.0 73.0 7.0 6.0 385.0 10627.0
48 5428.0 351.0 671.0 4.0 NaN 58.0 726.0 812.0 71.0 2.0 5.0 145.0 8273.0
49 7556.0 443.0 987.0 4.0 13.0 78.0 1050.0 1106.0 102.0 4.0 7.0 269.0 11619.0
50 7274.0 444.0 1039.0 7.0 NaN 62.0 922.0 995.0 112.0 6.0 5.0 358.0 11224.0
51 6506.0 366.0 791.0 1.0 2.0 57.0 843.0 919.0 77.0 5.0 5.0 355.0 9927.0
52 4727.0 352.0 586.0 NaN NaN 48.0 657.0 688.0 37.0 2.0 1.0 222.0 7320.0
53 4873.0 268.0 632.0 1.0 NaN 42.0 633.0 737.0 47.0 2.0 3.0 130.0 7368.0
2016 1 6691.0 431.0 843.0 7.0 1849.0 112.0 1118.0 965.0 123.0 292.0 2.0 242.0 12675.0
2 6989.0 474.0 848.0 9.0 2876.0 102.0 1081.0 977.0 119.0 318.0 7.0 253.0 14053.0
3 6734.0 490.0 787.0 6.0 2913.0 119.0 924.0 918.0 97.0 334.0 14.0 319.0 13655.0
4 6908.0 446.0 784.0 10.0 2676.0 110.0 878.0 927.0 97.0 401.0 14.0 260.0 13511.0
5 6713.0 406.0 788.0 2.0 2584.0 83.0 851.0 889.0 79.0 333.0 14.0 305.0 13047.0
6 6540.0 474.0 667.0 8.0 2705.0 101.0 964.0 913.0 73.0 310.0 25.0 235.0 13015.0
7 6405.0 379.0 746.0 8.0 2394.0 108.0 804.0 931.0 55.0 301.0 37.0 262.0 12430.0
8 7990.0 387.0 902.0 7.0 2931.0 126.0 1018.0 1168.0 88.0 324.0 26.0 323.0 15290.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2018 1 9285.0 450.0 770.0 4.0 2115.0 584.0 1953.0 1453.0 80.0 403.0 35.0 189.0 17321.0
2 9459.0 480.0 816.0 12.0 1971.0 574.0 1869.0 1436.0 130.0 402.0 21.0 168.0 17338.0
3 11021.0 513.0 793.0 5.0 2442.0 678.0 2015.0 1537.0 170.0 363.0 18.0 177.0 19732.0
4 10029.0 469.0 877.0 8.0 2324.0 669.0 1878.0 1505.0 158.0 297.0 17.0 215.0 18446.0
5 10090.0 456.0 787.0 4.0 2567.0 602.0 1785.0 1532.0 97.0 316.0 39.0 165.0 18440.0
6 10748.0 409.0 891.0 9.0 2334.0 631.0 1804.0 1547.0 109.0 325.0 31.0 184.0 19022.0
7 7835.0 442.0 678.0 16.0 2418.0 443.0 1551.0 1377.0 91.0 304.0 31.0 221.0 15407.0
8 7712.0 273.0 679.0 16.0 2082.0 430.0 1533.0 1232.0 84.0 253.0 23.0 213.0 14530.0
9 9460.0 381.0 754.0 7.0 6403.0 615.0 1476.0 1425.0 92.0 276.0 33.0 212.0 21134.0
10 9811.0 355.0 738.0 4.0 8409.0 753.0 1574.0 1513.0 94.0 273.0 22.0 212.0 23758.0
11 9317.0 408.0 702.0 14.0 7186.0 599.0 1720.0 1392.0 87.0 270.0 28.0 213.0 21936.0
12 8957.0 396.0 655.0 8.0 6877.0 543.0 1437.0 1309.0 78.0 315.0 18.0 203.0 20796.0
13 10330.0 460.0 807.0 8.0 7708.0 616.0 1537.0 1635.0 110.0 270.0 23.0 207.0 23711.0
14 10679.0 522.0 812.0 6.0 7931.0 781.0 1854.0 1635.0 85.0 354.0 32.0 192.0 24883.0
15 10278.0 538.0 772.0 7.0 8850.0 648.0 1672.0 1628.0 97.0 402.0 18.0 244.0 25154.0
16 9930.0 538.0 785.0 4.0 6165.0 596.0 1522.0 1552.0 86.0 314.0 28.0 200.0 21720.0
17 10759.0 508.0 818.0 4.0 5259.0 697.0 1539.0 1703.0 75.0 299.0 17.0 128.0 21806.0
18 10922.0 453.0 763.0 3.0 5946.0 777.0 1753.0 1656.0 53.0 287.0 30.0 131.0 22774.0
19 10566.0 538.0 795.0 8.0 6323.0 701.0 1701.0 1669.0 68.0 311.0 29.0 139.0 22848.0
20 10662.0 524.0 792.0 14.0 5960.0 781.0 1779.0 1638.0 53.0 304.0 31.0 142.0 22680.0
21 10217.0 585.0 746.0 11.0 6024.0 764.0 1800.0 1583.0 49.0 338.0 20.0 117.0 22254.0
22 10779.0 531.0 742.0 11.0 5107.0 728.0 1800.0 1655.0 52.0 314.0 21.0 109.0 21849.0
23 11749.0 559.0 823.0 11.0 5684.0 864.0 1848.0 1890.0 59.0 311.0 23.0 154.0 23975.0
24 11547.0 613.0 834.0 45.0 5847.0 988.0 2034.0 1767.0 34.0 373.0 34.0 131.0 24247.0
25 11549.0 568.0 838.0 55.0 6174.0 953.0 1953.0 1885.0 45.0 371.0 33.0 131.0 24555.0
26 12337.0 597.0 815.0 4.0 5975.0 828.0 1979.0 1944.0 76.0 362.0 27.0 138.0 25082.0
27 11282.0 590.0 734.0 6.0 4489.0 625.0 1745.0 1706.0 88.0 383.0 29.0 118.0 21795.0
28 12395.0 653.0 884.0 7.0 6240.0 845.0 2181.0 1852.0 81.0 317.0 39.0 133.0 25627.0
29 13363.0 615.0 919.0 10.0 5541.0 790.0 2379.0 1953.0 79.0 294.0 37.0 128.0 26108.0
30 750.0 26.0 44.0 NaN 375.0 38.0 114.0 89.0 7.0 13.0 5.0 14.0 1475.0

157 rows × 13 columns


In [25]:
cols = ['Bulky Items', 'Dead Animal Removal', 'Electronic Waste', 'Feedback', 'Graffiti Removal', 'Homeless Encampment', 'Illegal Dumping Pickup', 'Metal/Household Appliances', 'Multiple Streetlight Issue', 'Other', 'Report Water Waste', 'Single Streetlight Issue']

In [105]:
#Boxplot of all data to visualize any outliers
plt.rcParams.update({'font.size': 10})
c.plot.box(return_type='dict', widths = 0.7, figsize=(10,15))
plt.xticks(rotation = 'vertical')
plt.show()



In [ ]:
#Max Outliers for individual weekly request grouped by year:
for x in cols:
    max_specific_outliers = c[c[x] > c[x].mean() + 2 * c[x].std()]
    max_specific_outliers = max_specific_outliers[[x,'Total']]

In [34]:
#Min Outliers for individual weekly request types grouped by year:
for x in cols:
    min_specific_outliers = c[c[x] < c[x].mean() - 2 * c[x].std()]
    min_specific_outliers = min_specific_outliers[[x,'Total']]

In [107]:
#Max Outliers for total of weekly requests grouped by year
max_total_outliers = c[c['Total'] > c['Total'].mean() + 2 * c['Total'].std()]
max_total_outliers = max_total_outliers[['Total']]
max_total_outliers


Out[107]:
RequestType Total
Created Year Week Number
2018 15 25154.0
26 25082.0
28 25627.0
29 26108.0

In [38]:
#Min Outliers for total of weekly requests grouped by year
min_total_outliers = c[c['Total'] < c['Total'].mean() - 2 * c['Total'].std()]
min_total_outliers = min_total_outliers[['Total']]
min_total_outliers


Out[38]:
RequestType Total
Created Year Week Number
2015 32 5783.0
2016 53 1005.0
2017 43 2699.0
47 44.0
48 5170.0
2018 30 1475.0

In [44]:
c['Diff'] = c['Total'].diff()
c['Diff'] = c['Diff'].abs()

In [45]:
c


Out[45]:
RequestType Bulky Items Dead Animal Removal Electronic Waste Feedback Graffiti Removal Homeless Encampment Illegal Dumping Pickup Metal/Household Appliances Multiple Streetlight Issue Other Report Water Waste Single Streetlight Issue Total Diff
Created Year Week Number
2015 32 4192.0 274.0 330.0 31.0 NaN 43.0 362.0 377.0 15.0 78.0 NaN 81.0 5783.0 NaN
33 8313.0 499.0 672.0 146.0 2.0 56.0 596.0 710.0 58.0 36.0 NaN 159.0 11247.0 5464.0
34 8111.0 490.0 704.0 164.0 1.0 73.0 661.0 738.0 71.0 13.0 3.0 214.0 11243.0 4.0
35 7685.0 473.0 641.0 25.0 6.0 67.0 609.0 767.0 68.0 9.0 2.0 210.0 10562.0 681.0
36 8494.0 421.0 606.0 1.0 NaN 69.0 767.0 763.0 79.0 10.0 2.0 175.0 11387.0 825.0
37 8143.0 471.0 603.0 1.0 NaN 52.0 704.0 758.0 36.0 21.0 2.0 148.0 10939.0 448.0
38 9037.0 439.0 755.0 1.0 2.0 64.0 1388.0 843.0 56.0 10.0 3.0 245.0 12843.0 1904.0
39 11289.0 491.0 963.0 2.0 NaN 60.0 1521.0 897.0 72.0 10.0 11.0 224.0 15540.0 2697.0
40 9901.0 457.0 801.0 5.0 1.0 70.0 1516.0 941.0 86.0 4.0 6.0 250.0 14038.0 1502.0
41 8230.0 436.0 654.0 NaN NaN 67.0 896.0 827.0 73.0 12.0 9.0 242.0 11446.0 2592.0
42 8349.0 449.0 597.0 3.0 NaN 68.0 946.0 837.0 54.0 16.0 9.0 193.0 11521.0 75.0
43 8006.0 440.0 645.0 2.0 NaN 78.0 946.0 929.0 89.0 5.0 7.0 296.0 11443.0 78.0
44 8028.0 470.0 746.0 4.0 NaN 78.0 968.0 1000.0 52.0 11.0 10.0 256.0 11623.0 180.0
45 7410.0 426.0 726.0 6.0 NaN 68.0 984.0 853.0 81.0 4.0 4.0 341.0 10903.0 720.0
46 6835.0 427.0 724.0 5.0 NaN 69.0 912.0 866.0 95.0 10.0 8.0 289.0 10240.0 663.0
47 6913.0 416.0 813.0 7.0 1.0 96.0 971.0 939.0 73.0 7.0 6.0 385.0 10627.0 387.0
48 5428.0 351.0 671.0 4.0 NaN 58.0 726.0 812.0 71.0 2.0 5.0 145.0 8273.0 2354.0
49 7556.0 443.0 987.0 4.0 13.0 78.0 1050.0 1106.0 102.0 4.0 7.0 269.0 11619.0 3346.0
50 7274.0 444.0 1039.0 7.0 NaN 62.0 922.0 995.0 112.0 6.0 5.0 358.0 11224.0 395.0
51 6506.0 366.0 791.0 1.0 2.0 57.0 843.0 919.0 77.0 5.0 5.0 355.0 9927.0 1297.0
52 4727.0 352.0 586.0 NaN NaN 48.0 657.0 688.0 37.0 2.0 1.0 222.0 7320.0 2607.0
53 4873.0 268.0 632.0 1.0 NaN 42.0 633.0 737.0 47.0 2.0 3.0 130.0 7368.0 48.0
2016 1 6691.0 431.0 843.0 7.0 1849.0 112.0 1118.0 965.0 123.0 292.0 2.0 242.0 12675.0 5307.0
2 6989.0 474.0 848.0 9.0 2876.0 102.0 1081.0 977.0 119.0 318.0 7.0 253.0 14053.0 1378.0
3 6734.0 490.0 787.0 6.0 2913.0 119.0 924.0 918.0 97.0 334.0 14.0 319.0 13655.0 398.0
4 6908.0 446.0 784.0 10.0 2676.0 110.0 878.0 927.0 97.0 401.0 14.0 260.0 13511.0 144.0
5 6713.0 406.0 788.0 2.0 2584.0 83.0 851.0 889.0 79.0 333.0 14.0 305.0 13047.0 464.0
6 6540.0 474.0 667.0 8.0 2705.0 101.0 964.0 913.0 73.0 310.0 25.0 235.0 13015.0 32.0
7 6405.0 379.0 746.0 8.0 2394.0 108.0 804.0 931.0 55.0 301.0 37.0 262.0 12430.0 585.0
8 7990.0 387.0 902.0 7.0 2931.0 126.0 1018.0 1168.0 88.0 324.0 26.0 323.0 15290.0 2860.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2018 1 9285.0 450.0 770.0 4.0 2115.0 584.0 1953.0 1453.0 80.0 403.0 35.0 189.0 17321.0 4320.0
2 9459.0 480.0 816.0 12.0 1971.0 574.0 1869.0 1436.0 130.0 402.0 21.0 168.0 17338.0 17.0
3 11021.0 513.0 793.0 5.0 2442.0 678.0 2015.0 1537.0 170.0 363.0 18.0 177.0 19732.0 2394.0
4 10029.0 469.0 877.0 8.0 2324.0 669.0 1878.0 1505.0 158.0 297.0 17.0 215.0 18446.0 1286.0
5 10090.0 456.0 787.0 4.0 2567.0 602.0 1785.0 1532.0 97.0 316.0 39.0 165.0 18440.0 6.0
6 10748.0 409.0 891.0 9.0 2334.0 631.0 1804.0 1547.0 109.0 325.0 31.0 184.0 19022.0 582.0
7 7835.0 442.0 678.0 16.0 2418.0 443.0 1551.0 1377.0 91.0 304.0 31.0 221.0 15407.0 3615.0
8 7712.0 273.0 679.0 16.0 2082.0 430.0 1533.0 1232.0 84.0 253.0 23.0 213.0 14530.0 877.0
9 9460.0 381.0 754.0 7.0 6403.0 615.0 1476.0 1425.0 92.0 276.0 33.0 212.0 21134.0 6604.0
10 9811.0 355.0 738.0 4.0 8409.0 753.0 1574.0 1513.0 94.0 273.0 22.0 212.0 23758.0 2624.0
11 9317.0 408.0 702.0 14.0 7186.0 599.0 1720.0 1392.0 87.0 270.0 28.0 213.0 21936.0 1822.0
12 8957.0 396.0 655.0 8.0 6877.0 543.0 1437.0 1309.0 78.0 315.0 18.0 203.0 20796.0 1140.0
13 10330.0 460.0 807.0 8.0 7708.0 616.0 1537.0 1635.0 110.0 270.0 23.0 207.0 23711.0 2915.0
14 10679.0 522.0 812.0 6.0 7931.0 781.0 1854.0 1635.0 85.0 354.0 32.0 192.0 24883.0 1172.0
15 10278.0 538.0 772.0 7.0 8850.0 648.0 1672.0 1628.0 97.0 402.0 18.0 244.0 25154.0 271.0
16 9930.0 538.0 785.0 4.0 6165.0 596.0 1522.0 1552.0 86.0 314.0 28.0 200.0 21720.0 3434.0
17 10759.0 508.0 818.0 4.0 5259.0 697.0 1539.0 1703.0 75.0 299.0 17.0 128.0 21806.0 86.0
18 10922.0 453.0 763.0 3.0 5946.0 777.0 1753.0 1656.0 53.0 287.0 30.0 131.0 22774.0 968.0
19 10566.0 538.0 795.0 8.0 6323.0 701.0 1701.0 1669.0 68.0 311.0 29.0 139.0 22848.0 74.0
20 10662.0 524.0 792.0 14.0 5960.0 781.0 1779.0 1638.0 53.0 304.0 31.0 142.0 22680.0 168.0
21 10217.0 585.0 746.0 11.0 6024.0 764.0 1800.0 1583.0 49.0 338.0 20.0 117.0 22254.0 426.0
22 10779.0 531.0 742.0 11.0 5107.0 728.0 1800.0 1655.0 52.0 314.0 21.0 109.0 21849.0 405.0
23 11749.0 559.0 823.0 11.0 5684.0 864.0 1848.0 1890.0 59.0 311.0 23.0 154.0 23975.0 2126.0
24 11547.0 613.0 834.0 45.0 5847.0 988.0 2034.0 1767.0 34.0 373.0 34.0 131.0 24247.0 272.0
25 11549.0 568.0 838.0 55.0 6174.0 953.0 1953.0 1885.0 45.0 371.0 33.0 131.0 24555.0 308.0
26 12337.0 597.0 815.0 4.0 5975.0 828.0 1979.0 1944.0 76.0 362.0 27.0 138.0 25082.0 527.0
27 11282.0 590.0 734.0 6.0 4489.0 625.0 1745.0 1706.0 88.0 383.0 29.0 118.0 21795.0 3287.0
28 12395.0 653.0 884.0 7.0 6240.0 845.0 2181.0 1852.0 81.0 317.0 39.0 133.0 25627.0 3832.0
29 13363.0 615.0 919.0 10.0 5541.0 790.0 2379.0 1953.0 79.0 294.0 37.0 128.0 26108.0 481.0
30 750.0 26.0 44.0 NaN 375.0 38.0 114.0 89.0 7.0 13.0 5.0 14.0 1475.0 24633.0

157 rows × 14 columns


In [46]:
#Max Outliers for change in total number of weekly requests grouped by year
max_change_outliers = c[c['Diff'] > c['Diff'].mean() + 2 * c['Diff'].std()]
max_change_outliers = max_change_outliers[['Diff']]
max_change_outliers


Out[46]:
RequestType Diff
Created Year Week Number
2016 53 11553.0
2017 1 13847.0
43 12770.0
44 10203.0
47 10146.0
49 11170.0
2018 30 24633.0

In [47]:
#Min Outliers for change in total number of weekly requests grouped by year
min_change_outliers = c[c['Diff'] < c['Diff'].mean() - 2 * c['Diff'].std()]
min_change_outliers = min_change_outliers[['Diff']]
min_change_outliers


Out[47]:
RequestType Diff
Created Year Week Number

In [ ]: