In [20]:
from __future__ import division
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import json

In [2]:
df = pd.read_csv('_data/city_fixit_raw.csv', usecols=range(0,5))

In [3]:
df.head()


Out[3]:
ProblemType NumRequests Year AvgDaysToClose %ReportedBySeeClickFix
0 IllegalDumping 3000 2009 1 0
1 BuildingMaintenance 500 2009 5 0
2 Electrical 1000 2009 1 0
3 StreetSweeping 100 2009 50 0
4 RoadRepair 400 2009 20 0

In [4]:
df.ProblemType = pd.Categorical(df.ProblemType)

In [5]:
df.Year = df.Year.apply(int)

In [6]:
df.Year = pd.Categorical(df.Year)

In [7]:
df.Year


Out[7]:
0     2009
1     2009
2     2009
3     2009
4     2009
5     2009
6     2009
7     2009
8     2009
9     2009
10    2010
11    2010
12    2010
13    2010
14    2010
15    2010
16    2010
17    2010
18    2010
19    2010
20    2011
21    2011
22    2011
23    2011
24    2011
25    2011
26    2011
27    2011
28    2011
29    2011
30    2012
31    2012
32    2012
33    2012
34    2012
35    2012
36    2012
37    2012
38    2012
39    2012
Name: Year, dtype: category
Categories (4, int64): [2009 < 2010 < 2011 < 2012]

In [55]:
df.dtypes


Out[55]:
ProblemType               category
NumRequests                  int64
Year                      category
AvgDaysToClose               int64
%ReportedBySeeClickFix       int64
ReqBuckets                category
dtype: object

In [56]:
df.ProblemType


Out[56]:
0          IllegalDumping
1     BuildingMaintenance
2              Electrical
3          StreetSweeping
4              RoadRepair
5       VegetationControl
6                Graffiti
7                  Survey
8                Drainage
9            StreetLights
10         IllegalDumping
11    BuildingMaintenance
12             Electrical
13         StreetSweeping
14             RoadRepair
15      VegetationControl
16               Graffiti
17                 Survey
18               Drainage
19           StreetLights
20         IllegalDumping
21    BuildingMaintenance
22             Electrical
23         StreetSweeping
24             RoadRepair
25      VegetationControl
26               Graffiti
27                 Survey
28               Drainage
29           StreetLights
30         IllegalDumping
31    BuildingMaintenance
32             Electrical
33         StreetSweeping
34             RoadRepair
35      VegetationControl
36               Graffiti
37                 Survey
38               Drainage
39           StreetLights
Name: ProblemType, dtype: category
Categories (10, object): [BuildingMaintenance < Drainage < Electrical < Graffiti ... StreetLights < StreetSweeping < Survey < VegetationControl]

In [57]:
df.describe()


Out[57]:
NumRequests AvgDaysToClose %ReportedBySeeClickFix
count 40.000000 40.000000 40.000000
mean 1331.250000 11.500000 13.300000
std 1879.637428 14.338401 13.232051
min 50.000000 1.000000 0.000000
25% 50.000000 1.000000 1.500000
50% 450.000000 6.000000 10.000000
75% 1425.000000 15.000000 20.000000
max 7200.000000 50.000000 55.000000

In [58]:
df.groupby('Year').sum()


Out[58]:
NumRequests AvgDaysToClose %ReportedBySeeClickFix
Year
2009 5300 115 0
2010 12550 115 92
2011 16800 115 175
2012 18600 115 265

In [59]:
df.groupby('ProblemType').agg(np.sum).plot(kind='barh',y='NumRequests')


Out[59]:
<matplotlib.axes.AxesSubplot at 0x10a02b310>

In [60]:
df.set_index('ProblemType').plot(kind='barh', figsize=(10,8), y='NumRequests')


Out[60]:
<matplotlib.axes.AxesSubplot at 0x109d36bd0>

In [61]:
groups = df.groupby('Year')

In [62]:
def zscore(x):
    return np.sum(x - np.mean(x))/np.std(x)

In [65]:
# df.groupby(['Year', 'ProblemType']).agg(np.mean).plot(kind='barh', figsize=(10,8), y='NumRequests')

In [67]:
# df.groupby(['ProblemType']).cumsum()

In [68]:
df['ReqBuckets'] = pd.cut(df.NumRequests, 4, labels=['XS','SM','LG','XL'])

In [69]:
df.groupby('ProblemType')


Out[69]:
<pandas.core.groupby.DataFrameGroupBy object at 0x10a023250>

In [70]:
for name, group in df.set_index('ProblemType').groupby('ReqBuckets'):
    print group.plot(x='Year',y='NumRequests', kind='bar')


Axes(0.125,0.125;0.775x0.775)
Axes(0.125,0.125;0.775x0.775)
Axes(0.125,0.125;0.775x0.775)
Axes(0.125,0.125;0.775x0.775)

In [76]:
df.groupby('ProblemType').plot(y='NumRequests',x='Year')


Out[76]:
ProblemType
BuildingMaintenance    Axes(0.125,0.125;0.775x0.775)
Drainage               Axes(0.125,0.125;0.775x0.775)
Electrical             Axes(0.125,0.125;0.775x0.775)
Graffiti               Axes(0.125,0.125;0.775x0.775)
IllegalDumping         Axes(0.125,0.125;0.775x0.775)
RoadRepair             Axes(0.125,0.125;0.775x0.775)
StreetLights           Axes(0.125,0.125;0.775x0.775)
StreetSweeping         Axes(0.125,0.125;0.775x0.775)
Survey                 Axes(0.125,0.125;0.775x0.775)
VegetationControl      Axes(0.125,0.125;0.775x0.775)
dtype: object

In [14]:
df[['ProblemType','AvgDaysToClose']].groupby('ProblemType').mean()

In [15]:
avg_days = df[['ProblemType','AvgDaysToClose']].groupby('ProblemType').mean()

In [31]:
final_data = []
for k, v in avg_days.to_dict()['AvgDaysToClose'].items():
    final_data.append({
    "name":k,
    "data":v
    })
final_data = sorted(final_data, cmp=lambda x, y: cmp(x['name'], y['name']) )
print final_data


[{'data': 5, 'name': 'BuildingMaintenance'}, {'data': 1, 'name': 'Drainage'}, {'data': 1, 'name': 'Electrical'}, {'data': 5, 'name': 'Graffiti'}, {'data': 1, 'name': 'IllegalDumping'}, {'data': 20, 'name': 'RoadRepair'}, {'data': 10, 'name': 'StreetLights'}, {'data': 50, 'name': 'StreetSweeping'}, {'data': 15, 'name': 'Survey'}, {'data': 7, 'name': 'VegetationControl'}]

In [32]:
with open("_data/avgdays.json", 'wb') as f:
    json.dump(final_data, f)

In [46]:
df.drop('SeeClickFixCount', axis=1, inplace=True)

In [48]:
df.to_json()


Out[48]:
'{"ProblemType":{"0":"IllegalDumping","1":"BuildingMaintenance","2":"Electrical","3":"StreetSweeping","4":"RoadRepair","5":"VegetationControl","6":"Graffiti","7":"Survey","8":"Drainage","9":"StreetLights","10":"IllegalDumping","11":"BuildingMaintenance","12":"Electrical","13":"StreetSweeping","14":"RoadRepair","15":"VegetationControl","16":"Graffiti","17":"Survey","18":"Drainage","19":"StreetLights","20":"IllegalDumping","21":"BuildingMaintenance","22":"Electrical","23":"StreetSweeping","24":"RoadRepair","25":"VegetationControl","26":"Graffiti","27":"Survey","28":"Drainage","29":"StreetLights","30":"IllegalDumping","31":"BuildingMaintenance","32":"Electrical","33":"StreetSweeping","34":"RoadRepair","35":"VegetationControl","36":"Graffiti","37":"Survey","38":"Drainage","39":"StreetLights"},"NumRequests":{"0":3000,"1":500,"2":1000,"3":100,"4":400,"5":50,"6":100,"7":50,"8":50,"9":50,"10":5000,"11":2400,"12":3000,"13":400,"14":500,"15":50,"16":950,"17":50,"18":150,"19":50,"20":6500,"21":4000,"22":3200,"23":1000,"24":750,"25":50,"26":900,"27":50,"28":300,"29":50,"30":7200,"31":4500,"32":3300,"33":1100,"34":900,"35":50,"36":1000,"37":50,"38":400,"39":100},"Year":{"0":2009,"1":2009,"2":2009,"3":2009,"4":2009,"5":2009,"6":2009,"7":2009,"8":2009,"9":2009,"10":2010,"11":2010,"12":2010,"13":2010,"14":2010,"15":2010,"16":2010,"17":2010,"18":2010,"19":2010,"20":2011,"21":2011,"22":2011,"23":2011,"24":2011,"25":2011,"26":2011,"27":2011,"28":2011,"29":2011,"30":2012,"31":2012,"32":2012,"33":2012,"34":2012,"35":2012,"36":2012,"37":2012,"38":2012,"39":2012},"AvgDaysToClose":{"0":1,"1":5,"2":1,"3":50,"4":20,"5":7,"6":5,"7":15,"8":1,"9":10,"10":1,"11":5,"12":1,"13":50,"14":20,"15":7,"16":5,"17":15,"18":1,"19":10,"20":1,"21":5,"22":1,"23":50,"24":20,"25":7,"26":5,"27":15,"28":1,"29":10,"30":1,"31":5,"32":1,"33":50,"34":20,"35":7,"36":5,"37":15,"38":1,"39":10},"%ReportedBySeeClickFix":{"0":0,"1":0,"2":0,"3":0,"4":0,"5":0,"6":0,"7":0,"8":0,"9":0,"10":10,"11":2,"12":5,"13":15,"14":5,"15":10,"16":5,"17":5,"18":15,"19":20,"20":20,"21":10,"22":10,"23":20,"24":10,"25":10,"26":40,"27":10,"28":20,"29":25,"30":45,"31":15,"32":20,"33":30,"34":20,"35":15,"36":55,"37":10,"38":25,"39":30}}'

In [52]:
for k, g in df.groupby('Year'):
    print g


           ProblemType  NumRequests  Year  AvgDaysToClose  \
0       IllegalDumping         3000  2009               1   
1  BuildingMaintenance          500  2009               5   
2           Electrical         1000  2009               1   
3       StreetSweeping          100  2009              50   
4           RoadRepair          400  2009              20   
5    VegetationControl           50  2009               7   
6             Graffiti          100  2009               5   
7               Survey           50  2009              15   
8             Drainage           50  2009               1   
9         StreetLights           50  2009              10   

   %ReportedBySeeClickFix  
0                       0  
1                       0  
2                       0  
3                       0  
4                       0  
5                       0  
6                       0  
7                       0  
8                       0  
9                       0  
            ProblemType  NumRequests  Year  AvgDaysToClose  \
10       IllegalDumping         5000  2010               1   
11  BuildingMaintenance         2400  2010               5   
12           Electrical         3000  2010               1   
13       StreetSweeping          400  2010              50   
14           RoadRepair          500  2010              20   
15    VegetationControl           50  2010               7   
16             Graffiti          950  2010               5   
17               Survey           50  2010              15   
18             Drainage          150  2010               1   
19         StreetLights           50  2010              10   

    %ReportedBySeeClickFix  
10                      10  
11                       2  
12                       5  
13                      15  
14                       5  
15                      10  
16                       5  
17                       5  
18                      15  
19                      20  
            ProblemType  NumRequests  Year  AvgDaysToClose  \
20       IllegalDumping         6500  2011               1   
21  BuildingMaintenance         4000  2011               5   
22           Electrical         3200  2011               1   
23       StreetSweeping         1000  2011              50   
24           RoadRepair          750  2011              20   
25    VegetationControl           50  2011               7   
26             Graffiti          900  2011               5   
27               Survey           50  2011              15   
28             Drainage          300  2011               1   
29         StreetLights           50  2011              10   

    %ReportedBySeeClickFix  
20                      20  
21                      10  
22                      10  
23                      20  
24                      10  
25                      10  
26                      40  
27                      10  
28                      20  
29                      25  
            ProblemType  NumRequests  Year  AvgDaysToClose  \
30       IllegalDumping         7200  2012               1   
31  BuildingMaintenance         4500  2012               5   
32           Electrical         3300  2012               1   
33       StreetSweeping         1100  2012              50   
34           RoadRepair          900  2012              20   
35    VegetationControl           50  2012               7   
36             Graffiti         1000  2012               5   
37               Survey           50  2012              15   
38             Drainage          400  2012               1   
39         StreetLights          100  2012              10   

    %ReportedBySeeClickFix  
30                      45  
31                      15  
32                      20  
33                      30  
34                      20  
35                      15  
36                      55  
37                      10  
38                      25  
39                      30  

In [ ]: