In [1]:
ls


sampleSubmission.csv  starter.ipynb  test.csv  train.csv

In [2]:
import pandas as pd


Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 16 days
Vendor:  Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 16 days

In [3]:
train_frame = pd.read_csv('train.csv')

In [4]:
train_frame.columns


Out[4]:
Index([u'Dates', u'Category', u'Descript', u'DayOfWeek', u'PdDistrict', u'Resolution', u'Address', u'X', u'Y'], dtype='object')

In [61]:
d = dict()
for category, table in train_frame.groupby(by="Category"):
    d.update({category:table['Descript'].unique()})

In [ ]:


In [63]:
d.keys()


Out[63]:
['RECOVERED VEHICLE',
 'SUICIDE',
 'FRAUD',
 'WEAPON LAWS',
 'VANDALISM',
 'ARSON',
 'OTHER OFFENSES',
 'WARRANTS',
 'LOITERING',
 'DRUG/NARCOTIC',
 'EMBEZZLEMENT',
 'SEX OFFENSES NON FORCIBLE',
 'KIDNAPPING',
 'DRIVING UNDER THE INFLUENCE',
 'LARCENY/THEFT',
 'ROBBERY',
 'MISSING PERSON',
 'BURGLARY',
 'RUNAWAY',
 'STOLEN PROPERTY',
 'PORNOGRAPHY/OBSCENE MAT',
 'SUSPICIOUS OCC',
 'DISORDERLY CONDUCT',
 'LIQUOR LAWS',
 'FAMILY OFFENSES',
 'TRESPASS',
 'TREA',
 'SECONDARY CODES',
 'VEHICLE THEFT',
 'BAD CHECKS',
 'SEX OFFENSES FORCIBLE',
 'FORGERY/COUNTERFEITING',
 'ASSAULT',
 'BRIBERY',
 'NON-CRIMINAL',
 'GAMBLING',
 'EXTORTION',
 'PROSTITUTION',
 'DRUNKENNESS']

In [66]:
d['KIDNAPPING']


Out[66]:
array(['FALSE IMPRISONMENT', 'KIDNAPPING DURING ROBBERY', 'CHILD STEALING',
       'KIDNAPPING, ADULT VICTIM', 'ATTEMPTED KIDNAPPING, JUVENILE VICTIM',
       'KIDNAPPING, JUVENILE VICTIM', 'KIDNAPPING DURING CARJACKING',
       'ATTEMPTED KIDNAPPING, ADULT VICTIM', 'KIDNAPPER, POSING AS'], dtype=object)

In [25]:
train_frame.Category.describe()


Out[25]:
count            878049
unique               39
top       LARCENY/THEFT
freq             174900
Name: Category, dtype: object

In [ ]:


In [29]:
train_frame.groupby(by=train_frame.Category).describe()


Out[29]:
X Y
Category
ARSON count 1513.000000 1513.000000
mean -122.419799 37.757478
std 0.029780 0.027173
min -122.510037 37.708154
25% -122.433892 37.732303
50% -122.414544 37.761090
75% -122.399129 37.780978
max -122.364937 37.819923
ASSAULT count 76876.000000 76876.000000
mean -122.421062 37.766595
std 0.028574 0.377655
min -122.513642 37.707922
25% -122.430759 37.743555
50% -122.415722 37.772541
75% -122.406670 37.783672
max -120.500000 90.000000
BAD CHECKS count 406.000000 406.000000
mean -122.423752 37.769944
std 0.024956 0.023670
min -122.506213 37.708816
25% -122.437571 37.752700
50% -122.416197 37.778263
75% -122.405176 37.787689
max -122.365565 37.809671
BRIBERY count 289.000000 289.000000
mean -122.418650 37.754059
std 0.025421 0.024816
min -122.505928 37.709030
25% -122.430701 37.731740
50% -122.416078 37.754626
... ... ... ...
VANDALISM std 0.030115 0.248353
min -122.513642 37.707922
25% -122.439631 37.742772
50% -122.419483 37.769955
75% -122.406691 37.783310
max -120.500000 90.000000
VEHICLE THEFT count 53781.000000 53781.000000
mean -122.429060 37.768329
std 0.037893 0.676246
min -122.513642 37.707920
25% -122.443399 37.737311
50% -122.423602 37.762628
75% -122.409530 37.780849
max -120.500000 90.000000
WARRANTS count 42214.000000 42214.000000
mean -122.417581 37.778991
std 0.033304 0.719289
min -122.513642 37.707922
25% -122.421305 37.760957
50% -122.414056 37.776231
75% -122.407474 37.783730
max -120.500000 90.000000
WEAPON LAWS count 8555.000000 8555.000000
mean -122.418983 37.758734
std 0.025308 0.026170
min -122.510226 37.707922
25% -122.427918 37.734491
50% -122.414810 37.764057
75% -122.405284 37.781605
max -122.365565 37.819923

312 rows × 2 columns


In [ ]:


In [9]:
categories.


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-9-8de2132cadb8> in <module>()
----> 1 categories.columns

/home/will/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in __getattr__(self, attr)
    506             return self[attr]
    507         if hasattr(self.obj, attr):
--> 508             return self._make_wrapper(attr)
    509 
    510         raise AttributeError("%r object has no attribute %r" %

/home/will/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in _make_wrapper(self, name)
    521                    "using the 'apply' method".format(kind, name,
    522                                                      type(self).__name__))
--> 523             raise AttributeError(msg)
    524 
    525         # need to setup the selection

AttributeError: Cannot access attribute 'columns' of 'DataFrameGroupBy' objects, try using the 'apply' method

In [ ]: