In [1]:
ls
sampleSubmission.csv starter.ipynb test.csv train.csv
In [2]:
import pandas as pd
Vendor: Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 16 days
Vendor: Continuum Analytics, Inc.
Package: mkl
Message: trial mode expires in 16 days
In [3]:
train_frame = pd.read_csv('train.csv')
In [4]:
train_frame.columns
Out[4]:
Index([u'Dates', u'Category', u'Descript', u'DayOfWeek', u'PdDistrict', u'Resolution', u'Address', u'X', u'Y'], dtype='object')
In [61]:
d = dict()
for category, table in train_frame.groupby(by="Category"):
d.update({category:table['Descript'].unique()})
In [ ]:
In [63]:
d.keys()
Out[63]:
['RECOVERED VEHICLE',
'SUICIDE',
'FRAUD',
'WEAPON LAWS',
'VANDALISM',
'ARSON',
'OTHER OFFENSES',
'WARRANTS',
'LOITERING',
'DRUG/NARCOTIC',
'EMBEZZLEMENT',
'SEX OFFENSES NON FORCIBLE',
'KIDNAPPING',
'DRIVING UNDER THE INFLUENCE',
'LARCENY/THEFT',
'ROBBERY',
'MISSING PERSON',
'BURGLARY',
'RUNAWAY',
'STOLEN PROPERTY',
'PORNOGRAPHY/OBSCENE MAT',
'SUSPICIOUS OCC',
'DISORDERLY CONDUCT',
'LIQUOR LAWS',
'FAMILY OFFENSES',
'TRESPASS',
'TREA',
'SECONDARY CODES',
'VEHICLE THEFT',
'BAD CHECKS',
'SEX OFFENSES FORCIBLE',
'FORGERY/COUNTERFEITING',
'ASSAULT',
'BRIBERY',
'NON-CRIMINAL',
'GAMBLING',
'EXTORTION',
'PROSTITUTION',
'DRUNKENNESS']
In [66]:
d['KIDNAPPING']
Out[66]:
array(['FALSE IMPRISONMENT', 'KIDNAPPING DURING ROBBERY', 'CHILD STEALING',
'KIDNAPPING, ADULT VICTIM', 'ATTEMPTED KIDNAPPING, JUVENILE VICTIM',
'KIDNAPPING, JUVENILE VICTIM', 'KIDNAPPING DURING CARJACKING',
'ATTEMPTED KIDNAPPING, ADULT VICTIM', 'KIDNAPPER, POSING AS'], dtype=object)
In [25]:
train_frame.Category.describe()
Out[25]:
count 878049
unique 39
top LARCENY/THEFT
freq 174900
Name: Category, dtype: object
In [ ]:
In [29]:
train_frame.groupby(by=train_frame.Category).describe()
Out[29]:
X
Y
Category
ARSON
count
1513.000000
1513.000000
mean
-122.419799
37.757478
std
0.029780
0.027173
min
-122.510037
37.708154
25%
-122.433892
37.732303
50%
-122.414544
37.761090
75%
-122.399129
37.780978
max
-122.364937
37.819923
ASSAULT
count
76876.000000
76876.000000
mean
-122.421062
37.766595
std
0.028574
0.377655
min
-122.513642
37.707922
25%
-122.430759
37.743555
50%
-122.415722
37.772541
75%
-122.406670
37.783672
max
-120.500000
90.000000
BAD CHECKS
count
406.000000
406.000000
mean
-122.423752
37.769944
std
0.024956
0.023670
min
-122.506213
37.708816
25%
-122.437571
37.752700
50%
-122.416197
37.778263
75%
-122.405176
37.787689
max
-122.365565
37.809671
BRIBERY
count
289.000000
289.000000
mean
-122.418650
37.754059
std
0.025421
0.024816
min
-122.505928
37.709030
25%
-122.430701
37.731740
50%
-122.416078
37.754626
...
...
...
...
VANDALISM
std
0.030115
0.248353
min
-122.513642
37.707922
25%
-122.439631
37.742772
50%
-122.419483
37.769955
75%
-122.406691
37.783310
max
-120.500000
90.000000
VEHICLE THEFT
count
53781.000000
53781.000000
mean
-122.429060
37.768329
std
0.037893
0.676246
min
-122.513642
37.707920
25%
-122.443399
37.737311
50%
-122.423602
37.762628
75%
-122.409530
37.780849
max
-120.500000
90.000000
WARRANTS
count
42214.000000
42214.000000
mean
-122.417581
37.778991
std
0.033304
0.719289
min
-122.513642
37.707922
25%
-122.421305
37.760957
50%
-122.414056
37.776231
75%
-122.407474
37.783730
max
-120.500000
90.000000
WEAPON LAWS
count
8555.000000
8555.000000
mean
-122.418983
37.758734
std
0.025308
0.026170
min
-122.510226
37.707922
25%
-122.427918
37.734491
50%
-122.414810
37.764057
75%
-122.405284
37.781605
max
-122.365565
37.819923
312 rows × 2 columns
In [ ]:
In [9]:
categories.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-9-8de2132cadb8> in <module>()
----> 1 categories.columns
/home/will/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in __getattr__(self, attr)
506 return self[attr]
507 if hasattr(self.obj, attr):
--> 508 return self._make_wrapper(attr)
509
510 raise AttributeError("%r object has no attribute %r" %
/home/will/anaconda/lib/python2.7/site-packages/pandas/core/groupby.pyc in _make_wrapper(self, name)
521 "using the 'apply' method".format(kind, name,
522 type(self).__name__))
--> 523 raise AttributeError(msg)
524
525 # need to setup the selection
AttributeError: Cannot access attribute 'columns' of 'DataFrameGroupBy' objects, try using the 'apply' method
In [ ]:
Content source: wgm2111/wgm-kaggle
Similar notebooks: