In [2]:
import pandas as pd
In [156]:
df = pd.read_csv('./DOHMH_New_York_City_Restaurant_Inspection_Results.csv', usecols=['CAMIS','DBA','ZIPCODE','CUISINE DESCRIPTION', 'GRADE', 'GRADE DATE'])
df.drop_duplicates(subset=['CAMIS'], inplace=True)
df.sort_values(['CUISINE DESCRIPTION', 'GRADE'], inplace=True)
df['GRADE'].fillna('No info', inplace = True)
df['ZIPCODE'].fillna(0, inplace = True)
df['ZIPCODE'] = df['ZIPCODE'].astype('str')[:-2]
df['GRADE DATE'] = pd.to_datetime(df['GRADE DATE'])
df.set_index(['CUISINE DESCRIPTION', 'ZIPCODE'], inplace=True)
df
Out[156]:
CAMIS
DBA
GRADE
GRADE DATE
CUISINE DESCRIPTION
ZIPCODE
Afghan
11418.0
50038407
BAKHTER HALAL KABABS & GYRO KING
A
2015-08-12
11367.0
50010806
ARIA KABAB /TARIQ AFGHAN KABAB
A
2014-08-28
11105.0
50007432
BALKH SHISH KABAB HOUSE
A
2015-04-14
10019.0
40616799
AFGHAN KEBAB HOUSE #1
A
2017-07-21
11106.0
50058456
SAMI'S KABAB HOUSE
A
2017-01-26
11420.0
50056030
SAHARA GRILL
A
2016-12-22
10021.0
50034676
AFGHAN KEBAB HOUSE
A
2015-08-11
11367.0
41661199
MAIN BAKHTAR HALAL KABAB
A
2015-02-04
11355.0
50037974
CHOPAN KABAB HOUSE
B
2016-06-23
11229.0
50060023
GYRO BBQ
Not Yet Graded
NaT
10003.0
40589545
KHYBER PASS
No info
NaT
10019.0
40868400
ARIANA KEBAB HOUSE
No info
NaT
11103.0
50056690
ARIA KABAB
No info
NaT
11230.0
41670224
BAHAR MASALA
No info
NaT
11004.0
41535706
AFGHAN KEBOB HOUSE
No info
NaT
11365.0
50060525
BAKHTAR KEBAB
No info
NaT
African
10456.0
41674679
WEST AFRICAN RESTAURANT
A
2015-07-23
11216.0
41713405
CAFE RUE DIX
A
2014-06-27
10456.0
41022267
JALLOH FAMILY RESTAURANT
A
2015-09-29
11203.0
41718241
VCHRIS AFRICAN CUISINE
A
2016-12-22
10467.0
50034610
MAMA G AFRICAN KITCHEN
A
2016-06-16
10035.0
41240624
NEW IVOIRE
A
2016-09-02
11216.0
41595491
JOLOFF
A
2015-10-28
11216.0
50046505
LE BAOBAB GOUYGI II RESTAURANT
A
2016-05-16
10128.0
41573992
KAIA WINE BAR
A
2015-04-15
11435.0
40798504
AFRICANA RESTAURANT
A
2016-03-30
10458.0
50057973
PAPAYE DINER
A
2016-12-27
11233.0
50056417
FINGERLINKING RESTURANT
A
2017-03-07
10453.0
50016481
GALAGALA NY RESTAURANT
A
2016-05-18
10456.0
41612730
PAPA YE RESTAURANT
A
2014-08-20
...
...
...
...
...
...
Vietnamese/Cambodian/Malaysia
11355.0
41145163
PHO BANG RESTAURANT
No info
NaT
10314.0
50034118
PHO MAC VIETNAMESE FOOD
No info
NaT
10011.0
41483297
CO BA RESTAURANT
No info
NaT
10002.0
50045255
PHO GRAND
No info
NaT
11373.0
41338955
PHO BANG RESTAURANT
No info
NaT
11211.0
50004839
PHO
No info
NaT
11101.0
50037637
BIA RESTAURANT & BAR
No info
NaT
11373.0
40578058
PHO BAC VIETNAMESE SEAFOOD CUISINE
No info
NaT
10012.0
41538633
SAIGON SHACK
No info
NaT
10011.0
40903063
O-MAI
No info
NaT
11237.0
50017636
LUCY'S VIETNAMESE KITCHEN
No info
NaT
10306.0
50015327
PHO RAINBOW
No info
NaT
11232.0
41695857
PHO VIET
No info
NaT
10019.0
41551369
CHA PA'S VIETNAMESE EATERY
No info
NaT
10005.0
41613672
A LA SAIGON
No info
NaT
11218.0
50015475
HUNGER PANG
No info
NaT
11238.0
50037988
BANH MI PLACE
No info
NaT
11373.0
50059021
SUMMER
No info
NaT
10013.0
40751226
NHA-TRANG CENTRE VIETNAM RESTAURANT
No info
NaT
10013.0
40700664
PHO BANG RESTAURANT
No info
NaT
10036.0
50044184
AN'NAM
No info
NaT
10010.0
50061322
BONMi VIETNAMESE RESTAURANT
No info
NaT
11232.0
40959591
BA XUYEN
No info
NaT
10010.0
41627431
LUU'S BAGUETTE
No info
NaT
11231.0
41494322
NIGHTINGALE9
No info
NaT
11201.0
50001823
HANCO'S
No info
NaT
11237.0
50000024
FALANSAI
No info
NaT
11217.0
50018819
BRICOLAGE
No info
NaT
NaN
50059688
NEW HOME VIETNAMESE SANDWICH & BUBBLE TEA
No info
NaT
NaN
50064461
GREAT 8 RESTAURANT
No info
NaT
26458 rows × 4 columns
In [173]:
cuisines = set([name for name in df.index.get_level_values(0)])
print(cuisines)
{'Soul Food', 'Café/Coffee/Tea', 'Bangladeshi', 'Vegetarian', 'Italian', 'Portuguese', 'Creole', 'Afghan', 'Creole/Cajun', 'Pakistani', 'Bakery', 'German', 'Greek', 'Indian', 'Fruits/Vegetables', 'Indonesian', 'Moroccan', 'Nuts/Confectionary', 'Filipino', 'Polish', 'Polynesian', 'Barbecue', 'Chilean', 'Californian', 'Hotdogs', 'Vietnamese/Cambodian/Malaysia', 'Tex-Mex', 'Latin (Cuban, Dominican, Puerto Rican, South & Central American)', 'Mediterranean', 'Mexican', 'Turkish', 'Steak', 'Chinese', 'Not Listed/Not Applicable', 'Pancakes/Waffles', 'Other', 'Australian', 'Cajun', 'Egyptian', 'Iranian', 'Bagels/Pretzels', 'Middle Eastern', 'Pizza/Italian', 'American', 'Soups & Sandwiches', 'African', 'Peruvian', 'Asian', 'Chinese/Cuban', 'Eastern European', 'Pizza', 'Seafood', 'Thai', 'Armenian', 'Ice Cream, Gelato, Yogurt, Ices', 'Donuts', 'Brazilian', 'Delicatessen', 'Hawaiian', 'Japanese', 'Sandwiches', 'Salads', 'Continental', 'Sandwiches/Salads/Mixed Buffet', 'Southwestern', 'Hamburgers', 'French', 'Russian', 'Ethiopian', 'Czech', 'Spanish', 'Juice, Smoothies, Fruit Salads', 'Korean', 'Bottled beverages, including water, sodas, juices, etc.', 'Irish', 'Caribbean', 'Hotdogs/Pretzels', 'Jewish/Kosher', 'Scandinavian', 'Chinese/Japanese', 'Chicken', 'Tapas', 'English', 'Soups'}
In [225]:
for item df.loc[('Afghan')]
if item['GRADE'] == 'A':
print(item)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-225-b1b7106b2652> in <module>()
----> 1 for item['GRADE'] in df.loc[('Afghan')]:
2 print(type(item))
3 print(item)
4 #if item.loc['GRADE'] == 'A':
5
TypeError: 'str' object does not support item assignment
In [174]:
df.loc[df['CUISINE DESCRIPTION'] == 'Afghan', 'GRADE']
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2441 try:
-> 2442 return self._engine.get_loc(key)
2443 except KeyError:
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()
KeyError: 'CUISINE DESCRIPTION'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
<ipython-input-174-67bbccb2cc38> in <module>()
----> 1 df.loc[df['CUISINE DESCRIPTION'] == 'Afghan', 'GRADE']
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
1962 return self._getitem_multilevel(key)
1963 else:
-> 1964 return self._getitem_column(key)
1965
1966 def _getitem_column(self, key):
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
1969 # get column
1970 if self.columns.is_unique:
-> 1971 return self._get_item_cache(key)
1972
1973 # duplicate columns & possible reduce dimensionality
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
1643 res = cache.get(item)
1644 if res is None:
-> 1645 values = self._data.get(item)
1646 res = self._box_item_values(item, values)
1647 cache[item] = res
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
3588
3589 if not isnull(item):
-> 3590 loc = self.items.get_loc(item)
3591 else:
3592 indexer = np.arange(len(self.items))[isnull(self.items)]
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
2442 return self._engine.get_loc(key)
2443 except KeyError:
-> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key))
2445
2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()
pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()
pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()
KeyError: 'CUISINE DESCRIPTION'
In [90]:
df[df['CUISINE DESCRIPTION']=='Afghan'][df['GRADE']=='A'].set_index('DBA').stack()
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
"""Entry point for launching an IPython kernel.
Out[90]:
DBA
BAKHTER HALAL KABABS & GYRO KING CAMIS 50038407
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2015-08-12 00:00:00
ARIA KABAB /TARIQ AFGHAN KABAB CAMIS 50010806
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2014-08-28 00:00:00
BALKH SHISH KABAB HOUSE CAMIS 50007432
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2015-04-14 00:00:00
AFGHAN KEBAB HOUSE #1 CAMIS 40616799
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2017-07-21 00:00:00
SAMI'S KABAB HOUSE CAMIS 50058456
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2017-01-26 00:00:00
SAHARA GRILL CAMIS 50056030
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2016-12-22 00:00:00
AFGHAN KEBAB HOUSE CAMIS 50034676
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2015-08-11 00:00:00
MAIN BAKHTAR HALAL KABAB CAMIS 41661199
ZIPCODE 10301
CUISINE DESCRIPTION Afghan
GRADE A
GRADE DATE 2015-02-04 00:00:00
dtype: object
In [8]:
afghan = df['CUISINE DESCRIPTION'] == 'Afghan'
A = df['GRADE']=='A'
not_A = df['GRADE'] != 'A'
zip10002 = df['ZIPCODE'] == 10002
In [9]:
len(df[afghan & A])
Out[9]:
8
In [19]:
len(df[A])
Out[19]:
12962
In [20]:
df[zip10002 & not_A].set_index('CUISINE DESCRIPTION')
Out[20]:
CAMIS
DBA
ZIPCODE
GRADE
GRADE DATE
CUISINE DESCRIPTION
American
41171181
169 BAR
10002.0
B
2015-06-15
American
41151907
THE BACK ROOM
10002.0
B
2014-05-09
American
41155953
FAT BABY
10002.0
B
2016-06-17
American
41626630
DL
10002.0
B
2014-07-01
American
41286149
REMEDY DINER
10002.0
B
2017-06-26
American
50066279
BLACK TAP LES
10002.0
Not Yet Graded
NaT
American
50063880
BUTCHER BAR
10002.0
Not Yet Graded
NaT
American
50057052
THE FLOWER SHOP
10002.0
Not Yet Graded
NaT
American
50065564
THE PUBLIC NEW YORK
10002.0
Not Yet Graded
NaT
American
50063114
PUBLIC ROOF TOP
10002.0
Not Yet Graded
NaT
American
50066928
JIA NYC
10002.0
Not Yet Graded
NaT
American
41500642
BOB BAR
10002.0
No info
NaT
American
50050247
METROGRAPH
10002.0
No info
NaT
American
40966076
WELCOME TO THE JOHNSON'S
10002.0
No info
NaT
American
50001835
ANTLER BEER & WINE DISPENSARY
10002.0
No info
NaT
American
41569773
LOCAL 138
10002.0
No info
NaT
American
40972897
SCHILLER'S LIQUOR BAR
10002.0
No info
NaT
American
41114235
BOSS TWEED'S SALOON
10002.0
No info
NaT
American
41259890
SPITZER'S CORNER
10002.0
No info
NaT
American
41609204
LA GAMELLE
10002.0
No info
NaT
American
40929097
PIANOS
10002.0
No info
NaT
American
41198518
HOME SWEET HOME
10002.0
No info
NaT
American
41156699
WOOLWORTH TOWER KITCHEN
10002.0
No info
NaT
American
50036745
WILDAIR
10002.0
No info
NaT
American
50000116
HILL AND DALE
10002.0
No info
NaT
American
50051704
LITTLE CANAL
10002.0
No info
NaT
American
41084540
OPEN HOUSE
10002.0
No info
NaT
American
41702676
SUBJECT
10002.0
No info
NaT
American
40939686
KATRA
10002.0
No info
NaT
American
41616896
SHOPSINS GENERAL STORE (STORE #16)
10002.0
No info
NaT
...
...
...
...
...
...
Pizza
50013447
DOMINO'S
10002.0
No info
NaT
Pizza
40692120
NEW ROMA PIZZA
10002.0
No info
NaT
Pizza
40622601
A-1 PIZZA SHOP
10002.0
No info
NaT
Pizza
50001877
RIZZO FINE PIZZA
10002.0
No info
NaT
Pizza
50045890
La Margarita
10002.0
No info
NaT
Pizza
50000732
LOWER EAST SIDE PIZZA
10002.0
No info
NaT
Pizza
50058843
PITT PIZZERIA
10002.0
No info
NaT
Pizza
50011259
CLINTON SQUARE PIZZA
10002.0
No info
NaT
Pizza
50061410
PIZZA SHACK
10002.0
No info
NaT
Pizza
50057279
STAR 99C PIZZA
10002.0
No info
NaT
Pizza/Italian
50003214
WILLIAMSBURG PIZZA
10002.0
No info
NaT
Pizza/Italian
41408378
NONNA L.E.S. PIZZERIA
10002.0
No info
NaT
Sandwiches
41538017
SUBWAY
10002.0
B
2014-08-18
Sandwiches
41491151
TINY'S GIANT SANDWICH SHOP
10002.0
B
2015-09-24
Sandwiches
41447825
CHEEKY SANDWICHES
10002.0
No info
NaT
Sandwiches/Salads/Mixed Buffet
50002464
SUBWAY
10002.0
No info
NaT
Sandwiches/Salads/Mixed Buffet
50019273
SUBWAY
10002.0
No info
NaT
Seafood
50005517
BLACK CRESCENT
10002.0
No info
NaT
Seafood
50006310
CLAW DADDY'S
10002.0
No info
NaT
Soups
50001565
PEASANT STOCK
10002.0
No info
NaT
Spanish
41643863
CATA
10002.0
No info
NaT
Spanish
50013433
BALVANERA
10002.0
No info
NaT
Spanish
50017962
CHARRUA
10002.0
No info
NaT
Tapas
41626037
NO FUN
10002.0
No info
NaT
Tex-Mex
50002230
TAQUITORIA
10002.0
No info
NaT
Thai
41696519
ONE MORE THAI
10002.0
B
2016-09-21
Thai
41288951
HI THAI
10002.0
No info
NaT
Thai
40957398
KUMA INN
10002.0
No info
NaT
Vegetarian
50057793
ORCHARD GROCER
10002.0
No info
NaT
Vietnamese/Cambodian/Malaysia
50045255
PHO GRAND
10002.0
No info
NaT
279 rows × 5 columns
In [ ]:
In [16]:
gradelist = df['GRADE'].unique()
gradelist
Out[16]:
array(['A', 'B', 'Not Yet Graded', 'No info', 'C', 'Z', 'P'], dtype=object)
In [ ]:
In [ ]:
Content source: dgoldman916/nyu-python
Similar notebooks: