In [2]:
import pandas as pd

In [156]:
df = pd.read_csv('./DOHMH_New_York_City_Restaurant_Inspection_Results.csv', usecols=['CAMIS','DBA','ZIPCODE','CUISINE DESCRIPTION', 'GRADE', 'GRADE DATE'])
df.drop_duplicates(subset=['CAMIS'], inplace=True)
df.sort_values(['CUISINE DESCRIPTION', 'GRADE'], inplace=True)
df['GRADE'].fillna('No info', inplace = True)
df['ZIPCODE'].fillna(0, inplace = True)
df['ZIPCODE'] = df['ZIPCODE'].astype('str')[:-2]
df['GRADE DATE'] = pd.to_datetime(df['GRADE DATE'])
df.set_index(['CUISINE DESCRIPTION', 'ZIPCODE'], inplace=True)
df


Out[156]:
CAMIS DBA GRADE GRADE DATE
CUISINE DESCRIPTION ZIPCODE
Afghan 11418.0 50038407 BAKHTER HALAL KABABS & GYRO KING A 2015-08-12
11367.0 50010806 ARIA KABAB /TARIQ AFGHAN KABAB A 2014-08-28
11105.0 50007432 BALKH SHISH KABAB HOUSE A 2015-04-14
10019.0 40616799 AFGHAN KEBAB HOUSE #1 A 2017-07-21
11106.0 50058456 SAMI'S KABAB HOUSE A 2017-01-26
11420.0 50056030 SAHARA GRILL A 2016-12-22
10021.0 50034676 AFGHAN KEBAB HOUSE A 2015-08-11
11367.0 41661199 MAIN BAKHTAR HALAL KABAB A 2015-02-04
11355.0 50037974 CHOPAN KABAB HOUSE B 2016-06-23
11229.0 50060023 GYRO BBQ Not Yet Graded NaT
10003.0 40589545 KHYBER PASS No info NaT
10019.0 40868400 ARIANA KEBAB HOUSE No info NaT
11103.0 50056690 ARIA KABAB No info NaT
11230.0 41670224 BAHAR MASALA No info NaT
11004.0 41535706 AFGHAN KEBOB HOUSE No info NaT
11365.0 50060525 BAKHTAR KEBAB No info NaT
African 10456.0 41674679 WEST AFRICAN RESTAURANT A 2015-07-23
11216.0 41713405 CAFE RUE DIX A 2014-06-27
10456.0 41022267 JALLOH FAMILY RESTAURANT A 2015-09-29
11203.0 41718241 VCHRIS AFRICAN CUISINE A 2016-12-22
10467.0 50034610 MAMA G AFRICAN KITCHEN A 2016-06-16
10035.0 41240624 NEW IVOIRE A 2016-09-02
11216.0 41595491 JOLOFF A 2015-10-28
11216.0 50046505 LE BAOBAB GOUYGI II RESTAURANT A 2016-05-16
10128.0 41573992 KAIA WINE BAR A 2015-04-15
11435.0 40798504 AFRICANA RESTAURANT A 2016-03-30
10458.0 50057973 PAPAYE DINER A 2016-12-27
11233.0 50056417 FINGERLINKING RESTURANT A 2017-03-07
10453.0 50016481 GALAGALA NY RESTAURANT A 2016-05-18
10456.0 41612730 PAPA YE RESTAURANT A 2014-08-20
... ... ... ... ... ...
Vietnamese/Cambodian/Malaysia 11355.0 41145163 PHO BANG RESTAURANT No info NaT
10314.0 50034118 PHO MAC VIETNAMESE FOOD No info NaT
10011.0 41483297 CO BA RESTAURANT No info NaT
10002.0 50045255 PHO GRAND No info NaT
11373.0 41338955 PHO BANG RESTAURANT No info NaT
11211.0 50004839 PHO No info NaT
11101.0 50037637 BIA RESTAURANT & BAR No info NaT
11373.0 40578058 PHO BAC VIETNAMESE SEAFOOD CUISINE No info NaT
10012.0 41538633 SAIGON SHACK No info NaT
10011.0 40903063 O-MAI No info NaT
11237.0 50017636 LUCY'S VIETNAMESE KITCHEN No info NaT
10306.0 50015327 PHO RAINBOW No info NaT
11232.0 41695857 PHO VIET No info NaT
10019.0 41551369 CHA PA'S VIETNAMESE EATERY No info NaT
10005.0 41613672 A LA SAIGON No info NaT
11218.0 50015475 HUNGER PANG No info NaT
11238.0 50037988 BANH MI PLACE No info NaT
11373.0 50059021 SUMMER No info NaT
10013.0 40751226 NHA-TRANG CENTRE VIETNAM RESTAURANT No info NaT
10013.0 40700664 PHO BANG RESTAURANT No info NaT
10036.0 50044184 AN'NAM No info NaT
10010.0 50061322 BONMi VIETNAMESE RESTAURANT No info NaT
11232.0 40959591 BA XUYEN No info NaT
10010.0 41627431 LUU'S BAGUETTE No info NaT
11231.0 41494322 NIGHTINGALE9 No info NaT
11201.0 50001823 HANCO'S No info NaT
11237.0 50000024 FALANSAI No info NaT
11217.0 50018819 BRICOLAGE No info NaT
NaN 50059688 NEW HOME VIETNAMESE SANDWICH & BUBBLE TEA No info NaT
NaN 50064461 GREAT 8 RESTAURANT No info NaT

26458 rows × 4 columns


In [173]:
cuisines = set([name for name in df.index.get_level_values(0)])
print(cuisines)


{'Soul Food', 'Café/Coffee/Tea', 'Bangladeshi', 'Vegetarian', 'Italian', 'Portuguese', 'Creole', 'Afghan', 'Creole/Cajun', 'Pakistani', 'Bakery', 'German', 'Greek', 'Indian', 'Fruits/Vegetables', 'Indonesian', 'Moroccan', 'Nuts/Confectionary', 'Filipino', 'Polish', 'Polynesian', 'Barbecue', 'Chilean', 'Californian', 'Hotdogs', 'Vietnamese/Cambodian/Malaysia', 'Tex-Mex', 'Latin (Cuban, Dominican, Puerto Rican, South & Central American)', 'Mediterranean', 'Mexican', 'Turkish', 'Steak', 'Chinese', 'Not Listed/Not Applicable', 'Pancakes/Waffles', 'Other', 'Australian', 'Cajun', 'Egyptian', 'Iranian', 'Bagels/Pretzels', 'Middle Eastern', 'Pizza/Italian', 'American', 'Soups & Sandwiches', 'African', 'Peruvian', 'Asian', 'Chinese/Cuban', 'Eastern European', 'Pizza', 'Seafood', 'Thai', 'Armenian', 'Ice Cream, Gelato, Yogurt, Ices', 'Donuts', 'Brazilian', 'Delicatessen', 'Hawaiian', 'Japanese', 'Sandwiches', 'Salads', 'Continental', 'Sandwiches/Salads/Mixed Buffet', 'Southwestern', 'Hamburgers', 'French', 'Russian', 'Ethiopian', 'Czech', 'Spanish', 'Juice, Smoothies, Fruit Salads', 'Korean', 'Bottled beverages, including water, sodas, juices, etc.', 'Irish', 'Caribbean', 'Hotdogs/Pretzels', 'Jewish/Kosher', 'Scandinavian', 'Chinese/Japanese', 'Chicken', 'Tapas', 'English', 'Soups'}

In [225]:
for item df.loc[('Afghan')]
    if item['GRADE'] == 'A':
        print(item)


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-225-b1b7106b2652> in <module>()
----> 1 for item['GRADE'] in df.loc[('Afghan')]:
      2     print(type(item))
      3     print(item)
      4     #if item.loc['GRADE'] == 'A':
      5 

TypeError: 'str' object does not support item assignment

In [174]:
df.loc[df['CUISINE DESCRIPTION'] == 'Afghan', 'GRADE']


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2441             try:
-> 2442                 return self._engine.get_loc(key)
   2443             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()

KeyError: 'CUISINE DESCRIPTION'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-174-67bbccb2cc38> in <module>()
----> 1 df.loc[df['CUISINE DESCRIPTION'] == 'Afghan', 'GRADE']

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   1962             return self._getitem_multilevel(key)
   1963         else:
-> 1964             return self._getitem_column(key)
   1965 
   1966     def _getitem_column(self, key):

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   1969         # get column
   1970         if self.columns.is_unique:
-> 1971             return self._get_item_cache(key)
   1972 
   1973         # duplicate columns & possible reduce dimensionality

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   1643         res = cache.get(item)
   1644         if res is None:
-> 1645             values = self._data.get(item)
   1646             res = self._box_item_values(item, values)
   1647             cache[item] = res

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath)
   3588 
   3589             if not isnull(item):
-> 3590                 loc = self.items.get_loc(item)
   3591             else:
   3592                 indexer = np.arange(len(self.items))[isnull(self.items)]

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2442                 return self._engine.get_loc(key)
   2443             except KeyError:
-> 2444                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2445 
   2446         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5280)()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas/_libs/index.c:5126)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20523)()

pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas/_libs/hashtable.c:20477)()

KeyError: 'CUISINE DESCRIPTION'

In [90]:
df[df['CUISINE DESCRIPTION']=='Afghan'][df['GRADE']=='A'].set_index('DBA').stack()


/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/ipykernel_launcher.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  """Entry point for launching an IPython kernel.
Out[90]:
DBA                                                  
BAKHTER HALAL KABABS & GYRO KING  CAMIS                             50038407
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2015-08-12 00:00:00
ARIA KABAB /TARIQ AFGHAN KABAB    CAMIS                             50010806
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2014-08-28 00:00:00
BALKH SHISH KABAB HOUSE           CAMIS                             50007432
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2015-04-14 00:00:00
AFGHAN KEBAB HOUSE #1             CAMIS                             40616799
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2017-07-21 00:00:00
SAMI'S KABAB HOUSE                CAMIS                             50058456
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2017-01-26 00:00:00
SAHARA GRILL                      CAMIS                             50056030
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2016-12-22 00:00:00
AFGHAN KEBAB HOUSE                CAMIS                             50034676
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2015-08-11 00:00:00
MAIN BAKHTAR HALAL KABAB          CAMIS                             41661199
                                  ZIPCODE                              10301
                                  CUISINE DESCRIPTION                 Afghan
                                  GRADE                                    A
                                  GRADE DATE             2015-02-04 00:00:00
dtype: object

In [8]:
afghan = df['CUISINE DESCRIPTION'] == 'Afghan'
A = df['GRADE']=='A'
not_A = df['GRADE'] != 'A'
zip10002 = df['ZIPCODE'] == 10002

In [9]:
len(df[afghan & A])


Out[9]:
8

In [19]:
len(df[A])


Out[19]:
12962

In [20]:
df[zip10002 & not_A].set_index('CUISINE DESCRIPTION')


Out[20]:
CAMIS DBA ZIPCODE GRADE GRADE DATE
CUISINE DESCRIPTION
American 41171181 169 BAR 10002.0 B 2015-06-15
American 41151907 THE BACK ROOM 10002.0 B 2014-05-09
American 41155953 FAT BABY 10002.0 B 2016-06-17
American 41626630 DL 10002.0 B 2014-07-01
American 41286149 REMEDY DINER 10002.0 B 2017-06-26
American 50066279 BLACK TAP LES 10002.0 Not Yet Graded NaT
American 50063880 BUTCHER BAR 10002.0 Not Yet Graded NaT
American 50057052 THE FLOWER SHOP 10002.0 Not Yet Graded NaT
American 50065564 THE PUBLIC NEW YORK 10002.0 Not Yet Graded NaT
American 50063114 PUBLIC ROOF TOP 10002.0 Not Yet Graded NaT
American 50066928 JIA NYC 10002.0 Not Yet Graded NaT
American 41500642 BOB BAR 10002.0 No info NaT
American 50050247 METROGRAPH 10002.0 No info NaT
American 40966076 WELCOME TO THE JOHNSON'S 10002.0 No info NaT
American 50001835 ANTLER BEER & WINE DISPENSARY 10002.0 No info NaT
American 41569773 LOCAL 138 10002.0 No info NaT
American 40972897 SCHILLER'S LIQUOR BAR 10002.0 No info NaT
American 41114235 BOSS TWEED'S SALOON 10002.0 No info NaT
American 41259890 SPITZER'S CORNER 10002.0 No info NaT
American 41609204 LA GAMELLE 10002.0 No info NaT
American 40929097 PIANOS 10002.0 No info NaT
American 41198518 HOME SWEET HOME 10002.0 No info NaT
American 41156699 WOOLWORTH TOWER KITCHEN 10002.0 No info NaT
American 50036745 WILDAIR 10002.0 No info NaT
American 50000116 HILL AND DALE 10002.0 No info NaT
American 50051704 LITTLE CANAL 10002.0 No info NaT
American 41084540 OPEN HOUSE 10002.0 No info NaT
American 41702676 SUBJECT 10002.0 No info NaT
American 40939686 KATRA 10002.0 No info NaT
American 41616896 SHOPSINS GENERAL STORE (STORE #16) 10002.0 No info NaT
... ... ... ... ... ...
Pizza 50013447 DOMINO'S 10002.0 No info NaT
Pizza 40692120 NEW ROMA PIZZA 10002.0 No info NaT
Pizza 40622601 A-1 PIZZA SHOP 10002.0 No info NaT
Pizza 50001877 RIZZO FINE PIZZA 10002.0 No info NaT
Pizza 50045890 La Margarita 10002.0 No info NaT
Pizza 50000732 LOWER EAST SIDE PIZZA 10002.0 No info NaT
Pizza 50058843 PITT PIZZERIA 10002.0 No info NaT
Pizza 50011259 CLINTON SQUARE PIZZA 10002.0 No info NaT
Pizza 50061410 PIZZA SHACK 10002.0 No info NaT
Pizza 50057279 STAR 99C PIZZA 10002.0 No info NaT
Pizza/Italian 50003214 WILLIAMSBURG PIZZA 10002.0 No info NaT
Pizza/Italian 41408378 NONNA L.E.S. PIZZERIA 10002.0 No info NaT
Sandwiches 41538017 SUBWAY 10002.0 B 2014-08-18
Sandwiches 41491151 TINY'S GIANT SANDWICH SHOP 10002.0 B 2015-09-24
Sandwiches 41447825 CHEEKY SANDWICHES 10002.0 No info NaT
Sandwiches/Salads/Mixed Buffet 50002464 SUBWAY 10002.0 No info NaT
Sandwiches/Salads/Mixed Buffet 50019273 SUBWAY 10002.0 No info NaT
Seafood 50005517 BLACK CRESCENT 10002.0 No info NaT
Seafood 50006310 CLAW DADDY'S 10002.0 No info NaT
Soups 50001565 PEASANT STOCK 10002.0 No info NaT
Spanish 41643863 CATA 10002.0 No info NaT
Spanish 50013433 BALVANERA 10002.0 No info NaT
Spanish 50017962 CHARRUA 10002.0 No info NaT
Tapas 41626037 NO FUN 10002.0 No info NaT
Tex-Mex 50002230 TAQUITORIA 10002.0 No info NaT
Thai 41696519 ONE MORE THAI 10002.0 B 2016-09-21
Thai 41288951 HI THAI 10002.0 No info NaT
Thai 40957398 KUMA INN 10002.0 No info NaT
Vegetarian 50057793 ORCHARD GROCER 10002.0 No info NaT
Vietnamese/Cambodian/Malaysia 50045255 PHO GRAND 10002.0 No info NaT

279 rows × 5 columns


In [ ]:


In [16]:
gradelist = df['GRADE'].unique()
gradelist


Out[16]:
array(['A', 'B', 'Not Yet Graded', 'No info', 'C', 'Z', 'P'], dtype=object)

In [ ]:


In [ ]: