In [1]:
import pandas as pd
business = pd.read_pickle('../input/yelp_academic_dataset_business.pickle')

#print business.head(1)

def GetBusinessIDs(business_data, category):
    '''
    Search the listings for business ids matching a given category
    '''
    ids = []
    n_businesses = len(business_data.business_id)
    for i_id, ID in enumerate(business_data.business_id):
        cats = business_data.categories.iloc[i_id]
        if category in cats:
            ids.append(ID)
        
        if (i_id%10000)==0: 
            print '\r Processed %i of %i'%(i_id, n_businesses), 

    return ids
    
    
unique_categories = set([cat for cats in business.categories for cat in cats])
# for cat in unique_categories: 
#     print cat    

bar_categories = ["Lounges", "Hookah Bars", "Music Venues", "Beer Garden", "Pub Food", 
                  "Nightlife", "Dive Bars", "Cocktail Bars", "Distilleries", "Tapas Bars", "Piano Bars",
                  "Jazz & Blues", "Irish", "Irish Pub", "Beer Bar", "Bars", "Gay Bars", "Champagne Bars",
                  "Wine Bars", "Brasseries", "Gastropubs", "Breweries", "Restaurants", "Food"]  # EXCLUDING RESTAURANT?

business_ids = [] 
for i, cat in enumerate(bar_categories[:]):
    print 'category %i of %i'%(i , len(bar_categories)) 
    business_ids += GetBusinessIDs(business, cat)
    #print business_ids

business_ids = set(business_ids)


category 0 of 24
 Processed 60000 of 61184 category 1 of 24
 Processed 60000 of 61184 category 2 of 24
 Processed 60000 of 61184 category 3 of 24
 Processed 60000 of 61184 category 4 of 24
 Processed 60000 of 61184 category 5 of 24
 Processed 60000 of 61184 category 6 of 24
 Processed 60000 of 61184 category 7 of 24
 Processed 60000 of 61184 category 8 of 24
 Processed 60000 of 61184 category 9 of 24
 Processed 60000 of 61184 category 10 of 24
 Processed 60000 of 61184 category 11 of 24
 Processed 60000 of 61184 category 12 of 24
 Processed 60000 of 61184 category 13 of 24
 Processed 60000 of 61184 category 14 of 24
 Processed 60000 of 61184 category 15 of 24
 Processed 60000 of 61184 category 16 of 24
 Processed 60000 of 61184 category 17 of 24
 Processed 60000 of 61184 category 18 of 24
 Processed 60000 of 61184 category 19 of 24
 Processed 60000 of 61184 category 20 of 24
 Processed 60000 of 61184 category 21 of 24
 Processed 60000 of 61184 category 22 of 24
 Processed 60000 of 61184 category 23 of 24
 Processed 60000 of 61184

In [2]:
import pickle
#with open('../output/bar_ids.pickle', 'wb') as f:
with open('../output/bar_restaurant_ids.pickle', 'wb') as f:
    pickle.dump(business_ids, f)

In [3]:
bar_ids = pickle.load(open('../output/bar_ids.pickle', 'r'))
print len(bar_ids)


4655

In [ ]:


In [ ]:


In [ ]: