In [60]:
import json #for reading oauth info and save the results
import io #for credential read
from yelp.client import Client
from yelp.oauth1_authenticator import Oauth1Authenticator
from pprint import pprint #to better understand the result format
with io.open('yelp_oauth.json') as cred:
creds = json.load(cred)
auth = Oauth1Authenticator(**creds)
client = Client(auth)
Just to show the content of the authenticator:
auth = Oauth1Authenticator(
consumer_key = 'your consumer key'
consumer_secret = 'your consumer secret',
token = auth_info'your token',
token_secret = 'your token secret']
)
client = Client(auth)
In [114]:
zipstr = '02108, 02109, 02110, 02111, 02113, 02114, 02115, 02116, 02118, 02119, 02120, 02121, 02122, 02124, 02125, 02126, 02127, 02128, 02129, 02130, 02131, 02132, 02134, 02135, 02136, 02151, 02152, 02163, 02199, 02203, 02210, 02215, 02467'
zips = zipstr.split(', ')
Then set up parameters in the search:
For the details of search parameters, go to Search API
In [123]:
params = {
'lang': 'en',
'sort': 0 #Sort mode: 0=Best matched (default), 1=Distance, 2=Highest Rated
#'limit': 20 limit can be 1 to 20
#'offset': 21 we will use this parameter later in the loop
}
In [126]:
response = client.search('Boston', **params)
Then see how many restaurants we get in the search:
In [127]:
print 'The numbers of restaurants in Boston on Yelp: {}'.format(response.total)
Since there are 22476 restaurents in Boston on yelp, we can only get 20 restaurant a time, and 1000 in total of a search criteria. That's why we try to use zipcode to narrow down the scope to get all the results of Boston.
That's take a look on the data.
The responses we get are objects so we try to parse it to a readble format here with pprint
and vars()
:
To see all the response values and their definitinos in business, go to Search API-Business
In [128]:
b = vars(response.businesses[0])
b['location'] = vars(response.businesses[0].location)
b['location']['coordinate'] = vars(response.businesses[0].location['coordinate'])
pprint(b)
In [110]:
results = []
In [ ]:
for zipcode in zips:
for i in range(50):
n = i * 20 + 1
params['offset'] = n
response = client.search(zipcode, **params)
bizs = response.businesses
for biz in bizs:
b = vars(biz)
b['location'] = vars(biz.location)
b['location']['coordinate'] = vars(biz.location['coordinate'])
results.append(b)
break #Here I break when i == 1 for test. Please delete the 'break' when you start collection.
if len(response.businesses) < 20:
#Check if there are other results
break
else:
continue
In [113]:
with open('my_boston_restaurants_yelp.json', 'wb') as f:
results_json = json.dumps(results, indent=4, skipkeys=True, sort_keys=True)
f.write(results_json)
It's done! Let's take some snacks and play with the data!