In [3]:
from bs4 import BeautifulSoup
import requests
import re
import json
import sys
sys.path.append('/Users/robertsonwang/Desktop/Python/Yelp/Yelp_scrapper')
import os
#import scrapping_functions as sf
#reload(sf)

base_url = 'https://www.yelp.com'

In [4]:
os.getcwd()


Out[4]:
'/Users/robertsonwang/Desktop/Python/Yelp_class/yelp-classification'

In [ ]:
test = json.loads('')

In [2]:
link_file = open("cleanbiz_links_2.txt", "r")
link_list = link_file.read().split('\n')
link_list = list(set(link_list))

In [20]:
link_list[33]


Out[20]:
'/biz/the-codmother-washington?osq=Restaurants'

In [7]:
json_dict = {}
for link in link_list:
    reviews = sf.scrap_reviews(link, base_url)
    if type(reviews) == dict:
        json_dict[link] = reviews
    else:
        print reviews
        break

In [2]:
with open('dc_reviews_2.json', 'w') as outfile:
    json.dump(json_dict, outfile)

Load in Review Data


In [5]:
review_json = json.load(open("/Users/robertsonwang/Desktop/Python/Yelp/dc_reviews.json"))

In [9]:
review_json['/biz/panas-washington']


Out[9]:
{u'@context': u'http://schema.org/',
 u'@type': u'Restaurant',
 u'address': {u'addressCountry': u'US',
  u'addressLocality': u'Washington, DC',
  u'addressRegion': None,
  u'postalCode': u'20036',
  u'streetAddress': u'2029 P St NW'},
 u'aggregateRating': {u'@type': u'AggregateRating',
  u'ratingValue': 4.10933940774487,
  u'reviewCount': 439},
 u'image': u'https://s3-media4.fl.yelpcdn.com/bphoto/AAwxypGa753fHGLs_RFCVw/ls.jpg',
 u'name': u'Panas',
 u'priceRange': u'Under $10',
 u'review': [{u'author': u'Artem M.',
   u'datePublished': u'2017-03-01',
   u'description': u'Amazing place with fresh cooked empanadas!\nVegetarians should be impressed, they have decent selection there!\nDelicious food for a reasonable price!',
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Hank M.',
   u'datePublished': u'2017-02-28',
   u'description': u'Not bad. Not stunning.\nI grew up on empanadas in South America - these ones are nowhere near as good.\nBut, when you need an empanada fix, these just might have to do.\nThey do sell them frozen in the event you want to home prepare or impress your friends.',
   u'reviewRating': {u'ratingValue': 3}},
  {u'author': u'Blessing I.',
   u'datePublished': u'2017-01-23',
   u'description': u"The empanadas were amazing! It's a cute, small shop so it's not sufficient for a big crowd or group. But its location in Dupont is spectacular.\n\nMy friend and I were able to each get 4 empanadas for a great price (which is hard to find in DC). Definitely would recommend!",
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Brian L.',
   u'datePublished': u'2017-01-08',
   u'description': u'Their empanadas are super fresh! I ordered 12 with another friend and I thought I needed to order more. But it made me very full. The insides were really full of meat and rich flavor.\nI also ordered the mango juice which is perfect in size and goes well with the empanadas.\nI ordered Beef(CA), 2 Chicken Steak(CS), Ham and Cheese(JQ) 2Chicken Pesto(CP) 2Chicken Mushrooms(CM), Pepperoni Empanada(PE), Spinach Empanada(PY), Brie Cheese Empanada(BA), Four Cheeses and Onions(FC)\nI personally like Aji(spicy) and Chimi(mild) sauce',
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Amanda J.',
   u'datePublished': u'2016-12-19',
   u'description': u"My visit to Panas is all thanks to fate.  I actually tried to eat here the one time I visited the District before moving here, as I went to Urbana and was super unimpressed.  I remember walking up to Panas about ten minutes after they closed and being crushed.  I then came to the Icy Code opening, smelled the empanadas and thought I should make the trip back but didn't pay much attention to where it was.  Suddenly, I got off at the Dupont Metro one day and was roaming around trying to find some lunch.  As I walked up to Panas, both of these memories hit me, and I realized I had ended up back here.  So my friend and I finally stopped in, and I will definitely be back.\n\nWhen you walk into Panas, you walk up to the counter and order by letters of the things you want.  This comes in handy when you get the little pockets of joy because they print the letters on the dough so you know which one you are about to bite into!\n\nWe ordered the Empanadas Combo x4 ($9), which was supposed to come with chips and two drips, aka sauces.  The owner was super nice, though, and gave us an extra empanada and drip for free.  We tried the Chipotle Steak (CS), Chicken Pesto (CP), BBQ Sauce Chicken (CB), Pepperoni Empanada (PE), and were given a free Ham and Cheese (JQ).  The Chipotle Steak and BBQ Chicken were hands down the best empanadas, and these are the two must have ones.  The others were good but not special.  We also enjoyed the Salsa Verde, Aji, and Pimenton drips.  The Salsa Verde was mild but very good, and I loved the Pimenton the most with our empanadas.  The Aji tasted more mustard-y to me, which went with the ham and cheese but wasn't my top choice.  Once we finished stuffing ourselves with these pockets of joy, we ate the interestingly spiced chips, which were just OK, and we polished off a Mango Juice ($2.50).  It was very mango-y and delicious, and the price was right!\n\nThe shop was very warm on a cold day, and there are probably ten or so small tables to choose from.  The man who was taking care of us seemed to be the owner, and he was just the nicest.  Conversational, helpful, and generous.  It was just one of the only places that I have found in DC that delivered in all ways, which was really nice at a reasonable price.\n\nAll in all, fate may have kept bringing me to Panas, but their wonderful food and service will keep bringing me back.",
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Krys G.',
   u'datePublished': u'2016-12-02',
   u'description': u"Best empanadas right here! They make them to order and they are so flavorful and by themselves but come with different sauces for dipping. They have savory and sweet empanadas to choose from. We got the savory ones. They were out of the beef when we were there on a Sunday for lunch. You can choose from cheese, pork and chicken. They do offer vegetarian ones too. The fruit drinks are made fresh to order and are very sweet and tasty. Very reasonably priced. It's a small place with not too many tables to sit at. Would highly recommend this place.",
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Calvin H.',
   u'datePublished': u'2016-10-23',
   u'description': u"I've been to Panas a few times now and haven't been disappointed yet. I like that they have a wide range of empanada fillings to choose from. (I do wish it were easier to figure out which one is which though. I know you have the letters stamped on the edge, but how am I supposed to remember what PY and CA mean?) \n\nI particularly like the spinach with goat cheese and raisins (PY). The aji (yellow) sauce is my favorite. Their plantain chips are thinly sliced and not greasy--far and away better than the stuff in the bag at Trader Joe's. \n\nIf you're more than one person, order a canoa! You can get all of your empanadas, sauces, and chips served on an impressive-looking long wooden platter.",
   u'reviewRating': {u'ratingValue': 4}},
  {u'author': u'Ron C.',
   u'datePublished': u'2017-01-09',
   u'description': u'Delicious Empanadas and great service.  I used to come here for lunch when I was a little hung over from the night before and these things do the trick.',
   u'reviewRating': {u'ratingValue': 3}},
  {u'author': u'Sou M.',
   u'datePublished': u'2016-09-21',
   u'description': u"I had bookmarked this place for a long time but today was the first time I had the chance to try it out. After all, it lived up to my expectations. \n\nThey have Very tasty empanadas with a great variety, both for vegetarians and meat lovers. You have the option to choose 3 ($7.5) or 4 ($9) with two sauces + plantain chips or go for big platter (6 or 8) to share, which comes with 4 sauces. I got beef with olives, chicken pesto, chicken mushroom, and eggplant. They were so damn hot so i burned my tongue. But when they got colder, they tasted better. \n\nThe location is casual but cute, especially the outdoor sitting. \n\nAlthough the empanadas are very tasty and fresh, i'm afraid they are half size the ones we are used to see. For me, with a small appetite, 3 or 4 is more than enough, but for a guy, I dont think 3-4 would make for a meal, so definitely you need a side. In that case you'll end up paying $15 for empanadas. I think they can be a good option if you want to share a fun snack with a friend or you want to oder for your parties.\n\nI think they need to knock down the prices or make the empanadas 1.5 times bigger.",
   u'reviewRating': {u'ratingValue': 4}},
  {u'author': u'Jun Li Z.',
   u'datePublished': u'2016-10-11',
   u'description': u"Has a variety of options you can choose from. Love that they serve it to you after you order them. The flavors are so good. The juices are also good to try, such as the mango one. Horchata was also really yum. \n\nIf you get the 8 empanadas, it's enough to share for 2ppl. Not bad for the price.",
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Alyssa B.',
   u'datePublished': u'2016-09-14',
   u'description': u'I love and adore Panas!  It\'s one of my absolute favorite counter service places in DC.  I can\'t believe I haven\'t reviewed it before!  I come here as often as I can, and I\'ve made it a life goal to try every single empanada here at least once (they have over a dozen), but it\'s sort of hard because I love the ones I\'ve already had and keep getting them.  That\'s a good problem to have though right?  \n\nThe empanadas are delicious, baked, warm pockets of goodness.  I\'ve tried almost every variety at this point and my favorites are the Carne (traditional ground beef), the Pepperoni (it\'s like a much better version of a hot pocket), and the Spinach (formerly known as the Popeye).  The two I don\'t really like here are the corn and the chicken pesto, but that\'s more of a personal preference thing.  The empanadas are the perfect size-- not too big, not too small-- and GREAT for practicing portion size.  I usually have 1 for a snack or 3 for a meal, but it depends on your appetite.  They feel kind of decadent but they\'re small and baked, so they\'re reasonsably healthy for what they are (notice that I did say REASONABLY) as long as you don\'t have a million in one seating (tempting).  \n\nOverall, the food is a really solid 4.  The reason for the extra star is the amazing, above-and-beyond, service with a smile I always receive when I\'m here.  They are just so SO nice here!  All the guys here (I\'ve only ever had male cashiers) are super helpful and happy to give recommendations if it\'s your first time.  One time, I got a take out order on a really hot day and I was standing there drinking from a water bottle I brought.  One of the guys there saw me and was like "you should refill your water bottle at the drink station before you go back outside".  It was a small suggestion, but it was super considerate and the kind of thing that makes a business standout.  I\'m always happy when I leave here because they\'re just so darn friendly.  \n\nTo me, going to Panas is like happiness, rainbows, and baked pastries all rolled into a cute little store right off of the Circle. :)\n\nTip: When I\'m in MD and can\'t go to Panas (sad), I get their empanadas at Balducci\'s or Harris Teeter.  They stock a decent selection of their empanadas in local grocery stores all around the DMV.  They come in packs of 3 and they\'re super convenient for snacks or meals.  I even eat them for breakfast!',
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Katie B.',
   u'datePublished': u'2017-02-18',
   u'description': u'Amazing selection of baked empanadas. Friendly family owned place. Meat, veggie, cheese, and sweet empanadas. Fried plantain chips were good too.',
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Kristi C.',
   u'datePublished': u'2016-09-13',
   u'description': u'The best little empanadas I\'ve had in DC! We had a large order at work today for a birthday lunch for a member of our team, and they were a huge hit! I tried all 4 "drips" or sauces, and my favorite was mixing them together for incredible flavor. For empanadas I tried the beef and the chipotle steak, both of which were de-licious! It\'s cool that they stamp the two letter code on the pastry of each one, so that people could figure out which was which. The plantain were also great. \n\nMy colleagues said there was an issue with picking up the order, but it was resolved without too much hassle. \n\nI\'m looking forward to coming up to the store sometime and trying more flavors!',
   u'reviewRating': {u'ratingValue': 4}},
  {u'author': u'Nicholas W.',
   u'datePublished': u'2016-07-19',
   u'description': u"Very good, very cheap food. Huge variety of empanadas, and they're *reasonably* healthy too since they're baked instead of fried. Everything will be under $10 and very filling. The sauces could be a little spicier, but honestly that's a relatively minor complaint. The empanadas are good enough on their own without sauce. \n\nI'm a big fan of the spinach and feta empanada - definitely not authentic but hey it's yummy.",
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'John H.',
   u'datePublished': u'2017-02-09',
   u'description': u'Panas is a fun little place with a wide variety of tiny empanadas. They have at least 12 fillings, but only a couple could be considered even vaguely authentic to any country that serves empanadas. I would have liked to see some more classic flavors, and I would have preferred a corn flour empanada to wheat flour. I had 4 empanadas and chips, but left a little hungry.',
   u'reviewRating': {u'ratingValue': 3}},
  {u'author': u'DineoutGal A.',
   u'datePublished': u'2016-07-09',
   u'description': u"Yum!!! Great casual place to grab a bite! There are so many flavours of baked empanadas to choose from - chicken, beef, pork and lots of veggie options too. Just order by the initial letters from the menu for the fillings you want. \n\nwe opted for the two person share option which is 6 empanadas of any flavours with an ounce of plantain chips and four dipping sauces from spicy to mild (the spicy wasn't really spicy for me but all very good flavours). Very ingenious to have the dough show the initials of the filling so you know what you're eating. We tried many of the protein and veggie ones and they were all delicious. Can't go wrong with any fillings!\n\nThe place is brightly decorated with very friendly service. It's just steps away from Dupont Circle and a couple of blocks from the Phillip Collection.",
   u'reviewRating': {u'ratingValue': 4}},
  {u'author': u'Pascale A.',
   u'datePublished': u'2016-10-23',
   u'description': u"If you like empanadas... this is the place for you! You're a long way from the typical ground beef empanadas. They have savory. They have sweet. Bottom line you've got options! You can pick from beef, pork, chicken, seafood, cheese and vegetarian. They've got combos 3, 4, family pack comes with plantain chips and your choice of sauce - mild, medium, hot and oh la la.\nThey're not big; an average person can eat 3 or 4 no problem. The sweet ones cost more and aren't included in the combos. \nPlus they have passion fruit juice.",
   u'reviewRating': {u'ratingValue': 4}},
  {u'author': u'Marina H.',
   u'datePublished': u'2016-05-02',
   u'description': u'Great empanadas @ Panas! Absolutely live the Brie one! (French taste!) My empanadas to go place on a Monday night. \nTried all of the empanadas! (Literally!) and they are all very delicious. \nStaff is very friendly ;-)',
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Yasmin H.',
   u'datePublished': u'2017-01-16',
   u'description': u"Loved this place so much and can't wait to come back. My sister and I ordered 6 empanadas. I got the Brie, mushroom and artichoke and the steak chipotle and the beef and olives. All 3 were amazing! It came with?4 sauces and plantain chips! The empanadas were baked; light and fresh! Will bring other friends.",
   u'reviewRating': {u'ratingValue': 5}},
  {u'author': u'Indir S.',
   u'datePublished': u'2016-09-29',
   u'description': u'I have been very skeptical to visit again, a couple of times that I visited their now closed Bethesda store they were out of something or my selections never were fulfilling. I came to the DuPont location and I honestly can say that I love it, especially their Cholados that bring memories from of my childhood in Colombia.',
   u'reviewRating': {u'ratingValue': 4}}],
 u'servesCuisine': u'Latin American',
 u'telephone': u'+12022232964'}

In [13]:
agg_dict = dict(review_json, **json_dict)

In [19]:
with open('dc_reviews_agg.json', 'w') as outfile:
    json.dump(agg_dict, outfile)

TESTING IS GOING ON BEYOND THIS POINT


In [ ]:
test_link = '/biz/the-codmother-washington?osq=Restaurants'

articles = articles.find_all("div", class_="biz-listing-large")
for article in articles:
    match = re.search(r'href=[\'"]?([^\'" >]+)', str(article.find_all(href = True)))

    if match:
        link = match.group(0)
        link = link[6:]
    link_list.append(link)