In [20]:
import pandas as pd
import matplotlib.pyplot as plt
import boto3
import requests
from bs4 import BeautifulSoup
from collections import defaultdict
import re
import time
%matplotlib inline

In [2]:
df_toys = pd.read_csv("s3://dogfaces/reviews/toys.csv")

In [3]:
df_toys.head()


Out[3]:
cat_id num_reviews picture_link price toy_id toy_link toy_name
0 1 800 https://img.chewy.com/is/catalog/62758_MAIN._A... $1.19 47728 https://www.chewy.com/kong-airdog-squeakair-ba... kong-airdog-squeakair-ball-dog-toy
1 1 127 https://img.chewy.com/is/catalog/80753._AC_SS1... $2.99 108582 https://www.chewy.com/mammoth-monkey-fist-bar-... mammoth-monkey-fist-bar-dog-toy
2 1 292 https://img.chewy.com/is/catalog/62850._AC_SS1... $3.39 47880 https://www.chewy.com/kong-squeakair-birthday-... kong-squeakair-birthday-balls-dog
3 1 1233 https://img.chewy.com/is/catalog/77643._AC_SS1... $9.39 105502 https://www.chewy.com/kong-jumbler-ball-dog-to... kong-jumbler-ball-dog-toy-color
4 1 538 https://img.chewy.com/is/catalog/53235_MAIN._A... $6.85 38371 https://www.chewy.com/chuckit-ultra-rubber-bal... chuckit-ultra-rubber-ball-medium-2

In [4]:
test_url = df_toys['toy_link'][0]
review_url = test_url.replace('/dp/','/product-reviews/') +'?'+'reviewSort=NEWEST&reviewFilter=ALL_STARS&pageNumber=6'
r = requests.get(review_url)

In [5]:
print review_url


https://www.chewy.com/kong-airdog-squeakair-ball-dog-toy/product-reviews/47728?reviewSort=NEWEST&reviewFilter=ALL_STARS&pageNumber=6

In [6]:
'https://www.chewy.com/kong-airdog-squeakair-ball-dog-toy/product-reviews/47728?reviewSort=NEWEST&reviewFilter=ALL_STARS&pageNumber=1'


Out[6]:
'https://www.chewy.com/kong-airdog-squeakair-ball-dog-toy/product-reviews/47728?reviewSort=NEWEST&reviewFilter=ALL_STARS&pageNumber=1'

In [7]:
soup = BeautifulSoup(r.content,'lxml')

In [8]:
for sec in soup.select("li.js-content"):
    print sec.attrs


{'data-content-id': '184122095', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '184036857', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183977882', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183907922', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183744357', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183743685', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183685596', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183483074', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183352427', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}
{'data-content-id': '183331394', 'itemscope': '', 'class': ['js-content'], 'itemprop': 'review', 'itemtype': 'http://schema.org/Review'}

In [9]:
#|reviewid|toyid|toy_name | user_name| stars| review_text| time| image|image_id| useful|
review_id = sec['data-content-id']

In [10]:
rating_raw = sec.select_one("span.ugc-list__list__stars").select_one("source")['srcset']
rating = int(re.findall('rating-(\S*)\.svg',rating_raw)[0].split('_')[0])
print rating


3

In [11]:
user_name = sec.find_all("span",{"itemprop":"author"})[0].get_text()
print user_name


Rhonda

In [13]:
date = sec.find_all("span",{"itemprop":"datePublished"})[0].get_text()
print date


Jul 23, 2017

In [177]:
review_text = sec.select_one("span.ugc-list__review__display").get_text()
print review_text


I'm not sure there's much to say. A tennis ball is a tennis ball. Harvey loves them!

In [198]:
test_review_img = soup.select("li.js-content")[-2]
pic = test_review_img.select_one('a.js-open-modal.js-swap')
if pic:
    pic_link = pic["data-image"]
    pic_items = pic_link.split("/")
    pic_id = "_".join(pic_items[-3:-1])
    pic_name = "_".join(pic_items[-3:])

In [192]:
test_review_img.select_one("a.js-open-modal.js-swap")["data-image"]


Out[192]:
'http://chewy.ugc.bazaarvoice.com/0090-en_us/1837760/photo.jpg'

In [199]:
pic_name


Out[199]:
'0090-en_us_1837760_photo.jpg'

test scripts - 1


In [54]:
from list_reviews import *

In [41]:
test_row = df_toys.iloc[100]

In [42]:
start_time = time.time()
reviews, pics = get_review_content(test_row)
end_time = time.time()

In [43]:
end_time - start_time


Out[43]:
2.501729965209961

In [44]:
len(reviews)


Out[44]:
55

In [45]:
df_temp = pd.DataFrame.from_dict(reviews)
df_temp.head()


Out[45]:
rating review_content review_id review_time toy_id toy_name user_name
0 5 This had been my dog's favorite toy! She can c... 185593363 Sep 1, 2017 49703 chuckit-ultra-duo-tug-medium Biconclavicator02
1 5 The Chuckit! Ultra Duo Tug dog toy is one of t... 185391236 Aug 25, 2017 49703 chuckit-ultra-duo-tug-medium BWiedow
2 5 Our guy is an aggressive chewer, a regular bal... 185274730 Aug 23, 2017 49703 chuckit-ultra-duo-tug-medium Boykinpaws
3 5 My goldendoodle has so much fun with this toy!... 185205962 Aug 21, 2017 49703 chuckit-ultra-duo-tug-medium doodlemom
4 5 This would be great if your dogs actually play... 184725370 Aug 12, 2017 49703 chuckit-ultra-duo-tug-medium toneemo

In [46]:
df_pic_temp = pd.DataFrame.from_dict(pics)
df_pic_temp.tail(10)


Out[46]:
pic_id pic_name pic_url review_id star_rating toy_id toy_name
0 0090-en_us_2158051 0090-en_us_2158051_photo.jpg http://chewy.ugc.bazaarvoice.com/0090-en_us/21... 184633626 5 49703 chuckit-ultra-duo-tug-medium
1 0090-en_us_934055 0090-en_us_934055_photo.jpg http://chewy.ugc.bazaarvoice.com/0090-en_us/93... 176238149 1 49703 chuckit-ultra-duo-tug-medium
2 0090-en_us_737565 0090-en_us_737565_photo.jpg https://chewy.ugc.bazaarvoice.com/0090-en_us/7... 154662590 5 49703 chuckit-ultra-duo-tug-medium

In [51]:
print df_pic_temp['pic_url'][0]


http://chewy.ugc.bazaarvoice.com/0090-en_us/2158051/photo.jpg

In [38]:
df_pic_temp.shape


Out[38]:
(57, 7)

In [52]:
time.time()


Out[52]:
1504805900.113418

test scripts - 2


In [72]:
save_code = "test11"
df_test = get_df(5)
fetch_and_store_reviews(df_test, save_code)


https://www.chewy.com/mammoth-cottonblend-5-knot-dog-rope/product-reviews/108574?reviewSort=NEWEST&reviewFilter=ALL_STARS&pageNumber=6
save reviews till record reviews-test11-1.csv
save reviews till record reviews-test11-2.csv
save reviews till record reviews-test11-3.csv
save reviews till record reviews-test11-4.csv
save pictures till record pictures-test11-4.csv

In [60]:
df_temp_1 = pd.read_csv("s3://dogfaces/reviews/reviewstest11-1.csv")
df_temp_2 = pd.read_csv("s3://dogfaces/reviews/reviewstest11-2.csv")

In [61]:
df_temp_1.tail()


Out[61]:
rating review_content review_id review_time toy_id toy_name user_name
795 4 Just a tennis ball, but with the best sounding... 47666158 Dec 16, 2013 47728 kong-airdog-squeakair-ball-dog-toy sanderli22
796 5 I have a 2 yr old Sable German Shepherd who ju... 31371648 Nov 26, 2013 47728 kong-airdog-squeakair-ball-dog-toy Jerry82
797 5 My labradoodle loves playing fetch but fetch i... 27588417 Oct 21, 2013 47728 kong-airdog-squeakair-ball-dog-toy amyb3
798 5 I have a 136lb monster English Mastiff that to... 27454034 Oct 15, 2013 47728 kong-airdog-squeakair-ball-dog-toy Fiona1
799 5 This is ingenious;if you have a dog that loves... 22831354 Aug 14, 2013 47728 kong-airdog-squeakair-ball-dog-toy katbat

In [67]:
df_temp_2.head()


Out[67]:
rating review_content review_id review_time toy_id toy_name user_name
0 5 My 9wk gsd loves to play tug. This is a very d... 185768998 Sep 6, 2017 108582 mammoth-monkey-fist-bar-dog-toy Heatherlg
1 4 My little guy just loves this toy & as small a... 185768070 Sep 6, 2017 108582 mammoth-monkey-fist-bar-dog-toy Homestager123
2 1 The mammoth Monkey Fist Dog Toy was a mammoth ... 185710474 Sep 4, 2017 108582 mammoth-monkey-fist-bar-dog-toy Miles
3 2 I bought this rope thinking my 2 year old terr... 185706783 Sep 4, 2017 108582 mammoth-monkey-fist-bar-dog-toy AL13
4 1 I usually buy another rope that's sold on this... 185588816 Sep 1, 2017 108582 mammoth-monkey-fist-bar-dog-toy Gggg

In [65]:
df_temp_2.shape


Out[65]:
(129, 7)

In [69]:
df_temp_3 = pd.read_csv("s3://dogfaces/reviews/picturestest11-1.csv")

In [70]:
df_temp_3.head()


Out[70]:
pic_id pic_name pic_url review_id star_rating toy_id toy_name
0 0090-en_us_1900789 0090-en_us_1900789_photo.jpg http://chewy.ugc.bazaarvoice.com/0090-en_us/19... 183743685 5 47728 kong-airdog-squeakair-ball-dog-toy
1 0090-en_us_1837760 0090-en_us_1837760_photo.jpg http://chewy.ugc.bazaarvoice.com/0090-en_us/18... 183352427 5 47728 kong-airdog-squeakair-ball-dog-toy
2 0090-en_us_1784514 0090-en_us_1784514_photo.jpg http://chewy.ugc.bazaarvoice.com/0090-en_us/17... 183257510 3 47728 kong-airdog-squeakair-ball-dog-toy
3 0090-en_us_1640237 0090-en_us_1640237_photo.jpg http://chewy.ugc.bazaarvoice.com/0090-en_us/16... 182874927 5 47728 kong-airdog-squeakair-ball-dog-toy
4 0090-en_us_1640174 0090-en_us_1640174_photo.jpg http://chewy.ugc.bazaarvoice.com/0090-en_us/16... 182871999 5 47728 kong-airdog-squeakair-ball-dog-toy

In [78]:
df_test_5 = pd.read_csv("s3://dogfaces/reviews/reviews-1504809136_cat5-final-.csv")
df_test_5.shape


Out[78]:
(1577, 7)

In [79]:
df_test_5.head()


Out[79]:
rating review_content review_id review_time toy_id toy_name user_name
0 5 My Fur Baby loves his Rope pull toy. I brings ... 185682550 Sep 4, 2017 108574 mammoth-cottonblend-5-knot-dog-rope Gunnerfavorite
1 5 We love this toy. I got the largest size for m... 185591533 Sep 1, 2017 108574 mammoth-cottonblend-5-knot-dog-rope Lindsay
2 4 this was a good product but not for very stron... 185522967 Aug 29, 2017 108574 mammoth-cottonblend-5-knot-dog-rope bcodpas3
3 5 The dogs aren't big chewers so the rope is hol... 185487336 Aug 28, 2017 108574 mammoth-cottonblend-5-knot-dog-rope SickPup
4 5 My Aussies love this rope. It's long enough th... 185484167 Aug 28, 2017 108574 mammoth-cottonblend-5-knot-dog-rope AussieMom

In [75]:
sum(df_test.num_reviews.values)


Out[75]:
1576

In [ ]: