In [1]:

    
import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup # For HTML parsing
import requests
import re # Regular expressions
from time import sleep # To prevent overwhelming the server between connections
from collections import Counter # Keep track of our term counts
#from nltk.corpus import stopwords # Filter out stopwords, such as 'the', 'or', 'and'
import nltk
import pandas as pd # For converting results to a dataframe and bar chart plots
%matplotlib inline



In [2]:

    
import csv
import datetime
import time



In [3]:

    
import sqlalchemy
from sqlalchemy import create_engine



In [4]:

    
%load_ext watermark

This notebook is written by Yishin and Chi-Hung.



In [9]:

    
%watermark









    



2017-02-05T19:42:47+08:00

CPython 3.5.2
IPython 5.1.0

compiler   : GCC 4.2.1 Compatible Apple LLVM 7.3.0 (clang-703.0.31)
system     : Darwin
release    : 15.6.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit

First of all, we know that there are 7 types of vacuums on Amazon



In [10]:

    
def getVacuumTypeUrl(vacuumType,pageNum=1):
    vcleaners={"central":11333709011,"canister":510108,"handheld":510114,"robotic":3743561,"stick":510112,"upright":510110,"wetdry":553022}
    url_type_base="https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_"+str(pageNum)+"?ie=UTF8&node="
    url=url_type_base+str(vacuumType)+"&page="+str(pageNum)
    print (url)
    return url



In [11]:

    
vcleaners={"central":11333709011,"canister":510108,"handheld":510114,"robotic":3743561,"stick":510112,"upright":510110,"wetdry":553022}

for key in vcleaners:
    print(key,vcleaners[key])
    getVacuumTypeUrl(vcleaners[key])









    



stick 510112
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=510112&page=1
central 11333709011
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=11333709011&page=1
wetdry 553022
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=553022&page=1
upright 510110
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=510110&page=1
canister 510108
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=510108&page=1
robotic 3743561
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=3743561&page=1
handheld 510114
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=510114&page=1

The following are two functions which we aim to obtain the total number of pages of each vacuum type



In [12]:

    
def getFinalPageNum(url,maxretrytime=20):
    passed=False
    cnt=0
    
    while(passed==False):
        cnt+=1
        print("iteration from getFinalPageNum=",cnt)
        if(cnt>maxretrytime):
            raise Exception("Error from getFinalPageNum(url)! Tried too many times but we are still blocked by Amazon.")
        try:
            with requests.Session() as session:
                session.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"}            
                r=session.get(url)
                if (r.status_code==200):
                    soup=BeautifulSoup(r.content,"lxml")
                    if("Robot Check" in soup.text):
                        print("we are blocked!")
                    else:
                        tagsFinalPageNum=soup.select("span[class='pagnDisabled']")
                        finalPageNum=str(tagsFinalPageNum[0].text)
                        passed=True

                else:
                    print("Connection failed. Reconnecting...")
        except:
            print("Error from getFinalPageNum(url)! Probably due to connection time out")
    return finalPageNum



In [13]:

    
def InferFinalPageNum(vacuumType,pageNum=1,times=10):
    url=getVacuumTypeUrl(vacuumType,pageNum)
    
    list_finalpageNum=[]
    
    for j in range(times):
        finalpageNum=getFinalPageNum(url)
        list_finalpageNum.append(finalpageNum)
    FinalpageNum=min(list_finalpageNum)

    return FinalpageNum



In [14]:

    
FinalPageNum=InferFinalPageNum(510114,pageNum=1)
print('FinalPageNum=',FinalPageNum)









    



https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=510114&page=1
iteration from getFinalPageNum= 1
iteration from getFinalPageNum= 1
we are blocked!
iteration from getFinalPageNum= 2
we are blocked!
iteration from getFinalPageNum= 3
we are blocked!
iteration from getFinalPageNum= 4
we are blocked!
iteration from getFinalPageNum= 5
iteration from getFinalPageNum= 1
iteration from getFinalPageNum= 1
we are blocked!
iteration from getFinalPageNum= 2
we are blocked!
iteration from getFinalPageNum= 3
we are blocked!
iteration from getFinalPageNum= 4
we are blocked!
iteration from getFinalPageNum= 5
iteration from getFinalPageNum= 1
we are blocked!
iteration from getFinalPageNum= 2
we are blocked!
iteration from getFinalPageNum= 3
iteration from getFinalPageNum= 1
we are blocked!
iteration from getFinalPageNum= 2
we are blocked!
iteration from getFinalPageNum= 3
iteration from getFinalPageNum= 1
iteration from getFinalPageNum= 1
iteration from getFinalPageNum= 1
iteration from getFinalPageNum= 1
FinalPageNum= 84

So, right now, we are able to infer the total number of pages of a specific vacuum type.

The next step is to generate all URLs of the selected vacuum type:



In [15]:

    
def urlsGenerator(typenode,FinalPageNum):
    #Note: 'typenode' and 'FinalpageNum' are both string

    URLs=[]
    pageIdx=1
    while(pageIdx<=int(FinalPageNum)):
        url_Type="https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_"+str(pageIdx)+"?ie=UTF8&node="
        url=url_Type+str(typenode)+"&page="+str(pageIdx)
        #print(url)
        URLs.append(url)
        pageIdx+=1
   
    return URLs

For the moment, let us choose the vacuum type "handheld":



In [16]:

    
URLs=urlsGenerator(510114,FinalPageNum)
len(URLs)
for url in URLs:
    print(url)









    



https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=510114&page=1
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_2?ie=UTF8&node=510114&page=2
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_3?ie=UTF8&node=510114&page=3
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_4?ie=UTF8&node=510114&page=4
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_5?ie=UTF8&node=510114&page=5
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_6?ie=UTF8&node=510114&page=6
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_7?ie=UTF8&node=510114&page=7
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_8?ie=UTF8&node=510114&page=8
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_9?ie=UTF8&node=510114&page=9
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_10?ie=UTF8&node=510114&page=10
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_11?ie=UTF8&node=510114&page=11
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_12?ie=UTF8&node=510114&page=12
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_13?ie=UTF8&node=510114&page=13
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_14?ie=UTF8&node=510114&page=14
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_15?ie=UTF8&node=510114&page=15
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_16?ie=UTF8&node=510114&page=16
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_17?ie=UTF8&node=510114&page=17
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_18?ie=UTF8&node=510114&page=18
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_19?ie=UTF8&node=510114&page=19
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_20?ie=UTF8&node=510114&page=20
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_21?ie=UTF8&node=510114&page=21
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_22?ie=UTF8&node=510114&page=22
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_23?ie=UTF8&node=510114&page=23
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_24?ie=UTF8&node=510114&page=24
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_25?ie=UTF8&node=510114&page=25
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_26?ie=UTF8&node=510114&page=26
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_27?ie=UTF8&node=510114&page=27
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_28?ie=UTF8&node=510114&page=28
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_29?ie=UTF8&node=510114&page=29
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_30?ie=UTF8&node=510114&page=30
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_31?ie=UTF8&node=510114&page=31
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_32?ie=UTF8&node=510114&page=32
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_33?ie=UTF8&node=510114&page=33
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_34?ie=UTF8&node=510114&page=34
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_35?ie=UTF8&node=510114&page=35
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_36?ie=UTF8&node=510114&page=36
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_37?ie=UTF8&node=510114&page=37
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_38?ie=UTF8&node=510114&page=38
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_39?ie=UTF8&node=510114&page=39
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_40?ie=UTF8&node=510114&page=40
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_41?ie=UTF8&node=510114&page=41
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_42?ie=UTF8&node=510114&page=42
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_43?ie=UTF8&node=510114&page=43
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_44?ie=UTF8&node=510114&page=44
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_45?ie=UTF8&node=510114&page=45
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_46?ie=UTF8&node=510114&page=46
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_47?ie=UTF8&node=510114&page=47
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_48?ie=UTF8&node=510114&page=48
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_49?ie=UTF8&node=510114&page=49
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_50?ie=UTF8&node=510114&page=50
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_51?ie=UTF8&node=510114&page=51
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_52?ie=UTF8&node=510114&page=52
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_53?ie=UTF8&node=510114&page=53
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_54?ie=UTF8&node=510114&page=54
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_55?ie=UTF8&node=510114&page=55
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_56?ie=UTF8&node=510114&page=56
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_57?ie=UTF8&node=510114&page=57
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_58?ie=UTF8&node=510114&page=58
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_59?ie=UTF8&node=510114&page=59
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_60?ie=UTF8&node=510114&page=60
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_61?ie=UTF8&node=510114&page=61
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_62?ie=UTF8&node=510114&page=62
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_63?ie=UTF8&node=510114&page=63
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_64?ie=UTF8&node=510114&page=64
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_65?ie=UTF8&node=510114&page=65
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_66?ie=UTF8&node=510114&page=66
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_67?ie=UTF8&node=510114&page=67
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_68?ie=UTF8&node=510114&page=68
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_69?ie=UTF8&node=510114&page=69
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_70?ie=UTF8&node=510114&page=70
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_71?ie=UTF8&node=510114&page=71
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_72?ie=UTF8&node=510114&page=72
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_73?ie=UTF8&node=510114&page=73
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_74?ie=UTF8&node=510114&page=74
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_75?ie=UTF8&node=510114&page=75
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_76?ie=UTF8&node=510114&page=76
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_77?ie=UTF8&node=510114&page=77
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_78?ie=UTF8&node=510114&page=78
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_79?ie=UTF8&node=510114&page=79
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_80?ie=UTF8&node=510114&page=80
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_81?ie=UTF8&node=510114&page=81
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_82?ie=UTF8&node=510114&page=82
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_83?ie=UTF8&node=510114&page=83
https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_84?ie=UTF8&node=510114&page=84

Next, we'd like to obtain all the "soups" of the vacuum type "handheld" and store them into a list



In [ ]:

    
def soupGenerator(URLs,maxretrytime=20):    

    soups=[]
    urlindex=0
    for URL in URLs:
        urlindex+=1
        print("urlindex=",urlindex)
        passed=False
        cnt=0    
        while(passed==False):
            cnt+=1
            print("iteration=",cnt)
            if(cnt>maxretrytime):
                raise Exception("Error from soupGenerator(url,maxretrytime=20)! Tried too many times but we are still blocked by Amazon.")
        
            try:
                with requests.Session() as session:
            
                    session.headers = {'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0"}            
                    r=session.get(URL)            
                
                    if (r.status_code==200):                
                        soup=BeautifulSoup(r.content,"lxml")
                        if("Robot Check" in soup.text):
                            print("we are blocked!")
                        else:
                            print("we are not blocked!")
                            soups.append(soup)
                            passed=True
                        
                    else:
                        print ("Connection failed. Reconnecting...")
            except:
                print("Error from soupGenerator(URLs,maxretrytime=20)! Probably due to connection time out")
                
    return soups



In [19]:

    
soups=soupGenerator(URLs,maxretrytime=20)









    



urlindex= 1
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 2
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 3
iteration= 1
we are not blocked!
urlindex= 4
iteration= 1
we are not blocked!
urlindex= 5
iteration= 1
we are not blocked!
urlindex= 6
iteration= 1
we are not blocked!
urlindex= 7
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 8
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are not blocked!
urlindex= 9
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are blocked!
iteration= 10
we are blocked!
iteration= 11
we are blocked!
iteration= 12
we are blocked!
iteration= 13
we are blocked!
iteration= 14
we are blocked!
iteration= 15
we are blocked!
iteration= 16
we are blocked!
iteration= 17
we are not blocked!
urlindex= 10
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 11
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 12
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are blocked!
iteration= 10
we are not blocked!
urlindex= 13
iteration= 1
we are not blocked!
urlindex= 14
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 15
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are not blocked!
urlindex= 16
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are not blocked!
urlindex= 17
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are not blocked!
urlindex= 18
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are not blocked!
urlindex= 19
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are not blocked!
urlindex= 20
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 21
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 22
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 23
iteration= 1
we are not blocked!
urlindex= 24
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 25
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 26
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are not blocked!
urlindex= 27
iteration= 1
we are not blocked!
urlindex= 28
iteration= 1
we are not blocked!
urlindex= 29
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are blocked!
iteration= 10
we are blocked!
iteration= 11
we are blocked!
iteration= 12
we are blocked!
iteration= 13
we are blocked!
iteration= 14
we are blocked!
iteration= 15
we are blocked!
iteration= 16
we are blocked!
iteration= 17
we are blocked!
iteration= 18
we are not blocked!
urlindex= 30
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are not blocked!
urlindex= 31
iteration= 1
we are not blocked!
urlindex= 32
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are not blocked!
urlindex= 33
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are not blocked!
urlindex= 34
iteration= 1
we are not blocked!
urlindex= 35
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are not blocked!
urlindex= 36
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 37
iteration= 1
we are not blocked!
urlindex= 38
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are not blocked!
urlindex= 39
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 40
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 41
iteration= 1
we are not blocked!
urlindex= 42
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 43
iteration= 1
we are not blocked!
urlindex= 44
iteration= 1
we are not blocked!
urlindex= 45
iteration= 1
we are not blocked!
urlindex= 46
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 47
iteration= 1
we are not blocked!
urlindex= 48
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 49
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 50
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 51
iteration= 1
we are not blocked!
urlindex= 52
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 53
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are not blocked!
urlindex= 54
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 55
iteration= 1
we are not blocked!
urlindex= 56
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are not blocked!
urlindex= 57
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 58
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 59
iteration= 1
we are not blocked!
urlindex= 60
iteration= 1
we are not blocked!
urlindex= 61
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 62
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 63
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are blocked!
iteration= 10
we are not blocked!
urlindex= 64
iteration= 1
we are not blocked!
urlindex= 65
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 66
iteration= 1
we are not blocked!
urlindex= 67
iteration= 1
we are not blocked!
urlindex= 68
iteration= 1
we are not blocked!
urlindex= 69
iteration= 1
we are not blocked!
urlindex= 70
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 71
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 72
iteration= 1
we are not blocked!
urlindex= 73
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 74
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are not blocked!
urlindex= 75
iteration= 1
we are blocked!
iteration= 2
we are not blocked!
urlindex= 76
iteration= 1
we are not blocked!
urlindex= 77
iteration= 1
we are not blocked!
urlindex= 78
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are blocked!
iteration= 10
we are blocked!
iteration= 11
we are blocked!
iteration= 12
we are blocked!
iteration= 13
we are not blocked!
urlindex= 79
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are blocked!
iteration= 5
we are blocked!
iteration= 6
we are blocked!
iteration= 7
we are blocked!
iteration= 8
we are blocked!
iteration= 9
we are blocked!
iteration= 10
we are blocked!
iteration= 11
we are blocked!
iteration= 12
we are blocked!
iteration= 13
we are blocked!
iteration= 14
we are blocked!
iteration= 15
we are not blocked!
urlindex= 80
iteration= 1
we are not blocked!
urlindex= 81
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!
urlindex= 82
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are blocked!
iteration= 4
we are not blocked!
urlindex= 83
iteration= 1
we are not blocked!
urlindex= 84
iteration= 1
we are blocked!
iteration= 2
we are blocked!
iteration= 3
we are not blocked!

How many soups have we created?



In [20]:

    
print(len(soups))

Let us pause for a while. We would like to review the usage of CSS selectors



In [181]:

    
example='''
<span class="abc">
  <div>
    <a href="http://123xyz.com"></a>
    hello_div01
  </div>
</span>

<span class="def">
  <a href="http://www.go.123xyz"></a>
  <div>hello_div02</div>
</span>
'''



In [182]:

    
mysoup=BeautifulSoup(example,"lxml")



In [183]:

    
print(mysoup.prettify())









    



<html>
 <body>
  <span class="abc">
   <div>
    <a href="http://123xyz.com">
    </a>
    hello_div01
   </div>
  </span>
  <span class="def">
   <a href="http://www.go.123xyz">
   </a>
   <div>
    hello_div02
   </div>
  </span>
 </body>
</html>

Exercise: look for a specific tag which is a descendent of some other tag



In [187]:

    
mysoup.select(".abc a")









    Out[187]:





[<a href="http://123xyz.com"></a>]



In [288]:

    
mysoup.select(".abc > a")









    Out[288]:





[]

the symbol > indicates that we'd like to look for a tags, which are direct descendents of the tag which its class=abc.

If we use ".abc a", it means that we would like to find all descendents of the tag which its class=abc.



In [289]:

    
mysoup.select(".abc > div")









    Out[289]:





[<div>
 <a href="http://123xyz.com"></a>
     hello_div01
   </div>]

Exercise: we look for the tags whose value of the attr href starts with "http"



In [290]:

    
mysoup.select("a[href^='http']")









    Out[290]:





[<a href="http://123xyz.com"></a>, <a href="http://www.go.123xyz"></a>]

Exercise: we look for the tags whose value of the attr href ends with "http"



In [291]:

    
mysoup.select("a[href$='http']")









    Out[291]:





[]

Exercise: extract the value of a specific attr of a specific tag



In [185]:

    
mysoup.select(".abc a")[0]["href"]









    Out[185]:





'http://123xyz.com'

more info about CSS selectors:

https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors

http://wiki.jikexueyuan.com/project/python-crawler-guide/beautiful-soup.html



In [581]:

    
sp=soups[70].select('li[id^="result_"]')[0]

print(sp)

for s in sp:
    try:
        print(sp.span)
    except:
        print("error")









    



<li class="s-result-item celwidget " data-asin="B00QBJ0NP6" id="result_1680"><div class="s-item-container"><div class="a-row sx-badge-region"><div class="a-row a-spacing-top-micro a-spacing-micro"><div class="a-row a-spacing-large"></div></div></div><div class="a-row a-spacing-base"><div aria-hidden="true" class="a-column a-span12 a-text-left"><div class="a-section a-spacing-none a-inline-block s-position-relative"><a class="a-link-normal a-text-normal" href="https://www.amazon.com/Mini-Vacuum-Cleaner-Household-Vacuuming/dp/B00QBJ0NP6/ref=lp_510114_1_1681/159-6918950-0010931?s=vacuums&amp;ie=UTF8&amp;qid=1485362182&amp;sr=1-1681"><img alt="Product Details" class="s-access-image cfMarker" data-search-image-load="" height="160" src="https://images-na.ssl-images-amazon.com/images/I/31z0+kMLpUL._AC_US160_.jpg" srcset="https://images-na.ssl-images-amazon.com/images/I/31z0+kMLpUL._AC_US160_.jpg 1x, https://images-na.ssl-images-amazon.com/images/I/31z0+kMLpUL._AC_US240_QL65_.jpg 1.5x, https://images-na.ssl-images-amazon.com/images/I/31z0+kMLpUL._AC_US320_QL65_.jpg 2x, https://images-na.ssl-images-amazon.com/images/I/31z0+kMLpUL._AC_US400_QL65_.jpg 2.5x, https://images-na.ssl-images-amazon.com/images/I/31z0+kMLpUL._AC_US480_QL65_.jpg 3x" width="160"/></a><div class="a-section a-spacing-none a-text-center"></div></div></div></div><div class="a-row a-spacing-mini"><div class="a-row a-spacing-none"><a class="a-link-normal s-access-detail-page a-text-normal" href="https://www.amazon.com/Mini-Vacuum-Cleaner-Household-Vacuuming/dp/B00QBJ0NP6/ref=lp_510114_1_1681/159-6918950-0010931?s=vacuums&amp;ie=UTF8&amp;qid=1485362182&amp;sr=1-1681" title="Mini Vacuum Cleaner - Set of 12, [Household Supplies, Vacuuming]"><h2 class="a-size-base a-color-null s-inline s-access-title color-variation-title-replacement a-text-normal" data-attribute="Mini Vacuum Cleaner - Set of 12, [Household Supplies, Vacuuming]" data-max-rows="0" data-truncate-by-character="false">Mini Vacuum Cleaner - Set of 12, [Household Supplies, Vacuuming]</h2></a></div><div class="a-row a-spacing-none"><span class="a-size-small a-color-secondary">by </span><span class="a-size-small a-color-secondary">StarSun Depot</span></div></div><div class="a-row a-spacing-mini"><div class="a-row a-spacing-none"><a class="a-link-normal a-text-normal" href="https://www.amazon.com/Mini-Vacuum-Cleaner-Household-Vacuuming/dp/B00QBJ0NP6/ref=lp_510114_1_1681/159-6918950-0010931?s=vacuums&amp;ie=UTF8&amp;qid=1485362182&amp;sr=1-1681"><span class="a-color-base"><span class="sx-price sx-price-large">
<sup class="sx-price-currency">$</sup>
<span class="sx-price-whole">37</span>
<sup class="sx-price-fractional">68</sup>
</span>
</span></a><span class="a-letter-space"></span><span class="a-size-base-plus a-color-secondary">+ $17.74 shipping</span></div><div class="a-row a-spacing-none"><div class="a-row a-spacing-none"><span class="a-size-small a-color-price">Only 1 left in stock - order soon.</span></div></div></div></div></li>
<span class="a-size-small a-color-secondary">by </span>

Let's go back.

First of all, let us look for the Product URL of the first item of the first page

print the link of the first page:



In [21]:

    
URLs=urlsGenerator(510114,FinalPageNum)
len(URLs)
print(URLs[0])
#for url in URLs:
#    print(url)









    



https://www.amazon.com/home-garden-kitchen-furniture-bedding/b/ref=sr_pg_1?ie=UTF8&node=510114&page=1

We found that the Product URL of the first item can be extracted via:



In [22]:

    
soups[0].select('li[id^="result_"]')[0].select("a[class='a-link-normal s-access-detail-page a-text-normal']")[0]









    Out[22]:





<a class="a-link-normal s-access-detail-page a-text-normal" href="/gp/slredirect/picassoRedirect.html/ref=pa_sp_atf_browse_garden_sr_pg1_1?ie=UTF8&amp;adId=A02267761V6H01YO6L5KN&amp;url=https%3A%2F%2Fwww.amazon.com%2FDyson-V8-Absolute-Cord-Free-Vacuum%2Fdp%2FB01IENFJ14%2Fref%3Dlp_510114_1_1%2F163-0498112-6826524%3Fs%3Dvacuums%26ie%3DUTF8%26qid%3D1486295130%26sr%3D1-1-spons%26psc%3D1&amp;qualifier=1486295130&amp;id=8846538609981157&amp;widgetName=sp_atf_browse" title="Dyson V8 Absolute Cord-Free Vacuum"><h2 class="a-size-base a-color-null s-inline s-access-title color-variation-title-replacement a-text-normal" data-attribute="Dyson V8 Absolute Cord-Free Vacuum" data-max-rows="0" data-truncate-by-character="false">Dyson V8 Absolute Cord-Free Vacuum</h2></a>

where we have used the fact that each item has one unique id.

Now, we have another goal: obtain the total number of customer reviews of the selected item (first item in the first page). Doing so we are also able to obtain the link of that item, which is pretty nice, since the item name and the item ID can be extracted from that link.



In [265]:

    
csrev_tag=soups[0].select('li[id^="result_"]')[0].select("a[href$='customerReviews']")[0]
print(csrev_tag)









    



<a class="a-size-small a-link-normal a-text-normal" href="https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1#customerReviews">10,106</a>

This means we are able to obtain the total number of customer reviews (10,106) and also the link of the selected item:

https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&ie=UTF8&qid=1485361951&sr=1-1

The above link will then be replaced by the following one:

https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/product-reviews/B006LXOJC0/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000

which shows 50 customer reviews per page (instead of 10 reviews per page by default).

Another Goal: We'd like to obtain the price of the selected item

Now, let's look for more information, e.g. the price of the selected product. We know that the tag we have found is stored at the end part of a big tag which contains all the info of a specific item. Now, to retrieve more info of that item, we'll move ourselves from the end part to the front gradually.



In [266]:

    
csrev_tag.parent









    Out[266]:





<div class="a-row a-spacing-none"><span name="B006LXOJC0">
<span class="a-declarative" data-a-popover='{"max-width":"700","closeButton":"false","position":"triggerBottom","url":"/review/widgets/average-customer-review/popover/ref=acr_search__popover?ie=UTF8&amp;asin=B006LXOJC0&amp;contextId=search&amp;ref=acr_search__popover"}' data-action="a-popover"><a class="a-popover-trigger a-declarative" href="javascript:void(0)"><i class="a-icon a-icon-star a-star-4-5"><span class="a-icon-alt">4.3 out of 5 stars</span></i><i class="a-icon a-icon-popover"></i></a></span></span>
<a class="a-size-small a-link-normal a-text-normal" href="https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1#customerReviews">10,106</a></div>



In [317]:

    
csrev_tag.parent.previous_sibling.previous_sibling









    Out[317]:





<div class="a-row a-spacing-mini"><div class="a-row a-spacing-none"><a class="a-link-normal a-text-normal" href="https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1"><span class="a-color-base"><span class="sx-price sx-price-large">
<sup class="sx-price-currency">$</sup>
<span class="sx-price-whole">59</span>
<sup class="sx-price-fractional">99</sup>
</span>
</span></a><span class="a-letter-space"></span><span aria-label="Suggested Retail Price: $89.99" class="a-size-base-plus a-color-secondary a-text-strike">$89.99</span><span class="a-letter-space"></span><i aria-label="Prime" class="a-icon a-icon-prime a-icon-small s-align-text-bottom"><span class="a-icon-alt">Prime</span></i></div><div class="a-row a-spacing-none"><div class="a-row a-spacing-none"><span class="a-size-small a-color-secondary">Get it by <span class="a-color-success a-text-bold">Tomorrow, Jan 26</span></span></div></div><div class="a-row a-spacing-none"><div class="a-row a-spacing-mini"></div><span class="a-size-small a-color-secondary">More Buying Choices</span></div><div class="a-row a-spacing-none"><a class="a-size-small a-link-normal a-text-normal" href="https://www.amazon.com/gp/offer-listing/B006LXOJC0/ref=lp_510114_1_1_olp/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1&amp;condition=new"><span class="a-size-base a-color-base">$59.99</span><span class="a-letter-space"></span>new<span class="a-letter-space"></span><span class="a-color-secondary">(65 offers)</span><span class="a-letter-space"></span><span class="a-color-secondary a-text-strike"></span></a></div><div class="a-row a-spacing-none"><a class="a-size-small a-link-normal a-text-normal" href="https://www.amazon.com/gp/offer-listing/B006LXOJC0/ref=lp_510114_1_1_olp/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1&amp;condition=used"><span class="a-size-base a-color-base">$38.15</span><span class="a-letter-space"></span>used<span class="a-letter-space"></span><span class="a-color-secondary">(20 offers)</span><span class="a-letter-space"></span><span class="a-color-secondary a-text-strike"></span></a></div></div>



In [326]:

    
pricetag=csrev_tag.parent.previous_sibling.previous_sibling
price=pricetag.select(".sx-price-whole")[0].text
fraction_price=pricetag.select(".sx-price-fractional")[0].text
print(price,fraction_price)
print(int(price)+0.01*int(fraction_price))

so, we are able to obtain the price of the selected item.

Yet Another Goal: Let's see if we can obtain the brand of the selected item



In [333]:

    
pricetag.parent









    Out[333]:





<div class="s-item-container"><div class="a-row sx-badge-region"><div class="a-row a-spacing-top-micro a-spacing-micro"><a class="a-size-small a-link-normal a-text-normal" href="/gp/bestsellers/home-garden/510114/ref=sr_bs_1_510114_1"><span class="aok-float-left sx-badge-rectangle sx-bestseller-color"><span class="sx-badge-text s-color-white">Best Seller</span></span><span class="aok-float-left sx-badge-triangle sx-bestseller-color"></span><span class="s-padding-left-mini s-hidden aok-relative sx-top-left-badge-tooltip sx-bestseller-node sx-bestseller-color">in Handheld Vacuums</span></a></div></div><div class="a-row a-spacing-base"><div aria-hidden="true" class="a-column a-span12 a-text-left"><div class="a-section a-spacing-none a-inline-block s-position-relative"><a class="a-link-normal a-text-normal" href="https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1"><img alt="Product Details" class="s-access-image cfMarker" data-search-image-load="" height="160" src="https://images-na.ssl-images-amazon.com/images/I/411HN6WBUxL._AC_US160_.jpg" srcset="https://images-na.ssl-images-amazon.com/images/I/411HN6WBUxL._AC_US160_.jpg 1x, https://images-na.ssl-images-amazon.com/images/I/411HN6WBUxL._AC_US240_QL65_.jpg 1.5x, https://images-na.ssl-images-amazon.com/images/I/411HN6WBUxL._AC_US320_QL65_.jpg 2x, https://images-na.ssl-images-amazon.com/images/I/411HN6WBUxL._AC_US400_QL65_.jpg 2.5x, https://images-na.ssl-images-amazon.com/images/I/411HN6WBUxL._AC_US480_QL65_.jpg 3x" width="160"/></a><div class="a-section a-spacing-none a-text-center"></div></div></div></div><div class="a-row a-spacing-mini"><div class="a-row a-spacing-none"><a class="a-link-normal s-access-detail-page a-text-normal" href="https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1" title="BLACK + DECKER CHV1410L 16V Cordless Lithium Hand Vac"><h2 class="a-size-base a-color-null s-inline s-access-title color-variation-title-replacement a-text-normal" data-attribute="BLACK + DECKER CHV1410L 16V Cordless Lithium Hand Vac" data-max-rows="0" data-truncate-by-character="false">BLACK + DECKER CHV1410L 16V Cordless Lithium Hand Vac</h2></a></div><div class="a-row a-spacing-none"><span class="a-size-small a-color-secondary">by </span><span class="a-size-small a-color-secondary">BLACK+DECKER</span></div></div><div class="a-row a-spacing-mini"><div class="a-row a-spacing-none"><a class="a-link-normal a-text-normal" href="https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1"><span class="a-color-base"><span class="sx-price sx-price-large">
<sup class="sx-price-currency">$</sup>
<span class="sx-price-whole">59</span>
<sup class="sx-price-fractional">99</sup>
</span>
</span></a><span class="a-letter-space"></span><span aria-label="Suggested Retail Price: $89.99" class="a-size-base-plus a-color-secondary a-text-strike">$89.99</span><span class="a-letter-space"></span><i aria-label="Prime" class="a-icon a-icon-prime a-icon-small s-align-text-bottom"><span class="a-icon-alt">Prime</span></i></div><div class="a-row a-spacing-none"><div class="a-row a-spacing-none"><span class="a-size-small a-color-secondary">Get it by <span class="a-color-success a-text-bold">Tomorrow, Jan 26</span></span></div></div><div class="a-row a-spacing-none"><div class="a-row a-spacing-mini"></div><span class="a-size-small a-color-secondary">More Buying Choices</span></div><div class="a-row a-spacing-none"><a class="a-size-small a-link-normal a-text-normal" href="https://www.amazon.com/gp/offer-listing/B006LXOJC0/ref=lp_510114_1_1_olp/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1&amp;condition=new"><span class="a-size-base a-color-base">$59.99</span><span class="a-letter-space"></span>new<span class="a-letter-space"></span><span class="a-color-secondary">(65 offers)</span><span class="a-letter-space"></span><span class="a-color-secondary a-text-strike"></span></a></div><div class="a-row a-spacing-none"><a class="a-size-small a-link-normal a-text-normal" href="https://www.amazon.com/gp/offer-listing/B006LXOJC0/ref=lp_510114_1_1_olp/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1&amp;condition=used"><span class="a-size-base a-color-base">$38.15</span><span class="a-letter-space"></span>used<span class="a-letter-space"></span><span class="a-color-secondary">(20 offers)</span><span class="a-letter-space"></span><span class="a-color-secondary a-text-strike"></span></a></div></div><div class="a-row a-spacing-top-mini a-spacing-mini"><div class="a-row a-spacing-none"><span class="a-size-small a-color-secondary">FREE Shipping on eligible orders</span></div></div><div class="a-row a-spacing-none"><span name="B006LXOJC0">
<span class="a-declarative" data-a-popover='{"max-width":"700","closeButton":"false","position":"triggerBottom","url":"/review/widgets/average-customer-review/popover/ref=acr_search__popover?ie=UTF8&amp;asin=B006LXOJC0&amp;contextId=search&amp;ref=acr_search__popover"}' data-action="a-popover"><a class="a-popover-trigger a-declarative" href="javascript:void(0)"><i class="a-icon a-icon-star a-star-4-5"><span class="a-icon-alt">4.3 out of 5 stars</span></i><i class="a-icon a-icon-popover"></i></a></span></span>
<a class="a-size-small a-link-normal a-text-normal" href="https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/dp/B006LXOJC0/ref=lp_510114_1_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1#customerReviews">10,106</a></div><div class="a-row a-spacing-top-mini a-spacing-mini"><a class="a-link-emphasis a-text-normal" href="https://www.amazon.com/dp/B01DAI5CF6/ref=lp_510114_ob_1/157-7476471-7904367?s=vacuums&amp;ie=UTF8&amp;qid=1485361951&amp;sr=1-1">See newer version</a></div></div>



In [335]:

    
pricetag.previous_sibling.parent.select(".a-size-small")[2].text









    Out[335]:





'BLACK+DECKER'

Another goal: number of the average stars of the selected item



In [55]:

    
for j in range(30):
    try:
        #selected=soups[2].select('li[id^="result_"]')[j].select_one("span[class='a-declarative']")
        selected=soups[2].select('li[id^="result_"]')[j].select_one("i[class='a-icon a-icon-popover']").previous_sibling

        print(len(selected),selected.string.split(" ")[0])
    except:
        print("index= ",j,", 0 stars (no reviews yet)")









    



1 4.5
1 4.3
1 4.1
1 4.4
1 3.9
1 4.3
1 4.4
1 4.1
1 3.2
1 4.2
1 4.3
1 4
1 4.6
1 3.6
1 2.9
1 4.2
1 4.2
17 4
1 4.7
1 3.8
1 4.2
1 2.6
1 4.2
1 3.9
1 4.5
1 4.6
1 4.4
index=  27 , 0 stars (no reviews yet)
index=  28 , 0 stars (no reviews yet)
index=  29 , 0 stars (no reviews yet)



In [614]:

    
print(soups[10].select('li[id^="result_"]')[0].find_all("a")[2]["href"]) # 5stars (although only 2 reviews)









    



https://www.amazon.com/gp/offer-listing/B01HQK8IZA/ref=lp_510114_1_241_olp/157-4244468-6130167?s=vacuums&ie=UTF8&qid=1485361987&sr=1-241&condition=new



In [615]:

    
print(soups[12].select('li[id^="result_"]')[0].find_all("a")[2]["href"]) # 0 start (no customer reviews yet)









    



https://www.amazon.com/BLACK-DECKER-DustBuster-Cordless-Vacuum/dp/B00NCT8F0S/ref=lp_510114_1_289/157-4916605-7228009?s=vacuums&ie=UTF8&qid=1485361993&sr=1-289

Now we are ready to merge all the ingredients learned from above code blocks into one function



In [658]:

    
def items_info_extractor(soups):
    
    item_links=[]
    item_num_of_reviews=[]
    item_prices=[]
    item_names=[]
    item_ids=[]
    item_brands=[]
    item_avestars=[]
    
    for soup in soups:
        items=soup.select('li[id^="result_"]')

        for item in items:

            link_item=item.select("a[href$='customerReviews']")

            # ignore those items which contains 0 customer reviews. Those items are irrelevent to us.
            if (link_item !=[]):  

                price_tag=link_item[0].parent.previous_sibling.previous_sibling
                price_main_tag=price_tag.select(".sx-price-whole")
                price_fraction_tag=price_tag.select(".sx-price-fractional")

                link=link_item[0]["href"]

                # Ignore items which don't have normal price tags.
                # Those are items which are not sold by Amazon directly.
                # Also, remove those items which are ads (3 ads are shown in each page).
                if((price_main_tag !=[]) & (price_fraction_tag !=[]) & (link.endswith("spons#customerReviews") == False)):

                    # extract the item's name and ID from the obtained link
                    item_name=link.split("/")[3]
                    item_id=link.split("/")[5]
                    # replace the obtained link by the link that will lead to the customer reviews
                    base_url="https://www.amazon.com/"
                    link=base_url+item_name+"/product-reviews/"+item_id+"/ref=cm_cr_getr_d_paging_btm_" \
                                 +str(1)+"?ie=UTF8&pageNumber="+str(1)+"&reviewerType=all_reviews&pageSize=1000"

                    # obtain the price of the selected single item
                    price_main=price_main_tag[0].text
                    price_fraction=price_fraction_tag[0].text
                    item_price=int(price_main)+0.01*int(price_fraction)

                    # obtain the brand of the selected single item
                    item_brand=price_tag.parent.select(".a-size-small")[1].text
                    if(item_brand=="by "):
                        item_brand=price_tag.parent.select(".a-size-small")[2].text
                    # obtain the number of reviews of the selected single item
                    item_num_of_review=int(re.sub(",","",link_item[0].text))
                    
                    # obtain the averaged number of stars
                    starSelect=item.select_one("span[class='a-declarative']")
                    if((starSelect is None) or (starSelect.span is None)):  # there are no reviews yet (hence, we see no stars at all)
                        item_avestar=0
                    else:
                        item_avestar=starSelect.span.string.split(" ")[0]   # there are some reviews. So, we are able to extract the averaged number of stars
                    
                    # store the obtained variables into lists
                    item_links.append(link)
                    item_num_of_reviews.append(item_num_of_review)
                    item_prices.append(item_price)
                    item_names.append(item_name)
                    item_ids.append(item_id)
                    item_brands.append(item_brand)
                    item_avestars.append(item_avestar)
    return item_brands,item_ids,item_names,item_prices,item_num_of_reviews,item_links,item_avestars



In [659]:

    
item_brands,item_ids,item_names,item_prices,item_num_of_reviews,item_links,item_avestars=items_info_extractor(soups)



In [385]:

    
print(len(item_ids))
print(len(set(item_ids)))



In [386]:

    
print(len(item_names))
print(len(set(item_names)))



In [387]:

    
print(len(item_links))
print(len(set(item_links)))

The above results indicate that there are items that have the same product name but different links.

Cool. Let's find those products.



In [391]:

    
import collections
item_names_repeated=[]
for key in collections.Counter(item_names):
    if collections.Counter(item_names)[key]>1:
        print(key,collections.Counter(item_names)[key])
        item_names_repeated.append(key)
#print [item for item, count in collections.Counter(a).items() if count > 1]









    



ILIFE-Robotic-Cleaner-upgraded-Cleaning 2
Decker-Replacement-PHV1800-18-Volt-Pivoting 2
Shark-18V-Hand-Cordless-Vacuum 2
EcoGecko-Portable-Handheld-Mattress-Allergens 2
CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic 2
Wrapables-Animal-Mini-Tabletop-Vacuum 2
Decker-9-6-Volt-Cordless-Dustbuster-BDH9600CHV 2



In [392]:

    
print(item_names_repeated)









    



['ILIFE-Robotic-Cleaner-upgraded-Cleaning', 'Decker-Replacement-PHV1800-18-Volt-Pivoting', 'Shark-18V-Hand-Cordless-Vacuum', 'EcoGecko-Portable-Handheld-Mattress-Allergens', 'CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic', 'Wrapables-Animal-Mini-Tabletop-Vacuum', 'Decker-9-6-Volt-Cordless-Dustbuster-BDH9600CHV']



In [419]:

    
items_repeated=[]
for name,link,price,numrev in zip(item_names,item_links,item_prices,item_num_of_reviews):
    if name in item_names_repeated:
        #print(name,link,"\n")
        items_repeated.append((name,link,price,numrev))

sort a list with the method: sorted ( a "key" has to be given )



In [420]:

    
items_repeated=sorted(items_repeated, key=lambda x: x[0])



In [424]:

    
print("item name, item link, item price, total # of reviews of that item","\n")

for idx,(name,link,price,numrev) in enumerate(items_repeated):
    if((idx+1)%2==0):
        print(name,link,price,numrev,"\n")
    else:
        print(name,link,price,numrev)









    



item name, item link, item price, total # of reviews of that item 

CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic https://www.amazon.com/CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic/product-reviews/B00KASUEK8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 32.99 4793
CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic https://www.amazon.com/CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic/product-reviews/B004412GTO/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 53.46 5088 

Decker-9-6-Volt-Cordless-Dustbuster-BDH9600CHV https://www.amazon.com/Decker-9-6-Volt-Cordless-Dustbuster-BDH9600CHV/product-reviews/B01JYUUQD2/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 44.24 4
Decker-9-6-Volt-Cordless-Dustbuster-BDH9600CHV https://www.amazon.com/Decker-9-6-Volt-Cordless-Dustbuster-BDH9600CHV/product-reviews/B016P851MW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 32.98 5 

Decker-Replacement-PHV1800-18-Volt-Pivoting https://www.amazon.com/Decker-Replacement-PHV1800-18-Volt-Pivoting/product-reviews/B002TAVZIU/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 20.99 17
Decker-Replacement-PHV1800-18-Volt-Pivoting https://www.amazon.com/Decker-Replacement-PHV1800-18-Volt-Pivoting/product-reviews/B002TAXRCC/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 14.99 15 

EcoGecko-Portable-Handheld-Mattress-Allergens https://www.amazon.com/EcoGecko-Portable-Handheld-Mattress-Allergens/product-reviews/B004H6NO8S/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 39.28 1
EcoGecko-Portable-Handheld-Mattress-Allergens https://www.amazon.com/EcoGecko-Portable-Handheld-Mattress-Allergens/product-reviews/B004H6PBUM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 57.75 1 

ILIFE-Robotic-Cleaner-upgraded-Cleaning https://www.amazon.com/ILIFE-Robotic-Cleaner-upgraded-Cleaning/product-reviews/B01DNMJVEW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 144.99 13
ILIFE-Robotic-Cleaner-upgraded-Cleaning https://www.amazon.com/ILIFE-Robotic-Cleaner-upgraded-Cleaning/product-reviews/B01DU7MIPE/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 144.95 4 

Shark-18V-Hand-Cordless-Vacuum https://www.amazon.com/Shark-18V-Hand-Cordless-Vacuum/product-reviews/B00K99OZDA/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 65.71 13
Shark-18V-Hand-Cordless-Vacuum https://www.amazon.com/Shark-18V-Hand-Cordless-Vacuum/product-reviews/B00K947ILG/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 139.71 1 

Wrapables-Animal-Mini-Tabletop-Vacuum https://www.amazon.com/Wrapables-Animal-Mini-Tabletop-Vacuum/product-reviews/B000JKERWI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 13.59 133
Wrapables-Animal-Mini-Tabletop-Vacuum https://www.amazon.com/Wrapables-Animal-Mini-Tabletop-Vacuum/product-reviews/B001O6SHD6/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000 13.59 47

What's found

Each of the 7 items above has two different links/IDs (probably due to different color or seller) and varying prices.

Now, let's try to merge the obtained data into pandas dataframe

Reference: http://pbpython.com/pandas-list-dict.html



In [661]:

    
for id in item_ids:
    if("B006LXOJC0" in id):
        print(id)









    



B006LXOJC0



In [664]:

    
df=pd.DataFrame.from_items([("pindex",item_ids),("type","handheld"),("pname",item_names),("brand",item_brands),("price",item_prices),("rurl",item_links),("totalRev",item_num_of_reviews),("avgStars",item_avestars)])



In [671]:

    
df.loc[:,["rurl","avgStars","totalRev"]]









    Out[671]:






  
    
      
      rurl
      avgStars
      totalRev
    
  
  
    
      0
      https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/product-reviews/B006LXOJC0/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.3
      10106
    
    
      1
      https://www.amazon.com/Decker-BDH2000PL-Lithium-Vacuum-20-volt/product-reviews/B00IOEFBKS/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.4
      5297
    
    
      2
      https://www.amazon.com/Decker-HHVI320JR02-Dustbuster-Cordless-Lithium/product-reviews/B01DAI5CF6/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.2
      848
    
    
      3
      https://www.amazon.com/Black-Decker-HNV215B10-Compact-Lithium/product-reviews/B01BXBX6E6/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4
      237
    
    
      4
      https://www.amazon.com/Dirt-Devil-SD20005RED-Scorpion-Handheld/product-reviews/B002D47XOM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.3
      2348
    
    
      5
      https://www.amazon.com/Dyson-V8-Absolute-Cord-Free-Vacuum/product-reviews/B01IENFJ14/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.6
      447
    
    
      6
      https://www.amazon.com/Dyson-Motor-Head-Cord-free-Vacuum/product-reviews/B00SMLJQ72/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.4
      785
    
    
      7
      https://www.amazon.com/Dyson-V6-Animal-Cord-free-Vacuum/product-reviews/B00SMLJQ7W/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.5
      859
    
    
      8
      https://www.amazon.com/Bissell-Eraser-Handheld-Vacuum-33A1/product-reviews/B001EYFQ28/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.4
      4236
    
    
      9
      https://www.amazon.com/Eureka-EasyClean-Corded-Hand-Held-71B/product-reviews/B0006HUYGM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.2
      7482
    
    
      10
      https://www.amazon.com/Shark-Cordless-Perfect-Hand-SV780/product-reviews/B0037HHFMO/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3.8
      2704
    
    
      11
      https://www.amazon.com/CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic/product-reviews/B00KASUEK8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      2.7
      4793
    
    
      12
      https://www.amazon.com/Hair-Eraser-Cordless-Hand-Vacuum/product-reviews/B01E0472TI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.2
      389
    
    
      13
      https://www.amazon.com/Handheld-Cordless-Cleaner-Lithium-Cyclonic/product-reviews/B01MDT3U9N/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.7
      27
    
    
      14
      https://www.amazon.com/Decker-HNV220BCZ01FF-Compact-Lithium-Vacuum/product-reviews/B01BXBX6CI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.3
      290
    
    
      15
      https://www.amazon.com/Shark-Rocket-Corded-Hand-HV292/product-reviews/B00P9Z36T8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.4
      233
    
    
      16
      https://www.amazon.com/BDH2000L-20-Volt-Lithium-Battery-Cordless/product-reviews/B008R3F0J8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.2
      886
    
    
      17
      https://www.amazon.com/Dyson-V6-Cord-Free-Vacuum/product-reviews/B00SMLJPIC/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.3
      917
    
    
      18
      https://www.amazon.com/Dyson-Animal-Cordless-Certified-Refurbished/product-reviews/B01AVXFD6Q/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.2
      146
    
    
      19
      https://www.amazon.com/BLACK-DECKER-BDH2020FLFH-Lithium-20-volt/product-reviews/B00JILGZOC/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.2
      638
    
    
      20
      https://www.amazon.com/BISSELL-SPOTLIFTER-ESSENTIAL-RED-1719T/product-reviews/B00IRJ1BYU/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3.6
      1311
    
    
      21
      https://www.amazon.com/Dirt-Devil-Cleaner-Handheld-M08230RED/product-reviews/B000050HCV/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4
      708
    
    
      22
      https://www.amazon.com/Black-Decker-BDH9600CHV-Dustbuster-Cordless/product-reviews/B00ECM5RRI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3
      2850
    
    
      23
      https://www.amazon.com/PAV1200W-Cyclonic-Action-Automotive-Pivoting-Nose-Handheld/product-reviews/B001AQEQNA/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3.9
      1945
    
    
      24
      https://www.amazon.com/Bissell-Aeroswift-Compact-Bagless-1009/product-reviews/B0007Z69G2/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.2
      377
    
    
      25
      https://www.amazon.com/Stanley-Decker-FHV1200W-Cordless-Canister/product-reviews/B002FQJW4W/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3.8
      1601
    
    
      26
      https://www.amazon.com/Decker-HHVI315JO42-Dustbuster-Cordless-Lithium/product-reviews/B01DAI5BZ2/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.3
      163
    
    
      27
      https://www.amazon.com/Hoover-Cleaner-Cordless-Handheld-BH52120PC/product-reviews/B00SWCU1LG/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3.8
      652
    
    
      28
      https://www.amazon.com/BLACK-DECKER-BDH2020FL-Lithium-Brush/product-reviews/B00CCYLBZ0/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4.4
      131
    
    
      29
      https://www.amazon.com/Dirt-Devil-BD10025WX-Bagless-Cordless/product-reviews/B00489LHEM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3.5
      58
    
    
      ...
      ...
      ...
      ...
    
    
      357
      https://www.amazon.com/CleanWave-UV-C-Vacuum-Cleaner-Prevention/product-reviews/B008JGXHZW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      358
      https://www.amazon.com/Bionaire-Hand-Held-Turbo-Vac/product-reviews/B00440E50I/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3
      2
    
    
      359
      https://www.amazon.com/HAMMER-Dirt-Devil-Scorpion-Filter/product-reviews/B0019L6YU4/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      2.7
      4
    
    
      360
      https://www.amazon.com/Creative-Fashion-Keyboard-Desktop-Handheld/product-reviews/B00JWM9CHO/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      0
      1
    
    
      361
      https://www.amazon.com/Vacuum-Company-Car-Cleaner-CV-LDA105/product-reviews/B01LVXEM9Y/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      362
      https://www.amazon.com/Kole-Imports-CA026-Vacuum-Cleaner/product-reviews/B00DOTD7KY/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      363
      https://www.amazon.com/Honeywell-H19003-Replacement-DustBuster-Cyclonic/product-reviews/B000UKQMLG/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      364
      https://www.amazon.com/WackyVac-Wacky-Vac-Attachment/product-reviews/B000FMIRLM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      365
      https://www.amazon.com/Metrovac-Electra-Sweep-Power-Broom/product-reviews/B00JEMTJ66/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3
      2
    
    
      366
      https://www.amazon.com/Iautomatic-Cleaner-Accessory-suction-1-25-Inch/product-reviews/B00GQ4LH06/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      367
      https://www.amazon.com/Koblenz-HV-120-Corded-120-Volt-Hand/product-reviews/B00CDTPGVO/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4
      1
    
    
      368
      https://www.amazon.com/Unilution-75210-White-Portable-Handheld-Mattress/product-reviews/B008CCQJUI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      2
      2
    
    
      369
      https://www.amazon.com/DECKER-CHV4800-BUSTER-CORDLESS-KITCHENWARE/product-reviews/B007DIT1KW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      370
      https://www.amazon.com/AUVWD2-Dirt-Magic-Vacuum-Cleaner/product-reviews/B005HP495A/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      371
      https://www.amazon.com/EcoGecko-Portable-Handheld-Mattress-Allergens/product-reviews/B004H6PBUM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      372
      https://www.amazon.com/Panasonic-handheld-vacuum-cleaner-MC-D25CP-WA/product-reviews/B001SN8FAA/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      373
      https://www.amazon.com/Royal-Appliance-3-089570-001-Replacement-Filter/product-reviews/B000SMOFHE/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      374
      https://www.amazon.com/Atrix-VACAMEXP-Express-Handi-Vacuum/product-reviews/B01MQE6O7Y/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4
      1
    
    
      375
      https://www.amazon.com/Pullman-Holt-86asb5d4c-Dry-Hepa-Gallon/product-reviews/B01FHCCPHQ/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      376
      https://www.amazon.com/Black-Decker-VBBM10-Multi-Surface-Brush/product-reviews/B01DP73BR8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      377
      https://www.amazon.com/Kalorik-Artisan-Hand-Vacuum-Red/product-reviews/B01APER954/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      378
      https://www.amazon.com/Black-Decker-cordless-cleaner-PV1220/product-reviews/B00HVDAUJA/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      379
      https://www.amazon.com/Antique-Brass-Hod-Shovel-Set/product-reviews/B008749ATI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      380
      https://www.amazon.com/SANDHILL-Hearth-Country-Ash-Vacuum/product-reviews/B008747XSI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      381
      https://www.amazon.com/Metro-Vac-12V-Portable-Car/product-reviews/B0057ERKAW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      4
      1
    
    
      382
      https://www.amazon.com/Royal-Appliance-3-088570-001-Cordless-Filter/product-reviews/B0044UWGI6/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3.7
      37
    
    
      383
      https://www.amazon.com/Metropolitian-Cleaner-Extension-Blaster-HNBRK-2/product-reviews/B003CJD96I/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      5
      1
    
    
      384
      https://www.amazon.com/ReadiVac-39006-CompuVac-Vacuum-Cleaner/product-reviews/B000EOWA6Y/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      2.5
      4
    
    
      385
      https://www.amazon.com/BLACK-DECKER-cyclone-buster-Z-ACV1205/product-reviews/B000BWUN8G/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      1
      1
    
    
      386
      https://www.amazon.com/COLEMAN-POWERMATE-PMV6990-Cordless-Vacuum-Cleaner/product-reviews/B00007E7M0/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000
      3
      3
    
  

387 rows × 3 columns

Let's upload the obtained dataframe to MariaDB



In [466]:

    
from sqlalchemy import create_engine,Table,Column,Integer,String,MetaData,ForeignKey,Date
import pymysql

engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFb@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
conn = engine.connect()

df.to_sql(name='amzProd', con=conn, if_exists = 'append', index=False)
conn.close()









    



/usr/local/lib/python3.5/site-packages/pymysql/cursors.py:166: Warning: (1265, "Data truncated for column 'pname' at row 80")
  result = self._query(query)
/usr/local/lib/python3.5/site-packages/pymysql/cursors.py:166: Warning: (1265, "Data truncated for column 'brand' at row 313")
  result = self._query(query)

Alternatively, we can store the obtained dataframe into a csv file



In [469]:

    
df.to_csv("ProdInfo_handheld_26012017.csv", encoding="utf-8")

And load it:



In [474]:

    
pd.DataFrame.from_csv("ProdInfo_handheld_26012017.csv", encoding="utf-8")









    Out[474]:






  
    
      
      pindex
      type
      pname
      brand
      price
      rurl
      totalRev
    
  
  
    
      0
      B006LXOJC0
      handheld
      BLACK-DECKER-CHV1410L-Cordless-Lithium
      BLACK+DECKER
      59.99
      https://www.amazon.com/BLACK-DECKER-CHV1410L-C...
      10106
    
    
      1
      B00IOEFBKS
      handheld
      Decker-BDH2000PL-Lithium-Vacuum-20-volt
      BLACK+DECKER
      68.99
      https://www.amazon.com/Decker-BDH2000PL-Lithiu...
      5297
    
    
      2
      B01DAI5CF6
      handheld
      Decker-HHVI320JR02-Dustbuster-Cordless-Lithium
      BLACK+DECKER
      35.09
      https://www.amazon.com/Decker-HHVI320JR02-Dust...
      848
    
    
      3
      B01BXBX6E6
      handheld
      Black-Decker-HNV215B10-Compact-Lithium
      BLACK+DECKER
      24.99
      https://www.amazon.com/Black-Decker-HNV215B10-...
      237
    
    
      4
      B002D47XOM
      handheld
      Dirt-Devil-SD20005RED-Scorpion-Handheld
      Dirt Devil
      28.99
      https://www.amazon.com/Dirt-Devil-SD20005RED-S...
      2348
    
    
      5
      B01IENFJ14
      handheld
      Dyson-V8-Absolute-Cord-Free-Vacuum
      Dyson
      539.00
      https://www.amazon.com/Dyson-V8-Absolute-Cord-...
      447
    
    
      6
      B00SMLJQ72
      handheld
      Dyson-Motor-Head-Cord-free-Vacuum
      Dyson
      289.99
      https://www.amazon.com/Dyson-Motor-Head-Cord-f...
      785
    
    
      7
      B00SMLJQ7W
      handheld
      Dyson-V6-Animal-Cord-free-Vacuum
      Dyson
      349.99
      https://www.amazon.com/Dyson-V6-Animal-Cord-fr...
      859
    
    
      8
      B001EYFQ28
      handheld
      Bissell-Eraser-Handheld-Vacuum-33A1
      Bissell
      29.99
      https://www.amazon.com/Bissell-Eraser-Handheld...
      4236
    
    
      9
      B0006HUYGM
      handheld
      Eureka-EasyClean-Corded-Hand-Held-71B
      Eureka
      37.99
      https://www.amazon.com/Eureka-EasyClean-Corded...
      7482
    
    
      10
      B0037HHFMO
      handheld
      Shark-Cordless-Perfect-Hand-SV780
      SharkNinja
      48.47
      https://www.amazon.com/Shark-Cordless-Perfect-...
      2704
    
    
      11
      B00KASUEK8
      handheld
      CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic
      BLACK+DECKER
      32.99
      https://www.amazon.com/CHV1510-Dustbuster-15-6...
      4793
    
    
      12
      B01E0472TI
      handheld
      Hair-Eraser-Cordless-Hand-Vacuum
      Bissell
      44.99
      https://www.amazon.com/Hair-Eraser-Cordless-Ha...
      389
    
    
      13
      B01MDT3U9N
      handheld
      Handheld-Cordless-Cleaner-Lithium-Cyclonic
      HoLife
      59.99
      https://www.amazon.com/Handheld-Cordless-Clean...
      27
    
    
      14
      B01BXBX6CI
      handheld
      Decker-HNV220BCZ01FF-Compact-Lithium-Vacuum
      BLACK+DECKER
      29.99
      https://www.amazon.com/Decker-HNV220BCZ01FF-Co...
      290
    
    
      15
      B00P9Z36T8
      handheld
      Shark-Rocket-Corded-Hand-HV292
      SharkNinja
      81.34
      https://www.amazon.com/Shark-Rocket-Corded-Han...
      233
    
    
      16
      B008R3F0J8
      handheld
      BDH2000L-20-Volt-Lithium-Battery-Cordless
      BLACK+DECKER
      76.50
      https://www.amazon.com/BDH2000L-20-Volt-Lithiu...
      886
    
    
      17
      B00SMLJPIC
      handheld
      Dyson-V6-Cord-Free-Vacuum
      Dyson
      299.00
      https://www.amazon.com/Dyson-V6-Cord-Free-Vacu...
      917
    
    
      18
      B01AVXFD6Q
      handheld
      Dyson-Animal-Cordless-Certified-Refurbished
      Dyson
      319.00
      https://www.amazon.com/Dyson-Animal-Cordless-C...
      146
    
    
      19
      B00JILGZOC
      handheld
      BLACK-DECKER-BDH2020FLFH-Lithium-20-volt
      BLACK+DECKER
      95.00
      https://www.amazon.com/BLACK-DECKER-BDH2020FLF...
      638
    
    
      20
      B00IRJ1BYU
      handheld
      BISSELL-SPOTLIFTER-ESSENTIAL-RED-1719T
      Bissell
      49.00
      https://www.amazon.com/BISSELL-SPOTLIFTER-ESSE...
      1311
    
    
      21
      B000050HCV
      handheld
      Dirt-Devil-Cleaner-Handheld-M08230RED
      Dirt Devil
      35.60
      https://www.amazon.com/Dirt-Devil-Cleaner-Hand...
      708
    
    
      22
      B00ECM5RRI
      handheld
      Black-Decker-BDH9600CHV-Dustbuster-Cordless
      BLACK+DECKER
      29.69
      https://www.amazon.com/Black-Decker-BDH9600CHV...
      2850
    
    
      23
      B001AQEQNA
      handheld
      PAV1200W-Cyclonic-Action-Automotive-Pivoting-N...
      BLACK+DECKER
      53.77
      https://www.amazon.com/PAV1200W-Cyclonic-Actio...
      1945
    
    
      24
      B0007Z69G2
      handheld
      Bissell-Aeroswift-Compact-Bagless-1009
      Bissell
      69.99
      https://www.amazon.com/Bissell-Aeroswift-Compa...
      377
    
    
      25
      B002FQJW4W
      handheld
      Stanley-Decker-FHV1200W-Cordless-Canister
      BLACK+DECKER
      54.27
      https://www.amazon.com/Stanley-Decker-FHV1200W...
      1601
    
    
      26
      B01DAI5BZ2
      handheld
      Decker-HHVI315JO42-Dustbuster-Cordless-Lithium
      BLACK+DECKER
      50.63
      https://www.amazon.com/Decker-HHVI315JO42-Dust...
      163
    
    
      27
      B00SWCU1LG
      handheld
      Hoover-Cleaner-Cordless-Handheld-BH52120PC
      Hoover
      112.95
      https://www.amazon.com/Hoover-Cleaner-Cordless...
      652
    
    
      28
      B00CCYLBZ0
      handheld
      BLACK-DECKER-BDH2020FL-Lithium-Brush
      BLACK+DECKER
      133.56
      https://www.amazon.com/BLACK-DECKER-BDH2020FL-...
      131
    
    
      29
      B00489LHEM
      handheld
      Dirt-Devil-BD10025WX-Bagless-Cordless
      Dirt Devil
      18.62
      https://www.amazon.com/Dirt-Devil-BD10025WX-Ba...
      58
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      357
      B008JGXHZW
      handheld
      CleanWave-UV-C-Vacuum-Cleaner-Prevention
      Allergy Asthma Technology
      106.10
      https://www.amazon.com/CleanWave-UV-C-Vacuum-C...
      1
    
    
      358
      B00440E50I
      handheld
      Bionaire-Hand-Held-Turbo-Vac
      Sensio Bionaire
      37.07
      https://www.amazon.com/Bionaire-Hand-Held-Turb...
      2
    
    
      359
      B0019L6YU4
      handheld
      HAMMER-Dirt-Devil-Scorpion-Filter
      Arm & Hammer
      6.99
      https://www.amazon.com/HAMMER-Dirt-Devil-Scorp...
      4
    
    
      360
      B00JWM9CHO
      handheld
      Creative-Fashion-Keyboard-Desktop-Handheld
      Panda Superstore
      21.32
      https://www.amazon.com/Creative-Fashion-Keyboa...
      1
    
    
      361
      B01LVXEM9Y
      handheld
      Vacuum-Company-Car-Cleaner-CV-LDA105
      Vacuum Company
      34.55
      https://www.amazon.com/Vacuum-Company-Car-Clea...
      1
    
    
      362
      B00DOTD7KY
      handheld
      Kole-Imports-CA026-Vacuum-Cleaner
      Kole Imports
      16.00
      https://www.amazon.com/Kole-Imports-CA026-Vacu...
      1
    
    
      363
      B000UKQMLG
      handheld
      Honeywell-H19003-Replacement-DustBuster-Cyclonic
      Honeywell
      12.79
      https://www.amazon.com/Honeywell-H19003-Replac...
      1
    
    
      364
      B000FMIRLM
      handheld
      WackyVac-Wacky-Vac-Attachment
      WackyVac
      12.99
      https://www.amazon.com/WackyVac-Wacky-Vac-Atta...
      1
    
    
      365
      B00JEMTJ66
      handheld
      Metrovac-Electra-Sweep-Power-Broom
      MetroVac
      145.95
      https://www.amazon.com/Metrovac-Electra-Sweep-...
      2
    
    
      366
      B00GQ4LH06
      handheld
      Iautomatic-Cleaner-Accessory-suction-1-25-Inch
      Iautomatic
      3.99
      https://www.amazon.com/Iautomatic-Cleaner-Acce...
      1
    
    
      367
      B00CDTPGVO
      handheld
      Koblenz-HV-120-Corded-120-Volt-Hand
      Koblenz
      50.98
      https://www.amazon.com/Koblenz-HV-120-Corded-1...
      1
    
    
      368
      B008CCQJUI
      handheld
      Unilution-75210-White-Portable-Handheld-Mattress
      Uniution Inc.
      84.03
      https://www.amazon.com/Unilution-75210-White-P...
      2
    
    
      369
      B007DIT1KW
      handheld
      DECKER-CHV4800-BUSTER-CORDLESS-KITCHENWARE
      Black and Decker Office Products
      61.00
      https://www.amazon.com/DECKER-CHV4800-BUSTER-C...
      1
    
    
      370
      B005HP495A
      handheld
      AUVWD2-Dirt-Magic-Vacuum-Cleaner
      B&F
      17.13
      https://www.amazon.com/AUVWD2-Dirt-Magic-Vacuu...
      1
    
    
      371
      B004H6PBUM
      handheld
      EcoGecko-Portable-Handheld-Mattress-Allergens
      EcoGecko
      57.75
      https://www.amazon.com/EcoGecko-Portable-Handh...
      1
    
    
      372
      B001SN8FAA
      handheld
      Panasonic-handheld-vacuum-cleaner-MC-D25CP-WA
      Panasonic
      123.61
      https://www.amazon.com/Panasonic-handheld-vacu...
      1
    
    
      373
      B000SMOFHE
      handheld
      Royal-Appliance-3-089570-001-Replacement-Filter
      Royal Appliance
      7.55
      https://www.amazon.com/Royal-Appliance-3-08957...
      1
    
    
      374
      B01MQE6O7Y
      handheld
      Atrix-VACAMEXP-Express-Handi-Vacuum
      Atrix
      84.95
      https://www.amazon.com/Atrix-VACAMEXP-Express-...
      1
    
    
      375
      B01FHCCPHQ
      handheld
      Pullman-Holt-86asb5d4c-Dry-Hepa-Gallon
      Pullman-Holt
      864.00
      https://www.amazon.com/Pullman-Holt-86asb5d4c-...
      1
    
    
      376
      B01DP73BR8
      handheld
      Black-Decker-VBBM10-Multi-Surface-Brush
      BLACK+DECKER
      29.99
      https://www.amazon.com/Black-Decker-VBBM10-Mul...
      1
    
    
      377
      B01APER954
      handheld
      Kalorik-Artisan-Hand-Vacuum-Red
      Kalorik
      86.94
      https://www.amazon.com/Kalorik-Artisan-Hand-Va...
      1
    
    
      378
      B00HVDAUJA
      handheld
      Black-Decker-cordless-cleaner-PV1220
      BLACK+DECKER
      146.43
      https://www.amazon.com/Black-Decker-cordless-c...
      1
    
    
      379
      B008749ATI
      handheld
      Antique-Brass-Hod-Shovel-Set
      SANDHILL
      105.79
      https://www.amazon.com/Antique-Brass-Hod-Shove...
      1
    
    
      380
      B008747XSI
      handheld
      SANDHILL-Hearth-Country-Ash-Vacuum
      SANDHILL
      182.99
      https://www.amazon.com/SANDHILL-Hearth-Country...
      1
    
    
      381
      B0057ERKAW
      handheld
      Metro-Vac-12V-Portable-Car
      MetroVac
      72.46
      https://www.amazon.com/Metro-Vac-12V-Portable-...
      1
    
    
      382
      B0044UWGI6
      handheld
      Royal-Appliance-3-088570-001-Cordless-Filter
      Royal Appliance/Tti
      33.01
      https://www.amazon.com/Royal-Appliance-3-08857...
      37
    
    
      383
      B003CJD96I
      handheld
      Metropolitian-Cleaner-Extension-Blaster-HNBRK-2
      Metropolitian
      55.63
      https://www.amazon.com/Metropolitian-Cleaner-E...
      1
    
    
      384
      B000EOWA6Y
      handheld
      ReadiVac-39006-CompuVac-Vacuum-Cleaner
      ReadiVac
      21.99
      https://www.amazon.com/ReadiVac-39006-CompuVac...
      4
    
    
      385
      B000BWUN8G
      handheld
      BLACK-DECKER-cyclone-buster-Z-ACV1205
      BLACK+DECKER
      53.32
      https://www.amazon.com/BLACK-DECKER-cyclone-bu...
      1
    
    
      386
      B00007E7M0
      handheld
      COLEMAN-POWERMATE-PMV6990-Cordless-Vacuum-Cleaner
      Team Products
      78.00
      https://www.amazon.com/COLEMAN-POWERMATE-PMV69...
      3
    
  

387 rows × 7 columns

Upload the obtained CSV files to the remote MariaDB



In [496]:

    
from sqlalchemy import create_engine,Table,Column,Integer,String,MetaData,ForeignKey,Date
import pymysql
import datetime

I found out that there might be same pindex in one dataframe. This can lead to an error if we are going to upload our data to MariaDB, as the primary key is ought to be unique.



In [531]:

    
pd.set_option('max_colwidth', 800)
for idx,df in enumerate(dfs):
    print(idx,df.loc[df['pindex'] == 'B00SWGVICS'])









    



0 Empty DataFrame
Columns: [pindex, type, pname, brand, price, rurl, totalRev]
Index: []
1         pindex      type                                      pname  \
34  B00SWGVICS  canister  Hoover-Commercial-CH32008-Canister-Vacuum   

                brand  price  \
34  Hoover Commercial  141.0   

                                                                                                                                                                                     rurl  \
34  https://www.amazon.com/Hoover-Commercial-CH32008-Canister-Vacuum/product-reviews/B00SWGVICS/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000   

    totalRev  
34        54  
2         pindex      type                                      pname  \
68  B00SWGVICS  handheld  Hoover-Commercial-CH32008-Canister-Vacuum   

                brand  price  \
68  Hoover Commercial  141.0   

                                                                                                                                                                                     rurl  \
68  https://www.amazon.com/Hoover-Commercial-CH32008-Canister-Vacuum/product-reviews/B00SWGVICS/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000   

    totalRev  
68        54  
3 Empty DataFrame
Columns: [pindex, type, pname, brand, price, rurl, totalRev]
Index: []
4 Empty DataFrame
Columns: [pindex, type, pname, brand, price, rurl, totalRev]
Index: []
5 Empty DataFrame
Columns: [pindex, type, pname, brand, price, rurl, totalRev]
Index: []
6 Empty DataFrame
Columns: [pindex, type, pname, brand, price, rurl, totalRev]
Index: []

Strategy: Store all csvs into one dataframe. Then, remove all duplicates before uploading to the DataBase.



In [42]:

    
import os
from IPython.display import display



In [32]:

    
cwd=os.getcwd()



In [33]:

    
print(cwd)









    



/Users/chweng/Google Drive/SemanticProj/webCrawler

Now, it's time to get to know the Pandas Dataframe better. I'd like to figure out how two dataframes can be merged horizontally.

an one column example: pd.Dataframe.from_items()



In [58]:

    
test_col = pd.DataFrame.from_items([("test_column1",np.arange(10))])
test_col2 = pd.DataFrame.from_items([("test_column2",5+np.arange(10))])
display(test_col,test_col2)









    






  
    
      
      test_column1
    
  
  
    
      0
      0
    
    
      1
      1
    
    
      2
      2
    
    
      3
      3
    
    
      4
      4
    
    
      5
      5
    
    
      6
      6
    
    
      7
      7
    
    
      8
      8
    
    
      9
      9
    
  








    






  
    
      
      test_column2
    
  
  
    
      0
      5
    
    
      1
      6
    
    
      2
      7
    
    
      3
      8
    
    
      4
      9
    
    
      5
      10
    
    
      6
      11
    
    
      7
      12
    
    
      8
      13
    
    
      9
      14



In [59]:

    
result = pd.concat([test_col, test_col2], axis=1)



In [60]:

    
display(result)









    






  
    
      
      test_column1
      test_column2
    
  
  
    
      0
      0
      5
    
    
      1
      1
      6
    
    
      2
      2
      7
    
    
      3
      3
      8
    
    
      4
      4
      9
    
    
      5
      5
      10
    
    
      6
      6
      11
    
    
      7
      7
      12
    
    
      8
      8
      13
    
    
      9
      9
      14



In [7]:

    
date="2017-02-01"
prodTypes=["central","canister","handheld","robotic","stick","upright","wetdry"]

# put all the dataframes into a list
dfs=[pd.DataFrame.from_csv("data/ProdInfo_%s_%s.csv"%(prodType,date), encoding="utf-8") for prodType in prodTypes]


for idx,df in enumerate(dfs):
    cID=[j%7 for j in range(df.shape[0])]
    colCID=pd.DataFrame.from_items([( "cID",cID )])
    dfs[idx]=pd.concat([df, colCID], axis=1)

# concatenate dataframes
df=pd.concat(dfs).drop_duplicates("rurl")

df.to_csv("ProdInfo_all_%s.csv"%(date), encoding="utf-8")



In [5]:

    
date="2017-02-01"
date="2017-02-06"
prodTypes=["central","canister","handheld","robotic","stick","upright","wetdry"]

# put all the dataframes into a list
dfs=[pd.DataFrame.from_csv("data/ProdInfo_%s_%s.csv"%(prodType,date), encoding="utf-8") for prodType in prodTypes]


for idx,df in enumerate(dfs):
    cID=[j%7 for j in range(df.shape[0])]
    colCID=pd.DataFrame.from_items([( "cID",cID )])
    dfs[idx]=pd.concat([df, colCID], axis=1)

# concatenate dataframes
df=pd.concat(dfs).drop_duplicates("rurl")

# prepare the connection and connect to the DB
engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFb@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
conn = engine.connect()

# remove duplicates and upload the concatenated dataframe to the SQL DataBase
df.to_sql(name='amzProd', con=conn, if_exists = 'append', index=False)

# close the connection
conn.close()



In [111]:

    
len(df.iloc[974]["brand"])









    Out[111]:





60



In [540]:

    
df.iloc[463]["pname"]









    Out[540]:





'Handheld-Vacuum-Cleaner-Abask-Vacuum-Cleaner-7-2V-60W-Ni-CD2200MA-3-5KPA-Suction-Portable-1-Accessories-Rechargeable-Cordless-Cleaner'



In [543]:

    
!echo "Handheld-Vacuum-Cleaner-Abask-Vacuum-Cleaner-7-2V-60W-Ni-CD2200MA-3-5KPA-Suction-Portable-1-Accessories-Rechargeable-Cordless-Cleaner"| wc









    



       1       1     134

Length of this string is larger than 100. Therefore, I have to alter our schema, since the product name was set to have length 100 by default.

	rurl	avgStars	totalRev
0	https://www.amazon.com/BLACK-DECKER-CHV1410L-Cordless-Lithium/product-reviews/B006LXOJC0/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.3	10106
1	https://www.amazon.com/Decker-BDH2000PL-Lithium-Vacuum-20-volt/product-reviews/B00IOEFBKS/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.4	5297
2	https://www.amazon.com/Decker-HHVI320JR02-Dustbuster-Cordless-Lithium/product-reviews/B01DAI5CF6/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.2	848
3	https://www.amazon.com/Black-Decker-HNV215B10-Compact-Lithium/product-reviews/B01BXBX6E6/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4	237
4	https://www.amazon.com/Dirt-Devil-SD20005RED-Scorpion-Handheld/product-reviews/B002D47XOM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.3	2348
5	https://www.amazon.com/Dyson-V8-Absolute-Cord-Free-Vacuum/product-reviews/B01IENFJ14/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.6	447
6	https://www.amazon.com/Dyson-Motor-Head-Cord-free-Vacuum/product-reviews/B00SMLJQ72/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.4	785
7	https://www.amazon.com/Dyson-V6-Animal-Cord-free-Vacuum/product-reviews/B00SMLJQ7W/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.5	859
8	https://www.amazon.com/Bissell-Eraser-Handheld-Vacuum-33A1/product-reviews/B001EYFQ28/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.4	4236
9	https://www.amazon.com/Eureka-EasyClean-Corded-Hand-Held-71B/product-reviews/B0006HUYGM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.2	7482
10	https://www.amazon.com/Shark-Cordless-Perfect-Hand-SV780/product-reviews/B0037HHFMO/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3.8	2704
11	https://www.amazon.com/CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic/product-reviews/B00KASUEK8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	2.7	4793
12	https://www.amazon.com/Hair-Eraser-Cordless-Hand-Vacuum/product-reviews/B01E0472TI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.2	389
13	https://www.amazon.com/Handheld-Cordless-Cleaner-Lithium-Cyclonic/product-reviews/B01MDT3U9N/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.7	27
14	https://www.amazon.com/Decker-HNV220BCZ01FF-Compact-Lithium-Vacuum/product-reviews/B01BXBX6CI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.3	290
15	https://www.amazon.com/Shark-Rocket-Corded-Hand-HV292/product-reviews/B00P9Z36T8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.4	233
16	https://www.amazon.com/BDH2000L-20-Volt-Lithium-Battery-Cordless/product-reviews/B008R3F0J8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.2	886
17	https://www.amazon.com/Dyson-V6-Cord-Free-Vacuum/product-reviews/B00SMLJPIC/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.3	917
18	https://www.amazon.com/Dyson-Animal-Cordless-Certified-Refurbished/product-reviews/B01AVXFD6Q/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.2	146
19	https://www.amazon.com/BLACK-DECKER-BDH2020FLFH-Lithium-20-volt/product-reviews/B00JILGZOC/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.2	638
20	https://www.amazon.com/BISSELL-SPOTLIFTER-ESSENTIAL-RED-1719T/product-reviews/B00IRJ1BYU/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3.6	1311
21	https://www.amazon.com/Dirt-Devil-Cleaner-Handheld-M08230RED/product-reviews/B000050HCV/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4	708
22	https://www.amazon.com/Black-Decker-BDH9600CHV-Dustbuster-Cordless/product-reviews/B00ECM5RRI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3	2850
23	https://www.amazon.com/PAV1200W-Cyclonic-Action-Automotive-Pivoting-Nose-Handheld/product-reviews/B001AQEQNA/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3.9	1945
24	https://www.amazon.com/Bissell-Aeroswift-Compact-Bagless-1009/product-reviews/B0007Z69G2/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.2	377
25	https://www.amazon.com/Stanley-Decker-FHV1200W-Cordless-Canister/product-reviews/B002FQJW4W/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3.8	1601
26	https://www.amazon.com/Decker-HHVI315JO42-Dustbuster-Cordless-Lithium/product-reviews/B01DAI5BZ2/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.3	163
27	https://www.amazon.com/Hoover-Cleaner-Cordless-Handheld-BH52120PC/product-reviews/B00SWCU1LG/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3.8	652
28	https://www.amazon.com/BLACK-DECKER-BDH2020FL-Lithium-Brush/product-reviews/B00CCYLBZ0/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4.4	131
29	https://www.amazon.com/Dirt-Devil-BD10025WX-Bagless-Cordless/product-reviews/B00489LHEM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3.5	58
...	...	...	...
357	https://www.amazon.com/CleanWave-UV-C-Vacuum-Cleaner-Prevention/product-reviews/B008JGXHZW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
358	https://www.amazon.com/Bionaire-Hand-Held-Turbo-Vac/product-reviews/B00440E50I/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3	2
359	https://www.amazon.com/HAMMER-Dirt-Devil-Scorpion-Filter/product-reviews/B0019L6YU4/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	2.7	4
360	https://www.amazon.com/Creative-Fashion-Keyboard-Desktop-Handheld/product-reviews/B00JWM9CHO/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	0	1
361	https://www.amazon.com/Vacuum-Company-Car-Cleaner-CV-LDA105/product-reviews/B01LVXEM9Y/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
362	https://www.amazon.com/Kole-Imports-CA026-Vacuum-Cleaner/product-reviews/B00DOTD7KY/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
363	https://www.amazon.com/Honeywell-H19003-Replacement-DustBuster-Cyclonic/product-reviews/B000UKQMLG/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
364	https://www.amazon.com/WackyVac-Wacky-Vac-Attachment/product-reviews/B000FMIRLM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
365	https://www.amazon.com/Metrovac-Electra-Sweep-Power-Broom/product-reviews/B00JEMTJ66/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3	2
366	https://www.amazon.com/Iautomatic-Cleaner-Accessory-suction-1-25-Inch/product-reviews/B00GQ4LH06/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
367	https://www.amazon.com/Koblenz-HV-120-Corded-120-Volt-Hand/product-reviews/B00CDTPGVO/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4	1
368	https://www.amazon.com/Unilution-75210-White-Portable-Handheld-Mattress/product-reviews/B008CCQJUI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	2	2
369	https://www.amazon.com/DECKER-CHV4800-BUSTER-CORDLESS-KITCHENWARE/product-reviews/B007DIT1KW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
370	https://www.amazon.com/AUVWD2-Dirt-Magic-Vacuum-Cleaner/product-reviews/B005HP495A/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
371	https://www.amazon.com/EcoGecko-Portable-Handheld-Mattress-Allergens/product-reviews/B004H6PBUM/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
372	https://www.amazon.com/Panasonic-handheld-vacuum-cleaner-MC-D25CP-WA/product-reviews/B001SN8FAA/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
373	https://www.amazon.com/Royal-Appliance-3-089570-001-Replacement-Filter/product-reviews/B000SMOFHE/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
374	https://www.amazon.com/Atrix-VACAMEXP-Express-Handi-Vacuum/product-reviews/B01MQE6O7Y/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4	1
375	https://www.amazon.com/Pullman-Holt-86asb5d4c-Dry-Hepa-Gallon/product-reviews/B01FHCCPHQ/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
376	https://www.amazon.com/Black-Decker-VBBM10-Multi-Surface-Brush/product-reviews/B01DP73BR8/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
377	https://www.amazon.com/Kalorik-Artisan-Hand-Vacuum-Red/product-reviews/B01APER954/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
378	https://www.amazon.com/Black-Decker-cordless-cleaner-PV1220/product-reviews/B00HVDAUJA/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
379	https://www.amazon.com/Antique-Brass-Hod-Shovel-Set/product-reviews/B008749ATI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
380	https://www.amazon.com/SANDHILL-Hearth-Country-Ash-Vacuum/product-reviews/B008747XSI/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
381	https://www.amazon.com/Metro-Vac-12V-Portable-Car/product-reviews/B0057ERKAW/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	4	1
382	https://www.amazon.com/Royal-Appliance-3-088570-001-Cordless-Filter/product-reviews/B0044UWGI6/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3.7	37
383	https://www.amazon.com/Metropolitian-Cleaner-Extension-Blaster-HNBRK-2/product-reviews/B003CJD96I/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	5	1
384	https://www.amazon.com/ReadiVac-39006-CompuVac-Vacuum-Cleaner/product-reviews/B000EOWA6Y/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	2.5	4
385	https://www.amazon.com/BLACK-DECKER-cyclone-buster-Z-ACV1205/product-reviews/B000BWUN8G/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	1	1
386	https://www.amazon.com/COLEMAN-POWERMATE-PMV6990-Cordless-Vacuum-Cleaner/product-reviews/B00007E7M0/ref=cm_cr_getr_d_paging_btm_1?ie=UTF8&pageNumber=1&reviewerType=all_reviews&pageSize=1000	3	3

	pindex	type	pname	brand	price	rurl	totalRev
0	B006LXOJC0	handheld	BLACK-DECKER-CHV1410L-Cordless-Lithium	BLACK+DECKER	59.99	https://www.amazon.com/BLACK-DECKER-CHV1410L-C...	10106
1	B00IOEFBKS	handheld	Decker-BDH2000PL-Lithium-Vacuum-20-volt	BLACK+DECKER	68.99	https://www.amazon.com/Decker-BDH2000PL-Lithiu...	5297
2	B01DAI5CF6	handheld	Decker-HHVI320JR02-Dustbuster-Cordless-Lithium	BLACK+DECKER	35.09	https://www.amazon.com/Decker-HHVI320JR02-Dust...	848
3	B01BXBX6E6	handheld	Black-Decker-HNV215B10-Compact-Lithium	BLACK+DECKER	24.99	https://www.amazon.com/Black-Decker-HNV215B10-...	237
4	B002D47XOM	handheld	Dirt-Devil-SD20005RED-Scorpion-Handheld	Dirt Devil	28.99	https://www.amazon.com/Dirt-Devil-SD20005RED-S...	2348
5	B01IENFJ14	handheld	Dyson-V8-Absolute-Cord-Free-Vacuum	Dyson	539.00	https://www.amazon.com/Dyson-V8-Absolute-Cord-...	447
6	B00SMLJQ72	handheld	Dyson-Motor-Head-Cord-free-Vacuum	Dyson	289.99	https://www.amazon.com/Dyson-Motor-Head-Cord-f...	785
7	B00SMLJQ7W	handheld	Dyson-V6-Animal-Cord-free-Vacuum	Dyson	349.99	https://www.amazon.com/Dyson-V6-Animal-Cord-fr...	859
8	B001EYFQ28	handheld	Bissell-Eraser-Handheld-Vacuum-33A1	Bissell	29.99	https://www.amazon.com/Bissell-Eraser-Handheld...	4236
9	B0006HUYGM	handheld	Eureka-EasyClean-Corded-Hand-Held-71B	Eureka	37.99	https://www.amazon.com/Eureka-EasyClean-Corded...	7482
10	B0037HHFMO	handheld	Shark-Cordless-Perfect-Hand-SV780	SharkNinja	48.47	https://www.amazon.com/Shark-Cordless-Perfect-...	2704
11	B00KASUEK8	handheld	CHV1510-Dustbuster-15-6-Volt-Cordless-Cyclonic	BLACK+DECKER	32.99	https://www.amazon.com/CHV1510-Dustbuster-15-6...	4793
12	B01E0472TI	handheld	Hair-Eraser-Cordless-Hand-Vacuum	Bissell	44.99	https://www.amazon.com/Hair-Eraser-Cordless-Ha...	389
13	B01MDT3U9N	handheld	Handheld-Cordless-Cleaner-Lithium-Cyclonic	HoLife	59.99	https://www.amazon.com/Handheld-Cordless-Clean...	27
14	B01BXBX6CI	handheld	Decker-HNV220BCZ01FF-Compact-Lithium-Vacuum	BLACK+DECKER	29.99	https://www.amazon.com/Decker-HNV220BCZ01FF-Co...	290
15	B00P9Z36T8	handheld	Shark-Rocket-Corded-Hand-HV292	SharkNinja	81.34	https://www.amazon.com/Shark-Rocket-Corded-Han...	233
16	B008R3F0J8	handheld	BDH2000L-20-Volt-Lithium-Battery-Cordless	BLACK+DECKER	76.50	https://www.amazon.com/BDH2000L-20-Volt-Lithiu...	886
17	B00SMLJPIC	handheld	Dyson-V6-Cord-Free-Vacuum	Dyson	299.00	https://www.amazon.com/Dyson-V6-Cord-Free-Vacu...	917
18	B01AVXFD6Q	handheld	Dyson-Animal-Cordless-Certified-Refurbished	Dyson	319.00	https://www.amazon.com/Dyson-Animal-Cordless-C...	146
19	B00JILGZOC	handheld	BLACK-DECKER-BDH2020FLFH-Lithium-20-volt	BLACK+DECKER	95.00	https://www.amazon.com/BLACK-DECKER-BDH2020FLF...	638
20	B00IRJ1BYU	handheld	BISSELL-SPOTLIFTER-ESSENTIAL-RED-1719T	Bissell	49.00	https://www.amazon.com/BISSELL-SPOTLIFTER-ESSE...	1311
21	B000050HCV	handheld	Dirt-Devil-Cleaner-Handheld-M08230RED	Dirt Devil	35.60	https://www.amazon.com/Dirt-Devil-Cleaner-Hand...	708
22	B00ECM5RRI	handheld	Black-Decker-BDH9600CHV-Dustbuster-Cordless	BLACK+DECKER	29.69	https://www.amazon.com/Black-Decker-BDH9600CHV...	2850
23	B001AQEQNA	handheld	PAV1200W-Cyclonic-Action-Automotive-Pivoting-N...	BLACK+DECKER	53.77	https://www.amazon.com/PAV1200W-Cyclonic-Actio...	1945
24	B0007Z69G2	handheld	Bissell-Aeroswift-Compact-Bagless-1009	Bissell	69.99	https://www.amazon.com/Bissell-Aeroswift-Compa...	377
25	B002FQJW4W	handheld	Stanley-Decker-FHV1200W-Cordless-Canister	BLACK+DECKER	54.27	https://www.amazon.com/Stanley-Decker-FHV1200W...	1601
26	B01DAI5BZ2	handheld	Decker-HHVI315JO42-Dustbuster-Cordless-Lithium	BLACK+DECKER	50.63	https://www.amazon.com/Decker-HHVI315JO42-Dust...	163
27	B00SWCU1LG	handheld	Hoover-Cleaner-Cordless-Handheld-BH52120PC	Hoover	112.95	https://www.amazon.com/Hoover-Cleaner-Cordless...	652
28	B00CCYLBZ0	handheld	BLACK-DECKER-BDH2020FL-Lithium-Brush	BLACK+DECKER	133.56	https://www.amazon.com/BLACK-DECKER-BDH2020FL-...	131
29	B00489LHEM	handheld	Dirt-Devil-BD10025WX-Bagless-Cordless	Dirt Devil	18.62	https://www.amazon.com/Dirt-Devil-BD10025WX-Ba...	58
...	...	...	...	...	...	...	...
357	B008JGXHZW	handheld	CleanWave-UV-C-Vacuum-Cleaner-Prevention	Allergy Asthma Technology	106.10	https://www.amazon.com/CleanWave-UV-C-Vacuum-C...	1
358	B00440E50I	handheld	Bionaire-Hand-Held-Turbo-Vac	Sensio Bionaire	37.07	https://www.amazon.com/Bionaire-Hand-Held-Turb...	2
359	B0019L6YU4	handheld	HAMMER-Dirt-Devil-Scorpion-Filter	Arm & Hammer	6.99	https://www.amazon.com/HAMMER-Dirt-Devil-Scorp...	4
360	B00JWM9CHO	handheld	Creative-Fashion-Keyboard-Desktop-Handheld	Panda Superstore	21.32	https://www.amazon.com/Creative-Fashion-Keyboa...	1
361	B01LVXEM9Y	handheld	Vacuum-Company-Car-Cleaner-CV-LDA105	Vacuum Company	34.55	https://www.amazon.com/Vacuum-Company-Car-Clea...	1
362	B00DOTD7KY	handheld	Kole-Imports-CA026-Vacuum-Cleaner	Kole Imports	16.00	https://www.amazon.com/Kole-Imports-CA026-Vacu...	1
363	B000UKQMLG	handheld	Honeywell-H19003-Replacement-DustBuster-Cyclonic	Honeywell	12.79	https://www.amazon.com/Honeywell-H19003-Replac...	1
364	B000FMIRLM	handheld	WackyVac-Wacky-Vac-Attachment	WackyVac	12.99	https://www.amazon.com/WackyVac-Wacky-Vac-Atta...	1
365	B00JEMTJ66	handheld	Metrovac-Electra-Sweep-Power-Broom	MetroVac	145.95	https://www.amazon.com/Metrovac-Electra-Sweep-...	2
366	B00GQ4LH06	handheld	Iautomatic-Cleaner-Accessory-suction-1-25-Inch	Iautomatic	3.99	https://www.amazon.com/Iautomatic-Cleaner-Acce...	1
367	B00CDTPGVO	handheld	Koblenz-HV-120-Corded-120-Volt-Hand	Koblenz	50.98	https://www.amazon.com/Koblenz-HV-120-Corded-1...	1
368	B008CCQJUI	handheld	Unilution-75210-White-Portable-Handheld-Mattress	Uniution Inc.	84.03	https://www.amazon.com/Unilution-75210-White-P...	2
369	B007DIT1KW	handheld	DECKER-CHV4800-BUSTER-CORDLESS-KITCHENWARE	Black and Decker Office Products	61.00	https://www.amazon.com/DECKER-CHV4800-BUSTER-C...	1
370	B005HP495A	handheld	AUVWD2-Dirt-Magic-Vacuum-Cleaner	B&F	17.13	https://www.amazon.com/AUVWD2-Dirt-Magic-Vacuu...	1
371	B004H6PBUM	handheld	EcoGecko-Portable-Handheld-Mattress-Allergens	EcoGecko	57.75	https://www.amazon.com/EcoGecko-Portable-Handh...	1
372	B001SN8FAA	handheld	Panasonic-handheld-vacuum-cleaner-MC-D25CP-WA	Panasonic	123.61	https://www.amazon.com/Panasonic-handheld-vacu...	1
373	B000SMOFHE	handheld	Royal-Appliance-3-089570-001-Replacement-Filter	Royal Appliance	7.55	https://www.amazon.com/Royal-Appliance-3-08957...	1
374	B01MQE6O7Y	handheld	Atrix-VACAMEXP-Express-Handi-Vacuum	Atrix	84.95	https://www.amazon.com/Atrix-VACAMEXP-Express-...	1
375	B01FHCCPHQ	handheld	Pullman-Holt-86asb5d4c-Dry-Hepa-Gallon	Pullman-Holt	864.00	https://www.amazon.com/Pullman-Holt-86asb5d4c-...	1
376	B01DP73BR8	handheld	Black-Decker-VBBM10-Multi-Surface-Brush	BLACK+DECKER	29.99	https://www.amazon.com/Black-Decker-VBBM10-Mul...	1
377	B01APER954	handheld	Kalorik-Artisan-Hand-Vacuum-Red	Kalorik	86.94	https://www.amazon.com/Kalorik-Artisan-Hand-Va...	1
378	B00HVDAUJA	handheld	Black-Decker-cordless-cleaner-PV1220	BLACK+DECKER	146.43	https://www.amazon.com/Black-Decker-cordless-c...	1
379	B008749ATI	handheld	Antique-Brass-Hod-Shovel-Set	SANDHILL	105.79	https://www.amazon.com/Antique-Brass-Hod-Shove...	1
380	B008747XSI	handheld	SANDHILL-Hearth-Country-Ash-Vacuum	SANDHILL	182.99	https://www.amazon.com/SANDHILL-Hearth-Country...	1
381	B0057ERKAW	handheld	Metro-Vac-12V-Portable-Car	MetroVac	72.46	https://www.amazon.com/Metro-Vac-12V-Portable-...	1
382	B0044UWGI6	handheld	Royal-Appliance-3-088570-001-Cordless-Filter	Royal Appliance/Tti	33.01	https://www.amazon.com/Royal-Appliance-3-08857...	37
383	B003CJD96I	handheld	Metropolitian-Cleaner-Extension-Blaster-HNBRK-2	Metropolitian	55.63	https://www.amazon.com/Metropolitian-Cleaner-E...	1
384	B000EOWA6Y	handheld	ReadiVac-39006-CompuVac-Vacuum-Cleaner	ReadiVac	21.99	https://www.amazon.com/ReadiVac-39006-CompuVac...	4
385	B000BWUN8G	handheld	BLACK-DECKER-cyclone-buster-Z-ACV1205	BLACK+DECKER	53.32	https://www.amazon.com/BLACK-DECKER-cyclone-bu...	1
386	B00007E7M0	handheld	COLEMAN-POWERMATE-PMV6990-Cordless-Vacuum-Cleaner	Team Products	78.00	https://www.amazon.com/COLEMAN-POWERMATE-PMV69...	3