In [4]:
import requests
import re
import datetime
import random
from time import sleep
import bs4
from bs4 import BeautifulSoup
import pandas as pd
from pandas import Series, DataFrame
import sqlalchemy
from sqlalchemy import create_engine,Table,Column,Integer,String,MetaData,ForeignKey,Date
from IPython.display import display

In [5]:
import sqlalchemy
from sqlalchemy import create_engine

In [6]:
pd.set_option('max_colwidth', 800)

In [7]:
%load_ext watermark


The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark

In [8]:
%watermark


2017-01-31T19:51:35+08:00

CPython 3.5.2
IPython 5.1.0

compiler   : GCC 4.2.1 Compatible Apple LLVM 7.3.0 (clang-703.0.31)
system     : Darwin
release    : 15.6.0
machine    : x86_64
processor  : i386
CPU cores  : 4
interpreter: 64bit

I. 資料庫內共有幾個吸塵器商品?


In [129]:
engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFa@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
conn = engine.connect()

sql_command = "select count(*) FROM Tests.amzProd"
resultSet = pd.read_sql_query(sql=sql_command, con=conn, coerce_float=False)
display(resultSet)

conn.close()


count(*)
0 2325

II. 每個吸塵器種類各有幾個商品評論數


In [15]:
engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFa@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
conn = engine.connect()

sql_command = "SELECT type,sum(totalRev) as totalRev FROM Tests.amzProd group by type order by totalRev desc"
resultSet = pd.read_sql_query(sql=sql_command, con=conn, coerce_float=False)
display(resultSet)
conn.close()


type totalRev
0 upright 102463
1 handheld 81635
2 stick 59634
3 robotic 32266
4 canister 23337
5 wetdry 17423
6 central 317

III. 總評論數(所有吸塵器總類)


In [108]:
resultSet["totalRev"].sum()


Out[108]:
Decimal('317075')

IV. 各商品種類,商品最貴前5名


In [21]:
prodTypes=["central","canister","handheld","robotic","stick","upright","wetdry"]

for prodType in prodTypes:

    engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFa@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
    conn = engine.connect()
    sql_command = "SELECT type,pname,brand,price,totalRev,avgStars FROM Tests.amzProd where type='"+ prodType +"' order by price desc limit 5"
    resultSet = pd.read_sql_query(sql=sql_command, con=conn, coerce_float=False)
    display(resultSet)
    conn.close()


type pname brand price totalRev avgStars
0 central Pet-Attachments-E4-Black-Type ZVac 1879.95 12 3.8
1 central VacuMaid-S2400-Cyclonic-Power-Unit VacuMaid 799.95 1 5.0
2 central Allegro-MU4500-Champion-000-Square-Central-Electric Allegro 724.97 16 4.6
3 central Allegro-MUA75-Zenith-Square-Central Allegro 599.97 1 5.0
4 central Prolux-CV12000-Central-Electric-Warranty Prolux 549.99 20 4.2
type pname brand price totalRev avgStars
0 canister EUKSC6093-Sanitaire-Wide-Area-Vacuum Electrolux 1705.36 1 1.0
1 canister DYSON-360-RB01NB-Vacuum-Cleaner 360 EYE™ Robot 1399.40 7 1.6
2 canister Miele-Complete-C3-Brilliant-Canister Miele 1399.00 53 4.0
3 canister Nilfisk-GM-80-Vacuum-HEPA-Residential-01790032 Nilfisk-Advance 1349.00 5 0.0
4 canister Miele-S8990-Canister-Vacuum-Cleaner Miele 1332.61 41 3.5
type pname brand price totalRev avgStars
0 handheld Pullman-Holt-86asb5d4c-Dry-Hepa-Gallon Pullman-Holt 864.00 1 1.0
1 handheld Dyson-V8-Absolute-Cord-Free-Vacuum Dyson 593.97 458 4.6
2 handheld ProTeam-107310-Commercial-Backpack-vacuum ProTeam 396.75 1 5.0
3 handheld SANITAIRE-GIDDS2-2464763-Backpack-Vacuum-Cleaner Sanitaire 389.98 6 5.0
4 handheld MetroVac-Anti-Static-Electronic-Cleaning-Control-120V MetroVac 369.99 1 5.0
type pname brand price totalRev avgStars
0 robotic Samsung-VR20H9030UB-Premium-Vacuum-Cleaner Samsung 1389.90 1 2.0
1 robotic iRobot-980-Estate-Bundle-Batteries iRobot 1099.97 1 4.0
2 robotic Dyson-360-Eye-Robot-Vacuum Dyson 999.94 42 3.0
3 robotic 360-Robot-robotic-vacuum-cleaner 360 EYE™ Robot 995.00 7 1.6
4 robotic Samsung-SR2AK9350U-POWERbot-Turbo-Vacuum Samsung 944.99 55 3.6
type pname brand price totalRev avgStars
0 stick Euroflex-Purple-HO58-RFB Euroflex 1017.98 2 4.0
1 stick AIRBELT-90692AM-Arctic-Canister-Cleaner Sebo 859.00 2 3.0
2 stick Dyson-V8-Animal-Cordless-Vacuum Dyson 649.95 6 4.4
3 stick Dyson-DC62MC-Sigital-Slim Dyson 644.95 4 5.0
4 stick Dyson-V6-Absolute-Cord-free-Vacuum Dyson 427.25 882 4.4
type pname brand price totalRev avgStars
0 upright Basic-Coatings-Dirt-Dragon-E8900012 Basic Coatings 2650.00 2 2.5
1 upright Vortex-Mini-Dustless-Contractor-Vacuum Vortex 2197.00 1 5.0
2 upright Electrolux-Sanitaire-EURSC6085A-Commercial-Extractor Electrolux 1756.99 1 1.0
3 upright SEBO-9559AM-Automatic-Upright-Vacuum Sebo 920.22 76 4.1
4 upright 9587AM-Automatic-Upright-Vacuum-Yellow Sebo 879.00 21 4.3
type pname brand price totalRev avgStars
0 wetdry Mytee-1003DX-Carpet-Extractor Mytee 2690.00 1 0.0
1 wetdry Clear-Vue-Cyclones-CV1800LH-Collection Clear Vue Cyclones 1825.00 1 5.0
2 wetdry Mytee-HP-100-Grand-Prix Mytee 1500.00 6 4.0
3 wetdry Nortech-N301DCP-1-5-Inch-Attachment-30-Gallon Nortech 1385.00 1 3.0
4 wetdry Oneida-Air-Systems-XCKM010000-Cyclonic Oneida Air Systems 1268.41 1 5.0

V. 各商品種類, 商品平均星星最多前5名(評論數>500)


In [20]:
prodTypes=["central","canister","handheld","robotic","stick","upright","wetdry"]

for prodType in prodTypes:

    engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFa@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
    conn = engine.connect()
    sql_command = "SELECT type,pname,brand,price,totalRev,avgStars FROM Tests.amzProd where type='"+ prodType +"' and totalRev>500 order by avgStars desc limit 5"
    resultSet = pd.read_sql_query(sql=sql_command, con=conn, coerce_float=False)
    display(resultSet)
    conn.close()


type pname brand price totalRev avgStars
type pname brand price totalRev avgStars
0 canister Miele-S2121-Olympus-Canister-Cleaner Miele 329.00 979 4.4
1 canister BISSELL-Bagged-Canister-Vacuum-4122 Bissell 49.99 1770 4.3
2 canister Eureka-Mighty-Canister-Vacuum-3670G Eureka 60.36 2999 4.1
3 canister Sanitaire-SC3683B-Commercial-Canister-Vacuum Sanitaire 113.81 682 4.1
4 canister Doctor-Portable-Spot-Cleaner-Machine Rug Doctor 125.42 786 4.1
type pname brand price totalRev avgStars
0 handheld Eye-Vac-Professional-Touchless-Stationary-Vacuum CrowleyJones 172.26 717 4.7
1 handheld Dyson-V6-Animal-Cord-free-Vacuum Dyson 349.00 878 4.5
2 handheld Decker-BDH2000PL-Lithium-Vacuum-20-volt BLACK+DECKER 68.99 5325 4.4
3 handheld Dyson-V6-Trigger-Cordless-Handheld Dyson 191.98 553 4.4
4 handheld Hoover-CH30000-PortaPower-Lightweight-Commercial Hoover Commercial 115.38 592 4.4
type pname brand price totalRev avgStars
0 robotic iRobot-Roomba-Robotic-Vacuum-Cleaner iRobot 299.99 3902 4.4
1 robotic iRobot-Roomba-Robotic-Vacuum-Cleaner iRobot 899.00 732 4.4
2 robotic ILIFE-Robot-Vacuum-Cleaner-Titanium ILIFE 199.99 1138 4.4
3 robotic iRobot-Roomba-Robotic-Vacuum-Cleaner iRobot 542.85 1819 4.4
4 robotic Roomba-500-Side-Brush-3-Pack iRobot 9.99 524 4.3
type pname brand price totalRev avgStars
0 stick Shark-Rocket-Ultra-Light-Upright-HV301 SharkNinja 149.95 646 4.5
1 stick Shark-Rocket-Ultra-Light-Upright-HV303 SharkNinja 184.26 1958 4.5
2 stick Dyson-V6-Absolute-Cord-free-Vacuum Dyson 427.25 882 4.4
3 stick Dyson-DC59-Motorhead-Cordless-Vacuum Dyson 299.97 624 4.3
4 stick BISSELL-PowerEdge-Corded-Vacuum-81L2A Bissell 58.99 5812 4.3
type pname brand price totalRev avgStars
0 upright Metro-ED500-DataVac-500-Watt-Electric Metro Vacuum 61.99 2885 4.7
1 upright Rocket-TruePet-Ultra-Light-Upright-HV322 SharkNinja 203.98 830 4.6
2 upright Shark-Rotator-Professional-Lift-Away-NV501 SharkNinja 199.99 2548 4.6
3 upright Rotator-Lift-Away-Professional-Upright-NV502 SharkNinja 249.99 728 4.5
4 upright SharkNinja-NV42-Shark-Navigator-Deluxe SharkNinja 110.26 1550 4.5
type pname brand price totalRev avgStars
0 wetdry Armor-Gallon-Utility-Vacuum-AA255 ArmorAll 38.48 2294 4.4
1 wetdry DEWALT-DCV581H-20-Volt-Cordless-Wet-Dry DEWALT 111.34 625 4.4
2 wetdry Vacmaster-Gallon-Portable-Vacuum-VP205 Vacmaster 29.99 531 4.1
3 wetdry PowerSmith-PAVC101-Amp-Ash-Vacuum PowerSmith 64.38 1676 4.1
4 wetdry ArmorAll-Wet-Dry-Vacuum-Cleaner ArmorAll 22.00 747 3.6

VI. 主要有哪些牌子?他們獲得的平均星星數有多少? (以下只選出在Amazon上商品數量多於50個的品牌)


In [60]:
engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFa@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
conn = engine.connect()
sql_command = "SELECT brand,count(pname),avg(totalRev),avg(avgStars),avg(price) FROM Tests.amzProd \
group by brand having count(pname)> 50 order by avg(avgStars) desc"
resSet = pd.read_sql_query(sql=sql_command, con=conn, coerce_float=False)
display(resSet)
conn.close()


brand count(pname) avg(totalRev) avg(avgStars) avg(price)
0 iRobot 74 203.8919 4.30541 200.187431
1 Shark 51 29.8235 4.26863 149.171962
2 Dyson 74 178.1892 4.18108 261.800676
3 Miele 58 74.9828 4.16724 449.271723
4 WORKSHOP Wet/Dry Vacs 59 29.1864 3.98305 50.901695
5 Hoover 70 792.0571 3.90143 131.095428
6 Shop-Vac 73 66.2329 3.87123 134.073973
7 Bissell 104 396.9231 3.83846 119.205577
8 Eureka 51 310.3922 3.64314 85.329805
9 BLACK+DECKER 82 517.5366 3.53415 65.671951
10 Dirt Devil 83 366.1446 3.51687 51.012410

VII. 以上品牌存在多少商品位於各商品種類?


In [57]:
prodTypes=["central","canister","handheld","robotic","stick","upright","wetdry"]

for brand in resSet["brand"]:
    engine=create_engine("mysql+pymysql://semantic:GbwSq1RzFa@104.199.201.206:13606/Tests?charset=utf8",echo=False, encoding='utf-8')
    conn = engine.connect()
    sql_command = "SELECT count(type),type,brand FROM Tests.amzProd \
    where brand='"+brand+"' group by type"
    resultSet = pd.read_sql_query(sql=sql_command, con=conn, coerce_float=False)
    display(resultSet)
    conn.close()


count(type) type brand
0 74 robotic iRobot
count(type) type brand
0 2 canister Shark
1 9 handheld Shark
2 1 robotic Shark
3 3 stick Shark
4 36 upright Shark
count(type) type brand
0 6 canister Dyson
1 17 handheld Dyson
2 1 robotic Dyson
3 8 stick Dyson
4 42 upright Dyson
count(type) type brand
0 41 canister Miele
1 2 robotic Miele
2 1 stick Miele
3 14 upright Miele
count(type) type brand
0 3 upright WORKSHOP Wet/Dry Vacs
1 56 wetdry WORKSHOP Wet/Dry Vacs
count(type) type brand
0 10 canister Hoover
1 5 handheld Hoover
2 3 robotic Hoover
3 9 stick Hoover
4 42 upright Hoover
5 1 wetdry Hoover
count(type) type brand
0 1 canister Shop-Vac
1 8 upright Shop-Vac
2 64 wetdry Shop-Vac
count(type) type brand
0 19 canister Bissell
1 11 handheld Bissell
2 1 robotic Bissell
3 20 stick Bissell
4 49 upright Bissell
5 4 wetdry Bissell
count(type) type brand
0 11 canister Eureka
1 6 handheld Eureka
2 34 upright Eureka
count(type) type brand
0 1 canister BLACK+DECKER
1 74 handheld BLACK+DECKER
2 1 robotic BLACK+DECKER
3 4 stick BLACK+DECKER
4 1 upright BLACK+DECKER
5 1 wetdry BLACK+DECKER
count(type) type brand
0 7 canister Dirt Devil
1 36 handheld Dirt Devil
2 17 stick Dirt Devil
3 22 upright Dirt Devil
4 1 wetdry Dirt Devil