In [1]:
import requests, json, time, lxml.html as lh, pandas as pd
from IPython.core.display import HTML
timestamp = int(round(time.time() * 1000))
k = '米森 蔓越莓麥片'
html = lh.fromstring(k)
unicodeK = lh.tostring(html).decode("utf-8")
unicodeK = unicodeK.lstrip('<p>')
unicodeK = unicodeK.rstrip('</p>')
In [2]:
headers = requests.utils.default_headers()
s = requests.Session()
headers = {'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'}
s.headers.update(headers)
payload = {
'keyword': k,
'searchType': '2',
'curPage=1': '1'
}
r = s.get('https://www.momoshop.com.tw/search/searchShop.jsp', params=payload)
In [3]:
r.raise_for_status()
In [4]:
cookies = s.cookies.get_dict()
In [5]:
headers['host'] = 'www.momoshop.com.tw';
headers['origin'] = 'https://www.momoshop.com.tw'
headers['DNT'] = '1'
headers['X-Requested-With'] = 'XMLHttpRequest'
headers['Referer'] = r.url
headers['content-type'] = 'application/x-www-form-urlencoded'
s.headers.update(headers)
In [6]:
data = {
"flag": 2018,
"data": {
"searchValue": unicodeK,
"cateCode": "",
"cateLevel": "-1",
"cp": "N",
"NAM": "N",
"normal": "N",
"first": "N",
"superstore": "N",
"curPage": "1",
"priceS": "0",
"priceE": "9999999",
"searchType": "2",
"reduceKeyword": "",
"isFuzzy": "0",
"specialGoodsType": "",
"rtnCateDatainfo": {
"cateCode": "",
"cateLv": "-1",
"curPage": "1",
"historyDoPush": "true",
"timestamp": timestamp
}
}
}
data = {"data": str(data)}
In [7]:
res1 = s.post("https://www.momoshop.com.tw/ajax/ajaxTool.jsp?t=1507641784805", data=data, cookies=cookies)
In [8]:
prods = json.loads(res1.text.strip())['rtnData']['searchResult']['rtnSearchData']['goodsInfoList']
In [9]:
def pickFields(p):
return {
'name': p['goodsName'],
'price': p['SALE_PRICE'],
}
In [10]:
prods = list(map(pickFields, prods))
In [11]:
df = pd.DataFrame(prods)
df
Out[11]: