In [ ]:
# coding: utf-8

In [25]:
#起始標準起手式~
import requests as rq
from bs4 import BeautifulSoup as bs
from collections import OrderedDict as od
import json
import csv
import traceback as tb
import re

In [3]:
HOST = 'http://www.dodocook.com/recipe/'

In [20]:
#def dodocook_crawler(開始文章ID,結束文章ID)
def dodocook_crawler(no_start_page,no_stop_page):
    HOST = 'http://www.dodocook.com/recipe/'
    #在pass這裡接上function() 網址產生器
    #pass
    url_builder(HOST,no_start_page,no_stop_page)

In [21]:
#網址產生器
#url_builder(網站URL, 開始文章ID, 結束文章ID)
def url_builder(HOST,no_start_page,no_stop_page):
    for i in range(no_start_page, no_stop_page):
        url = HOST + "{}/".format(i)        
        print("[INFO] {}".format(url))
        #在pass這裡接上function()  res_and_soup(url)
        #pass
        res_and_soup(url)

In [26]:
#就字面意思
def res_and_soup(url):
    try:
        res = rq.get(url)
        soup = bs(res.text, 'lxml')
        print("[INFO] success")
        #在pass這裡接上爬取網頁內容的function()
        #pass
        dodocook_contents(soup)
    except:
        print("[Error] Error while getting contents")
        tb.print_exc()

In [27]:
def dodocook_contents(soup):
    #title
    mydict={}
    for tag1 in soup.select(".band > h1"):
        title=tag1.text
    mydict["title"]=title
        
    #foodname
    foodName=[]
    for tag2 in soup.select(".body > .list > .name"):
        tag2_dict={}
        n=soup.select(".body > .list > .name").index(tag2)
        tag2_dict[tag2.text]=soup.select(".ingredient > .body > .list > .count")[n].text
        foodName.append(tag2_dict)
    mydict["foodName"]=foodName
    
    #step
    step1=[]
    step_list1=soup.select('.steps > .CSrtB > .Sno') 
    step_go1=soup.select('.steps > .CSrtB > .SBpma')

#     step2=[] 
#     step_list2=soup.select('.steps > .CSrtA > .SrtAk > .Sno') 
#     for tag3 in step_list2:
#         print(tag3.text)
#     step_go2=soup.select('.steps > .CSrtA > .SrtAk > .Spma > p')
#     for tag4 in step_go2:
#         print(tag4.text)
        
    
    for tag3 in step_list1:
        tag3_dict={}
        n1=step_list1.index(tag3)
        tag3_dict[tag3.text]=step_go1[n1].text
        step1.append(tag3_dict)
    mydict["step"]=step1
    print(mydict)

In [28]:
dodocook_crawler(40000,40010)


[INFO] http://www.dodocook.com/recipe/40000/
[INFO] success
{'step': [], 'title': '♡宛の美好”食“光 ✿ 咖哩肉片燴鮮菇', 'foodName': [{'洋蔥': '1/4個'}, {'小里肌': '150g'}, {'雪白菇': '2包'}, {'紅蘿蔔': '1小段'}, {'清湯': '1杯'}, {'鹽': '少許'}, {'水': '1大匙'}, {'太白粉': '1茶匙'}, {'鹽': '少許'}, {'糖': '適量'}, {'咖哩粉': '1大匙'}, {'太白粉水': '適量勾芡'}]}
[INFO] http://www.dodocook.com/recipe/40001/
[INFO] success
{'step': [{'01': '食材如下~'}, {'02': '鍋子燒熱加橄欖油,先下洋蔥炒香。'}, {'03': '加入蕃茄繼續拌炒。'}, {'04': '將米倒入鍋中,翻炒一下。 \n\n加入2杯高湯,加入鹽、白胡椒,中火煮約5分鐘。 \n\n在加剩下的1杯高湯 (可將番紅花泡入這杯高湯中一起倒入)。'}, {'05': '中小火,蓋上鍋蓋,煮約15至20分(請翻蓋檢視狀況)。\n\n\n米飯快收乾時,將海鮮、紅黃椒擺上,蓋鍋蓋,繼續悶煮約5分鐘。\n灑上巴西里,檸檬數片~~ 即可上桌。'}], 'title': '西班牙海鮮燉飯', 'foodName': [{'義大利米': '2杯'}, {'雞高湯或清水': '3杯'}, {'橄欖油或沙拉油': '3匙'}, {'洋蔥': '半顆'}, {'番茄(大顆的)': '半顆'}, {'檸檬': '1顆'}, {'透抽': '一支'}, {'草蝦': '六支'}, {'文蛤': '數顆'}, {'白胡椒': '少許'}, {'鹽(高湯有鹹則減少用量)': '少許'}, {'番紅花': '一大撮'}, {'新鮮巴西里': '少許'}, {'紅黃椒': '半顆'}]}
[INFO] http://www.dodocook.com/recipe/40002/
[INFO] success
[Error] Error while getting contents
[INFO] http://www.dodocook.com/recipe/40003/
Traceback (most recent call last):
  File "<ipython-input-26-681adb971120>", line 9, in res_and_soup
    dodocook_contents(soup)
  File "<ipython-input-27-430b819a750a>", line 6, in dodocook_contents
    mydict["title"]=title
UnboundLocalError: local variable 'title' referenced before assignment
[INFO] success
{'step': [], 'title': '葡萄燕麥甜餅', 'foodName': [{'無鹽奶油100g': ''}, {'細砂糖70g': ''}, {'雞蛋1顆': ''}, {'燕麥片50g': ''}, {'葡萄乾+其他堅果類50g': ''}, {'低筋麵粉160g': ''}, {'小蘇打粉1/4t': ''}]}
[INFO] http://www.dodocook.com/recipe/40004/
[INFO] success
{'step': [], 'title': '蠔油燜竹筍', 'foodName': [{'蠔油': '1.5大匙'}, {'綠竹筍': '1支'}, {'乾香菇': '數小朵'}, {'枸杞': '少許'}, {'芹菜葉(或香菜葉)': '少許'}]}
[INFO] http://www.dodocook.com/recipe/40005/
[INFO] success
{'step': [], 'title': '腐乳脆花腩', 'foodName': [{'松阪豬': '300g'}, {'市售麻油腐乳': '1塊'}, {'【李錦記舊庄特級蠔油】': '1大匙(約15ml)'}, {'味霖': '2大匙(約30ml)'}, {'薑片': '適量'}, {'木薯粉(地瓜粉)': '1.5大匙'}, {'白胡椒': '少許'}]}
[INFO] http://www.dodocook.com/recipe/40006/
[INFO] success
{'step': [], 'title': '稻荷壽司(豆皮壽司)', 'foodName': [{'炸三角豆腐包約20個': ''}, {'[壽司皮]': ''}, {'清水300g': ''}, {'二砂糖70g': ''}, {'醬油45g': ''}, {'[醋飯]': ''}, {'熱白飯400g': ''}, {'白醋40cc': ''}, {'細砂糖2大匙': ''}]}
[INFO] http://www.dodocook.com/recipe/40007/
[INFO] success
{'step': [], 'title': 'Smoothie ~ 菠蘿 + 青瓜', 'foodName': [{'菠蘿': '70g'}, {'青瓜': '1/3個'}, {'水': '100ml'}, {'檸檬汁': '1小匙'}, {'蜜糖': '1小匙'}]}
[INFO] http://www.dodocook.com/recipe/40008/
[INFO] success
{'step': [], 'title': '人妻的廚房--咖哩魯肉', 'foodName': [{'咖哩粉': '1大匙'}, {'洋蔥(小)': '1顆'}, {'油蔥酥、薑': '適量'}, {'糖': '1小匙'}, {'醬油': '1大匙'}, {'豬絞肉': '1斤'}, {'水': '適量'}]}
[INFO] http://www.dodocook.com/recipe/40009/
[INFO] success
{'step': [], 'title': '酸菜炒大腸', 'foodName': [{'薑絲': '1把'}, {'蒜末': '2大匙'}, {'酸菜': '1把'}, {'大腸': '1條'}, {'辣椒片': '少許'}, {'蒜苗片': '少許(配色用可免)'}, {'雞粉': '2小匙'}, {'醋精': '1大匙'}, {'米酒': '2大匙'}]}

In [ ]: