notebook.community

Edit and run



In [56]:

    
import os
import requests
from bs4 import BeautifulSoup
from subprocess import Popen
from datetime import datetime



In [57]:

    
with open('now_index.txt', 'r') as f:
    now_index = f.readlines()
now_index = int(now_index[0].strip())



In [58]:

    
def check_is(url):
    resp = requests.get(url)
    soup = BeautifulSoup(resp.text, 'html.parser')
    divs = soup.find("div", "btn-group-paging")
    live = divs.find_all("a", "disabled")
    if live == []:
        return True
    else:
        return False



In [59]:

    
now_index_list = []
# 判斷第一次
triggle = check_is('https://www.ptt.cc/bbs/movie/index'+str(now_index+1)+'.html')
datetimestr =  datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if triggle:
    while triggle:
        url = 'https://www.ptt.cc/bbs/movie/index'+str(now_index)+'.html'
        triggle = check_is(url)
        if triggle:
            now_index_list.append(now_index)
            now_index+=1
    icmd = ' -i '+str(now_index_list[0]+1)+' '+str(now_index_list[-1])
    cmd = 'python crawler.py -b movie'+icmd
    os.system( cmd )
    with open('log.txt', 'a') as f:
        f.write(datetimestr+'   '+cmd+'\n')
    with open('now_index.txt', 'w') as f:
        f.write(str(now_index_list[-1]))
else:
    with open('log.txt', 'a') as f:
        f.write(datetimestr+'   '+'no run script'+'\n')