In [56]:
import os
import requests
from bs4 import BeautifulSoup
from subprocess import Popen
from datetime import datetime
In [57]:
with open('now_index.txt', 'r') as f:
now_index = f.readlines()
now_index = int(now_index[0].strip())
In [58]:
def check_is(url):
resp = requests.get(url)
soup = BeautifulSoup(resp.text, 'html.parser')
divs = soup.find("div", "btn-group-paging")
live = divs.find_all("a", "disabled")
if live == []:
return True
else:
return False
In [59]:
now_index_list = []
# 判斷第一次
triggle = check_is('https://www.ptt.cc/bbs/movie/index'+str(now_index+1)+'.html')
datetimestr = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
if triggle:
while triggle:
url = 'https://www.ptt.cc/bbs/movie/index'+str(now_index)+'.html'
triggle = check_is(url)
if triggle:
now_index_list.append(now_index)
now_index+=1
icmd = ' -i '+str(now_index_list[0]+1)+' '+str(now_index_list[-1])
cmd = 'python crawler.py -b movie'+icmd
os.system( cmd )
with open('log.txt', 'a') as f:
f.write(datetimestr+' '+cmd+'\n')
with open('now_index.txt', 'w') as f:
f.write(str(now_index_list[-1]))
else:
with open('log.txt', 'a') as f:
f.write(datetimestr+' '+'no run script'+'\n')