In [1]:
from BeautifulSoup import BeautifulSoup
from os import system

with open('/home/weizhou/Projects/scripts/input/service/cr', 'r') as inf:
    content = inf.read()

pre = "https://docs.engineering.redhat.com"

In [2]:
soup = BeautifulSoup(content)
system('rm -f input/cache/*')
for item in soup.findAll('a'):
    system('cd input/cache; wget ' + pre+item['href'] +' --no-check-certificate')