In [ ]:
import mechanize
import cookielib
import re
from bs4 import BeautifulSoup
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1')]
r = br.open('https://sso.openedu.ru/login/')
html = r.read()
br.select_form(nr=0)
br.form['username']=''
br.form['password']=''
br.submit()
r = br.open('https://courses.openedu.ru/courses/course-v1:mipt+GAMETH+fall_2015/courseware/')
html = r.read()
soup = BeautifulSoup(html, 'html.parser')
chapters = soup.select(".chapter ul li a")
links = []
for i in chapters:
r = br.open('https://courses.openedu.ru'+i["href"])
html = r.read()
files = re.findall(r'http://[^\s<>"]+.mp4', str(html))
for link in files:
if link not in links:
links.append(link)
for l in links:
print l