In [70]:
import asyncio
import os
import re
from contextlib import closing

import aiohttp
import requests
from bs4 import BeautifulSoup

In [66]:
resp = requests.get('http://goalkicker.com/')
soup = BeautifulSoup(resp.content, 'html.parser')

In [67]:
urls = []

for link in soup.find_all('a'):
    urls.append('http://goalkicker.com/{}'.format(link.get('href')))

In [75]:
re_onclick = re.compile(r"^location.href='(?P<name>\w+\.\w+)'$")

pdfs = {}

for url in urls:
    resp = requests.get(url)
    soup = BeautifulSoup(resp.content, 'html.parser')

    btn_dl = soup.find('button', attrs={'class': 'download'})

    pdf_filename = re_onclick.match(btn_dl.get('onclick')).groups()[0]
    pdfs[pdf_filename] = '{0}/{1}'.format(url, pdf_filename)

In [74]:
@asyncio.coroutine
def download(url, filename, session, semaphore, chunk_size=1<<15):
    with (yield from semaphore):
        print('Downloading {}'.format(filename))
    
        resp = yield from session.get(url)
    
        with closing(resp), open(os.path.join('pdf/', filename), 'wb') as fobj:
            while True:
                chunk = yield from resp.content.read(chunk_size)
            
                if not chunk:
                    break
            
                fobj.write(chunk)
    
            print('Done {}'.format(filename))
        
        return filename, (response.status, tuple(response.headers.items()))

with closing(asyncio.get_event_loop()) as loop, \
        closing(aiohttp.ClientSession()) as session:

    semaphore = asyncio.Semaphore(4)
    download_tasks = (download(url, filename, session, semaphore) for filename, url in pdfs.items())
    result = loop.run_until_complete(asyncio.gather(*download_tasks))


Creating a client session outside of coroutine
client_session: <aiohttp.client.ClientSession object at 0x104cb3668>
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-74-15df25b9b003> in <module>()
     23     semaphore = asyncio.Semaphore(4)
     24     download_tasks = (download(url, filename, session, semaphore) for filename, url in pdfs.items())
---> 25     result = loop.run_until_complete(asyncio.gather(*download_tasks))

/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/tasks.py in gather(loop, return_exceptions, *coros_or_futures)
    594     for arg in set(coros_or_futures):
    595         if not futures.isfuture(arg):
--> 596             fut = ensure_future(arg, loop=loop)
    597             if loop is None:
    598                 loop = fut._loop

/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/tasks.py in ensure_future(coro_or_future, loop)
    517         if loop is None:
    518             loop = events.get_event_loop()
--> 519         task = loop.create_task(coro_or_future)
    520         if task._source_traceback:
    521             del task._source_traceback[-1]

/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py in create_task(self, coro)
    280         Return a task object.
    281         """
--> 282         self._check_closed()
    283         if self._task_factory is None:
    284             task = tasks.Task(coro, loop=self)

/usr/local/Cellar/python3/3.6.3/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py in _check_closed(self)
    355     def _check_closed(self):
    356         if self._closed:
--> 357             raise RuntimeError('Event loop is closed')
    358 
    359     def _asyncgen_finalizer_hook(self, agen):

RuntimeError: Event loop is closed

In [ ]: