In [4]:
from bs4 import BeautifulSoup as bs
import requests
In [5]:
url = 'http://apps.who.int/gho/data/view.ebola-sitrep.ebola-summary-latest?lang=en'
In [6]:
r = requests.get(url)
data = r.text
soup = bs(data)
In [26]:
ul = soup.findAll('ul', {'class': 'list_dash'})[0]
In [30]:
lis = ul.findAll('li')
In [31]:
len(lis)
Out[31]:
In [32]:
lis[0]
Out[32]:
In [49]:
url_prefix = 'http://apps.who.int/gho/data/'
from datetime import datetime
d_format = '%d %B %Y'
In [50]:
with open('files.csv', 'w') as f:
for li in lis:
d = li.a.text.lstrip('Data published on ')
date = datetime.strptime(d, d_format).date().isoformat()
url = li.a.attrs['href']
f.writelines('{},{}{}\n'.format(date, url_prefix, url))
In [38]:
li
Out[38]:
In [39]:
li.a.url
In [40]:
li.a
Out[40]:
In [44]:
li.a.attrs['href']
Out[44]:
In [ ]: