In [ ]:
opds_gitenberg_url = "https://unglue.it/api/opds/kw.GITenberg/"
In [ ]:
doc = etree.parse(StringIO(requests.get(opds_gitenberg_url).content))
In [ ]:
doc.findall("{http://www.w3.org/2005/Atom}entry")
In [ ]:
from StringIO import StringIO
from lxml import etree
import requests
ATOM_NS = "http://www.w3.org/2005/Atom"
def elements_for_feed(url, starting_page=0):
page = starting_page
while True:
page_url = url + "?page={}".format(page)
doc = etree.parse(StringIO(requests.get(page_url).content))
entries = doc.findall("{{{}}}entry".format(ATOM_NS))
if entries:
for entry in entries:
yield entry
else:
break
page += 1
In [ ]:
for (i, entry) in enumerate(elements_for_feed(opds_gitenberg_url)):
title = entry.find("{{{}}}{}".format(ATOM_NS, 'title')).text
print (i, entry.find("{{{}}}{}".format(ATOM_NS, 'title')).text)
In [ ]: