Goals

  • compute books that have been marked GITenberg in the OPDS feed

In [ ]:
opds_gitenberg_url = "https://unglue.it/api/opds/kw.GITenberg/"

In [ ]:
doc = etree.parse(StringIO(requests.get(opds_gitenberg_url).content))

In [ ]:
doc.findall("{http://www.w3.org/2005/Atom}entry")

In [ ]:
from StringIO import StringIO
from lxml import etree

import requests

ATOM_NS = "http://www.w3.org/2005/Atom"

def elements_for_feed(url, starting_page=0):
    
    page = starting_page
    
    while True:
        page_url = url + "?page={}".format(page)
        doc = etree.parse(StringIO(requests.get(page_url).content))
        entries = doc.findall("{{{}}}entry".format(ATOM_NS))
        if entries:
            for entry in entries:
                yield entry
        else:
            break
        page += 1

In [ ]:
for (i, entry) in enumerate(elements_for_feed(opds_gitenberg_url)):
    title = entry.find("{{{}}}{}".format(ATOM_NS, 'title')).text
    print (i, entry.find("{{{}}}{}".format(ATOM_NS, 'title')).text)

In [ ]: