In [5]:
import pickle
import requests
from bs4 import BeautifulSoup

In [59]:
def getPages():
    pages = dict()
    def getSet(url):
        r = requests.get('http://www.pathofexile.gamepedia.com' + url)
        soup = BeautifulSoup(r.content, 'html.parser')
        allPages = soup.find("div", { "class" : "mw-allpages-body"})
        links = allPages.findAll("li")
        for link in links:
            aTag = link.find("a")
            pages[str(aTag.string)] = aTag["href"]
        nav = soup.find("div", { "class" : "mw-allpages-nav"})
        for link in nav.findAll("a"):
            print("Found Page")
            if "Next page" in link.string:
                return link["href"]
        return None
    
    url = "/Special:AllPages"
    while url:
        url = getSet(url)
    with open('pages.pickle', 'wb') as handle:
        pickle.dump(pages, handle)

In [60]:
getPages()


Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page
Found Page