In [1]:
import requests
from bs4 import BeautifulSoup
import pathlib
import os

In [8]:
# Create BeautifulSoup object from website
def load_soup(url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')
    return soup

# site galleries contain tables for individual tilesets
def get_tileset_tables(soup, page):
    if page == 'two-corner':
        # 2-corner        
        tables = get_two_corner_tileset_tables(soup)
    elif page == 'block':
        tables = get_block_tileset_tables(soup)
        
    for t in tables:
        yield t
    
def get_two_corner_tileset_tables(soup):
    return (soup
            .find_all('table')[3]
            .find('table')
            .find_all('table'))

def get_block_tileset_tables(soup):
    return (soup
            .find_all('table')[4]
           .find('table')
           .find_all('table'))

In [30]:
def get_set_name(table):
    name = table.find('caption').text.strip().replace('*', '')
    return name

def get_block_rows(table):
    rows = table.find_all('tr')
    for r in rows[1:]:
        yield r
        
def get_block_cells(row):
    cells = row.find_all('td')
    return cells

def get_image_addresses(table):
    imgs = table.find_all('img')
    for i in imgs:
        address = fixRelativeUrl(i.attrs['src'])
        yield address
    
def fixRelativeUrl(url):
    # HACK: for these we go up a level
    return url.replace('..', 'http://cr31.co.uk/stagecast')

In [22]:
def get_formatted_tilename(url, set_dir):
    # find just the filename
    s = url.split('/')[-1]
    num = int(s.split('.')[0])
    # keep leading zeros for 3 digits
    formatted = set_dir + '/' + '{:0>3d}.gif'.format(num)
    return formatted

def download_tile(url, set_dir, verbose=False):
    response = requests.get(url)
    if response.status_code == 200:
        filename = get_formatted_tilename(url)
        save_image(response.content, filename, verbose)
    else:
        print ('Bad response code:', response.status_code, 'for', url)
        
def save_image(content, filename, verbose=False):
    with open(filename, 'wb') as f:
        f.write(content)
        if verbose:
            print ('Saved', filename)

In [23]:
targets = {'two-corner':'http://cr31.co.uk/stagecast/wang/tiles_c.html',
           'block':'http://cr31.co.uk/stagecast/wang/block_g.html'}

In [24]:
for k, v in targets.items():
    print (k, v)


two-corner http://cr31.co.uk/stagecast/wang/tiles_c.html
block http://cr31.co.uk/stagecast/wang/block_g.html

In [34]:
soup = load_soup('http://cr31.co.uk/stagecast/wang/block_g.html')
for t in get_block_tileset_tables(soup):
    n = get_set_name(t)
    print (n)
    for r in get_block_rows(t):
        #print(r)
        cells = get_block_cells(r)
        name = cells[0].text.strip()
        for c in cells[1:]:
            img = c.find('img').attrs['src']
            print (name, img)


Truchet Tiles
Tru1(2) ../art/truch/tru1/0.gif
Tru2(2) ../art/truch/tru2/0.gif
Tru3(2) ../art/truch/tru3/0.gif
Tru4(2) ../art/truch/tru4/0.gif
Tru5(2) ../art/truch/tru5/0.gif
Squeel(2) ../art/truch/tru6/0.gif
Tru7(4) ../art/truch/tru7/0.gif
Tru8(4) ../art/truch/tru8/0.gif
Block2 Tiles
Bloc ../art/block/bloc/0.gif
Bloc ../art/block/bloc/1.gif
Bowtie ../art/block/bowtie/0.gif
Bowtie ../art/block/bowtie/1.gif
Diag ../art/block/diag/0.gif
Diag ../art/block/diag/1.gif
Pool ../art/block/pool/0.gif
Pool ../art/block/pool/1.gif
Spiral ../art/block/spiral/0.gif
Spiral ../art/block/spiral/1.gif
Square ../art/block/square/0.gif
Square ../art/block/square/1.gif
Steps ../art/block/steps/0.gif
Steps ../art/block/steps/1.gif
1edgeA ../art/block/1edge2a/0.gif
1edgeA ../art/block/1edge2a/1.gif
1cornA ../art/block/1corn2a/0.gif
1cornA ../art/block/1corn2a/1.gif
1cornB ../art/block/1corn2b/0.gif
1cornB ../art/block/1corn2b/1.gif
Block4 Tiles
Arrow ../art/block/arrow/0.gif
Arrow ../art/block/arrow/1.gif
Arrow ../art/block/arrow/2.gif
Arrow ../art/block/arrow/3.gif
Box ../art/block/box/0.gif
Box ../art/block/box/1.gif
Box ../art/block/box/2.gif
Box ../art/block/box/3.gif
Braid ../art/block/braid/0.gif
Braid ../art/block/braid/1.gif
Braid ../art/block/braid/2.gif
Braid ../art/block/braid/3.gif
Bubble ../art/block/bubble/0.gif
Bubble ../art/block/bubble/1.gif
Bubble ../art/block/bubble/2.gif
Bubble ../art/block/bubble/3.gif
Dublin ../art/block/dublin/0.gif
Dublin ../art/block/dublin/1.gif
Dublin ../art/block/dublin/2.gif
Dublin ../art/block/dublin/3.gif
Fence ../art/block/fence/0.gif
Fence ../art/block/fence/1.gif
Fence ../art/block/fence/2.gif
Fence ../art/block/fence/3.gif
Floor1 ../art/block/floor1/0.gif
Floor1 ../art/block/floor1/1.gif
Floor1 ../art/block/floor1/2.gif
Floor1 ../art/block/floor1/3.gif
Floor2 ../art/block/floor2/0.gif
Floor2 ../art/block/floor2/1.gif
Floor2 ../art/block/floor2/2.gif
Floor2 ../art/block/floor2/3.gif
Folder ../art/block/fold/0.gif
Folder ../art/block/fold/1.gif
Folder ../art/block/fold/2.gif
Folder ../art/block/fold/3.gif
Madrid ../art/block/madrid/0.gif
Madrid ../art/block/madrid/1.gif
Madrid ../art/block/madrid/2.gif
Madrid ../art/block/madrid/3.gif
Oslo ../art/block/oslo/0.gif
Oslo ../art/block/oslo/1.gif
Oslo ../art/block/oslo/2.gif
Oslo ../art/block/oslo/3.gif
Panel ../art/block/panel/0.gif
Panel ../art/block/panel/1.gif
Panel ../art/block/panel/2.gif
Panel ../art/block/panel/3.gif
Rome ../art/block/rome/0.gif
Rome ../art/block/rome/1.gif
Rome ../art/block/rome/2.gif
Rome ../art/block/rome/3.gif
Wall ../art/block/wall/0.gif
Wall ../art/block/wall/1.gif
Wall ../art/block/wall/2.gif
Wall ../art/block/wall/3.gif
Wedge ../art/block/wedge/0.gif
Wedge ../art/block/wedge/1.gif
Wedge ../art/block/wedge/2.gif
Wedge ../art/block/wedge/3.gif
1edgeA ../art/block/1edge4a/0.gif
1edgeA ../art/block/1edge4a/1.gif
1edgeA ../art/block/1edge4a/2.gif
1edgeA ../art/block/1edge4a/3.gif
1edgeB ../art/block/1edge4b/0.gif
1edgeB ../art/block/1edge4b/1.gif
1edgeB ../art/block/1edge4b/2.gif
1edgeB ../art/block/1edge4b/3.gif

In [ ]: