In [1]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
html = urlopen("http://static.decontextualize.com/cats.html").read()
document = BeautifulSoup(html, "html.parser")

In [2]:
cafe_list = list()
cafe_table = document.find('table', {'class': 'cafe-list'})
tbody = cafe_table.find('tbody')
for tr_tag in tbody.find_all('tr'):
    name = tr_tag.find('td', {'class': 'name'})
    text_name = name.text
    zip = tr_tag.find('td', {'class': 'zip'})
    text_zip = zip.text
    cat_dict = {'name': text_name, 'zip': text_zip}
    cafe_list.append(cat_dict)
cafe_list


Out[2]:
[{'name': 'Hang In There', 'zip': '11237'},
 {'name': 'Independent Claws', 'zip': '11201'},
 {'name': 'Paws and Play', 'zip': '11215'},
 {'name': 'Tall Tails', 'zip': '11222'},
 {'name': 'Cats Meow', 'zip': '11231'}]

In [3]:
cat_list = list()
cat_table = document.find('table', {'class': 'cat-list'})
tbody = cat_table.find('tbody')
for tr_tag in tbody.find_all('tr'):
    cat_dict = {}
    bday = tr_tag.find('td', {'class': 'birthdate'})
    bday_t = bday.string
    color = tr_tag.find('td', {'class': 'color'})
    color_t = color.string
    loc = []
    location = tr_tag.find('td', {'class': 'locations'})
    locations = location.string.split(', ')
    loc.append(locations)
    name = tr_tag.find('td', {'class': 'name'})
    name_t = name.string
    pattern = tr_tag.find('td', {'class': 'pattern'})
    pattern_t = pattern.string
    weight = tr_tag.find('td', {'class': 'weight'})
    weight_t = weight.string
    cat_dict.update({'birthdate': bday_t, 'color': color_t, 'locations': loc, 'name': name_t, 'pattern': pattern_t, 'weight': weight_t})
    cat_list.append(cat_dict)
cat_list


Out[3]:
[{'birthdate': '2015-05-20',
  'color': 'black',
  'locations': [['Paws and Play', 'Independent Claws*']],
  'name': 'Sylvester',
  'pattern': 'colorpoint',
  'weight': '10.46'},
 {'birthdate': '2000-01-03',
  'color': 'cinnamon',
  'locations': [['Independent Claws*']],
  'name': 'Jasper',
  'pattern': 'solid',
  'weight': '8.06'},
 {'birthdate': '2006-02-27',
  'color': 'brown',
  'locations': [['Independent Claws*']],
  'name': 'Luna',
  'pattern': 'tortoiseshell',
  'weight': '10.88'},
 {'birthdate': '2015-08-13',
  'color': 'white',
  'locations': [['Tall Tails*', 'Hang In There']],
  'name': 'Georges',
  'pattern': 'tabby',
  'weight': '9.40'},
 {'birthdate': '2003-09-13',
  'color': 'red',
  'locations': [['Paws and Play*']],
  'name': 'Millie',
  'pattern': 'bicolor',
  'weight': '9.27'},
 {'birthdate': '2009-07-30',
  'color': 'cream',
  'locations': [['Hang In There*']],
  'name': 'Lisa',
  'pattern': 'colorpoint',
  'weight': '8.84'},
 {'birthdate': '2011-12-15',
  'color': 'cream',
  'locations': [['Hang In There*', 'Cats Meow', 'Tall Tails']],
  'name': 'Oscar',
  'pattern': 'solid',
  'weight': '8.44'},
 {'birthdate': '2015-12-30',
  'color': 'lilac',
  'locations': [['Paws and Play*', 'Cats Meow']],
  'name': 'Scaredy',
  'pattern': 'tabby',
  'weight': '8.83'},
 {'birthdate': '2013-10-16',
  'color': 'blue',
  'locations': [['Independent Claws', 'Paws and Play*']],
  'name': 'Charlotte',
  'pattern': 'tabby',
  'weight': '9.54'},
 {'birthdate': '2011-02-07',
  'color': 'white',
  'locations': [['Independent Claws*', 'Hang In There']],
  'name': 'Whiskers',
  'pattern': 'colorpoint',
  'weight': '9.47'},
 {'birthdate': '2007-05-28',
  'color': 'lilac',
  'locations': [['Independent Claws', 'Cats Meow*', 'Paws and Play']],
  'name': 'Patches',
  'pattern': 'colorpoint',
  'weight': '10.12'},
 {'birthdate': '1998-12-01',
  'color': 'black',
  'locations': [['Independent Claws', 'Paws and Play*']],
  'name': 'Charly',
  'pattern': 'bicolor',
  'weight': '9.20'},
 {'birthdate': '2013-04-09',
  'color': 'black',
  'locations': [['Hang In There', 'Tall Tails*']],
  'name': 'Mimi',
  'pattern': 'solid',
  'weight': '9.19'},
 {'birthdate': '2000-06-28',
  'color': 'black',
  'locations': [['Hang In There*']],
  'name': 'Samantha',
  'pattern': 'solid',
  'weight': '7.49'},
 {'birthdate': '2014-08-16',
  'color': 'fawn',
  'locations': [['Independent Claws*', 'Paws and Play']],
  'name': 'Bella',
  'pattern': 'solid',
  'weight': '10.47'},
 {'birthdate': '2009-01-28',
  'color': 'cinnamon',
  'locations': [['Tall Tails*']],
  'name': 'Angel',
  'pattern': 'tortoiseshell',
  'weight': '9.72'},
 {'birthdate': '2015-12-16',
  'color': 'cinnamon',
  'locations': [['Paws and Play*']],
  'name': 'Garfield',
  'pattern': 'calico',
  'weight': '7.18'},
 {'birthdate': '1997-06-12',
  'color': 'red',
  'locations': [['Paws and Play*', 'Tall Tails']],
  'name': 'Oliver',
  'pattern': 'bicolor',
  'weight': '9.44'},
 {'birthdate': '2010-12-22',
  'color': 'red',
  'locations': [['Hang In There', 'Independent Claws*']],
  'name': 'Moritz',
  'pattern': 'tortoiseshell',
  'weight': '10.10'},
 {'birthdate': '2009-01-10',
  'color': 'cream',
  'locations': [['Cats Meow', 'Independent Claws*', 'Tall Tails']],
  'name': 'Stevens',
  'pattern': 'bicolor',
  'weight': '8.12'},
 {'birthdate': '2010-11-30',
  'color': 'cinnamon',
  'locations': [['Independent Claws*']],
  'name': 'Sassy',
  'pattern': 'solid',
  'weight': '8.67'},
 {'birthdate': '2003-04-14',
  'color': 'red',
  'locations': [['Hang In There*', 'Tall Tails']],
  'name': 'Lily',
  'pattern': 'bicolor',
  'weight': '10.01'},
 {'birthdate': '2014-05-15',
  'color': 'blue',
  'locations': [['Paws and Play*', 'Independent Claws', 'Tall Tails']],
  'name': "O'Malley",
  'pattern': 'tortoiseshell',
  'weight': '8.86'},
 {'birthdate': '2009-01-18',
  'color': 'cinnamon',
  'locations': [['Tall Tails*']],
  'name': 'Maru',
  'pattern': 'bicolor',
  'weight': '10.09'},
 {'birthdate': '2004-03-04',
  'color': 'lilac',
  'locations': [['Tall Tails', 'Hang In There', 'Cats Meow*']],
  'name': 'Oreo',
  'pattern': 'bicolor',
  'weight': '10.76'},
 {'birthdate': '2005-05-17',
  'color': 'brown',
  'locations': [['Cats Meow', 'Paws and Play', 'Tall Tails*']],
  'name': 'Molly',
  'pattern': 'colorpoint',
  'weight': '9.58'},
 {'birthdate': '2007-06-25',
  'color': 'brown',
  'locations': [['Hang In There', 'Independent Claws', 'Cats Meow*']],
  'name': 'Minette',
  'pattern': 'calico',
  'weight': '8.84'},
 {'birthdate': '2011-02-20',
  'color': 'red',
  'locations': [['Paws and Play*']],
  'name': 'Smudge',
  'pattern': 'colorpoint',
  'weight': '12.00'},
 {'birthdate': '2007-12-02',
  'color': 'black',
  'locations': [['Cats Meow', 'Hang In There', 'Independent Claws*']],
  'name': 'Jack',
  'pattern': 'colorpoint',
  'weight': '9.92'},
 {'birthdate': '2009-05-06',
  'color': 'red',
  'locations': [['Paws and Play*']],
  'name': 'Lolcat',
  'pattern': 'bicolor',
  'weight': '8.88'},
 {'birthdate': '1998-07-18',
  'color': 'cinnamon',
  'locations': [['Independent Claws*']],
  'name': 'Diana',
  'pattern': 'colorpoint',
  'weight': '7.81'},
 {'birthdate': '1997-11-29',
  'color': 'brown',
  'locations': [['Tall Tails*']],
  'name': 'Minka',
  'pattern': 'tabby',
  'weight': '9.37'},
 {'birthdate': '1997-03-09',
  'color': 'lilac',
  'locations': [['Independent Claws', 'Cats Meow', 'Hang In There*']],
  'name': 'Carlos',
  'pattern': 'tabby',
  'weight': '9.92'},
 {'birthdate': '1998-03-24',
  'color': 'cinnamon',
  'locations': [['Tall Tails', 'Independent Claws', 'Hang In There*']],
  'name': 'Roquefort',
  'pattern': 'colorpoint',
  'weight': '9.73'},
 {'birthdate': '2005-12-25',
  'color': 'brown',
  'locations': [['Independent Claws', 'Paws and Play*']],
  'name': 'Chanel',
  'pattern': 'tabby',
  'weight': '9.04'},
 {'birthdate': '2003-12-07',
  'color': 'black',
  'locations': [['Hang In There*', 'Independent Claws']],
  'name': 'Jiji',
  'pattern': 'tabby',
  'weight': '10.35'},
 {'birthdate': '1997-01-04',
  'color': 'fawn',
  'locations': [['Paws and Play*', 'Cats Meow']],
  'name': 'Poppy',
  'pattern': 'calico',
  'weight': '9.89'},
 {'birthdate': '2010-10-30',
  'color': 'cinnamon',
  'locations': [['Tall Tails*']],
  'name': 'Berlioz',
  'pattern': 'colorpoint',
  'weight': '11.37'},
 {'birthdate': '2010-02-26',
  'color': 'fawn',
  'locations': [['Hang In There', 'Tall Tails', 'Independent Claws*']],
  'name': 'Max',
  'pattern': 'colorpoint',
  'weight': '8.72'},
 {'birthdate': '1999-01-09',
  'color': 'white',
  'locations': [['Cats Meow*', 'Independent Claws', 'Tall Tails']],
  'name': 'Lafayette',
  'pattern': 'tortoiseshell',
  'weight': '9.30'}]

In [4]:
import pg8000
conn = pg8000.connect(user='postgres', password='password', database="test")

In [5]:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE cafe (
  id serial,
  name varchar(40),
  zip varchar(5)
)
""")

cursor.execute("""
CREATE TABLE cat (
   id serial,
   name varchar(60),
   birthdate varchar(40),
   weight numeric,
   color varchar(40),
   pattern varchar(40)   
)
""")

cursor.execute("""
CREATE TABLE cat_cafe (
   cat_id int,
   cafe_id int,
   active boolean
)
""")
conn.commit()

In [6]:
cafe_name_id_map = {}
for item in cafe_list:
    cursor.execute("INSERT INTO cafe (name, zip) VALUES (%s, %s) RETURNING id",
                  [str(item['name']), str(item['zip'])])
    rowid = cursor.fetchone()[0]
    cafe_name_id_map[str(item['name'])] = rowid
conn.commit()

In [7]:
cafe_name_id_map


Out[7]:
{'Cats Meow': 5,
 'Hang In There': 1,
 'Independent Claws': 2,
 'Paws and Play': 3,
 'Tall Tails': 4}

In [8]:
cat_name_id_map = {}
for cat in cat_list:
    cursor.execute("INSERT INTO cat(name, birthdate, weight, color, pattern) VALUES (%s, %s, %s, %s, %s) RETURNING id", [str(cat['name']), str(cat['birthdate']), str(cat['weight']), str(cat['color']), str(cat['pattern'])])
    catrowid = cursor.fetchone()[0]
    cat_name_id_map[str(cat['name'])] = catrowid
conn.commit()

In [27]:
conn.rollback()

In [9]:
cat_name_id_map


Out[9]:
{'Angel': 16,
 'Bella': 15,
 'Berlioz': 38,
 'Carlos': 33,
 'Chanel': 35,
 'Charlotte': 9,
 'Charly': 12,
 'Diana': 31,
 'Garfield': 17,
 'Georges': 4,
 'Jack': 29,
 'Jasper': 2,
 'Jiji': 36,
 'Lafayette': 40,
 'Lily': 22,
 'Lisa': 6,
 'Lolcat': 30,
 'Luna': 3,
 'Maru': 24,
 'Max': 39,
 'Millie': 5,
 'Mimi': 13,
 'Minette': 27,
 'Minka': 32,
 'Molly': 26,
 'Moritz': 19,
 "O'Malley": 23,
 'Oliver': 18,
 'Oreo': 25,
 'Oscar': 7,
 'Patches': 11,
 'Poppy': 37,
 'Roquefort': 34,
 'Samantha': 14,
 'Sassy': 21,
 'Scaredy': 8,
 'Smudge': 28,
 'Stevens': 20,
 'Sylvester': 1,
 'Whiskers': 10}

In [30]:
for cat in cat_list:
    cat_id = cat_name_id_map[cat['name']]
    for loc in cat['locations']:
        #print(loc)
        for i in loc:
            if "*" in i:
                #print(i)
                active = True
                cafe_id = cafe_name_id_map[i[:-1]]
                cursor.execute("INSERT INTO cat_cafe(cafe_id, cat_id, active) VALUES (%s, %s, %s)", [cafe_id, cat_id, active])
            else:
                active = False 
                cafe_id = cafe_name_id_map[i]
                cursor.execute("INSERT INTO cat_cafe(cafe_id, cat_id, active) VALUES (%s, %s, %s)", [cafe_id, cat_id, active])
conn.commit()

In [ ]: