In [3]:
from bs4 import BeautifulSoup
import string
import requests
import pandas as pd
import re
import pickle

from fake_useragent import UserAgent
ua = UserAgent()

In [4]:
def remove_punctuation(x):
    x = str(x)
    return x.translate(str.maketrans({a:None for a in string.punctuation}))

In [5]:
def get_soup(url, timeout=5):
    headers  = {'User-Agent':ua.random}
    try:
        response = requests.get(url,headers=headers)
    except:
        print("FAILED "+ url)
        return 0
    attempts = 0
    while(not response.ok):
            print((url+' failed with code: '+str(response.status_code)))
            if attempts > timeout:
                print(url+' failed with code: '+str(response.status_code))
                return BeautifulSoup('')
            response = requests.get(url)
            attempts += 1
    page = response.text
    soup = BeautifulSoup(page)
    return soup

In [ ]:
# Get all urls for each beer style

url = 'http://www.beeradvocate.com/beer/style/'
soup = get_soup(url)

beer_styles = {}
for style in soup.find('table').find_all('a'):
    beer_styles[style.get_text()] = style['href']

In [ ]:
ba_url = 'http://www.beeradvocate.com'
style_suffix = '?sort=revsD&start=0'
soup = get_soup(ba_url+style_url+style_suffix)

In [ ]:
int(re.findall(r'(?<=\(out of )\d*',soup.find('tr').get_text())[0])

In [ ]:


In [ ]:
beer_styles.items()[0]

In [ ]:
# Pulls the name and url to all beers with more than 25 'hads'

ba_url = 'http://www.beeradvocate.com'
style_suffix = '?sort=revsD&start='
columns = ['name','url']
temp = {}
for style in beer_styles.items():
    print(url)
    url = ba_url+style[1]+style_suffix
    soup = get_soup(url)
    num_beers = int(re.findall(r'(?<=\(out of )\d*',soup.find('tr').get_text())[0])
    print num_beers
    min_beer = False
    for i in range(num_beers//50):
        if min_beer:
                break
        url = ba_url+style[1]+style_suffix+str(i*50)
        soup = get_soup(url)
        for row in soup.find_all('tr')[3:-1]:
            cells = row.find_all('td')
            if int(cells[4].get_text().replace(',','')) < 25:
                min_beer = True
                break
            temp[cells[0].find('a').get_text()] = cells[0].find('a')['href']

In [7]:
# Pulls the name and url to all beers with more than 25 'hads'

ba_url = 'http://www.beeradvocate.com'
style_suffix = '?sort=revsD&start='
columns = ['name','url']
temp = {}

url = ba_url+'/beer/style/149/'+style_suffix
soup = get_soup(url)
num_beers = int(re.findall(r'(?<=\(out of )\d*',soup.find('tr').get_text())[0])
print num_beers
min_beer = False
for i in range(num_beers//50):
    if min_beer:
            break
    url = ba_url+'/beer/style/149/'+style_suffix+str(i*50)
    soup = get_soup(url)
    for row in soup.find_all('tr')[3:-1]:
        cells = row.find_all('td')
        if int(cells[4].get_text().replace(',','')) < 25:
            min_beer = True
            break
        temp[cells[0].find('a').get_text()] = cells[0].find('a')['href']
        
temp


538
Out[7]:
{u'1554 Black Lager': '/beer/profile/192/111828/',
 u'3 Best Friends': '/beer/profile/219/116077/',
 u'Alhambra Negra': '/beer/profile/9262/22584/',
 u'Asahi Dry Black': '/beer/profile/716/89545/',
 u'B.B. Dark Bohemia Beer - 1795 Original Czech Dark Lager': '/beer/profile/303/37361/',
 u'Baltika #4 Original (Dark)': '/beer/profile/401/2235/',
 u'Beerlao Dark': '/beer/profile/2970/27607/',
 u'Bernard Cerne': '/beer/profile/2055/21521/',
 u'Big Rock Honey Brown Lager': '/beer/profile/391/11175/',
 u'Black Lager (\u010cern\xe9 Pivo)': '/beer/profile/22723/76132/',
 u'Black Licorice Lager': '/beer/profile/9629/37147/',
 u'Bohemia': '/beer/profile/301/7078/',
 u'Bohemia Regent Lager Dark': '/beer/profile/7366/14349/',
 u'Bony Fingers': '/beer/profile/763/20446/',
 u'Brick Waterloo Dark Lager': '/beer/profile/416/5196/',
 u'Budweiser Budvar Czech Dark Lager': '/beer/profile/304/35967/',
 u'California Black Beer': '/beer/profile/8818/40370/',
 u'Celestial Meridian Cascadian Dark Lager': '/beer/profile/29619/118880/',
 u'Dark 266': '/beer/profile/3912/27684/',
 u'Death & Taxes Black Beer': '/beer/profile/763/2306/',
 u'Efes Dark': '/beer/profile/569/12362/',
 u'El Steinber Dark Lager': '/beer/profile/193/83646/',
 u'Elevator Dark Horse Lager': '/beer/profile/1464/40147/',
 u'Faxe Amber': '/beer/profile/783/11961/',
 u'Fischer Tradition Amber': '/beer/profile/197/710/',
 u'Fix Dark': '/beer/profile/3963/85171/',
 u'Gigi': '/beer/profile/29250/115954/',
 u'Gran\xe1t - BrouCzech Dark': '/beer/profile/21333/58010/',
 u'Guinness Black Lager': '/beer/profile/209/57285/',
 u'Heineken Dark Lager': '/beer/profile/81/1167/',
 u'Heineken Oud Bruin': '/beer/profile/81/4087/',
 u'Hot Rocks Lager': '/beer/profile/1337/48508/',
 u'John Michael Dark Lyric Lagrrr!': '/beer/profile/30452/98186/',
 u'Kelso Nut Brown Lager': '/beer/profile/8768/33357/',
 u'Kilikia Dark': '/beer/profile/671/2061/',
 u'LTD Series - 06': '/beer/profile/5316/86044/',
 u'Layla Dirty Blonde Lager': '/beer/profile/1939/12687/',
 u"Leinenkugel's Creamy Dark": '/beer/profile/710/2940/',
 u'Lev Black Lion': '/beer/profile/168/9395/',
 u"McSorley's Dark Lager": '/beer/profile/447/42663/',
 u'Moa Noir': '/beer/profile/15922/36001/',
 u'Mythos Red': '/beer/profile/1084/45087/',
 u'Nightfall Lager': '/beer/profile/32426/97943/',
 u'Obolon Oksamytove (Deep Velvet)': '/beer/profile/601/35035/',
 u'Oldgott': '/beer/profile/12215/25835/',
 u'Palone': '/beer/profile/568/25468/',
 u'Pietra': '/beer/profile/2977/7195/',
 u'Pils Noir': '/beer/profile/29196/86001/',
 u'Podkovan Dark': '/beer/profile/11206/28049/',
 u'Praga Dark Lager': '/beer/profile/303/82289/',
 u'Primator Dark Lager': '/beer/profile/707/14874/',
 u'Sagres Cerveja Preta (Dark)': '/beer/profile/301/6187/',
 u'San Miguel Dark Lager': '/beer/profile/355/7102/',
 u'Saranac Chocolate Amber Lager': '/beer/profile/99/6796/',
 u'Schwarzer Kristall': '/beer/profile/5687/45326/',
 u'Silva Strong Dark Beer': '/beer/profile/1705/4808/',
 u'St. Pauli Girl Special Dark': '/beer/profile/224/698/',
 u'Staropramen Granat Beer': '/beer/profile/437/14167/',
 u'Staropramen \u010cern\xfd': '/beer/profile/437/8689/',
 u'Super Bock Stout': '/beer/profile/439/37864/',
 u'Telenn Du': '/beer/profile/2520/6245/',
 u'Tomislav Pivo': '/beer/profile/1720/42822/',
 u'Tooheys Red': '/beer/profile/839/4001/',
 u"Trafalgar Paddy's Irish Red Lager": '/beer/profile/765/8590/',
 u'U Fleku Dark Lager': '/beer/profile/2805/6614/',
 u'U Rousse': '/beer/profile/22/2054/',
 u'Wolverine Dark': '/beer/profile/24808/72163/',
 u'Yebisu Black Beer': '/beer/profile/284/11490/',
 u'\x8e\u017datec Dark Lager': '/beer/profile/4106/57920/'}

In [ ]:
pickle.dump(temp,open('beer_list.pkl','wb'))

In [ ]:
beer_urls = pickle.load(open('beer_list.pkl','rb'))

In [ ]:
columns = ['name','url']
beers = pd.DataFrame(columns=columns)

beers['name'] = beer_urls.keys()
beers['url']  = beer_urls.values()
pd.to_pickle(beers,'beers.pkl')

In [8]:
beers = pd.read_pickle('beers.pkl')

In [9]:
beers[beers.name == 'Death & Taxes Black Beer']


Out[9]:
name url
2299 Death & Taxes Black Beer /beer/profile/763/2306/

In [18]:
def get_beer_soup(url):
    ba_url = 'http://www.beeradvocate.com'
    url = ba_url+url
    soup = get_soup(url)
    return soup

In [20]:
import sys
sys.setrecursionlimit(100000000)

In [31]:
for i in tqdm(range(0,beers.shape[0],230)):
    temp = beers.iloc[i:i+230-1,:]
    temp['soup'] = temp.url.map(get_beer_soup)
    temp.to_pickle('beer_soup_'+str(i+230-1)+'.pkl')


  0%|          | 0/89 [00:00<?, ?it/s]
http://www.beeradvocate.com/beer/profile/26676/104914/ failed with code: 403
http://www.beeradvocate.com/beer/profile/2055/13583/ failed with code: 403
http://www.beeradvocate.com/beer/profile/27021/124612/ failed with code: 403
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
  1%|          | 1/89 [02:04<3:02:00, 124.10s/it]
http://www.beeradvocate.com/beer/profile/16773/104438/ failed with code: 403
http://www.beeradvocate.com/beer/profile/395/35378/ failed with code: 403
  2%|▏         | 2/89 [04:09<3:00:19, 124.36s/it]
http://www.beeradvocate.com/beer/profile/113/17284/ failed with code: 403
http://www.beeradvocate.com/beer/profile/24764/99037/ failed with code: 403
  3%|▎         | 3/89 [06:18<3:00:29, 125.93s/it]
http://www.beeradvocate.com/beer/profile/34318/156937/ failed with code: 403
http://www.beeradvocate.com/beer/profile/23/43060/ failed with code: 403
http://www.beeradvocate.com/beer/profile/22655/101507/ failed with code: 403
http://www.beeradvocate.com/beer/profile/173/45936/ failed with code: 403
  4%|▍         | 4/89 [08:20<2:56:29, 124.58s/it]
http://www.beeradvocate.com/beer/profile/1636/4603/ failed with code: 403
  6%|▌         | 5/89 [10:23<2:54:07, 124.38s/it]
http://www.beeradvocate.com/beer/profile/26/86165/ failed with code: 403
http://www.beeradvocate.com/beer/profile/19897/51045/ failed with code: 403
  7%|▋         | 6/89 [12:27<2:51:31, 124.00s/it]
http://www.beeradvocate.com/beer/profile/28383/81870/ failed with code: 403
http://www.beeradvocate.com/beer/profile/1623/4584/ failed with code: 403
http://www.beeradvocate.com/beer/profile/21154/57481/ failed with code: 403
http://www.beeradvocate.com/beer/profile/433/1313/ failed with code: 500
  8%|▊         | 7/89 [14:26<2:47:23, 122.48s/it]
http://www.beeradvocate.com/beer/profile/703/6135/ failed with code: 403
  9%|▉         | 8/89 [16:24<2:43:53, 121.40s/it]
http://www.beeradvocate.com/beer/profile/396/189097/ failed with code: 403
http://www.beeradvocate.com/beer/profile/24247/81721/ failed with code: 403
http://www.beeradvocate.com/beer/profile/32908/114626/ failed with code: 403
http://www.beeradvocate.com/beer/profile/789/35291/ failed with code: 403
http://www.beeradvocate.com/beer/profile/28178/203498/ failed with code: 403
 10%|█         | 9/89 [18:23<2:40:46, 120.58s/it]
http://www.beeradvocate.com/beer/profile/16866/200131/ failed with code: 403
http://www.beeradvocate.com/beer/profile/31644/180672/ failed with code: 403
 11%|█         | 10/89 [20:25<2:39:10, 120.89s/it]
http://www.beeradvocate.com/beer/profile/889/23531/ failed with code: 403
 13%|█▎        | 12/89 [24:37<2:38:54, 123.82s/it]
http://www.beeradvocate.com/beer/profile/1457/5136/ failed with code: 403
http://www.beeradvocate.com/beer/profile/684/26207/ failed with code: 403
http://www.beeradvocate.com/beer/profile/22524/61859/ failed with code: 403
 15%|█▍        | 13/89 [26:38<2:35:35, 122.84s/it]
http://www.beeradvocate.com/beer/profile/14879/118321/ failed with code: 403
http://www.beeradvocate.com/beer/profile/28255/111924/ failed with code: 403
 16%|█▌        | 14/89 [28:46<2:35:33, 124.44s/it]
http://www.beeradvocate.com/beer/profile/158/88572/ failed with code: 403
http://www.beeradvocate.com/beer/profile/907/3592/ failed with code: 403
http://www.beeradvocate.com/beer/profile/19241/52046/ failed with code: 403
http://www.beeradvocate.com/beer/profile/28019/84491/ failed with code: 403
 17%|█▋        | 15/89 [30:54<2:34:41, 125.42s/it]
http://www.beeradvocate.com/beer/profile/139/103831/ failed with code: 403
http://www.beeradvocate.com/beer/profile/28726/90099/ failed with code: 403
 18%|█▊        | 16/89 [32:51<2:29:42, 123.05s/it]
http://www.beeradvocate.com/beer/profile/9139/43579/ failed with code: 403
http://www.beeradvocate.com/beer/profile/16333/211556/ failed with code: 403
http://www.beeradvocate.com/beer/profile/30542/118621/ failed with code: 403
 20%|██        | 18/89 [36:58<2:25:56, 123.33s/it]
http://www.beeradvocate.com/beer/profile/1037/6565/ failed with code: 500
http://www.beeradvocate.com/beer/profile/26/184050/ failed with code: 403
http://www.beeradvocate.com/beer/profile/596/26613/ failed with code: 403
 21%|██▏       | 19/89 [39:21<2:30:46, 129.24s/it]
http://www.beeradvocate.com/beer/profile/23315/71064/ failed with code: 403
http://www.beeradvocate.com/beer/profile/10607/77299/ failed with code: 403
 22%|██▏       | 20/89 [41:31<2:28:59, 129.55s/it]
http://www.beeradvocate.com/beer/profile/24940/132380/ failed with code: 403
http://www.beeradvocate.com/beer/profile/1935/7920/ failed with code: 403
http://www.beeradvocate.com/beer/profile/700/83742/ failed with code: 403
 26%|██▌       | 23/89 [47:41<2:17:58, 125.43s/it]
http://www.beeradvocate.com/beer/profile/263/29145/ failed with code: 403
http://www.beeradvocate.com/beer/profile/2346/7052/ failed with code: 403
http://www.beeradvocate.com/beer/profile/25710/177881/ failed with code: 403
 27%|██▋       | 24/89 [49:58<2:19:38, 128.90s/it]
http://www.beeradvocate.com/beer/profile/26565/82744/ failed with code: 403
http://www.beeradvocate.com/beer/profile/5316/7200/ failed with code: 403
 30%|███       | 27/89 [56:35<2:15:35, 131.22s/it]
http://www.beeradvocate.com/beer/profile/16393/146756/ failed with code: 403
http://www.beeradvocate.com/beer/profile/20430/81744/ failed with code: 403
 31%|███▏      | 28/89 [58:52<2:15:08, 132.93s/it]
http://www.beeradvocate.com/beer/profile/436/19809/ failed with code: 403
http://www.beeradvocate.com/beer/profile/4051/40865/ failed with code: 403
http://www.beeradvocate.com/beer/profile/13884/63180/ failed with code: 403
http://www.beeradvocate.com/beer/profile/2372/113505/ failed with code: 403
 33%|███▎      | 29/89 [1:00:59<2:11:01, 131.03s/it]
http://www.beeradvocate.com/beer/profile/30/1830/ failed with code: 403
 34%|███▎      | 30/89 [1:03:10<2:09:02, 131.23s/it]
http://www.beeradvocate.com/beer/profile/40579/218624/ failed with code: 403
http://www.beeradvocate.com/beer/profile/71/84562/ failed with code: 403
http://www.beeradvocate.com/beer/profile/1675/8052/ failed with code: 403
http://www.beeradvocate.com/beer/profile/147/109789/ failed with code: 403
 35%|███▍      | 31/89 [1:05:14<2:04:41, 128.99s/it]
http://www.beeradvocate.com/beer/profile/15237/62070/ failed with code: 403
 36%|███▌      | 32/89 [1:07:22<2:02:19, 128.77s/it]
http://www.beeradvocate.com/beer/profile/17981/180992/ failed with code: 403
 37%|███▋      | 33/89 [1:09:28<1:59:12, 127.72s/it]
http://www.beeradvocate.com/beer/profile/31840/96859/ failed with code: 403
http://www.beeradvocate.com/beer/profile/515/18895/ failed with code: 403
http://www.beeradvocate.com/beer/profile/26932/93416/ failed with code: 403
 38%|███▊      | 34/89 [1:11:44<1:59:32, 130.41s/it]
http://www.beeradvocate.com/beer/profile/33064/148605/ failed with code: 403
 40%|████      | 36/89 [1:15:48<1:51:15, 125.95s/it]
http://www.beeradvocate.com/beer/profile/29656/144719/ failed with code: 403
http://www.beeradvocate.com/beer/profile/43/467/ failed with code: 403
http://www.beeradvocate.com/beer/profile/21872/55347/ failed with code: 403
 42%|████▏     | 37/89 [1:17:56<1:49:40, 126.54s/it]
http://www.beeradvocate.com/beer/profile/24168/72411/ failed with code: 403
http://www.beeradvocate.com/beer/profile/267/66436/ failed with code: 403
http://www.beeradvocate.com/beer/profile/583/39397/ failed with code: 403
http://www.beeradvocate.com/beer/profile/18823/166022/ failed with code: 403
 43%|████▎     | 38/89 [1:20:00<1:46:54, 125.78s/it]
http://www.beeradvocate.com/beer/profile/2772/8725/ failed with code: 403
http://www.beeradvocate.com/beer/profile/26520/79898/ failed with code: 403
 44%|████▍     | 39/89 [1:22:00<1:43:24, 124.09s/it]
http://www.beeradvocate.com/beer/profile/825/3587/ failed with code: 500
 45%|████▍     | 40/89 [1:24:04<1:41:16, 124.01s/it]
http://www.beeradvocate.com/beer/profile/22564/79840/ failed with code: 403
http://www.beeradvocate.com/beer/profile/36798/210557/ failed with code: 403
 46%|████▌     | 41/89 [1:26:10<1:39:40, 124.58s/it]
http://www.beeradvocate.com/beer/profile/31272/96578/ failed with code: 403
 47%|████▋     | 42/89 [1:28:18<1:38:29, 125.74s/it]
http://www.beeradvocate.com/beer/profile/1628/34956/ failed with code: 403
http://www.beeradvocate.com/beer/profile/566/13070/ failed with code: 403
 48%|████▊     | 43/89 [1:30:18<1:35:04, 124.02s/it]
http://www.beeradvocate.com/beer/profile/33160/112140/ failed with code: 403
http://www.beeradvocate.com/beer/profile/105/68248/ failed with code: 403
http://www.beeradvocate.com/beer/profile/2432/27792/ failed with code: 403
http://www.beeradvocate.com/beer/profile/347/30518/ failed with code: 403
 49%|████▉     | 44/89 [1:32:22<1:33:02, 124.05s/it]
http://www.beeradvocate.com/beer/profile/1530/10277/ failed with code: 403
http://www.beeradvocate.com/beer/profile/31678/189327/ failed with code: 403
 52%|█████▏    | 46/89 [1:36:42<1:30:52, 126.81s/it]
http://www.beeradvocate.com/beer/profile/13371/118229/ failed with code: 403
http://www.beeradvocate.com/beer/profile/604/21197/ failed with code: 403
http://www.beeradvocate.com/beer/profile/28383/95633/ failed with code: 403
 53%|█████▎    | 47/89 [1:39:06<1:32:24, 132.01s/it]
http://www.beeradvocate.com/beer/profile/37961/161406/ failed with code: 403
http://www.beeradvocate.com/beer/profile/215/1811/ failed with code: 403
 54%|█████▍    | 48/89 [1:41:14<1:29:25, 130.86s/it]
http://www.beeradvocate.com/beer/profile/12516/42983/ failed with code: 403
http://www.beeradvocate.com/beer/profile/598/65060/ failed with code: 403
 55%|█████▌    | 49/89 [1:43:51<1:32:30, 138.77s/it]
http://www.beeradvocate.com/beer/profile/697/96243/ failed with code: 403
http://www.beeradvocate.com/beer/profile/9629/37483/ failed with code: 403
http://www.beeradvocate.com/beer/profile/31478/96766/ failed with code: 403
 56%|█████▌    | 50/89 [1:46:03<1:28:46, 136.57s/it]
http://www.beeradvocate.com/beer/profile/696/125502/ failed with code: 403
 57%|█████▋    | 51/89 [1:48:25<1:27:36, 138.33s/it]
http://www.beeradvocate.com/beer/profile/32406/106852/ failed with code: 403
http://www.beeradvocate.com/beer/profile/30807/93480/ failed with code: 403
http://www.beeradvocate.com/beer/profile/27947/86596/ failed with code: 403
 58%|█████▊    | 52/89 [1:50:42<1:25:02, 137.90s/it]
http://www.beeradvocate.com/beer/profile/48/19363/ failed with code: 403
http://www.beeradvocate.com/beer/profile/158/76886/ failed with code: 403
http://www.beeradvocate.com/beer/profile/31805/102492/ failed with code: 403
 60%|█████▉    | 53/89 [1:52:51<1:21:06, 135.18s/it]
http://www.beeradvocate.com/beer/profile/11031/42445/ failed with code: 403
http://www.beeradvocate.com/beer/profile/16206/48375/ failed with code: 403
http://www.beeradvocate.com/beer/profile/33607/154162/ failed with code: 403
 63%|██████▎   | 56/89 [1:59:16<1:11:55, 130.78s/it]
http://www.beeradvocate.com/beer/profile/1141/42958/ failed with code: 403
http://www.beeradvocate.com/beer/profile/664/2731/ failed with code: 403
 64%|██████▍   | 57/89 [2:01:26<1:09:35, 130.48s/it]
http://www.beeradvocate.com/beer/profile/5266/17026/ failed with code: 403
http://www.beeradvocate.com/beer/profile/34149/115118/ failed with code: 403
http://www.beeradvocate.com/beer/profile/30815/99003/ failed with code: 403
 65%|██████▌   | 58/89 [2:03:51<1:09:39, 134.83s/it]
http://www.beeradvocate.com/beer/profile/17033/42695/ failed with code: 403
http://www.beeradvocate.com/beer/profile/12881/61689/ failed with code: 500
 66%|██████▋   | 59/89 [2:06:02<1:06:48, 133.63s/it]
http://www.beeradvocate.com/beer/profile/24018/184060/ failed with code: 403
 67%|██████▋   | 60/89 [2:08:08<1:03:29, 131.35s/it]
http://www.beeradvocate.com/beer/profile/28556/89820/ failed with code: 403
http://www.beeradvocate.com/beer/profile/29434/105721/ failed with code: 403
http://www.beeradvocate.com/beer/profile/10485/178506/ failed with code: 403
http://www.beeradvocate.com/beer/profile/27917/150504/ failed with code: 403
 69%|██████▊   | 61/89 [2:10:14<1:00:29, 129.63s/it]
http://www.beeradvocate.com/beer/profile/23459/64493/ failed with code: 403
 70%|██████▉   | 62/89 [2:12:27<58:50, 130.77s/it]  
http://www.beeradvocate.com/beer/profile/3/60284/ failed with code: 403
http://www.beeradvocate.com/beer/profile/294/815/ failed with code: 403
http://www.beeradvocate.com/beer/profile/22072/179142/ failed with code: 403
http://www.beeradvocate.com/beer/profile/24428/73210/ failed with code: 403
 72%|███████▏  | 64/89 [2:16:49<54:27, 130.69s/it]
http://www.beeradvocate.com/beer/profile/23222/76304/ failed with code: 403
http://www.beeradvocate.com/beer/profile/75/88536/ failed with code: 403
http://www.beeradvocate.com/beer/profile/864/112280/ failed with code: 403
 73%|███████▎  | 65/89 [2:19:00<52:25, 131.05s/it]
http://www.beeradvocate.com/beer/profile/21154/54966/ failed with code: 500
http://www.beeradvocate.com/beer/profile/2391/55504/ failed with code: 403
http://www.beeradvocate.com/beer/profile/28178/112878/ failed with code: 403
http://www.beeradvocate.com/beer/profile/25818/145776/ failed with code: 403
 74%|███████▍  | 66/89 [2:21:06<49:38, 129.49s/it]
http://www.beeradvocate.com/beer/profile/1805/13701/ failed with code: 403
http://www.beeradvocate.com/beer/profile/4/88815/ failed with code: 500
 75%|███████▌  | 67/89 [2:23:17<47:37, 129.88s/it]
http://www.beeradvocate.com/beer/profile/22/1693/ failed with code: 403
 78%|███████▊  | 69/89 [2:27:45<43:59, 131.98s/it]
http://www.beeradvocate.com/beer/profile/33437/109428/ failed with code: 403
 79%|███████▊  | 70/89 [2:29:56<41:44, 131.82s/it]
http://www.beeradvocate.com/beer/profile/195/617/ failed with code: 403
http://www.beeradvocate.com/beer/profile/30949/101709/ failed with code: 403
 80%|███████▉  | 71/89 [2:32:11<39:48, 132.71s/it]
http://www.beeradvocate.com/beer/profile/48/3490/ failed with code: 403
http://www.beeradvocate.com/beer/profile/16354/39238/ failed with code: 403
 81%|████████  | 72/89 [2:34:22<37:28, 132.26s/it]
http://www.beeradvocate.com/beer/profile/775/14027/ failed with code: 403
http://www.beeradvocate.com/beer/profile/16352/149857/ failed with code: 403
http://www.beeradvocate.com/beer/profile/13014/212431/ failed with code: 403
 82%|████████▏ | 73/89 [2:36:42<35:53, 134.57s/it]
http://www.beeradvocate.com/beer/profile/27025/101083/ failed with code: 500
http://www.beeradvocate.com/beer/profile/32893/212780/ failed with code: 403
 84%|████████▍ | 75/89 [2:41:03<30:52, 132.32s/it]
http://www.beeradvocate.com/beer/profile/32171/105040/ failed with code: 403
http://www.beeradvocate.com/beer/profile/24018/128242/ failed with code: 500
 85%|████████▌ | 76/89 [2:43:22<29:05, 134.29s/it]
http://www.beeradvocate.com/beer/profile/33018/148547/ failed with code: 403
http://www.beeradvocate.com/beer/profile/25775/82169/ failed with code: 403
http://www.beeradvocate.com/beer/profile/33901/118920/ failed with code: 403
 87%|████████▋ | 77/89 [2:45:34<26:44, 133.70s/it]
http://www.beeradvocate.com/beer/profile/402/45914/ failed with code: 403
 89%|████████▉ | 79/89 [2:49:55<22:03, 132.34s/it]
http://www.beeradvocate.com/beer/profile/17223/55942/ failed with code: 403
http://www.beeradvocate.com/beer/profile/23326/69298/ failed with code: 403
http://www.beeradvocate.com/beer/profile/138/8081/ failed with code: 403
http://www.beeradvocate.com/beer/profile/14967/118297/ failed with code: 403
 90%|████████▉ | 80/89 [2:52:13<20:05, 133.98s/it]
http://www.beeradvocate.com/beer/profile/2743/86841/ failed with code: 403
http://www.beeradvocate.com/beer/profile/640/1738/ failed with code: 403
http://www.beeradvocate.com/beer/profile/506/16406/ failed with code: 403
 91%|█████████ | 81/89 [2:54:21<17:38, 132.31s/it]
http://www.beeradvocate.com/beer/profile/60/1598/ failed with code: 403
 92%|█████████▏| 82/89 [2:56:34<15:26, 132.34s/it]
http://www.beeradvocate.com/beer/profile/19378/98544/ failed with code: 403
 94%|█████████▍| 84/89 [3:00:49<10:49, 129.81s/it]
http://www.beeradvocate.com/beer/profile/583/1994/ failed with code: 403
 96%|█████████▌| 85/89 [3:02:52<08:31, 127.77s/it]
http://www.beeradvocate.com/beer/profile/1195/3349/ failed with code: 500
http://www.beeradvocate.com/beer/profile/28/197461/ failed with code: 403
 97%|█████████▋| 86/89 [3:04:55<06:19, 126.40s/it]
http://www.beeradvocate.com/beer/profile/30746/90736/ failed with code: 500
http://www.beeradvocate.com/beer/profile/24299/92753/ failed with code: 403
 99%|█████████▉| 88/89 [3:09:07<02:05, 125.93s/it]
http://www.beeradvocate.com/beer/profile/17980/64545/ failed with code: 403
http://www.beeradvocate.com/beer/profile/25959/70761/ failed with code: 403
100%|██████████| 89/89 [3:11:12<00:00, 125.82s/it]

In [ ]:
temp = beers.iloc[229:beers.shape[0]:230]
temp['soup'] = temp.url.map(get_beer_soup)

In [21]:
temp.head()


Out[21]:
name url soup
229 Franconia Amber /beer/profile/17033/43115/ <!DOCTYPE html> <html class="Public NoJs Logge...
459 Big American Stout /beer/profile/26824/113395/ <!DOCTYPE html> <html class="Public NoJs Logge...
689 Love's Armor /beer/profile/28019/90275/ <!DOCTYPE html> <html class="Public NoJs Logge...
919 Deadeye Jack Porter /beer/profile/19126/47303/ <!DOCTYPE html> <html class="Public NoJs Logge...
1149 Peach Lambic /beer/profile/1170/45334/ <!DOCTYPE html> <html class="Public NoJs Logge...

In [22]:
temp.to_pickle('beer_soup_missing.pkl')

In [14]:
from tqdm import tqdm
for i in tqdm(range(229,beers.shape[0],230)):
    print(i)


100%|██████████| 89/89 [00:00<00:00, 46279.82it/s]
229
459
689
919
1149
1379
1609
1839
2069
2299
2529
2759
2989
3219
3449
3679
3909
4139
4369
4599
4829
5059
5289
5519
5749
5979
6209
6439
6669
6899
7129
7359
7589
7819
8049
8279
8509
8739
8969
9199
9429
9659
9889
10119
10349
10579
10809
11039
11269
11499
11729
11959
12189
12419
12649
12879
13109
13339
13569
13799
14029
14259
14489
14719
14949
15179
15409
15639
15869
16099
16329
16559
16789
17019
17249
17479
17709
17939
18169
18399
18629
18859
19089
19319
19549
19779
20009
20239
20469


In [ ]:


In [ ]:


In [ ]:
beer_reviews = {}
ba_url = 'http://www.beeradvocate.com'

for beer in beers.items():
    soup = get_soup(ba_url+beer[1])
    print(beer[0])
    beer_reviews[beer[0]] = get_beer_reviews(soup)