In [1]:
%pylab inline
import os, sys, glob
import time
import numpy as np
import pickle
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
In [2]:
# Load credential data
from untappd_credentials import *
USER_NAME = 'ovarol'
#CLIENT_ID = "XXXXXXXXXXXXXX"
#CLIENT_SECRET = "XXXXXXXXXXXXXX"
#print 'Id:\t{}\nSecret:\t{}\n'.format(CLIENT_ID, CLIENT_SECRET)
In [3]:
# Using API from https://github.com/marshall91/pythonUntappd
import untappd_api as pythonUntappd
api = pythonUntappd.api(CLIENT_ID,CLIENT_SECRET)
userData = api.user_info(USER_NAME)['response']
#print json.dumps(userData, indent=4)
In [4]:
uniqueBeers = list()
userData = api.user_info(USER_NAME)['response']
count, keepCollect = 0, True
scanCount = 0
while keepCollect:
resp = api.user_distinct_beers(USER_NAME, offset=count)['response']
count += resp['beers']['count']
uniqueBeers.extend(resp['beers']['items'])
print('Unique beers collected: {}'.format(len(uniqueBeers)))
scanCount += 1
if resp['beers']['count'] != 0:
time.sleep(1)
else:
break
with open('docs/data/{}_untappd_data.json'.format(USER_NAME),'w') as fl:
fl.write(json.dumps({'user_data':userData, 'beer_data':uniqueBeers}))
In [5]:
#print json.dumps(uniqueBeers, indent=4, sort_keys=True)
print(json.dumps(uniqueBeers[-1], indent=4, sort_keys=True))
In [5]:
# Check style counts
styleCounts = dict()
styleCountSimple = dict()
for b in uniqueBeers:
if b['beer']['beer_style'] not in styleCounts:
styleCounts[b['beer']['beer_style']] = 0
styleCounts[b['beer']['beer_style']] += 1
sstyle = b['beer']['beer_style'].split('-')[0].split('/')[0]
if sstyle not in styleCountSimple:
styleCountSimple[sstyle] = 0
styleCountSimple[sstyle] += 1
for s in sorted(styleCountSimple, key=styleCountSimple.get, reverse=True):
print(s, styleCountSimple[s])
In [6]:
# Most preferred brewery
breweryCount = dict()
for b in uniqueBeers:
if b['brewery']['brewery_name'] not in breweryCount:
breweryCount[b['brewery']['brewery_name']] = 0
breweryCount[b['brewery']['brewery_name']] += 1
for s in sorted(breweryCount, key=breweryCount.get, reverse=True):
print(s, breweryCount[s])
In [7]:
for b in sorted(uniqueBeers, key=lambda x: x['rating_score'], reverse=True)[:30]:
print(b['brewery']['brewery_name'], b['beer']['beer_name'], b['rating_score'])
In [8]:
ibuList, abvList = list(), list()
for b in uniqueBeers:
ibuList.append(b['beer']['beer_ibu'])
abvList.append(b['beer']['beer_abv'])
print('Avg. IBU: {}'.format(np.mean(ibuList)))
print('Avg. ABV: {}'.format(np.mean(abvList)))
fig, axarr = plt.subplots(1, 2, figsize=(7,3))
axarr[0].hist(ibuList, bins=20, alpha=0.5, linewidth=0)
axarr[0].set_xlabel('IBU (International Bitterness Units)', fontsize=12)
axarr[0].tick_params(axis='both', which='major', labelsize=12)
axarr[1].hist(abvList, bins=20, alpha=0.5, linewidth=0)
axarr[1].set_xlabel('ABV (Alcohol By Volume)', fontsize=12)
axarr[1].tick_params(axis='both', which='major', labelsize=12)
plt.tight_layout()
In [9]:
# Beer ABV vs IBU
beerMeasures = list()
for b in uniqueBeers:
beerMeasures.append((b['beer']['beer_abv'],b['beer']['beer_ibu']))
beerMeasures = np.array(beerMeasures)
print(beerMeasures.shape)
fig = plt.figure(figsize=(4,4))
plt.scatter(beerMeasures[:,0], beerMeasures[:,1], alpha=0.5)
plt.xlim(xmin=0)
plt.ylim(ymin=0)
plt.xlabel('ABV (Alcohol By Volume)', fontsize=12)
plt.ylabel('IBU (International Bitterness Units)', fontsize=12)
plt.tick_params(axis='both', which='major', labelsize=14)
In [10]:
# Scatter plot of avg. ratings vs. my ratings
scatterData = list()
for b in uniqueBeers:
if b['beer']['rating_score'] == 0:
continue
scatterData.append((b['beer']['rating_score'], b['rating_score']))
scatterData = np.array(scatterData)
fig = plt.figure(figsize=(4,4))
plt.plot([0,5],[0,5],'r--', linewidth=2)
plt.scatter(scatterData[:,0], scatterData[:,1], alpha=0.5)
minV, maxV = np.min(scatterData), np.max(scatterData)
print(minV, maxV)
plt.xlim((minV-0.2, maxV+0.2))
plt.ylim((minV-0.2, maxV+0.2))
plt.xlabel('Avg. rating', fontsize=14)
plt.ylabel('My rating', fontsize=14)
plt.tick_params(axis='both', which='major', labelsize=12)
In [11]:
# What kind of beers I like more than avg. people
scatterData = list()
for b in uniqueBeers:
if b['beer']['rating_score'] == 0:
continue
scatterData.append((b['rating_score'] - b['beer']['rating_score'],
b['beer']['beer_abv'],
b['beer']['beer_ibu']))
scatterData = np.array(scatterData)
print(scatterData.shape)
fig, axarr = plt.subplots(1, 2, figsize=(7,3), sharey=True)
axarr[0].scatter(scatterData[:,2], scatterData[:,0], alpha=0.5)
axarr[0].axhline(y=0, c='k', linewidth=2, linestyle='--')
fit = np.polyfit(scatterData[:,2], scatterData[:,0], 1)
fit_fn = np.poly1d(fit)
axarr[0].plot(scatterData[:,2], fit_fn(scatterData[:,2]), 'r--', linewidth=2)
axarr[0].set_xlim(xmin=-5, xmax=105)
axarr[0].set_xlabel('IBU (International Bitterness Units)', fontsize=12)
axarr[0].set_ylabel('Rating difference', fontsize=12)
axarr[0].tick_params(axis='both', which='major', labelsize=12)
axarr[1].scatter(scatterData[:,1], scatterData[:,0], alpha=0.5)
axarr[1].axhline(y=0, c='k', linewidth=2, linestyle='--')
fit = np.polyfit(scatterData[:,1], scatterData[:,0], 1)
fit_fn = np.poly1d(fit)
axarr[1].plot(scatterData[:,1], fit_fn(scatterData[:,1]), 'r--', linewidth=2)
axarr[1].set_xlim(xmin=-1, xmax=max(scatterData[:,1])+1)
axarr[1].set_xlabel('ABV (Alcohol By Volume)', fontsize=12)
axarr[1].tick_params(axis='both', which='major', labelsize=12)
plt.tight_layout()
In [12]:
# Cumulative beer discoveries
dTimes = list()
for b in uniqueBeers:
dTimes.append(datetime.datetime.strptime(b['first_created_at'].split(' -')[0],
'%a, %d %b %Y %H:%M:%S'))
dTimes.sort()
fig = plt.figure(figsize=(7,3))
plt.plot(dTimes, range(len(dTimes)), linewidth=2)
plt.ylabel('Unique beer count', fontsize=14)
plt.xticks(rotation=30)
plt.tick_params(axis='both', which='major', labelsize=12)
In [ ]: