In [1]:
import csv
import urllib2
from urllib import urlencode
import os, os.path
from bs4 import BeautifulSoup
from dateutil.parser import parse
import re
import numpy as np
import sys
os.chdir('/home/will/Dropbox/BMES375/Summer13/')
sys.path.append('/home/will/PySeqUtils/')

In [2]:
from PlottingTools import make_heatmap_df

In [3]:
url_maps = {}
with open('cody_urls.csv') as handle:
    for row in csv.DictReader(handle):
        url = row['Cody Url']
        player_id = url.rsplit('/', 1)[-1].split('-')[0]
        url_maps[row['BannerWebID']] = player_id

In [4]:
cody_problems = []
with open('cody_problems.csv') as handle:
    for row in csv.DictReader(handle):
        cody_problems.append((row['CodyProblem'], parse(row['DueDate'])))

In [5]:
def get_web_data(user_num, problem_num):
    term = 'term='
    term += 'player_id%3A'+str(user_num)
    term += '+problem_id%3A'+str(problem_num)
    url = 'http://www.mathworks.com/matlabcentral/cody/solutions'
    return urllib2.urlopen(url + '?' + term).read()


def get_sols(soup):
    return soup.find_all('div', attrs = {'class':'solution-metric'})

def get_solution_size(solution):
    size = solution.span.text
    if size == 'Incorrect':
        return np.inf
    else:
        return int(size)
    
def get_grade(user, problem):
    soup = BeautifulSoup(get_web_data(user, problem))
    sols = get_sols(soup)
    for sol in sols:
        yield get_solution_size(sol)
    else:
        yield None

In [6]:
from itertools import product
from concurrent.futures import ProcessPoolExecutor
from itertools import imap

def linker(tup):
    (bw_id, user_num), (problem, due_date) = tup
    tmp = []
    for size in get_grade(user_num, problem):
        tmp.append((bw_id, 'P-'+problem, size))
    return tmp

with ProcessPoolExecutor(max_workers = 10) as ex:
    tmp_data = []
    checks = product(url_maps.items(), cody_problems)
    for res in ex.map(linker, checks):
        tmp_data += res

In [7]:
import pandas as pd

grade_df = pd.DataFrame(tmp_data, columns = ['BW_id', 'Prob', 'Size'])

In [8]:
pdata = pd.pivot_table(grade_df, rows = ['BW_id', 'Prob'], 
                        values = 'Size', aggfunc = [np.min, len]).dropna()
pdata.head()


Out[8]:
amin len
BW_id Prob
aah67 P-19 inf 3
P-2 13.000000 2
P-3 12.000000 2
P-6 17.000000 5
P-7 inf 7

In [18]:
best_scores  = pd.pivot_table(pdata.reset_index(), rows = 'BW_id', 
                                  cols = 'Prob', values = 'amin')
fig = make_heatmap_df(best_scores.T, figsize = (10,10))
plt.colorbar().set_label('Best Solution Nodesize')



In [27]:
num_tries  = pd.pivot_table(pdata.reset_index(), rows = 'BW_id', 
                                  cols = 'Prob', values = 'len', 
                                  aggfunc = np.sum)
fig = make_heatmap_df(num_tries.applymap(np.log10).T, figsize = (10,10))
cb = plt.colorbar()
tickpos = [5, 10, 15, 20, 30, 40, 50, 60, 100]
cb.set_label('#Tries')
cb.set_ticks([np.log10(x) for x in tickpos])
cb.set_ticklabels(map(str, tickpos))



In [ ]: