In [1]:
import json
import datetime


folder = '/dfs/scratch2/fcipollone/stackoverflow/guesslang_and_ast/outfiles'

lines = []

guess_and_parse = {}
guess_not_parse = {}
parse_not_guess = {}
total = {}
dates = []
for file_num in range(793):
    filename = folder + '/file' + str(file_num) + '.txt'
    count = 0
    first_day = datetime.datetime(2008,1,1)
    cur_diff = None
    for line in open(filename):
        count += 1
        line_obj = json.loads(line)
        cd = line_obj['CreationDate']
        d = datetime.datetime(year=int(cd[:4]),month=int(cd[5:7]), day=int(cd[8:10]))
        if cur_diff is not None:
            if d-first_day < cur_diff:
                print(d-first_day-cur_diff)
        cur_diff = d-first_day
        dates.append(d-first_day)
        for code_block in line_obj['CodeBlocks']:
            l = int(round(len(code_block['code']),-1))
            gl = code_block['Guesslang']
            par = code_block['Parsable']
            if par == "True" or gl.strip().lower() == "python":
                if l not in total:
                    total[l] = 0
                total[l] += 1
            if par == "True" and gl.strip().lower() == "python":
                if l not in guess_and_parse:
                    guess_and_parse[l] = 0
                guess_and_parse[l] += 1
                
            elif par == "True" and not gl.strip().lower() == "python":
                if l not in parse_not_guess:
                    parse_not_guess[l] = 0
                parse_not_guess[l] += 1
                
            elif not par == "True" and gl.strip().lower() == "python":
                if l not in guess_not_parse:
                    guess_not_parse[l] = 0
                guess_not_parse[l] += 1
                
    lines.append(count)


-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-7 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-40 days, 0:00:00
-27 days, 0:00:00
-3 days, 0:00:00
-7 days, 0:00:00
-7 days, 0:00:00
-39 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-7 days, 0:00:00
-325 days, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-141 days, 0:00:00
-51 days, 0:00:00
-1 day, 0:00:00
-121 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-17 days, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-32 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-315 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-4 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-214 days, 0:00:00
-460 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-110 days, 0:00:00
-1 day, 0:00:00
-7 days, 0:00:00
-5 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-11 days, 0:00:00
-2 days, 0:00:00
-38 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-32 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-12 days, 0:00:00
-29 days, 0:00:00
-103 days, 0:00:00
-1 day, 0:00:00
-110 days, 0:00:00
-73 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-748 days, 0:00:00
-553 days, 0:00:00
-147 days, 0:00:00
-695 days, 0:00:00
-90 days, 0:00:00
-1 day, 0:00:00
-494 days, 0:00:00
-23 days, 0:00:00
-231 days, 0:00:00
-116 days, 0:00:00
-638 days, 0:00:00
-1 day, 0:00:00
-88 days, 0:00:00
-1 day, 0:00:00
-732 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-394 days, 0:00:00
-718 days, 0:00:00
-7 days, 0:00:00
-587 days, 0:00:00
-452 days, 0:00:00
-302 days, 0:00:00
-411 days, 0:00:00
-131 days, 0:00:00
-1 day, 0:00:00
-35 days, 0:00:00
-16 days, 0:00:00
-1 day, 0:00:00
-562 days, 0:00:00
-164 days, 0:00:00
-1 day, 0:00:00
-9 days, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-28 days, 0:00:00
-1 day, 0:00:00
-16 days, 0:00:00
-1 day, 0:00:00
-627 days, 0:00:00
-81 days, 0:00:00
-1 day, 0:00:00
-16 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-87 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-38 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-51 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-7 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-9 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-12 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-18 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-4 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-8 days, 0:00:00
-1 day, 0:00:00
-16 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-7 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-16 days, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-35 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-13 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-12 days, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-20 days, 0:00:00
-18 days, 0:00:00
-18 days, 0:00:00
-17 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-20 days, 0:00:00
-1 day, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-4 days, 0:00:00
-32 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-43 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-15 days, 0:00:00
-15 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-31 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-32 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-11 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-12 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-8 days, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-13 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-6 days, 0:00:00
-4 days, 0:00:00
-10 days, 0:00:00
-3 days, 0:00:00
-2 days, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-12 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-4 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-2 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-7 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-14 days, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-8 days, 0:00:00
-41 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-48 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-17 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-8 days, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-9 days, 0:00:00
-8 days, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-8 days, 0:00:00
-1 day, 0:00:00
-14 days, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-9 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-8 days, 0:00:00
-13 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-21 days, 0:00:00
-7 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-28 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-17 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-16 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-25 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-14 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-31 days, 0:00:00
-4 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-4 days, 0:00:00
-3 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-7 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-43 days, 0:00:00
-2 days, 0:00:00
-41 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-25 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-20 days, 0:00:00
-1 day, 0:00:00
-12 days, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-15 days, 0:00:00
-1 day, 0:00:00
-23 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-6 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-7 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-18 days, 0:00:00
-3 days, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-16 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-9 days, 0:00:00
-1 day, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-11 days, 0:00:00
-2 days, 0:00:00
-15 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-10 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-29 days, 0:00:00

In [2]:
import numpy as np
np.sum(lines)


Out[2]:
4110319

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(lines)
plt.xlabel('File Number')
plt.ylabel('Number of Posts')
plt.title('Number of posts remaining out of original 50000 per file')


Out[3]:
Text(0.5,1,'Number of posts remaining out of original 50000 per file')

In [4]:
x = [i for i in range(len(dates))]

y = [d.days for d in dates]

In [5]:
plt.rcParams['figure.figsize'] = [20,5]
plt.scatter(x, y, s=.2)
plt.xlabel('Post Number')
plt.ylabel('Days since 2008')


Out[5]:
Text(0,0.5,'Days since 2008')

In [6]:
import numpy as np
print("Guesslang=Python  and Parse=True  for:", np.sum([guess_and_parse[key] for key in guess_and_parse.keys()]))
print("Guesslang=Python  and Parse=False for:", np.sum([guess_not_parse[key] for key in guess_not_parse.keys()]))
print("Guesslang!=Python and Parse=True  for:", np.sum([parse_not_guess[key] for key in parse_not_guess.keys()]))


Guesslang=Python  and Parse=True  for: 1695196
Guesslang=Python  and Parse=False for: 1471445
Guesslang!=Python and Parse=True  for: 2234022

In [7]:
def get_bar(d):
    l = sorted([(key, d[key]) for key in d.keys()])
    x = [el[0] for el in l if el[0] < 4000]
    y = [el[1] for el in l if el[0] < 4000]
    return x, y

def get_scaled_bar(d, t):
    l = []
    for key in t.keys():
        if key in d:
            l.append((key, d[key]/t[key]))
        else:
            l.append((key, 0))
    l = sorted(l)
    x = [el[0] for el in l if el[0] < 4000]
    y = [el[1] for el in l if el[0] < 4000]
    return x, y

plt.rcParams['figure.figsize'] = [20,5]
fig, axes = plt.subplots(1, 2)
x, y = get_scaled_bar(guess_and_parse, total)
axes[0].bar(x, y, width=100)
axes[0].set_xlabel('Length of code block in characters')
axes[0].set_ylabel('Number of code blocks at this level')
axes[0].set_title('Percentage of blocks where Guesslang=Python and Parse=True (by code len)')
x, y = get_bar(guess_and_parse)
axes[1].bar(x, y, width=100)
axes[1].set_xlabel('Length of code block in characters')
axes[1].set_ylabel('Number of code blocks at this level')
axes[1].set_title('Number of blocks where Guesslang=Python and Parse=True (by code len)')


Out[7]:
Text(0.5,1,'Number of blocks where Guesslang=Python and Parse=True (by code len)')

In [8]:
fig, axes = plt.subplots(1, 2)
x, y = get_scaled_bar(guess_not_parse, total)
axes[0].bar(x, y, width=100)
axes[0].set_xlabel('Length of code block in characters')
axes[0].set_ylabel('Number of code blocks at this level')
axes[0].set_title('Percentage of blocks where Guesslang=Python and Parse=False (by code len)')
x, y = get_bar(guess_not_parse)
axes[1].bar(x, y, width=100)
axes[1].set_xlabel('Length of code block in characters')
axes[1].set_ylabel('Number of code blocks at this level')
axes[1].set_title('Number of blocks where Guesslang=Python and Parse=False (by code len)')


Out[8]:
Text(0.5,1,'Number of blocks where Guesslang=Python and Parse=False (by code len)')

In [9]:
fig, axes = plt.subplots(1, 2)
x, y = get_scaled_bar(parse_not_guess, total)
axes[0].bar(x, y, width=100)
axes[0].set_xlabel('Length of code block in characters')
axes[0].set_ylabel('Number of code blocks at this level')
axes[0].set_title('Percentage of blocks where Guesslang != Python and Parse=True (by code len)')
x, y = get_bar(parse_not_guess)
axes[1].bar(x, y, width=100)
axes[1].set_xlabel('Length of code block in characters')
axes[1].set_ylabel('Number of code blocks at this level')
axes[1].set_title('Number of blocks where Guesslang != Python and Parse=True (by code len)')


Out[9]:
Text(0.5,1,'Number of blocks where Guesslang != Python and Parse=True (by code len)')

In [10]:
x1, y1 = get_scaled_bar(guess_and_parse,total)
plt.bar(x1, y1, width=100)
x2, y2 = get_scaled_bar(guess_not_parse,total)
plt.bar(x2, y2, width=100, bottom=y1)
x3, y3 = get_scaled_bar(parse_not_guess,total)
plt.bar(x3, y3, width=100, bottom=([y1[i]+y2[i] for i in range(len(y1))]))


Out[10]:
<BarContainer object of 396 artists>

In [ ]:


In [ ]: