In [1]:
%matplotlib inline

In [101]:
from __future__ import print_function
from matplotlib import pyplot
import pandas
import numpy

In [3]:
import os
os.chdir('..')

In [4]:
pink = "#f92672"
blue = "#6ec9dc"
orange = "#fd9738"
green = "#a6e24d"

In [5]:
from __future__ import print_function

import csv
from src import main, utils
import sys

projects = main.load_projects()

snapshot_frms = dict()
changeset_frms = dict()
historical_frms = dict()

for project in projects:
    goldsets = main.load_goldsets(project)
    snapshot_ranks = main.read_ranks(project, "release")
    changeset_ranks = main.read_ranks(project, "changeset")
    try:
        historical_ranks = main.read_ranks(project, "temporal")
    except IOError:
        historical_ranks = list()


    snapshot_frms[project] = dict( (y,(x,z)) for x,y,z in main.get_frms(goldsets, snapshot_ranks))
    changeset_frms[project] = dict( (y,(x,z)) for x,y,z in  main.get_frms(goldsets, changeset_ranks))
    historical_frms[project] = dict( (y,(x,z)) for x,y,z in  main.get_frms(goldsets, historical_ranks))

In [6]:
snapshot_all_method = list()
changeset_all_method = list()

snapshot_all_class = list()
changeset_all_class = list()

snapshot_6_method = list()
changeset_6_method = list()
historical_6_method = list()

snapshot_6_class = list()
changeset_6_class = list()
historical_6_class = list()


for i, project in enumerate(projects):
    snapshot_norm = [ frm[0] for gid, frm in snapshot_frms[project].items() ]
    changeset_norm = [ frm[0] for gid, frm in changeset_frms[project].items() ]
    historical_norm = [ frm[0] for gid, frm in historical_frms[project].items() ]
    if project.level == 'method':
        snapshot_all_method.extend(snapshot_norm)
        changeset_all_method.extend(changeset_norm)
        if len(historical_norm):
            snapshot_6_method.extend(snapshot_norm)
            changeset_6_method.extend(changeset_norm)
            historical_6_method.extend(historical_norm)    
    else:
        snapshot_all_class.extend(snapshot_norm)
        changeset_all_class.extend(changeset_norm)
        if len(historical_norm):
            snapshot_6_class.extend(snapshot_norm)
            changeset_6_class.extend(changeset_norm)
            historical_6_class.extend(historical_norm)

In [7]:
snapshot_norms = dict()
changeset_norms = dict()
historical_norms = dict()

for i, project in enumerate(projects):
    snapshot_norms[project] = [ frm[0] for gid, frm in snapshot_frms[project].items() ]
    changeset_norms[project]  = [ frm[0] for gid, frm in changeset_frms[project].items() ]
    historical_norms[project]  = [ frm[0] for gid, frm in historical_frms[project].items() ]

In [99]:
def draw_save_boxplot(filename, title, data, labels):
    box = pyplot.boxplot(data, labels=labels, notch=True, patch_artist=True)
    ax = pyplot.gca()
    #ax.set_ylim(ax.get_ylim()[::-1]) 
    ax.invert_yaxis()

    colors = [pink, blue, green]
    for patch, color in zip(box['boxes'], colors):
        patch.set_facecolor(color)

    for patch in box['whiskers']:
        patch.set_color("#000000")

    for patch in box['means']:
        patch.set_color("#000000")

    for patch in box['medians']:
        patch.set_color("#000000")

    for patch in box['caps']:
        patch.set_color("#000000")

    for patch in box['fliers']:
        patch.set_color("#000000")

    pyplot.title(title)
    pyplot.savefig("slides/box-" + filename + ".pdf")
    pyplot.close()
    
def draw_save_violin(filename, title, data, labels):
    violin = pyplot.violinplot(data, showmeans=True, showmedians=False, showextrema=False)
    ax = pyplot.gca()
    ax.invert_yaxis()
    ax.set_xlim(0, len(data) + 1)
    ax.set_xticks(list(range(1, len(data)+1)))
    ax.set_xticklabels(labels)

    colors = [pink, blue, green]
    for patch, color in zip(violin['bodies'], colors):
        patch.set_color(color)
        patch.set_alpha(1.0)

    for each in ['cmeans', 'cmedians']:
        if each in violin:
            c = violin[each]
            c.set_color("#000000")
            c.set_linewidth(2)
            c.set_linestyle('dotted')

    pyplot.title(title)
    pyplot.savefig("slides/violin-" + filename + ".pdf")
    pyplot.close()

In [100]:
for draw_save in [draw_save_boxplot, draw_save_violin]:
    for i, project in enumerate(projects):
        draw_save(
            filename="-".join([project.printable_name, project.version, project.level]),
            title=" ".join([project.printable_name, project.version, project.level + '-level']),
            data=[ snapshot_norms[project], changeset_norms[project]], 
            labels=["Snapshot", "Changesets"]
            )

        draw_save(
            filename="-".join([project.printable_name, project.version, project.level, "historical"]),
            title=" ".join([project.printable_name, project.version, project.level + '-level']),
            data=[ snapshot_norms[project], changeset_norms[project], historical_norms[project]], 
            labels=["Snapshot", "Changesets", "Historical"]
            )

    draw_save(
        filename="all-full-class",
        title="Overall class-level",
        data=[ snapshot_6_class, changeset_6_class, historical_6_class], 
        labels=["Snapshot", "Changesets", "Historical"]
        )

    draw_save(
        filename="all-full-method",
        title="Overall method-level",
        data=[ snapshot_6_method, changeset_6_method, historical_6_method], 
        labels=["Snapshot", "Changesets", "Historical"]
        )

In [143]:
draw_save(
    filename="example",
    title=" ",
    data=[ numpy.random.random_integers(1, 50, 20) ], 
    labels=[" "]
    )

In [136]:
draw_save(
    filename="good-example",
    title=" ",
    data=[ numpy.concatenate((numpy.ones(90), numpy.random.random_integers(10, 50, 10))) ], 
    labels=[" "]
    )

In [132]:
draw_save(
    filename="bad-example",
    title=" ",
    data=[ numpy.concatenate((numpy.random.random_integers(40, 50, 90), numpy.ones(10))) ], 
    labels=[" "]
    )

In [ ]:
for lvl in ['class', 'method']:
    print()
    print(lvl)
    ones = list()
    same = list()
    in10 = list()
    in50 = list()
    in500 = list()
    in100 = list()
    in1000 = list()
    other = list()
    skips = list()
    total = 0

    for project in projects:
        if project.level == lvl:
            for gid, changeset_frm in changeset_frms[project].items():
                if gid not in snapshot_frms[project]:
                    skips.append(gid)
                    continue

                total += 1
                changeset_frm = changeset_frms[project][gid][0]
                snapshot_frm = snapshot_frms[project][gid][0]
                a = abs(snapshot_frm - changeset_frm)
                if snapshot_frm == 1 and changeset_frm == 1:
                    ones.append(gid)
                if a == 0:
                    same.append(gid)
                if a < 10:
                    in10.append(gid)
                if a < 50:
                    in50.append(gid)
                else:
                    other.append(gid)

    print("ones:", len(ones), float(len(ones)) / total)
    print("same:", len(same), float(len(same)) / total)
    print("in10:", len(in10), float(len(in10)) / total)
    print("in50:", len(in50), float(len(in50)) / total)
    #print("in100:", len(in100), float(len(in100)) / total)
    #print("in500:", len(in500), float(len(in500)) / total)
    #print("in1000:", len(in1000), float(len(in1000)) / total)
    print("other:", len(other), float(len(other)) / total)
    print("total:", total)
    print("skipped:", len(skips))

In [ ]:
for lvl in ['class', 'method']:
    print()
    print(lvl)
    ones = list()
    same = list()
    in10 = list()
    in50 = list()
    in500 = list()
    in100 = list()
    in1000 = list()
    other = list()
    skips = list()
    total = 0

    for project in projects:
        if project.level == lvl:
            for gid, historical_frm in historical_frms[project].items():
                if gid not in snapshot_frms[project] or gid not in changeset_frms[project]:
                    skips.append(gid)
                    continue
                
                total += 1
                historical_frm = historical_frms[project][gid][0]
                snapshot_frm = snapshot_frms[project][gid][0]
                changeset_frm = changeset_frms[project][gid][0]
                a = abs(max(historical_frm, snapshot_frm, changeset_frm) - min(historical_frm, snapshot_frm, changeset_frm))
                if historical_frm == 1 and snapshot_frm == 1 and changeset_frm == 1:
                    ones.append(gid)
                if a == 0:
                    same.append(gid)
                if a < 10:
                    in10.append(gid)
                if a < 50:
                    in50.append(gid)
                else:
                    other.append(gid)

    print("ones:", len(ones), float(len(ones)) / total)
    print("same:", len(same), float(len(same)) / total)
    print("in10:", len(in10), float(len(in10)) / total)
    print("in50:", len(in50), float(len(in50)) / total)
    #print("in100:", len(in100), float(len(in100)) / total)
    #print("in500:", len(in500), float(len(in500)) / total)
    #print("in1000:", len(in1000), float(len(in1000)) / total)
    print("other:", len(other), float(len(other)) / total)
    print("total:", total)
    print("skipped:", len(skips))

In [ ]:
(6,4)
w=3.11/2
fig = pyplot.figure(figsize=(w*2,4))
pyplot.boxplot([snapshot_all_method, changeset_all_method], labels=["Snapshot", "Changesets"], widths=0.2)
#pyplot.title("RQ1 overall method-level")
fig.tight_layout()
fig.savefig('paper/figures/rq1-overall-method.pdf')
                        
fig = pyplot.figure(figsize=(w*2,4))
pyplot.boxplot([snapshot_all_class, changeset_all_class], labels=["Snapshot", "Changesets"], widths=0.2)
#pyplot.title("RQ1 overall class-level")
fig.tight_layout()
fig.savefig('paper/figures/rq1-overall-class.pdf')

fig = pyplot.figure(figsize=(w*3,4))
pyplot.boxplot([snapshot_6_method, changeset_6_method, historical_6_method], labels=["Snapshot", "Changesets", "Historical"], widths=0.2)
#pyplot.title("RQ2 overall method-level")
fig.tight_layout()
fig.savefig('paper/figures/rq2-overall-method.pdf')

fig = pyplot.figure(figsize=(w*3,4))
pyplot.boxplot([snapshot_6_class, changeset_6_class, historical_6_class], labels=["Snapshot", "Changesets", "Historical"], widths=0.2)
#pyplot.title("RQ2 overall class-level")
fig.tight_layout()
fig.savefig('paper/figures/rq2-overall-class.pdf')

In [ ]:


In [ ]: