In [25]:
%pylab
from __future__ import print_function

%matplotlib inline


Using matplotlib backend: agg
Populating the interactive namespace from numpy and matplotlib

In [26]:
from __future__ import print_function

import csv
from src import main, utils
import sys

projects = main.load_projects()
interests = ['argouml v0.26.2 method', 'mucommander v0.8.5 method']

p = list()
for project in projects:
    if ' '.join([project.name, project.version, project.level]) not in interests:
        continue
    p.append(project)

In [27]:
project = p[0]
print(project)
repos = main.load_repos(project)
goldsets = main.load_goldsets(project)

queries = main.create_queries(project)
snapshot = main.create_release_corpus(project, repos)
changesets = main.create_corpus(project, repos, main.ChangesetCorpus, use_level=False)

snapshot_lda, _  = main.create_lda_model(project, snapshot, None, "Release", use_level=True)
changeset_lda, _ = main.create_lda_model(project, changesets, None, "Changeset", use_level=False)

snapshot_ranks = main.read_ranks(project, "release")
changeset_ranks = main.read_ranks(project, "changeset")
snapshot_frms = dict( (y,(x,z)) for x,y,z in main.get_frms(goldsets, snapshot_ranks))
changeset_frms = dict( (y,(x,z)) for x,y,z in  main.get_frms(goldsets, changeset_ranks))


Project(name='argouml', printable_name='ArgoUML', version='v0.26.2', level='method', ref='refs/tags/VERSION_0_26_2', alpha=None, eta=None, passes=5, iterations=1000, num_topics=500, src_url='http://argouml-downloads.tigris.org/nonav/argouml-0.26.2/ArgoUML-0.26.2-src.zip', data_path='data/argouml/', full_path='data/argouml/v0.26.2/', src_path='data/argouml/v0.26.2/src')

In [28]:
q = dict()
for query, topics in main.get_topics(snapshot_lda, queries, by_ids=["5258", "5088"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q[query[0]] = topics

qc = dict()
for query, topics in main.get_topics(changeset_lda, queries, by_ids=["5258", "5088"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc[query[0]] = topics

q5258 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[5258][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q5258[query[0]] = topics
#print(q5258)

qc5258 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[5258][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc5258[query[0]] = topics
#print(q5258)

q5088 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[5088][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q5088[query[0]] = topics
#print(q5088)

qc5088 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[5088][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc5088[query[0]] = topics
#print(qc5088)

In [29]:
def printer(q, model):
    for qid, topics in q.items():
        print(qid, "num topics:", len(topics))
        for t in topics:
                print("    ", t[0], t[1])
                for word in model.show_topic(t[0]):
                    if word[0]>= 0.05:
                        print("        {0} {1}".format(*word))

Ranks


In [30]:
print(snapshot_frms[5258])
print(changeset_frms[5258])
print(snapshot_frms[5088])
print(changeset_frms[5088])


(1, 'org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName()')
(8138, 'org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName()')
(124, 'org.argouml.model.mdr.XmiWriterMDRImpl.write()')
(1, 'org.argouml.persistence.TestProfileConfigurationFilePersister.testWritePreviouslyLoadedProfile()')

In [31]:
!cat data/argouml/v0.26.2/goldsets/method/5258.txt




In [32]:
!cat data/argouml/v0.26.2/goldsets/method/5088.txt





Query words (preprocessed)


In [33]:
queries.metadata = True
qs = list(filter(lambda x: x[1][0] in ["5258", "5088"], queries))

for query, metadata in qs:
    doc = sorted(query, key=lambda x: x[1], reverse=True)
    words = [ ( freq, queries.id2word[wid] ) for wid, freq in doc ]
    print(metadata[0], "num words:", len(words))
    for word in words:
        print("        {1} ({0})".format(*word))


5088 num words: 49
        profiles (4)
        xmi (3)
        user (3)
        profile (3)
        write (3)
        save (2)
        files (2)
        defined (2)
        loaded (2)
        impl (2)
        mdr (2)
        writer (2)
        models (2)
        able (1)
        available (1)
        implemented (1)
        file (1)
        model (1)
        issue (1)
        release (1)
        aren (1)
        using (1)
        isn (1)
        zargo (1)
        creating (1)
        removed (1)
        configuration (1)
        removing (1)
        empty (1)
        won (1)
        seams (1)
        persister (1)
        usage (1)
        simply (1)
        written (1)
        depend (1)
        due (1)
        configured (1)
        deeper (1)
        directories (1)
        experimentally (1)
        flag (1)
        functionality (1)
        persist (1)
        prevents (1)
        tackle (1)
        unassigned (1)
        wasn (1)
        writing (1)
5258 num words: 6
        name (2)
        classifier (2)
        perspective (2)
        collaboration (2)
        rules (2)
        explorer (1)

Query topics


In [34]:
print("Snapshot")
printer(q, snapshot_lda)

print()
print("Changeset")
printer(qc, changeset_lda)


Snapshot
5258 num topics: 3
     194 0.583235451342
        0.178309175492 rule
        0.121059499556 perspective
        0.0762293638816 name
        0.0690858822929 rules
        0.0503684867632 explorer
     226 0.185504920139
        0.1856541011 machine
        0.11777883397 object
        0.102723364576 handle
        0.0773127578072 collaboration
        0.0647654447602 expr
        0.0556342824257 represented
        0.0545723769322 set
     464 0.148426295186
        0.388703354048 classifier
        0.0789009067262 object
5088 num topics: 18
     281 0.192892482112
        0.0969630906947 profiles
        0.0777460707084 profile
     283 0.134882719488
        0.0805840860895 writer
        0.0625096636339 component
        0.0515641337096 file
        0.0505557374166 xmi
     38 0.111432315008
        0.0837400317472 file
        0.052783578714 chooser
     119 0.0964613712182
        0.156409544424 file
     103 0.0691276760039
        0.16809440942 file
     111 0.0685104236137
        0.125885156498 step
        0.0697529957911 model
     481 0.0518654065039
        0.0674266557557 parse
        0.0570079740695 token
        0.0544558797888 writer
     139 0.0324502945153
        0.0955942380078 models
        0.0650571100652 model
     141 0.0322229933664
        0.0543910707269 xmi
     108 0.0314122256905
        0.0699067796998 model
        0.0612544940263 remove
     86 0.026662365181
        0.105853196686 reference
        0.0878734448854 string
        0.0645647436518 url
        0.0550209968393 name
     447 0.0240687789787
        0.15333403279 notation
     354 0.0225346023494
        0.0878789060677 project
     382 0.0210871915307
        0.139606144686 exception
        0.106314762066 implemented
        0.0974270629621 object
        0.0599530729317 handle
     214 0.0186729032508
        0.230609056123 data
        0.111611181192 usage
        0.0713278894831 type
     434 0.0181542447857
        0.219480177176 expression
        0.10163616281 body
        0.0751584535406 uml
        0.0602296707336 object
     6 0.0165884049916
        0.0637265671456 paint
     213 0.0154252143155
        0.176449741077 signal
        0.0613632526749 code
        0.0503169723679 set

Changeset
5258 num topics: 2
     250 0.480454833658
        0.14981641216 role
        0.0626290395827 classifier
     365 0.436545166285
        0.124552431151 list
        0.0938468895104 perspective
        0.0755053463939 explorer
5088 num topics: 19
     446 0.223695242894
        0.153329589266 profile
        0.143068021978 model
     426 0.12399276247
        0.0949520396923 model
        0.0696471492872 xmi
     472 0.102106097725
        0.0910955504975 profile
        0.0607702650632 model
     60 0.0894294001188
        0.26619347497 writer
        0.0603899936415 xmi
        0.0574402204277 version
     275 0.0573757019035
     442 0.0547833013152
        0.163713250373 issue
        0.0792524029923 missing
     413 0.0432923461318
     461 0.037831138752
        0.176006718296 align
        0.0922787388659 fig
     387 0.0363919063144
        0.123594389938 key
        0.115037429216 configuration
     364 0.0319705003843
        0.0794981898048 localizer
        0.0578702004864 save
        0.0561289380578 project
     459 0.03153287682
        0.170216589157 cur
        0.0720421850808 directories
        0.0697521781028 files
        0.0655348165511 seen
        0.0623599481029 file
     141 0.0280556318168
        0.122469551075 project
        0.0978741348229 file
        0.0726405979342 persister
        0.0669375074458 load
        0.0575545253566 url
     310 0.0280084956012
     96 0.0253951376442
        0.0565651169082 string
     94 0.017713858847
     340 0.0153925911799
        0.1388582756 release
        0.0937788467227 linus
        0.0749201810244 tolke
        0.0541570626906 responsible
     26 0.0145906663083
        0.128221160772 performance
     173 0.0128230167936
        0.0754531405559 downloads
        0.0554828998678 argo
        0.0547616867927 uml
     176 0.0116772979938
        0.105611764512 extended

Issue 5258 top method topics


In [35]:
print("Snapshot")
printer(q5258, snapshot_lda)

print()
print("Changeset")
printer(qc5258, changeset_lda)


Snapshot
org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName() num topics: 3
     194 0.816857912065
        0.178309175492 rule
        0.121059499556 perspective
        0.0762293638816 name
        0.0690858822929 rules
        0.0503684867632 explorer
     202 0.067793539522
        0.107613146742 elem
        0.0827978683518 perspective
        0.0708453920312 add
        0.0578630351812 delete
     464 0.0568779601772
        0.388703354048 classifier
        0.0789009067262 object

Changeset
org.argouml.ui.explorer.rules.GoClassifierToInstance.getRuleName() num topics: 2
     432 0.757164655495
        0.0893536710693 jar
        0.0678621781371 argouml
        0.0517429904434 org
     332 0.184247109211
        0.0834646177049 rule
        0.0673931412026 perspective

Issue 5088 top method topics


In [36]:
print()
print("Snapshot")
printer(q5088, snapshot_lda)

print()
print("Changeset")
printer(qc5088, changeset_lda)


Snapshot
org.argouml.model.mdr.XmiWriterMDRImpl.write() num topics: 6
     283 0.560131002273
        0.0805840860895 writer
        0.0625096636339 component
        0.0515641337096 file
        0.0505557374166 xmi
     82 0.155389811181
        0.112620395345 xmi
        0.100644430345 input
        0.0743053978246 exception
        0.0681277303619 stream
        0.0663342788556 reader
        0.0647220093709 file
     41 0.117416586732
        0.116364685128 exception
        0.113558278325 object
        0.0930610719178 invalid
        0.0875901359546 handle
        0.0700790042915 element
     135 0.0973695698302
     327 0.0250058497936
        0.118631551517 handle
        0.0784944389706 object
        0.0687597203891 command
        0.0598388243215 set
        0.0578894470115 model
        0.0522935577955 token
        0.0511620081769 execute
     462 0.0236659035944
        0.0555031990487 type

Changeset
org.argouml.persistence.TestProfileConfigurationFilePersister.testWritePreviouslyLoadedProfile() num topics: 10
     446 0.469148817839
        0.153329589266 profile
        0.143068021978 model
     462 0.151367576645
        0.234758074375 file
     426 0.116783321691
        0.0949520396923 model
        0.0696471492872 xmi
     60 0.0845542343172
        0.26619347497 writer
        0.0603899936415 xmi
        0.0574402204277 version
     417 0.0530253486527
        0.115612676069 resource
        0.0804154038388 icon
        0.0749234907511 loader
        0.052997765041 string
     447 0.0348505504431
        0.249974406751 uml
        0.105597470943 novosoft
     387 0.0251125129828
        0.123594389938 key
        0.115037429216 configuration
     497 0.0214877824164
        0.250963633269 name
        0.068723223098 xsd
     364 0.0179882222237
        0.0794981898048 localizer
        0.0578702004864 save
        0.0561289380578 project
     472 0.0120901889967
        0.0910955504975 profile
        0.0607702650632 model

In [ ]: