notebook.community

Edit and run



In [18]:

    
%pylab
from __future__ import print_function

%matplotlib inline









    



Using matplotlib backend: agg
Populating the interactive namespace from numpy and matplotlib



In [19]:

    
from __future__ import print_function

import csv
from src import main, utils
import sys

projects = main.load_projects()
interests = ['argouml v0.26.2 method', 'mucommander v0.8.5 method']

p = list()
for project in projects:
    if ' '.join([project.name, project.version, project.level]) not in interests:
        continue
    p.append(project)



In [20]:

    
project = p[1]
print(project)
repos = main.load_repos(project)
goldsets = main.load_goldsets(project)

queries = main.create_queries(project)
snapshot = main.create_release_corpus(project, repos)
changesets = main.create_corpus(project, repos, main.ChangesetCorpus, use_level=False)

snapshot_lda, _  = main.create_lda_model(project, snapshot, None, "Release", use_level=True)
changeset_lda, _ = main.create_lda_model(project, changesets, None, "Changeset", use_level=False)

snapshot_ranks = main.read_ranks(project, "release")
changeset_ranks = main.read_ranks(project, "changeset")
snapshot_frms = dict( (y,(x,z)) for x,y,z in main.get_frms(goldsets, snapshot_ranks))
changeset_frms = dict( (y,(x,z)) for x,y,z in  main.get_frms(goldsets, changeset_ranks))









    



Project(name='mucommander', printable_name='muCommander', version='v0.8.5', level='method', ref='refs/tags/release_0_8_5', alpha=None, eta=None, passes=5, iterations=1000, num_topics=500, src_url='http://trac.mucommander.com/changeset/3794/tags/release_0_8_5?old_path=%2F&format=zip', data_path='data/mucommander/', full_path='data/mucommander/v0.8.5/', src_path='data/mucommander/v0.8.5/src')



In [21]:

    
q = dict()
for query, topics in main.get_topics(snapshot_lda, queries, by_ids=["37", "142"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q[query[0]] = topics

qc = dict()
for query, topics in main.get_topics(changeset_lda, queries, by_ids=["37", "142"], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc[query[0]] = topics
    

q37 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[37][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q37[query[0]] = topics


qc37 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[37][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc37[query[0]] = topics

q142 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[142][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    q142[query[0]] = topics


qc142 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[142][1]], full=False):
    topics = sorted(topics, key=lambda x: x[1], reverse=True)
    qc142[query[0]] = topics



In [22]:

    
def printer(q, model):
    for qid, topics in q.items():
        print(qid, "num topics:", len(topics))
        for t in topics:
                print("    ", t[0], t[1])
                for word in model.show_topic(t[0]):
                    if word[0]>= 0.05:
                        print("        {0} {1}".format(*word))

Ranks



In [23]:

    
print(snapshot_frms[37])
print(changeset_frms[37])
print(snapshot_frms[142])
print(changeset_frms[142])









    



(1, 'com.mucommander.ui.main.menu.MainMenuBar.MainMenuBar(MainFrame)')
(303, 'com.mucommander.ui.action.impl.MinimizeWindowAction.performAction()')
(536, 'com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run()')
(1, 'com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run()')



In [28]:

    
!cat data/mucommander/v0.8.5/goldsets/method/37.txt









    



com.mucommander.ui.action.impl.MaximizeWindowAction.MaximizeWindowAction(MainFrame,Hashtable)
com.mucommander.ui.action.impl.MaximizeWindowAction.performAction()
com.mucommander.ui.action.impl.MinimizeWindowAction.MinimizeWindowAction(MainFrame,Hashtable)
com.mucommander.ui.action.impl.MinimizeWindowAction.performAction()
com.mucommander.ui.main.menu.MainMenuBar.MainMenuBar(MainFrame)



In [29]:

    
!cat data/mucommander/v0.8.5/goldsets/method/142.txt









    



com.mucommander.ui.main.DrivePopupButton.getPopupMenu()
com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run()

Query words (preprocessed)



In [24]:

    
### queries.metadata = True
qs = list(filter(lambda x: x[1][0] in ["37", "142"], queries))

for query, metadata in qs:
    doc = sorted(query, key=lambda x: x[1], reverse=True)
    words = [ ( freq, queries.id2word[wid] ) for wid, freq in doc ]
    print(metadata[0], "num words:", len(words))
    for word in words:
        print("        {1} ({0})".format(*word))

Query topics



In [25]:

    
print("Snapshot")
printer(q, snapshot_lda)

print()
print("Changeset")
printer(qc, changeset_lda)









    



Snapshot
142 num topics: 13
     486 0.200052424627
        0.225407162454 java
        0.0667966365939 version
     129 0.152348084849
        0.0836325698865 drive
     197 0.0949708839446
        0.335195301674 component
        0.151223520535 popup
        0.0779508804518 components
        0.0601404620095 toolbar
        0.0592926573171 visible
     433 0.0872638102855
        0.0705262831764 folder
        0.0665606935383 file
     365 0.0868366789345
        0.182387091563 names
        0.0747587839647 flush
        0.0729320850512 col
        0.0527956478951 tbl
     254 0.0799372199553
        0.146340723821 file
        0.0801011442289 icons
        0.0597020707911 system
        0.0544446927419 code
     75 0.0684727683493
        0.213794985131 main
        0.210647551907 frame
        0.143999368226 action
        0.143530673908 properties
        0.0719823945338 object
        0.0715686215803 string
        0.070676652891 hashtable
     397 0.0486127656029
        0.24105348728 button
     472 0.0467388000592
        0.11642986966 com
        0.11617010148 mucommander
        0.100808034392 file
        0.0705926721623 action
        0.0540481217409 impl
     168 0.0329568419545
        0.108815228728 lock
        0.0809228134008 locked
        0.0610304981422 exception
        0.0561012886858 returns
     315 0.0315678826248
        0.268987406066 thread
        0.0687648436667 interrupted
        0.0573349765927 flatten
     385 0.0247943755324
        0.120548772933 archive
        0.0825208321572 language
     133 0.0216913657193
        0.146466639523 run
        0.106935582251 root
        0.0881405899222 file
        0.0584709143142 set
37 num topics: 8
     280 0.28018723305
        0.223045774075 menu
        0.0922777930903 item
        0.0766523325734 action
        0.0556715337321 mnemonic
        0.0534673653631 add
     21 0.155949417791
        0.0887366858441 mac
        0.0883327991072 href
        0.084118802622 comp
        0.0704069827643 data
        0.0661999626631 string
        0.0655555700563 component
        0.0590099396551 length
     442 0.141483490874
        0.0575465278475 file
        0.0504697109713 args
     192 0.11032643544
        0.0989449484066 command
        0.0858638338666 file
        0.0571865437272 image
        0.0538139100016 zoom
     483 0.0943409094178
        0.418102915379 window
     31 0.0639773027298
        0.299975998009 close
        0.0924149427924 exception
        0.0655813913721 window
        0.0563983718872 recall
     345 0.0516126355569
        0.192667805245 standard
        0.0863086947733 code
        0.0502528207036 vmsf
     492 0.0474559084731
        0.405491679193 action
        0.0963417533379 descriptor
        0.0667780136219 register
        0.0570310172299 factory

Changeset
142 num topics: 6
     392 0.554182832766
        0.127821431271 popup
        0.0687645755657 drive
        0.0681926443664 menu
        0.0653349157637 volumes
        0.056665808695 extended
        0.0506841836654 name
     498 0.232092916911
        0.254498373515 java
        0.138406186078 awt
        0.123127139731 javax
        0.113667460008 swing
        0.076517022349 event
        0.0573646151563 util
     230 0.0657026580669
        0.182948589521 button
        0.071220824774 icon
     265 0.063169742044
        0.113010920159 exception
        0.104664089314 token
        0.0732569416298 read
        0.0593919983333 file
     342 0.0306960529912
        0.13280234228 java
        0.0949459257292 jar
        0.0651130180385 jcifs
        0.0585877136027 stubs
        0.0559186799659 osx
        0.0535961400055 compiled
        0.0531566388022 cifs
        0.0526944762231 eawt
     400 0.0282187029634
        0.242370612448 locale
        0.163081303964 string
        0.055583693534 equals
37 num topics: 9
     384 0.239666994697
        0.0718084188726 dialog
        0.0548479182608 rename
        0.0537649172634 batch
        0.0511609860499 action
     197 0.158004862457
        0.308495661939 menu
        0.181199801946 item
        0.0880851347601 add
        0.0603725913346 bookmarks
     69 0.146764836686
        0.173719160635 version
        0.0896130822056 java
        0.0841533575052 family
        0.0789837995046 mac
        0.0636027708019 windows
        0.0598492326121 starts
     489 0.10379659121
        0.189649351607 image
        0.119280188387 zoom
        0.0699836118552 table
        0.0691270331437 factor
     76 0.0870177726898
        0.150192555606 window
        0.135369924693 focus
        0.11055842372 frame
        0.109025490271 main
        0.0561341944788 request
     89 0.0576161412711
        0.078923839402 application
     106 0.0539485935636
        0.0883001239405 file
        0.0731894320248 code
     138 0.0523271327189
        0.127466604678 added
        0.097290962247 edit
     13 0.0491728641793
        0.28015154053 output
        0.0985202039166 sets
        0.0940799075007 standard
        0.0833517455525 debug
        0.0726743964374 messages

Issue 37 top method topics



In [26]:

    
print("Snapshot")
printer(q37, snapshot_lda)

print()
print("Changeset")
printer(qc37, changeset_lda)









    



Snapshot
com.mucommander.ui.main.menu.MainMenuBar.MainMenuBar(MainFrame) num topics: 2
     280 0.895544528469
        0.223045774075 menu
        0.0922777930903 item
        0.0766523325734 action
        0.0556715337321 mnemonic
        0.0534673653631 add
     492 0.104051366312
        0.405491679193 action
        0.0963417533379 descriptor
        0.0667780136219 register
        0.0570310172299 factory

Changeset
com.mucommander.ui.action.impl.MinimizeWindowAction.performAction() num topics: 3
     76 0.410880046182
        0.150192555606 window
        0.135369924693 focus
        0.11055842372 frame
        0.109025490271 main
        0.0561341944788 request
     472 0.240844841356
        0.137835824374 events
        0.137234417418 mode
        0.121380838756 thread
        0.0711481876135 action
        0.062647338085 code
        0.0575339382588 separate
     392 0.237830668017
        0.127821431271 popup
        0.0687645755657 drive
        0.0681926443664 menu
        0.0653349157637 volumes
        0.056665808695 extended
        0.0506841836654 name

Issue 142 top method topics



In [27]:

    
print()
print("Snapshot")
printer(q142, snapshot_lda)

print()
print("Changeset")
printer(qc142, changeset_lda)









    



Snapshot
com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run() num topics: 12
     111 0.359103937365
        0.392817788721 icon
        0.143936260431 file
     175 0.115558589428
        0.647603479703 name
        0.12470674585 string
     254 0.101636680774
        0.146340723821 file
        0.0801011442289 icons
        0.0597020707911 system
        0.0544446927419 code
     235 0.0851824939718
     362 0.0807915321745
        0.130791849747 focus
        0.0797450868169 request
        0.067886389463 component
        0.0567156286773 swing
     479 0.0692591462939
        0.194917588524 volumes
        0.0928604624774 volume
        0.0650423475007 file
     280 0.0600650649487
        0.223045774075 menu
        0.0922777930903 item
        0.0766523325734 action
        0.0556715337321 mnemonic
        0.0534673653631 add
     129 0.0521915888343
        0.0836325698865 drive
     330 0.0169814388655
        0.16194433017 items
        0.0698979458364 completer
        0.0619325162772 field
     244 0.0143981536032
        0.0577365073415 disk
     365 0.0142993914126
        0.182387091563 names
        0.0747587839647 flush
        0.0729320850512 col
        0.0527956478951 tbl
     270 0.0106937990844
        0.112277294282 file
        0.100782996539 simple
        0.0992534136413 attributes
        0.0882915046154 attrs
        0.0610759905351 equals

Changeset
com.mucommander.ui.main.DrivePopupButton.RefreshDriveNamesAndIcons.run() num topics: 3
     392 0.910431074559
        0.127821431271 popup
        0.0687645755657 drive
        0.0681926443664 menu
        0.0653349157637 volumes
        0.056665808695 extended
        0.0506841836654 name
     7 0.0640272778848
        0.402932576847 icon
        0.0678853110107 set
        0.055033889188 manager
        0.0501018197733 file
     358 0.0144276249768
        0.116535560733 file
        0.0976123504672 editor
        0.0932814869926 frame
        0.0578793202131 viewer



In [ ]: