In [18]:
%pylab
from __future__ import print_function
%matplotlib inline
In [19]:
from __future__ import print_function
import csv
from src import main, utils
import sys
projects = main.load_projects()
interests = ['argouml v0.26.2 method', 'mucommander v0.8.5 method']
p = list()
for project in projects:
if ' '.join([project.name, project.version, project.level]) not in interests:
continue
p.append(project)
In [20]:
project = p[1]
print(project)
repos = main.load_repos(project)
goldsets = main.load_goldsets(project)
queries = main.create_queries(project)
snapshot = main.create_release_corpus(project, repos)
changesets = main.create_corpus(project, repos, main.ChangesetCorpus, use_level=False)
snapshot_lda, _ = main.create_lda_model(project, snapshot, None, "Release", use_level=True)
changeset_lda, _ = main.create_lda_model(project, changesets, None, "Changeset", use_level=False)
snapshot_ranks = main.read_ranks(project, "release")
changeset_ranks = main.read_ranks(project, "changeset")
snapshot_frms = dict( (y,(x,z)) for x,y,z in main.get_frms(goldsets, snapshot_ranks))
changeset_frms = dict( (y,(x,z)) for x,y,z in main.get_frms(goldsets, changeset_ranks))
In [21]:
q = dict()
for query, topics in main.get_topics(snapshot_lda, queries, by_ids=["37", "142"], full=False):
topics = sorted(topics, key=lambda x: x[1], reverse=True)
q[query[0]] = topics
qc = dict()
for query, topics in main.get_topics(changeset_lda, queries, by_ids=["37", "142"], full=False):
topics = sorted(topics, key=lambda x: x[1], reverse=True)
qc[query[0]] = topics
q37 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[37][1]], full=False):
topics = sorted(topics, key=lambda x: x[1], reverse=True)
q37[query[0]] = topics
qc37 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[37][1]], full=False):
topics = sorted(topics, key=lambda x: x[1], reverse=True)
qc37[query[0]] = topics
q142 = dict()
for query, topics in main.get_topics(snapshot_lda, snapshot, by_ids=[snapshot_frms[142][1]], full=False):
topics = sorted(topics, key=lambda x: x[1], reverse=True)
q142[query[0]] = topics
qc142 = dict()
for query, topics in main.get_topics(changeset_lda, snapshot, by_ids=[changeset_frms[142][1]], full=False):
topics = sorted(topics, key=lambda x: x[1], reverse=True)
qc142[query[0]] = topics
In [22]:
def printer(q, model):
for qid, topics in q.items():
print(qid, "num topics:", len(topics))
for t in topics:
print(" ", t[0], t[1])
for word in model.show_topic(t[0]):
if word[0]>= 0.05:
print(" {0} {1}".format(*word))
In [23]:
print(snapshot_frms[37])
print(changeset_frms[37])
print(snapshot_frms[142])
print(changeset_frms[142])
In [28]:
!cat data/mucommander/v0.8.5/goldsets/method/37.txt
In [29]:
!cat data/mucommander/v0.8.5/goldsets/method/142.txt
In [24]:
### queries.metadata = True
qs = list(filter(lambda x: x[1][0] in ["37", "142"], queries))
for query, metadata in qs:
doc = sorted(query, key=lambda x: x[1], reverse=True)
words = [ ( freq, queries.id2word[wid] ) for wid, freq in doc ]
print(metadata[0], "num words:", len(words))
for word in words:
print(" {1} ({0})".format(*word))
In [25]:
print("Snapshot")
printer(q, snapshot_lda)
print()
print("Changeset")
printer(qc, changeset_lda)
In [26]:
print("Snapshot")
printer(q37, snapshot_lda)
print()
print("Changeset")
printer(qc37, changeset_lda)
In [27]:
print()
print("Snapshot")
printer(q142, snapshot_lda)
print()
print("Changeset")
printer(qc142, changeset_lda)
In [ ]: