In [1]:
# Necessary imports
import os
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.features.features import Features
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
from nbminer.encoders.ast_graph.ast_graph import *
In [2]:
import glob2
filenames = glob2.glob('home/piccardi/HomeworkBot2/repos/**/*.ipynb', recursive=True)
In [3]:
notebook_objs = [NotebookMiner(file) for file in filenames]
In [4]:
notebook_objs[0].filename
Out[4]:
In [5]:
for nbob in notebook_objs:
filename = '_'.join(nbob.filename.split('/')[4:])
nbob.write_to_file(os.path.join('hw_corpus',filename))
In [6]:
people = os.listdir('testbed/Final')
notebooks = []
for person in people:
person = os.path.join('testbed/Final', person)
if os.path.isdir(person):
direc = os.listdir(person)
notebooks.extend([os.path.join(person, filename) for filename in direc if filename.endswith('.ipynb')])
notebook_objs_testbed = [NotebookMiner(file) for file in notebooks]
In [7]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.features.features import Features
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.encoders.ast_graph.ast_graph import ASTGraphReducer
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.kmeans_encoder import KmeansEncoder
from nbminer.results.reconstruction_error.astor_error import AstorError
from nbminer.results.similarity.jaccard_similarity import NotebookJaccardSimilarity
a = Features(notebook_objs, 'group_1')
a.add_notebooks(notebook_objs_testbed, 'group_2')
gastf = GetASTFeatures()
rbn = ResampleByNode()
gi = GetImports()
fe = FeatureEncoding()
ke = KmeansEncoder(n_clusters = value)
#agr = ASTGraphReducer(a, threshold=20, split_call=False)
njs = NotebookJaccardSimilarity()
pipe = Pipeline([gastf, rbn, gi, fe, ke, njs])
a = pipe.transform(a)
In [ ]:
In [ ]:
njs.average_jaccard_similarity(8)
In [ ]:
import numpy as np
intra, inter = njs.group_average_jaccard_similarity('group_1')
print('Mean within group: ', np.mean(np.array(intra)))
print('STD within group: ', np.std(np.array(intra)))
print('Mean outside group: ', np.mean(np.array(inter)))
print('STD outside group: ', np.std(np.array(inter)))
In [ ]:
In [ ]:
print(a.get_notebook(0).get_feature('import_name'))
njs.get_list_from_notebook(0)
In [ ]:
len([key for key in agr.templates.box_lookup])
In [ ]: