In [1]:
# Necessary imports 
import os
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.features.features import Features
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
from nbminer.encoders.ast_graph.ast_graph import *

In [2]:
import glob2
filenames = glob2.glob('home/piccardi/HomeworkBot2/repos/**/*.ipynb', recursive=True)

In [3]:
notebook_objs = [NotebookMiner(file) for file in filenames]

In [4]:
notebook_objs[0].filename


Out[4]:
'home/piccardi/HomeworkBot2/repos/timozattol/06_poster_graphs.ipynb'

In [5]:
for nbob in notebook_objs:
    filename = '_'.join(nbob.filename.split('/')[4:])
    nbob.write_to_file(os.path.join('hw_corpus',filename))

In [6]:
people = os.listdir('testbed/Final')
notebooks = []
for person in people:
    person = os.path.join('testbed/Final', person)
    if os.path.isdir(person):
        direc = os.listdir(person)
        notebooks.extend([os.path.join(person, filename) for filename in direc if filename.endswith('.ipynb')])
notebook_objs_testbed = [NotebookMiner(file) for file in notebooks]

In [7]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.features.features import Features
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.encoders.ast_graph.ast_graph import ASTGraphReducer
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.kmeans_encoder import KmeansEncoder
from nbminer.results.reconstruction_error.astor_error import AstorError
from nbminer.results.similarity.jaccard_similarity import NotebookJaccardSimilarity
a = Features(notebook_objs, 'group_1')
a.add_notebooks(notebook_objs_testbed, 'group_2')
gastf = GetASTFeatures()
rbn = ResampleByNode()
gi = GetImports()
fe = FeatureEncoding()
ke = KmeansEncoder(n_clusters = value)
#agr = ASTGraphReducer(a, threshold=20, split_call=False)
njs = NotebookJaccardSimilarity()
pipe = Pipeline([gastf, rbn, gi, fe, ke, njs])
a = pipe.transform(a)


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-7-7b863bc81eca> in <module>()
      6 from nbminer.encoders.ast_graph.ast_graph import ASTGraphReducer
      7 from nbminer.preprocess.feature_encoding import FeatureEncoding
----> 8 from nbminer.encoders.cluster.kmeans_encoder import KmeansEncoder
      9 from nbminer.results.reconstruction_error.astor_error import AstorError
     10 from nbminer.results.similarity.jaccard_similarity import NotebookJaccardSimilarity

/dfs/scratch2/fcipollone/nbminer/encoders/cluster/kmeans_encoder.py in <module>()
----> 1 from sklearn.cluster import KMeans
      2 from nbminer.encoders.helper.templates import SimpleTemplates
      3 import astor
      4 
      5 

ImportError: No module named 'sklearn'

In [ ]:


In [ ]:
njs.average_jaccard_similarity(8)

In [ ]:
import numpy as np
intra, inter = njs.group_average_jaccard_similarity('group_1')
print('Mean within group: ', np.mean(np.array(intra)))
print('STD within group: ', np.std(np.array(intra)))
print('Mean outside group: ', np.mean(np.array(inter)))
print('STD outside group: ', np.std(np.array(inter)))

In [ ]:


In [ ]:
print(a.get_notebook(0).get_feature('import_name'))
njs.get_list_from_notebook(0)

In [ ]:
len([key for key in agr.templates.box_lookup])

In [ ]: