In [1]:
# Necessary imports 
import os
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.features.features import Features
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
from nbminer.encoders.ast_graph.ast_graph import *

In [2]:
people = os.listdir('../testbed/Final')
notebooks = []
for person in people:
    person = os.path.join('../testbed/Final', person)
    if os.path.isdir(person):
        direc = os.listdir(person)
        notebooks.extend([os.path.join(person, filename) for filename in direc if filename.endswith('.ipynb')])
notebook_objs = [NotebookMiner(file) for file in notebooks]

In [3]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.results.reconstruction_error.astor_error import AstorError
for value in [2,5,10,20,30,50,10000]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    agr = ASTGraphReducer(a, threshold=value, split_call=True)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, agr, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/bottom_up_split_call'
    print(ae.save_summary(filename))


Calculating for value:  2
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x15117ec278>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1511c32668>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x1511c32be0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1511c3f6a0>
(30.997285358013087, 35.83381953525802, 0.8525299265667438, 1272)
Calculating for value:  5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1530f8e080>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x153c3f7908>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x153c1bf4a8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x153c657320>
(23.792586641283147, 42.41821748315059, 0.7391610501961573, 633)
Calculating for value:  10
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1541cb4908>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151d96db00>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x152fdcc0f0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x152fdcc5f8>
(17.51599310291307, 47.91751332863897, 0.6337893572075244, 309)
Calculating for value:  20
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152a67d128>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x153033cac8>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x15230475c0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15230474e0>
(12.846796022620401, 52.03284377829192, 0.5289206317271904, 143)
Calculating for value:  30
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x151f9977b8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x153542fa90>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x1536090a58>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151ad9efd0>
(11.148776577530255, 53.457398652047075, 0.47887536465144354, 93)
Calculating for value:  50
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1517dc83c8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151d5c70b8>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x151d5c7a20>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151a602ac8>
(9.51655193432608, 54.87461020018107, 0.424756060758475, 59)
Calculating for value:  10000
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x151fe59828>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151fd5af60>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x15172fef28>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1541ad7080>
(0.0, 60.32411226234785, 0.0, 1)

In [4]:
for value in [2,5,10,20,30,50,10000]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    agr = ASTGraphReducer(a, threshold=value, split_call=False)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, agr, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/bottom_up_general'
    print(ae.save_summary(filename))


Calculating for value:  2
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x15117e1c50>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1539d47080>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x152e634208>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x152ab4ccf8>
(28.673621643303324, 39.456996278040435, 0.8997585755960165, 899)
Calculating for value:  5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1528a874a8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x153531cba8>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x15245a3f98>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x153f306550>
(23.033699811491253, 44.89895382758274, 0.8272306608993059, 485)
Calculating for value:  10
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x15263ef630>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1533fa0a58>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x151c12a278>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151c12ad68>
(18.20717752691658, 49.11387184387888, 0.7526405794185695, 255)
Calculating for value:  20
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152231a320>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1530a6ce48>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x153c0e5898>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151e9784a8>
(14.408670645973372, 52.58646011467659, 0.6774469369278745, 132)
Calculating for value:  30
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x151efd7550>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x153b1a6f60>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x152eb9e470>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151f2ab2e8>
(12.918756031484381, 53.4727391610502, 0.6370083492606378, 93)
Calculating for value:  50
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x15386f5550>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151cfe09e8>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x152b1914a8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x152b9d5a20>
(11.419386025672779, 54.651393219997985, 0.5903329644904939, 63)
Calculating for value:  10000
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x153ac767f0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1529beab70>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x1544abe128>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x153d560f28>
(0.0, 60.32411226234785, 0.0, 1)

In [ ]:


In [5]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.results.reconstruction_error.astor_error import AstorError
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.kmeans_encoder import KmeansEncoder
for value in [1000, 700, 500, 200, 100, 10, 1]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    gi = GetImports()
    fe = FeatureEncoding()
    ke = KmeansEncoder(n_clusters = value)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, gi, fe, ke, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/kmeans'
    print(ae.save_summary(filename))


Calculating for value:  1000
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x15410e2ac8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1513ea6828>
<nbminer.preprocess.get_imports.GetImports object at 0x15418ac588>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x15418972e8>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x1516eae7b8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1516eae940>
(32.054761881468316, 41.07076752841766, 1.0, 1000)
Calculating for value:  700
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152e0dfa58>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x15273205c0>
<nbminer.preprocess.get_imports.GetImports object at 0x1527320550>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x154102e978>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x154102ea90>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x152baaf908>
(30.61505446003707, 43.45936022532944, 1.0, 700)
Calculating for value:  500
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152d1e0e80>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1532a6f3c8>
<nbminer.preprocess.get_imports.GetImports object at 0x153879dcc0>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x153879db00>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x1526827ba8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15268273c8>
(29.155493577203792, 46.580173020822855, 1.0, 500)
Calculating for value:  200
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1538da2ac8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1538ad9390>
<nbminer.preprocess.get_imports.GetImports object at 0x153df49630>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x153bd5ad30>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x152b3b29e8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x152b3b2ac8>
(25.510456498220258, 53.3112362941354, 1.0, 200)
Calculating for value:  100
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1542681c50>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x15240ac860>
<nbminer.preprocess.get_imports.GetImports object at 0x1542465668>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1542465710>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x15374c21d0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15374c2470>
(23.098107295158137, 56.41711095463233, 1.0, 100)
Calculating for value:  10
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x15177977b8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1529021e80>
<nbminer.preprocess.get_imports.GetImports object at 0x1525896780>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x15258969e8>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x15258967f0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x152588eb70>
(17.60817789920184, 65.98043456392718, 1.0, 10)
Calculating for value:  1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x154334a470>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x152ce7ba20>
<nbminer.preprocess.get_imports.GetImports object at 0x1535e060f0>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1535e060b8>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x15293a7cf8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15293a7048>
(14.751064689460042, 70.3468966904738, 1.0, 1)

In [6]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.results.reconstruction_error.astor_error import AstorError
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.hierarchical_encoder import HierarchicalEncoder
for value in [.1, .25, .5, 1, 5]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    gi = GetImports()
    fe = FeatureEncoding()
    he = HierarchicalEncoder(value, 5)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, gi, fe, he, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/hierarchical'
    print(ae.save_summary(filename))


Calculating for value:  0.1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152fbbc198>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x153cd08cf8>
<nbminer.preprocess.get_imports.GetImports object at 0x153cd080b8>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x15179f6ba8>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x15179f6d30>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15179f6a20>
(24.884892805615902, 55.49356201589377, 1.0, 326)
Calculating for value:  0.25
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1537b567f0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151fe84b70>
<nbminer.preprocess.get_imports.GetImports object at 0x1540603400>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x151c3e30f0>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x151c3e3080>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151c3e3240>
(24.929692012255575, 54.96091942460517, 1.0, 326)
Calculating for value:  0.5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x151ed5b208>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1513beaba8>
<nbminer.preprocess.get_imports.GetImports object at 0x1513be35f8>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1513be3710>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x153a3434a8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x153a343320>
(24.721859731083853, 53.25691580323911, 1.0, 326)
Calculating for value:  1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x151af6f198>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x154060b5f8>
<nbminer.preprocess.get_imports.GetImports object at 0x154060bfd0>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x15405fa710>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x15405fa9b0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x153caf1eb8>
(19.0022791955399, 61.79262649632834, 1.0, 111)
Calculating for value:  5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152d17e3c8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x15203dd2b0>
<nbminer.preprocess.get_imports.GetImports object at 0x151dcf5a90>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x15203eff28>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x15203c1240>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15203c1048>
(14.950740202045028, 69.2649632833719, 1.0, 2)

In [ ]:


In [ ]:


In [ ]: