In [1]:
# Necessary imports 
import os
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.features.features import Features
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
from nbminer.encoders.ast_graph.ast_graph import *

In [3]:
notebooks = [os.path.join('../hw_corpus', fname) for fname in os.listdir('../hw_corpus')]
notebook_objs = [NotebookMiner(file) for file in notebooks]

In [4]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.results.reconstruction_error.astor_error import AstorError
for value in [2,5,10,20,30,50,10000]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    agr = ASTGraphReducer(a, threshold=value, split_call=True)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, agr, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/homework_bottom_up_split_call'
    print(ae.save_summary(filename))


Calculating for value:  2
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1518a069e8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151d8f8da0>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x151d8e1748>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151d8e1438>
(32.66008151259884, 56.59984186077159, 0.8201166276809541, 1790)
Calculating for value:  5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152ac8dc18>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1543253ef0>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x1543253e80>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1543253f28>
(22.51618893625605, 66.03100187790334, 0.7181497710275755, 857)
Calculating for value:  10
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x154bf09588>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x154e92c748>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x154e92c1d0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x154e947c50>
(16.443636072958345, 71.31301683523868, 0.6220142984219023, 403)
Calculating for value:  20
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x155a626b38>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x155dcbf780>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x155dcbf8d0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x155dcbf5f8>
(12.437342573395313, 74.79626396072875, 0.5399795736829968, 207)
Calculating for value:  30
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152bca0908>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1556990940>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x152089ef98>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15565a5b38>
(11.122467087021043, 75.61087866108787, 0.5053866174677956, 153)
Calculating for value:  50
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1570efb438>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x15646980b8>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x15457be400>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15225adf60>
(8.480802505868722, 77.47270451026257, 0.4321813329819128, 80)
Calculating for value:  10000
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x154704c208>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1544905c88>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x154d4d6a90>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x154d4d6dd8>
(0.0, 81.72984548479558, 0.0, 1)

In [5]:
for value in [2,5,10,20,30,50,10000]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    agr = ASTGraphReducer(a, threshold=value, split_call=False)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, agr, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/homework_bottom_up_general'
    print(ae.save_summary(filename))


Calculating for value:  2
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1518a0f668>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x152747b518>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x15356715c0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x155ecc0ac8>
(30.25748576184726, 60.640826277468456, 0.8718742793134122, 1306)
Calculating for value:  5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x155b732d68>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x152c1b2588>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x152c1b24e0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x155cf2a588>
(21.602460298007912, 68.58481863407242, 0.8016999967054328, 659)
Calculating for value:  10
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1536b06080>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x15358fa278>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x15358fafd0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1557157eb8>
(17.27129080469465, 72.63667512272264, 0.7382136856323922, 348)
Calculating for value:  20
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x157694fa90>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x15331c3470>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x15331c3cc0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x154b4035c0>
(14.203275281009498, 75.25256152604355, 0.6807893783151583, 205)
Calculating for value:  30
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x153caab8d0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x156f3630b8>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x156f363908>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x156f363588>
(12.855680712899211, 76.15260435541792, 0.64520805192238, 144)
Calculating for value:  50
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1520bb7ef0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1561cf0c18>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x1561cf09b0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x156b7ec748>
(10.639320597045618, 77.53059005699602, 0.5915066056073535, 90)
Calculating for value:  10000
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x153405d390>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x15610cb128>
<nbminer.encoders.ast_graph.ast_graph.ASTGraphReducer object at 0x152b7d07f0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15609be6a0>
(0.0, 81.72984548479558, 0.0, 1)

In [6]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.results.reconstruction_error.astor_error import AstorError
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.kmeans_encoder import KmeansEncoder
for value in [1000, 700, 500, 200, 100, 10, 1]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    gi = GetImports()
    fe = FeatureEncoding()
    ke = KmeansEncoder(n_clusters = value)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, gi, fe, ke, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/homework_kmeans'
    print(ae.save_summary(filename))


Calculating for value:  1000
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x15798c0240>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1543a1b978>
<nbminer.preprocess.get_imports.GetImports object at 0x152daabe10>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x152daabd68>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x152daab780>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x152daabcf8>
(41.92009995989897, 60.710901723058676, 1.0, 1000)
Calculating for value:  700
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1544af3ef0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x155e2df898>
<nbminer.preprocess.get_imports.GetImports object at 0x154c510550>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1557a66828>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x1557a66da0>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1557a66eb8>
(39.623364241870405, 65.84788982967088, 1.0, 700)
Calculating for value:  500
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x153b18c9b0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151e392b70>
<nbminer.preprocess.get_imports.GetImports object at 0x151e395a20>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x151e395898>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x151e395c18>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151e395b70>
(37.15617514940666, 70.342667940566, 1.0, 500)
Calculating for value:  200
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x156d239f28>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x156981c9b0>
<nbminer.preprocess.get_imports.GetImports object at 0x156981c6a0>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x156981cac8>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x156981cb38>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x156981cda0>
(31.67031006288614, 79.02839916976905, 1.0, 200)
Calculating for value:  100
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152dc37860>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1543290ba8>
<nbminer.preprocess.get_imports.GetImports object at 0x1543290cf8>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x15432b1b70>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x15432b1d30>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x15432b1860>
(28.478194569340186, 86.06454057259579, 1.0, 100)
Calculating for value:  10
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152f5c1198>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1537cf95f8>
<nbminer.preprocess.get_imports.GetImports object at 0x1537cf96d8>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1537cf98d0>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x1537cf9438>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1537cf9748>
(21.573622354220063, 95.845254175864, 1.0, 10)
Calculating for value:  1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1531135b38>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x156de00e10>
<nbminer.preprocess.get_imports.GetImports object at 0x156de00f98>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x156de009b0>
<nbminer.encoders.cluster.kmeans_encoder.KmeansEncoder object at 0x156de0b828>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x156de1cc88>
(16.92007856524114, 108.80917866438243, 1.0, 1)

In [7]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.results.reconstruction_error.astor_error import AstorError
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.hierarchical_encoder import HierarchicalEncoder
for value in [.1, .25, .5, 1, 5]:
    print ('Calculating for value: ',value)
    a = Features(notebook_objs)
    gastf = GetASTFeatures()
    rbn = ResampleByNode()
    gi = GetImports()
    fe = FeatureEncoding()
    he = HierarchicalEncoder(value, 5)
    ae = AstorError()
    pipe = Pipeline([gastf, rbn, gi, fe, he, ae])
    a = pipe.transform(a)
    filename = '../results/reconstruction_error/results/homework_hierarchical'
    print(ae.save_summary(filename))


Calculating for value:  0.1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1521217668>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x155ab7eda0>
<nbminer.preprocess.get_imports.GetImports object at 0x155ab7ecc0>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x155ab7e898>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x155ab7e9e8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x155ab7e0b8>
(28.13862855472205, 88.20380193061641, 1.0, 507)
Calculating for value:  0.25
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x152110acc0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x153ea03908>
<nbminer.preprocess.get_imports.GetImports object at 0x153ea03128>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x153ea03dd8>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x153ea0a828>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x153ea0aa90>
(28.225481015717595, 86.99400388758937, 1.0, 507)
Calculating for value:  0.5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x153e5517b8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x151f1e24a8>
<nbminer.preprocess.get_imports.GetImports object at 0x151f1e2358>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x151f1cf588>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x151f1cf898>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x151f1cf208>
(28.112882050377593, 88.06289328896649, 1.0, 507)
Calculating for value:  1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x156092a6a0>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1568b17a20>
<nbminer.preprocess.get_imports.GetImports object at 0x1568b0ee10>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1568b0eba8>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x1568b0edd8>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1568b0ef60>
(22.905364686028012, 93.69370408196883, 1.0, 174)
Calculating for value:  5
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x1525b159e8>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x1542f8a2e8>
<nbminer.preprocess.get_imports.GetImports object at 0x1542f8af98>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1542f8ac50>
<nbminer.encoders.cluster.hierarchical_encoder.HierarchicalEncoder object at 0x1542f8a588>
<nbminer.results.reconstruction_error.astor_error.AstorError object at 0x1542f82630>
(18.81994811547603, 101.97618027872039, 1.0, 2)

In [ ]: