In [1]:
# Necessary imports 
import os
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.features.features import Features
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
from nbminer.encoders.ast_graph.ast_graph import *
people = os.listdir('../testbed/Final')
notebooks = []
for person in people:
    person = os.path.join('../testbed/Final', person)
    if os.path.isdir(person):
        direc = os.listdir(person)
        notebooks.extend([os.path.join(person, filename) for filename in direc if filename.endswith('.ipynb')])
notebook_objs = [NotebookMiner(file) for file in notebooks]

In [2]:
from nbminer.pipeline.pipeline import Pipeline
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.results.reconstruction_error.astor_error import AstorError
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.k_nearest import KNearestNeighbors
a = Features(notebook_objs)
gastf = GetASTFeatures()
rbn = ResampleByNode()
gi = GetImports()
fe = FeatureEncoding()
knn = KNearestNeighbors()
pipe = Pipeline([gastf, rbn, gi, fe, knn])
a = pipe.transform(a)


<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x10ffadf98>
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x10ffad588>
<nbminer.preprocess.get_imports.GetImports object at 0x1a1b065be0>
<nbminer.preprocess.feature_encoding.FeatureEncoding object at 0x1a1b075be0>
<nbminer.encoders.cluster.k_nearest.KNearestNeighbors object at 0x1a1b075f28>

In [5]:
num = 1
print (a.get_list_segments()[num].get_feature('astor'))
print (a.get_list_segments()[num].get_feature('short_name_string'))
print ('')
for el in a.get_list_segments()[num].get_feature('neighbors'):
    print (el)


import numpy as np

[]

var['hour'] = [var.hour for var in var.created_at]

import matplotlib.pyplot as plt

var.shape


In [ ]: