In [1]:
# Necessary imports
import os
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.features.ast_features import ASTFeatures
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
In [2]:
#Loading in the notebooks
people = os.listdir('../testbed/Final')
notebooks = []
for person in people:
person = os.path.join('../testbed/Final', person)
if os.path.isdir(person):
direc = os.listdir(person)
notebooks.extend([os.path.join(person, filename) for filename in direc if filename.endswith('.ipynb')])
notebook_objs = [NotebookMiner(file) for file in notebooks]
a = ASTFeatures(notebook_objs)
In [3]:
examp_nb = a.get_notebook(0)
In [4]:
print (examp_nb.get_number_cells())
In [5]:
new_segmentation = examp_nb.get_new_notebook()
In [6]:
print (new_segmentation.get_number_cells())
In [7]:
for i, nb in enumerate(a.nb_features):
a.nb_features[i] = nb.get_new_notebook()
In [8]:
total_segments = 0
for nb in a.nb_features:
for cell in nb.get_all_cells():
total_segments += 1
if len(cell.get_feature('ast').body) != 1:
print ("Failed")
In [9]:
print (total_segments)
In [10]:
all_types = []
for nb in a.nb_features:
for cell in nb.get_all_cells():
t = type(cell.get_feature('ast').body[0])
all_types.append(t)
In [11]:
counting_dict = {}
for t in all_types:
if t not in counting_dict:
counting_dict[t] = 0
counting_dict[t] += 1
In [14]:
import ast
cells = new_segmentation.get_all_cells()
a = cells[17].get_feature('ast')
for el in a.body:
for node in ast.iter_child_nodes(el):
print(node)
print (ast.dump(el))
In [19]:
import networkx
In [20]:
ast_tree = networkx.Graph.DirectedTree()
In [32]:
dgraph = networkx.DiGraph()
from collections import deque
nodes = deque()
nodes.append(a.body[0])
dgraph.add_node(a.body[0])
while len(nodes) != 0:
cur_node = nodes.pop()
for node in ast.iter_child_nodes(cur_node):
dgraph.add_node(node)
dgraph.add_edge(cur_node,node)
nodes.append(node)
In [36]:
print (dgraph.nodes())
print (dgraph.edges())
In [43]:
def return_graph(node):
dgraph = networkx.DiGraph()
nodes = deque()
nodes.append(node.body[0])
dgraph.add_node(node.body[0])
while len(nodes) != 0:
cur_node = nodes.pop()
for node in ast.iter_child_nodes(cur_node):
dgraph.add_node(node)
dgraph.add_edge(cur_node,node)
nodes.append(node)
return dgraph
In [59]:
graphs = [return_graph(c.get_feature('ast')) for c in cells]
roots = [c.get_feature('ast').body[0] for c in cells]
In [45]:
len(graphs)
Out[45]:
In [67]:
max_values = []
for n in range(len(graphs)):
max_values.append( max(networkx.shortest_path_length(graphs[n],roots[n]).values()))
In [69]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.hist(max_values)
Out[69]:
In [ ]: