In [1]:
# Necessary imports 
import os
import time
from nbminer.notebook_miner import NotebookMiner
from nbminer.cells.cells import Cell
from nbminer.features.ast_features import ASTFeatures
from nbminer.stats.summary import Summary
from nbminer.stats.multiple_summary import MultipleSummary
from nbminer.features.featurize.ast_graph.ast_graph import *
people = os.listdir('../testbed/Final')
notebooks = []
for person in people:
    person = os.path.join('../testbed/Final', person)
    if os.path.isdir(person):
        direc = os.listdir(person)
        notebooks.extend([os.path.join(person, filename) for filename in direc if filename.endswith('.ipynb')])
notebook_objs = [NotebookMiner(file) for file in notebooks]
a = ASTFeatures(notebook_objs)
for i, nb in enumerate(a.nb_features):
    a.nb_features[i] = nb.get_new_notebook()

In [2]:
graphs = []
for nb in a.nb_features:
    for cell in nb.get_all_cells():
        graphs.append(cell.get_feature('graph'))
agr = ASTGraphReducer(graphs)

In [3]:
cur_count = 0
new_count = 1
print (agr.count_nodes())
while cur_count != new_count:
    cur_count = new_count
    new_count = (agr.count_nodes())
    agr.build_relations()
print (new_count)


289657
37853

In [4]:
cells = []
for nb in a.nb_features:
    cells.extend([cell for cell in nb.get_all_cells()])

In [5]:
agr.get_trace('black_box1')


Out[5]:
"<class '_ast.Import'> (black_box1)\n\t <class '_ast.alias'>"

In [6]:
groups = []
cur_code = ''
cur_group = []
for cell in cells:
    if cell.get_feature('original_code') == cur_code:
        cur_group.append(cell)
    else:
        if len(cur_group) > 0:
            groups.append(cur_group)
        cur_group = []
    cur_code = cell.get_feature('original_code')

In [8]:
group = 4
print ('*'*50)
print ('Black Boxes')
for cell in groups[group]:
    print (cell.get_feature('graph').get_nodes())
print ('*'*50)
print ('Code')
print (groups[group][0].get_feature('original_code'))
print ('*'*50)
print ('Black Box meaning')
for cell in groups[group]:
    n = (cell.get_feature('graph').get_nodes())
    if len(n) == 1 and n[0][:5] == 'black':
        print (agr.get_trace(n[0]))


**************************************************
Black Boxes
['black_box1103']
["<class '_ast.For'>", "<class '_ast.Assign'>", "<class '_ast.Call'>", "<class '_ast.List'>", "<class '_ast.Load'>", 'black_box2345', 'black_box74', 'black_box4', 'black_box4', 'black_box807']
['black_box1103']
**************************************************
Code

# coding: utf-8

# In[ ]:

df_epfl['user'] #is a dictionnary we want the user's id

#let's build a df with only the tweeter user's id

#df_epfl['user'].iloc[0]['id']
epfl_user = pd.DataFrame()

for i in range(0, df_epfl.shape[0]):
    epfl_user = epfl_user.append([df_epfl['user'].iloc[i]['id']])
    
#I got tricked and there is only one user as said on exam

unique = epfl_user.drop_duplicates()


**************************************************
Black Box meaning
<class '_ast.Assign'> (black_box1103)
	 <class '_ast.Call'> (black_box224)
		 <class '_ast.Attribute'> (black_box74)
			 <class '_ast.Load'>
			 <class '_ast.Name'> (black_box3)
				 <class '_ast.Load'>
	 <class '_ast.Name'> (black_box4)
		 <class '_ast.Store'>
<class '_ast.Assign'> (black_box1103)
	 <class '_ast.Call'> (black_box224)
		 <class '_ast.Attribute'> (black_box74)
			 <class '_ast.Load'>
			 <class '_ast.Name'> (black_box3)
				 <class '_ast.Load'>
	 <class '_ast.Name'> (black_box4)
		 <class '_ast.Store'>

In [ ]: