Baseline prediction for homework type

The baseline prediction method we use for predicting which homework the notebook came from uses the popular plagiarism detector JPlag.

We feed each noteboook through our pipeline to eliminate variable names, string declarations, comments, and import names


In [1]:
# First step is to load a balanced dataset of homeworks
import sys
home_directory = '/dfs/scratch2/fcipollone'
sys.path.append(home_directory)

import numpy as np
from nbminer.notebook_miner import NotebookMiner

hw_filenames = np.load('../homework_names_jplag_combined_per_student.npy')
min_val = min([len(temp) for temp in hw_filenames])
print(min_val)
hw_notebooks = [[NotebookMiner(filename) for filename in temp[:min_val]] for temp in hw_filenames]


59

In [3]:
# Now we do the transformation, storing the results into the variable hw_code
from nbminer.pipeline.pipeline import Pipeline
from nbminer.features.features import Features
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
import tqdm

hw_code = []
for corp in tqdm.tqdm(hw_notebooks):
    temp = []
    for nb in corp:
        a = Features([nb])
        gastf = GetASTFeatures()
        gi = GetImports()
        pipe = Pipeline([gastf, gi])
        a = pipe.transform(a)
        code = a.get_notebook(0).get_all_asts()
        lines = code.split('\n')
        lines = [line for line in lines if line != '']
        temp.append('\n\n'.join(lines))
    hw_code.append(temp)


  0%|          | 0/6 [00:00<?, ?it/s]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245acddd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a90080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8dc88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a564a8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a71588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a71e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a60c18>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a89fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a90358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a73a58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aad780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7ac18>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab5c50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab5c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a54048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a54390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a60400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a603c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa6908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa3978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a662b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab9c50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a60860>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b01c88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245b01550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a662e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a78cc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9f438>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a646a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a64320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8c048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab8f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab8f60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a25048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a842b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab8240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab8e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ac1898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a594e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a75128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a84160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a75eb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a75ba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a842b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a98dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a63da0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245b019e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a66a58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a75588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a75e80>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a75518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a75c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a65d30>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a65c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a7d358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abfda0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a71f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a683c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a61e48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a61fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a62a20>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a62e10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a59198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a797f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9d630>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9db70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab82b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab8978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a89710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a89470>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a63a90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a63940>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa0358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa0668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abe8d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa07b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa0ba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe8d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7dba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8b6d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8b630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aaba58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aab860>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa2940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa23c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abe358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab6208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a5ebe0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a54048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a54320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9d9b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9d978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa0588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aab208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aab2e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aab400>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a600f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a60358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aab2e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aab080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a546a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a540f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6a4e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab3a90>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa30b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a612e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9df28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c1d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a5b860>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c400>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6ac50>
1
 17%|█▋        | 1/6 [00:07<00:38,  7.64s/it]
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6ae10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245acde48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a90438>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a66208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b01588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a90a20>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a62160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab32e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a90780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab57b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a71a58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a71b70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6a160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6a5c0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8b630>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8b0b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a842b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a84390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a519e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a51c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a51eb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a80128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a623c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a53be0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a680f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a404a8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a40fd0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a40828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a40da0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9b5f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a68208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a5b6a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8c4e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a5e8d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ac0668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ac0e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ac0748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab9358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ac0cf8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a0e6a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a0e630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a5e908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a5e160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a63898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a62e10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a5f780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a5f710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8ceb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a60b00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a54ef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7eba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa3518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa37f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8fcf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8f5f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ac07b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6aef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6a780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6a208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa7ac8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa7588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe9e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa7080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abefd0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a75630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9e518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9e940>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a12940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a128d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a4a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9a390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9acc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9aef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a12710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a12630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aafe48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a62d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a71400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a713c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9d898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9d828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9d198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9da58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6eb70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6ecf8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a989e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a982b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a63940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a636d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8c0f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a1a1d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a4af28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a90588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a90710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a718d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a71128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a378d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a37470>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6e630>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6ea90>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a37f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a37c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8cdd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9b080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a660f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8cef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a46f60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a80a58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a80b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a61d30>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6aac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a1bda0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a33e10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8c898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c7b8>
1
 33%|███▎      | 2/6 [00:15<00:31,  7.95s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa9e80>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6eba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a538d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a53908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a530b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9f160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6a400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6af60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a785f8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a78320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab17f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a563c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a90f28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a90b70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a73dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a53710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a80630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a683c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a595c0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a185c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a18668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a89f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa8940>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a73278>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a730b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b01a58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab9198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aac908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a53c50>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a2f668>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa0eb8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a2b898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aaf630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245b01978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a593c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a622e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9f5c0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa80f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a184a8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a622b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a4a6a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62459db908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a2fe80>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa9748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a53198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b01208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a58b70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a2f240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a4a4e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa0748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a80da0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62459db128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f62459db748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aaf358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245acd828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a615c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7a588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab9f60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab95f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a63ac8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a66e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a56fd0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a564e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a54048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a54908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245acd780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245acdc88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6af98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6a710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a598d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7aa20>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a186d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9aa90>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9bbe0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a58198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a58320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa96d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a18048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a593c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6a278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a78710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a84cc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a7a588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a809e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245acd9b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a540f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a54550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a89208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a899e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a76160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7ccc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a18be0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a68fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6e828>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abfb70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6e128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a7af60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7dd30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a43240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a59668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6e940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6edd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a5f8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7a048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b01518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245b01f28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a607f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a398d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a39dd8>
1
 50%|█████     | 3/6 [00:20<00:20,  6.79s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa7908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9ed30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6eda0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245acd828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa31d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa35f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a7f860>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7f0f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a894e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7f908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a78b00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a783c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9ecc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a50d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a43780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9ab38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9add8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab9828>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab9cc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6eef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6e048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6a0b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6a6d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9aac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a7fb38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7f828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab99e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a666d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9b208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a89f98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a6eb70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7b550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a784e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a78518>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a55080>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7c240>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a7c400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9ecc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a7cb38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a78048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab97f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab9860>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa9f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7e8d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a78390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a6eda0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa39b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9b518>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9b358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9b550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab9ef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a43d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a53860>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a53908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a66940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a66e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a532b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7d9e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a58748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a83b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a12e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a607f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aae828>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab8cc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a90588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a90fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abe748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a53f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a53588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9ccf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a97128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a97cc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a97940>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a59c50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a57a90>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa6e48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7d7f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abdf60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9c080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9e2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aae518>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa6da0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa6cf8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab9d30>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9af98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9c320>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9cdd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245acdcc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245acddd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa93c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abef28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245acdcc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245acddd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ac0550>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9bc50>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab9978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a97f28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a53780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa60f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a90f60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a90748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9e8d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9eeb8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a60b70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a58b00>
1
 67%|██████▋   | 4/6 [00:23<00:11,  5.84s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abdf98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abd6a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abd438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a59be0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a598d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9d128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9d630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a57160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a7dba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9ca58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab3908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a97748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a39d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a97898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a97780>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa98d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa9ef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9c5f8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245acdc88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab3828>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9ca58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9b978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa9518>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a3e7f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a3e898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a13eb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a136a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9deb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a534e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a18240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa8630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62459f8f60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f62459f83c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a35048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa3c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a35128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa69b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a58cc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a3ee80>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa6908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa3d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a68dd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa62b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a06ba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a68f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a3e2e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abef98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab0ba8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f62459ca240>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a977f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ac1828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a30080>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a28908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ac0438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f62459d4748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a260b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a265f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9e550>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abea58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a28160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a28c18>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9e550>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa79b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62459d4cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8f978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa3588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abf908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a10eb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab0128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a22588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a22160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62459d4b00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a57048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe550>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a220f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9db70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a23b00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab0400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab0320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a89908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a89630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab2b00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab2fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abfb00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8f9b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b01400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abe358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a0ec88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9a208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a60da0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c780>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abf898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abf780>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa8a90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a229b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a9e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9af60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab0ac8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a60fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab87f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab8860>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a23b70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa0f98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a979b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab0978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab0390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a846d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a849e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abee10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abefd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a26e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa6898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a97cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa8198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a11d68>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a304e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a84f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab9f98>
1
 83%|████████▎ | 5/6 [00:27<00:05,  5.56s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a29d68>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a29b70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ac0828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a97240>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a97898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a974a8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a835c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a899e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9a0f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a844e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a934a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abd908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a84b38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abe1d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a109e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa3dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab8ef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab8b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a93978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a60550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a1dcf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a072e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa2e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa6dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abec50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a288d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe2e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa7908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8b710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a5f390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a52da0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a110b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a11978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abd1d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a4fb38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a08748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a08128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b016d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa2630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a1df60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abe6d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245b01e80>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa06a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa0710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a858d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a85898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a85400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a851d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8fef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8f2b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab0eb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aacb38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a84978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab4390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a046a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abdd30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a53e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a53eb8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a84550>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a84940>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b015c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245b01a20>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245acdc50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9a550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a20a90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab4cc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab4c88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8f3c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa06d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab89e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aec160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aa48d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a61630>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a61978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245b01390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab9588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab47f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8b320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abd940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245abdb38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a10cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a107f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab8320>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aad710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a83f60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab4e10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aab470>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab8d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aab390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a10588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ac07f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ac03c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a93a90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245ab40b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa7b00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a93eb8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245aa7b70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a97b00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a9db38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a9d4e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f62474c67b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a83710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a8cf60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a8c390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab8fd0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a139e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a627b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a97128>
1
100%|██████████| 6/6 [00:31<00:00,  5.33s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245a3bf28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a621d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245ab8a90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245aab400>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f6245abe940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f6245a840f0>
1


In [8]:
# Print an example to see what the result of the transformation looks like.
print(hw_code[0][0])


import import as import

import import as import

pd.options.mode.chained_assignment = None

var = pd.Series([632, 1638, 569, 115])

var

var.values

var.index

var = pd.Series([632, 1638, 569, 115], index=['string', 'string', 'string',

    'string'])

var

var['string']

var[[var.endswith('string') for var in var.index]]

[var.endswith('string') for var in var.index]

var[0]

var.name = 'string'

var.index.name = 'string'

var

np.log10(var)

var[var > 1000]

var = {'string': 632, 'string': 1638, 'string': 569, 'string': 115}

pd.Series(var)

var = pd.Series(var, index=['string', 'string', 'string', 'string'])

var

var.isnull()

var + var

var = pd.DataFrame({'string': [632, 1638, 569, 115, 433, 1130, 754, 555],

    'string': [1, 1, 1, 1, 2, 2, 2, 2], 'string': ['string', 'string',

    'string', 'string', 'string', 'string', 'string', 'string']})

var

var[['string', 'string', 'string']]

var.columns

var.dtypes

var['string']

var.patient

type(var.phylum)

type(var[['string']])

var.loc[3]

var.head()

var.tail(3)

var.shape

var = pd.DataFrame([{'string': 1, 'string': 'string', 'string': 632}, {

    'string': 1, 'string': 'string', 'string': 1638}, {'string': 1,

    'string': 'string', 'string': 569}, {'string': 1, 'string': 'string',

    'string': 115}, {'string': 2, 'string': 'string', 'string': 433}, {

    'string': 2, 'string': 'string', 'string': 1130}, {'string': 2,

    'string': 'string', 'string': 754}, {'string': 2, 'string': 'string',

    'string': 555}])

var

var = var.value

var

var[5] = 0

var

var = var.value.copy()

var[5] = 1000

var

var.value[[3, 4, 6]] = [14, 21, 5]

var

var['string'] = 2013

var

var.treatment = 1

var

var.treatment

var[var.value > 1000]

var[var.phylum.str.endswith('string') & (var.value > 1000)]

var = pd.Series([0] * 4 + [1] * 2)

var

var['string'] = var

var

var = ['string', 'string', 'string', 'string']

var['string'] = var

var['string'] = ['string'] * len(var)

var

var = var.drop('string', axis=1)

var

var.values

var = pd.DataFrame({'string': [1, 2, 3], 'string': [0.4, -1.0, 4.5]})

var.values

var.index

var.index[0] = 15

var.index = var.index

var

var().system('string')

var = pd.read_csv('string')

var

pd.read_csv('string', header=None).head()

var = pd.read_table('string', sep='string')

var = pd.read_csv('string', index_col=['string', 'string'])

var.head()

pd.read_csv('string', skiprows=[3, 4, 6]).head()

pd.read_csv('string', nrows=4)

pd.read_csv('string', chunksize=14)

var = pd.read_csv('string', chunksize=14)

var = pd.Series({var.Taxon[0]: var.Tissue.mean() for var in var})

var

var().system('string')

pd.read_csv('string').head(20)

pd.isnull(pd.read_csv('string')).head(20)

pd.read_csv('string', na_values=['string', -99999]).head(20)

var = pd.read_excel('string', sheetname='string', header=None)

var.head()

var = pd.read_csv('string', index_col='string')

var.head()

var = var.player + var.year.astype(str)

var = var.copy()

var.index = var

var.head()

var.index.is_unique

pd.Series(var.index).value_counts()

var.loc['string']

var = var.player + var.team + var.year.astype(str)

var = var.copy()

var.index = var

var.head()

var.index.is_unique

var.reindex(var.index[::-1]).head()

var = range(var.index.values.min(), var.index.values.max())

var.reindex(var).head()

var.reindex(var, method='string', columns=['string', 'string']).head()

var.reindex(var, fill_value='string', columns=['string']).head()

var.shape

var.drop([89525, 89526])

var.drop(['string', 'string'], axis=1)

var = var.h

var

var[:3]

var[['string', 'string']]

var['string':'string']

var['string':'string'] = 5

var

var[['string', 'string']]

var[var.ab > 500]

var.query('string')

var = 450

var.query('string')

var.loc['string', ['string', 'string', 'string', 'string']]

var.loc[:'string', ('string')]

var.iloc[:5, 5:8]

var = ['string', 'string']

var = var[var.team.isin(var)]

var

len(var.player.unique())

var().run_line_magic('string', 'string')

import import as import

import import as import

import import as import

sns.set_context('string')

sns.barplot(y=var.team.unique(), x=var.team.value_counts())

var = var.loc[var.year == 2006, 'string']

var.index = var.player[var.year == 2006]

var = var.loc[var.year == 2007, 'string']

var.index = var.player[var.year == 2007]

var

var = var + var

var

var[var.notnull()]

var.add(var, fill_value=0)

var.hr - var.hr.max()

var.loc[89521, 'string']

var = var[['string', 'string', 'string', 'string']]

var = var - var.loc[89521]

var[:10]

var.apply(np.median)

def range_calc(x):

    return var.max() - var.min()

var = lambda x: var.max() - var.min()

var.apply(var)

def slugging(x):

    var = var['string'] - var['string'] - var['string'] - var['string'

        ] + 2 * var['string'] + 3 * var['string'] + 4 * var['string']

    var = var['string'] + 1e-06

    return var / var

var.apply(var, axis=1).round(3)

var.sort_index().head()

var.sort_index(ascending=False).head()

var.sort_index(axis=1).head()

var.hr.sort_values()

var[['string', 'string', 'string']].sort_values(ascending=[False, True], by

    =['string', 'string']).head(10)

var.hr.rank()

pd.Series([100, 100]).rank()

var.hr.rank(method='string')

var.rank(ascending=False).head()

var[['string', 'string', 'string']].rank(ascending=False).head()

def on_base(x):

    """string"""

    var = var['string'] + var['string'] + var['string']

    var = var['string'] + var['string'] + var['string'] + var['string'] + 1e-06

    return var / var

var.apply(var, axis=1).round(3).sort_values(ascending=False)

var = var.set_index(['string', 'string', 'string'])

var.head(10)

var.index[:10]

var.index.is_unique

var.loc[2007, 'string', 'string']

var = pd.read_csv('string', index_col=['string', 'string'])

var.head(10)

var.loc['string']

var = pd.DataFrame(np.arange(12).reshape((4, 3)), index=[['string',

    'string', 'string', 'string'], [1, 2, 1, 2]], columns=[['string',

    'string', 'string'], ['string', 'string', 'string']])

var

var.index.names = ['string', 'string']

var.columns.names = ['string', 'string']

var

var.loc['string', 'string']

var.loc[('string', 2), 'string']

var.swaplevel('string', 'string').head()

var.sortlevel('string', ascending=False).head()

var = pd.Series([np.nan, -3, None, 'string'])

var

var.isnull()

var

var.dropna()

var.isnull()

var[var.notnull()]

var.dropna()

var.dropna(how='string')

var.loc[7, 'string'] = np.nan

var

var.dropna(thresh=5)

var.dropna(axis=1)

var.fillna(0)

var.fillna({'string': 2013, 'string': 2})

var.year.fillna(2013, inplace=True)

var

var.fillna(method='string')

var.sum()

var.mean()

var

var.mean()

var.mean(skipna=False)

var = var[['string', 'string', 'string']].sum(axis=1)

var.sort_values(ascending=False)

var.describe()

var.player.describe()

var.hr.cov(var.X2b)

var.hr.corr(var.X2b)

var.ab.corr(var.h)

var.corr()

var.head()

var.sum(level='string')

var.to_csv('string')

var.to_pickle('string')

pd.read_pickle('string')

import import as import

var = [pd.DataFrame(columns=['string', 'string', 'string']) for var in range(3)

    ]

var = 'string'

var = var[0]

for var in os.listdir(var):

    var = pd.read_csv(var + var, parse_dates=['string'])

    var = var.Date[0]

    var = var[(var.Description == 'string') | (var.Description == 'string')]

    var.index = var.Date

    var = int(var.loc[var, 'string'])

    var = var[var.Description == 'string']

    var.index = var.Date

    var = int(var.loc[var, 'string'])

    var.loc[var] = [var, var, 'string']

var = 'string'

var = var[1]

for var in os.listdir(var):

    var = pd.read_csv(var + var, parse_dates=['string']).fillna(0)

    var = var.Date[0]

    var.index = var.Variable

    var = int(var.loc['string', 'string'])

    var = int(var.loc['string', 'string'])

    var.loc[var] = [var, var, 'string']

var = 'string'

var = var[2]

for var in os.listdir(var):

    var = pd.read_csv(var + var, parse_dates=['string']).fillna(0)

    var = var.date[0]

    var.index = var.variable

    var = int(var.loc['string', 'string'])

    var = int(var.loc['string', 'string'])

    var.loc[var] = [var, var, 'string']

var = []

for var in var:

    var.index.name = 'string'

    var.append(var.set_index(['string', var.index]))

pd.concat(var, axis=0)

Running Jplag

To run jplag, we need to write all of our files to a directory, and then setup the command with the .jar file that needs to be run on the command line


In [5]:
import os
for i in range(len(hw_code)):
    base_name = 'plagiarism/homework_code_cleaned/hw' + str(i) + '_'
    for j, code_body in enumerate(hw_code[i]):
        fname = base_name + 'student_' + str(j) + ".py"
        f = open(fname,'w')
        f.write(code_body)
        f.close

In [6]:
import os
jar_file = 'plagiarism/jplag-2.11.9-SNAPSHOT-jar-with-dependencies.jar'
lang = 'python3'
results = 'plagiarism/results_cleaned'
students = 'plagiarism/homework_code_cleaned'
command = "java -jar " + jar_file + " -l " + lang + " -r " + results + " -s " + students + " -m 200"

In [9]:
print("nohup",command,"> plagiarism/experiment_cleaned.out &")


nohup java -jar plagiarism/jplag-2.11.9-SNAPSHOT-jar-with-dependencies.jar -l python3 -r plagiarism/results_cleaned -s plagiarism/homework_code_cleaned -m 200 > plagiarism/experiment_cleaned.out &

After we run the JPlag command

While JPlag produces a nice report that is human readable, we want the pairwise similarities, which are printed out by JPlag as it runs. By parsing the output file we can get these similarities that we will use for prediction


In [ ]:
output = open('plagiarism/experiment_cleaned.out','r')
lines = [line for line in output if line[:9] == 'Comparing']

In [1]:
output = open('plagiarism/experiment_cleaned.out','r')
lines = [line for line in output if line[:9] == 'Comparing']

In [2]:
len(lines)


Out[2]:
61776

In [3]:
# Create the dictionary of pairwise sims
my_dict = {}
for line in lines:
    hw1 = line.split()[1].split('-')[0].split('.')[0]
    hw2 = line.split()[1].split('-')[1].split('.')[0]
    val = line.split()[2]
    if hw1 not in my_dict:
        my_dict[hw1] = {}
    if hw2 not in my_dict:
        my_dict[hw2] = {}
    my_dict[hw1][hw2] = val
    my_dict[hw2][hw1] = val

Inter and Intra Similarities

The first measure that we can use to determine if something reasonable is happening is to look at, for each homework, the average similarity of two notebooks both pulled from that homework, and the average similarity of a notebook pulled from that homework and any notebook in the corpus not pulled from that homework. These are printed below


In [5]:
import numpy as np
def get_avg_inter_intra_sims(sim_dict, hw):
    cur_hw = 'hw' + str(hw)
    in_vals = []
    out_vals = []
    for key in sim_dict.keys():
        if key[:3] != cur_hw:
            continue
        for key2 in sim_dict[key].keys():
            if key2[:3] != cur_hw:
                out_vals.append(float(sim_dict[key][key2]))
            else:
                in_vals.append(float(sim_dict[key][key2]))
    return in_vals, out_vals


for i in range(6):
    intra_sims, inter_sims = get_avg_inter_intra_sims(my_dict, i)
    print('Mean intra similarity for hw',i,'is',np.mean(intra_sims),'with std',np.std(intra_sims))
    print('Mean inter similarity for hw',i,'is',np.mean(inter_sims),'with std',np.std(inter_sims))
    print('----')


Mean intra similarity for hw 0 is 75.38015597399182 with std 12.299211838184748
Mean inter similarity for hw 0 is 7.701982832481055 with std 6.8603984611060715
----
Mean intra similarity for hw 1 is 67.4203401648159 with std 19.328966601511585
Mean inter similarity for hw 1 is 8.762574596118471 with std 6.376304620919762
----
Mean intra similarity for hw 2 is 4.827964967513611 with std 3.0326538887152887
Mean inter similarity for hw 2 is 5.652557806837908 with std 3.3464660391264553
----
Mean intra similarity for hw 3 is 6.608522854120397 with std 3.672315918181055
Mean inter similarity for hw 3 is 5.291771157227975 with std 3.040508991062362
----
Mean intra similarity for hw 4 is 7.35182069006429 with std 3.770690031727822
Mean inter similarity for hw 4 is 6.214205078156996 with std 3.2846233319870275
----
Mean intra similarity for hw 5 is 7.592112775922565 with std 3.907326584269893
Mean inter similarity for hw 5 is 5.414714503606615 with std 3.0307103943917393
----

In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = 5, 10
def get_all_sims(sim_dict, hw):
    cur_hw = 'hw' + str(hw)
    sims = []
    for key in sim_dict.keys():
        for key2 in sim_dict[key].keys():
            if key[:3] != cur_hw and key2[:3] != cur_hw:
                continue
            sims.append(float(sim_dict[key][key2]))
    return sims
fig, axes = plt.subplots(6)
for i in range(6):
    axes[i].hist(get_all_sims(my_dict,i), bins=50)


Actual Prediction

While the above results are helpful, it is better to use a classifier that uses more information. The setup is as follows:

  1. Split the data into train and test
  2. For each notebook, generate a feature vector that is calculated as the similarity between the notebook and each notebook of the train set
  3. Build a random forest classifier that uses this feature representation, and measure the performance

In [14]:
from sklearn.model_selection import train_test_split
features = [key for key in my_dict]
feature_map = {}
test_features = set()

indices = [i for i in range(len(features))]
#import pdb; pdb.set_trace()
train, test = train_test_split(indices, test_size=.2)
for i in test:
    test_features.add(features[i])
train_features = []
for i in train:
    train_features.append(features[i])
for i, el in enumerate(train_features):
    feature_map[el] = i
    
X = np.zeros((len(train),len(train)))
y = []
X_test = np.zeros((len(test), len(train)))
y_test = []
for i, el in enumerate(train_features):
    for key in my_dict[el]:
        if key not in feature_map:
            continue
        loc = feature_map[key]
        X[i, loc] = my_dict[el][key]
    y.append(int(el[2]))

for i, el in enumerate(test_features):
    for key in my_dict[el]:
        if key not in feature_map:
            continue
        loc = feature_map[key]
        X_test[i, loc] = my_dict[el][key]
    y_test.append(int(el[2]))

In [21]:
import sklearn
from sklearn.ensemble import RandomForestClassifier

clf = sklearn.ensemble.RandomForestClassifier(n_estimators=400, max_depth=4)
clf.fit(X, y)


Out[21]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=4, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=400, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [22]:
clf.predict(X_test)


Out[22]:
array([3, 1, 4, 3, 2, 2, 2, 0, 0, 0, 3, 5, 2, 2, 2, 3, 5, 4, 3, 3, 1, 0,
       1, 2, 4, 3, 4, 1, 4, 0, 2, 3, 4, 0, 1, 5, 5, 3, 2, 4, 3, 0, 2, 2,
       2, 0, 4, 2, 0, 3, 0, 3, 3, 1, 3, 3, 2, 3, 3, 3, 5, 4, 0, 3, 1, 5,
       0, 1, 0, 4, 3])

Results

Below are the results of the prediction. We can see a good deal of predictive power, though there is room for improvement


In [23]:
import numpy as np
np.sum(clf.predict(X_test)==y_test)/len(y_test)


Out[23]:
0.6619718309859155

In [24]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(clf.predict(X_test),y_test)
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(cm, cmap=plt.cm.Blues)
plt.show()



In [25]:
clfi = clf.feature_importances_
sa = []
for i in range(len(clfi)):
    sa.append((clfi[i], train_features[i]))
sra = [el for el in reversed(sorted(sa))]

In [26]:
for i in range(100):
    print(sra[i])


(0.018575009941791808, 'hw1_student_50')
(0.01776631171920686, 'hw1_student_49')
(0.017741581549647292, 'hw1_student_44')
(0.0163665821444652, 'hw1_student_1')
(0.014837947663380473, 'hw1_student_18')
(0.014342546847770015, 'hw1_student_34')
(0.013758786955744522, 'hw5_student_31')
(0.01339456840665277, 'hw1_student_53')
(0.013076254027043552, 'hw0_student_54')
(0.01291142423027971, 'hw0_student_1')
(0.01271874568024469, 'hw0_student_22')
(0.012369943585143801, 'hw1_student_42')
(0.012219621162526959, 'hw5_student_7')
(0.012157640900844702, 'hw1_student_2')
(0.011978364584829931, 'hw1_student_21')
(0.011921257207399888, 'hw1_student_45')
(0.011104685549346188, 'hw1_student_6')
(0.011073027941261145, 'hw1_student_13')
(0.011060043636568363, 'hw1_student_57')
(0.010680765123711937, 'hw1_student_54')
(0.01064863485881129, 'hw0_student_36')
(0.01042696009410907, 'hw1_student_39')
(0.010335577353650956, 'hw4_student_44')
(0.010246655258617227, 'hw0_student_6')
(0.009860168084762559, 'hw0_student_14')
(0.009812971837624845, 'hw1_student_20')
(0.00977056833501722, 'hw1_student_56')
(0.009664697545787233, 'hw5_student_8')
(0.009646410527124944, 'hw0_student_43')
(0.009574508225623475, 'hw1_student_46')
(0.00949860314030383, 'hw1_student_51')
(0.009464936828124455, 'hw1_student_32')
(0.009372480682365103, 'hw1_student_24')
(0.009363586989309669, 'hw1_student_47')
(0.009297551680948762, 'hw0_student_18')
(0.009289159762310319, 'hw1_student_48')
(0.008927592314727267, 'hw0_student_50')
(0.008768999980542721, 'hw0_student_7')
(0.008722662431430049, 'hw2_student_24')
(0.008700507040596812, 'hw0_student_27')
(0.008669355146444157, 'hw1_student_52')
(0.008608909677475912, 'hw0_student_51')
(0.008539574275358296, 'hw0_student_5')
(0.00846331715490769, 'hw1_student_14')
(0.008376668855859862, 'hw1_student_41')
(0.008353356455873344, 'hw1_student_38')
(0.008285664422736053, 'hw1_student_58')
(0.008278027364689909, 'hw1_student_15')
(0.008032768408567013, 'hw1_student_37')
(0.007922848325116699, 'hw4_student_23')
(0.007809672616586609, 'hw1_student_11')
(0.0076467862584139136, 'hw1_student_35')
(0.007520921142238202, 'hw1_student_3')
(0.007487683842638193, 'hw0_student_23')
(0.007419672878850182, 'hw0_student_28')
(0.007290949543584018, 'hw1_student_17')
(0.007188429443265011, 'hw0_student_29')
(0.007029793142306842, 'hw0_student_21')
(0.007014421101725939, 'hw0_student_52')
(0.006946774215463024, 'hw1_student_36')
(0.006883264993055416, 'hw0_student_26')
(0.006805285148558294, 'hw0_student_48')
(0.006661015482617164, 'hw1_student_16')
(0.00663232148725141, 'hw0_student_40')
(0.006626208487917845, 'hw5_student_36')
(0.00656640654883093, 'hw4_student_31')
(0.0065151629521278225, 'hw0_student_25')
(0.006251971905511351, 'hw0_student_4')
(0.00619626971653498, 'hw1_student_0')
(0.006181209524095805, 'hw5_student_52')
(0.006179246318187332, 'hw0_student_47')
(0.0061614556061135705, 'hw5_student_27')
(0.0059660365519098, 'hw1_student_29')
(0.005680309576371567, 'hw1_student_28')
(0.005659832548242958, 'hw4_student_46')
(0.0055269359688129415, 'hw4_student_27')
(0.005516068380016173, 'hw0_student_46')
(0.005367954845066169, 'hw0_student_37')
(0.00528111532715943, 'hw4_student_5')
(0.005195058229760808, 'hw0_student_20')
(0.005135727557190817, 'hw1_student_31')
(0.005123362227443393, 'hw0_student_39')
(0.0050279445971662465, 'hw5_student_42')
(0.004995638217500624, 'hw0_student_17')
(0.004968252421319993, 'hw0_student_56')
(0.004956949350508308, 'hw1_student_5')
(0.0049566497543847565, 'hw1_student_19')
(0.004799989909683989, 'hw4_student_20')
(0.004774594183770655, 'hw0_student_32')
(0.004766556011291109, 'hw1_student_27')
(0.004741875696175097, 'hw1_student_4')
(0.004704552114030805, 'hw4_student_18')
(0.004659834317148503, 'hw0_student_8')
(0.004550910004677373, 'hw0_student_34')
(0.0045042829124088204, 'hw0_student_42')
(0.004481193575946136, 'hw0_student_44')
(0.004439318304897899, 'hw0_student_55')
(0.004410813664553351, 'hw0_student_45')
(0.004333344804759836, 'hw1_student_10')
(0.004286715144320806, 'hw1_student_43')

In [ ]: