In [1]:
import time
import xml.etree.cElementTree
import numpy as np
import ast
from html.parser import HTMLParser

# create a subclass and override the handler methods
class MyHTMLParser(HTMLParser):
    def __init__(self):
        HTMLParser.__init__(self)
        self.code_start = 0
        self.code = []
    
    def reset_vars(self):
        self.code = []
        self.code_start = 0

    def handle_starttag(self, tag, attrs):
        if self.code_start == 1:
            pass
        else:
            if tag == 'code':
                self.code_start = 1
                self.code.append('')

    def handle_endtag(self, tag):
        if tag == 'code':
            self.code_start = 0

    def handle_data(self, data):
        if self.code_start == 1:
            self.code[-1] += data

In [8]:
import tqdm
import datetime
def is_valid_python(code):
    try:
        ast.parse(code)
    except SyntaxError:
        return False
        if 'import ' in code:
            return True
        else:
            return False
    return True

def parse_file(filename):
    # instantiate the parser and fed it some HTML
    parser = MyHTMLParser()
    
    first_day = datetime.datetime(2008,1,1)
    all_code = []
    code_time = []
    total = 0
    cur_diff = None
    for event, elem in xml.etree.ElementTree.iterparse(filename):
        total += 1
        if total == 10000*300:
            break

        for neighbor in elem.iter('row'):
            bod = (neighbor.attrib['Body'])
            if total % 10000 == 0:
                creation_date = neighbor.attrib['CreationDate']
                #print(creation_date)
                #print(total/10000)
            cd = neighbor.attrib['CreationDate']
            d = datetime.datetime(year=int(cd[:4]),month=int(cd[5:7]), day=int(cd[8:10]))
            diff = d-first_day
            if cur_diff is not None:
                if diff < cur_diff:
                    print(d-first_day-cur_diff)
            cur_diff=diff
            parser.feed(bod)
            for el in parser.code:
                if is_valid_python(el):
                    cd = neighbor.attrib['CreationDate']
                    #print(cd)
                    #print(cd[:4],cd[5:7],cd[9:10])
                    d = datetime.datetime(year=int(cd[:4]),month=int(cd[5:7]), day=int(cd[8:10]))
                    diff_time = d-first_day
                    all_code.append((el,diff_time))
            parser.reset_vars()
        elem.clear()


    return all_code

%matplotlib inline
import matplotlib.pyplot as plt
def print_summary(all_code, python_code):
    print('We have looked through',len(all_code),'posts.')
    zero_posts = len([el for el in all_code if len(el) > 0])
    print('We have',zero_posts,'posts with no code')
    print('Our average code blocks per code post is:',np.mean([len(el) for el in all_code if len(el) > 0]))
    print('Number of python code blocks is:',len(python_code),'out of',np.sum([len(el) for el in all_code]),'total blocks')
    larger_than_1 = len([el for el in python_code if len(el.split('\n')) > 1])
    print('Number of python code blocks that are greater than one line of code is:',larger_than_1)
    plt.hist([len(code) for code in python_code if len(code) < 2000],bins=100)

In [6]:
filename = '/dfs/scratch2/fcipollone/stackoverflow/exchange/datascience/Posts.xml'
filename = '/dfs/scratch2/fcipollone/stackoverflow/exchange/iot/Posts.xml'
filename = '/dfs/scratch2/fcipollone/stackoverflow/exchange/cs/Posts.xml'
filename = '/dfs/scratch2/fcipollone/stackoverflow/exchange/opensource/Posts.xml'

In [9]:
filename = '/dfs/scratch2/fcipollone/stackoverflow/Posts.xml'
python_code = parse_file(filename)
print('Summary for Stack Overflow:')
#print_summary(all_code, python_code)


-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-25 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-7 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-73 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-29 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-75 days, 0:00:00
-2 days, 0:00:00
-193 days, 0:00:00
-138 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-13 days, 0:00:00
-153 days, 0:00:00
-7 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-68 days, 0:00:00
-113 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-116 days, 0:00:00
-1 day, 0:00:00
-22 days, 0:00:00
-1 day, 0:00:00
-188 days, 0:00:00
-71 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-146 days, 0:00:00
-1 day, 0:00:00
-98 days, 0:00:00
-219 days, 0:00:00
-33 days, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-143 days, 0:00:00
-1 day, 0:00:00
-177 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-237 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-25 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-88 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-152 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-177 days, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-30 days, 0:00:00
-1 day, 0:00:00
-5 days, 0:00:00
-1 day, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-36 days, 0:00:00
-17 days, 0:00:00
-17 days, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-43 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-1 day, 0:00:00
-40 days, 0:00:00
-30 days, 0:00:00
-40 days, 0:00:00
-28 days, 0:00:00
-12 days, 0:00:00
-39 days, 0:00:00
-1 day, 0:00:00
-40 days, 0:00:00
-28 days, 0:00:00
-4 days, 0:00:00
-40 days, 0:00:00
-7 days, 0:00:00
-37 days, 0:00:00
-40 days, 0:00:00
-40 days, 0:00:00
-24 days, 0:00:00
-10 days, 0:00:00
-40 days, 0:00:00
-5 days, 0:00:00
-5 days, 0:00:00
-33 days, 0:00:00
-36 days, 0:00:00
-40 days, 0:00:00
-4 days, 0:00:00
-40 days, 0:00:00
-4 days, 0:00:00
-37 days, 0:00:00
-40 days, 0:00:00
-2 days, 0:00:00
-28 days, 0:00:00
-4 days, 0:00:00
-39 days, 0:00:00
-6 days, 0:00:00
-24 days, 0:00:00
-6 days, 0:00:00
-32 days, 0:00:00
-26 days, 0:00:00
-38 days, 0:00:00
-3 days, 0:00:00
-4 days, 0:00:00
-5 days, 0:00:00
-20 days, 0:00:00
-40 days, 0:00:00
-37 days, 0:00:00
-6 days, 0:00:00
-18 days, 0:00:00
-30 days, 0:00:00
-40 days, 0:00:00
-27 days, 0:00:00
-12 days, 0:00:00
-39 days, 0:00:00
-5 days, 0:00:00
-3 days, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-26 days, 0:00:00
-26 days, 0:00:00
-6 days, 0:00:00
-27 days, 0:00:00
-38 days, 0:00:00
-3 days, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-29 days, 0:00:00
-33 days, 0:00:00
-25 days, 0:00:00
-28 days, 0:00:00
-1 day, 0:00:00
-39 days, 0:00:00
-7 days, 0:00:00
-4 days, 0:00:00
-40 days, 0:00:00
-26 days, 0:00:00
-37 days, 0:00:00
-5 days, 0:00:00
-30 days, 0:00:00
-5 days, 0:00:00
-5 days, 0:00:00
-40 days, 0:00:00
-28 days, 0:00:00
-3 days, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-40 days, 0:00:00
-19 days, 0:00:00
-19 days, 0:00:00
-5 days, 0:00:00
-5 days, 0:00:00
-6 days, 0:00:00
-6 days, 0:00:00
-30 days, 0:00:00
-25 days, 0:00:00
-40 days, 0:00:00
-6 days, 0:00:00
-30 days, 0:00:00
-17 days, 0:00:00
-36 days, 0:00:00
-1 day, 0:00:00
-40 days, 0:00:00
-32 days, 0:00:00
-40 days, 0:00:00
-11 days, 0:00:00
-5 days, 0:00:00
-5 days, 0:00:00
-6 days, 0:00:00
-4 days, 0:00:00
-6 days, 0:00:00
-4 days, 0:00:00
-2 days, 0:00:00
-4 days, 0:00:00
-33 days, 0:00:00
-3 days, 0:00:00
-6 days, 0:00:00
-1 day, 0:00:00
-6 days, 0:00:00
-5 days, 0:00:00
-25 days, 0:00:00
-5 days, 0:00:00
-5 days, 0:00:00
-6 days, 0:00:00
-27 days, 0:00:00
-6 days, 0:00:00
-5 days, 0:00:00
-5 days, 0:00:00
-30 days, 0:00:00
-4 days, 0:00:00
-4 days, 0:00:00
-31 days, 0:00:00
-6 days, 0:00:00
-5 days, 0:00:00
-41 days, 0:00:00
-41 days, 0:00:00
-7 days, 0:00:00
-5 days, 0:00:00
-6 days, 0:00:00
-4 days, 0:00:00
-30 days, 0:00:00
-40 days, 0:00:00
-8 days, 0:00:00
-15 days, 0:00:00
-25 days, 0:00:00
-7 days, 0:00:00
-18 days, 0:00:00
-31 days, 0:00:00
-35 days, 0:00:00
-26 days, 0:00:00
-7 days, 0:00:00
-25 days, 0:00:00
-32 days, 0:00:00
-4 days, 0:00:00
-22 days, 0:00:00
-32 days, 0:00:00
-7 days, 0:00:00
-4 days, 0:00:00
-34 days, 0:00:00
-40 days, 0:00:00
-32 days, 0:00:00
-3 days, 0:00:00
-6 days, 0:00:00
-39 days, 0:00:00
-5 days, 0:00:00
-4 days, 0:00:00
-28 days, 0:00:00
-1 day, 0:00:00
-3 days, 0:00:00
-8 days, 0:00:00
-42 days, 0:00:00
-8 days, 0:00:00
-31 days, 0:00:00
-28 days, 0:00:00
-26 days, 0:00:00
-19 days, 0:00:00
-6 days, 0:00:00
-10 days, 0:00:00
-7 days, 0:00:00
-24 days, 0:00:00
-31 days, 0:00:00
-3 days, 0:00:00
-6 days, 0:00:00
-23 days, 0:00:00
-7 days, 0:00:00
-24 days, 0:00:00
-7 days, 0:00:00
-7 days, 0:00:00
-6 days, 0:00:00
-27 days, 0:00:00
-15 days, 0:00:00
-7 days, 0:00:00
-7 days, 0:00:00
-8 days, 0:00:00
-22 days, 0:00:00
-28 days, 0:00:00
-34 days, 0:00:00
-6 days, 0:00:00
-7 days, 0:00:00
-6 days, 0:00:00
-7 days, 0:00:00
-3 days, 0:00:00
-7 days, 0:00:00
-34 days, 0:00:00
-8 days, 0:00:00
-7 days, 0:00:00
-3 days, 0:00:00
-4 days, 0:00:00
-6 days, 0:00:00
-8 days, 0:00:00
-32 days, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
-4 days, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-1 day, 0:00:00
-2 days, 0:00:00
Summary for Stack Overflow:

In [8]:
len(python_code)


Out[8]:
1555262

In [ ]:


In [9]:
longer_python_code = [p for p in python_code if len(p[0].split('\n')) > 1]
len(longer_python_code)


Out[9]:
175280

In [10]:
import sys
home_directory = '/dfs/scratch2/fcipollone'
sys.path.append(home_directory)
import numpy as np
from nbminer.notebook_miner import NotebookMiner
from nbminer.notebook_miner import NotebookMinerString
from nbminer.pipeline.pipeline import Pipeline
from nbminer.features.features import Features
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
from nbminer.preprocess.resample_by_node import ResampleByNode
from nbminer.encoders.ast_graph.ast_graph import ASTGraphReducer
from nbminer.preprocess.feature_encoding import FeatureEncoding
from nbminer.encoders.cluster.kmeans_encoder import KmeansEncoder
from nbminer.results.similarity.jaccard_similarity import NotebookJaccardSimilarity
from nbminer.results.prediction.corpus_identifier import CorpusIdentifier
nbs = [NotebookMinerString(longer_python_code[i][0]) for i in range(len(longer_python_code))]
a = Features(nbs)
gastf = GetASTFeatures()
rbn = ResampleByNode()
gi = GetImports()
#agr = ASTGraphReducer(a, threshold=8, split_call=False)
#ci = CorpusIdentifier()
pipe = Pipeline([gastf, rbn, gi])
a = pipe.transform(a)


<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f2298f19a58>
175280
<nbminer.preprocess.resample_by_node.ResampleByNode object at 0x7f2298f199b0>
175280
<nbminer.preprocess.get_imports.GetImports object at 0x7f2298f199e8>
175280

In [16]:
time_range = 10
import_name = 'LinearRegression'
result = {}
aggregate_result = {}
for i in range(len(longer_python_code)):
    import_list = a.get_notebook(i).get_feature('imports')
    if i < 1000:
        print(import_list)
    d = longer_python_code[i][1]
    for key in import_list.keys():
        time_step = int(d.days/time_range)
        if key.strip() == import_name:
            if time_step not in result:
                result[time_step] = 0
            result[time_step] += 1
        if time_step not in aggregate_result:
            aggregate_result[time_step] = 0
        aggregate_result[time_step] += 1


%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
tuples = sorted([(key, result[key]) for key in result.keys()])
x_vals = np.array([el[0] for el in tuples])
y_vals = np.array([el[1] for el in tuples])
print(x_vals, y_vals)
y_vals = y_vals/max(y_vals)
plt.plot(x_vals,y_vals)
tuples_agg = sorted([(key, aggregate_result[key]) for key in aggregate_result.keys()])
print(tuples_agg)
x_vals_agg = np.array([el[0] for el in tuples_agg])
y_vals_agg = np.array([el[1] for el in tuples_agg])
y_vals_agg = y_vals_agg/max(y_vals_agg)
plt.plot(x_vals_agg,y_vals_agg)


{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'cgi': 'cgi', 'cgitb': 'cgitb', 'MySQLdb': 'MySQLdb'}
{'cgitb': 'cgitb', 'cgi': 'cgi', 'MySQLdb': 'MySQLdb'}
{}
{'SomeClass': 'SomeOtherProduct.SomeModule.SomeClass'}
{}
{}
{}
{}
{'msvcrt': 'msvcrt'}
{}
{}
{}
{}
{}
{}
{}
{}
{'FTP': 'ftplib.FTP'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'foo': 'foo'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'sys': 'sys'}
{'sys': 'sys'}
{'sys': 'sys', 'time': 'time'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'new': 'new'}
{}
{}
{}
{}
{}
{}
{}
{}
{'math': 'math'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'re': 're'}
{}
{}
{}
{'sys': 'sys'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'etree': 'lxml.etree', 'StringIO': 'StringIO.StringIO'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'zipfile': 'zipfile', 'os': 'os'}
{'chilkat': 'chilkat'}
{}
{}
{}
{}
{}
{}
{}
{}
{'setup': 'setuptools.setup'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'sys': 'sys', 'getopt': 'getopt'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'urllib2': 'urllib2'}
{'urllib2': 'urllib2'}
{'urllib': 'urllib'}
{'urllib': 'urllib'}
{}
{}
{'urllib': 'urllib'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'operator': 'operator'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'subprocess': 'subprocess'}
{'sys': 'sys'}
{}
{}
{}
{}
{'struct': 'struct'}
{'urljoin': 'urlparse.urljoin', 'BeautifulSoup': 'BeautifulSoup.BeautifulSoup', 'Comment': 'BeautifulSoup.Comment', 're': 're'}
{'zipfile': 'zipfile', 'StringIO': 'StringIO'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'image': 'image'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'*': 'xml.dom.minidom.*'}
{'sys': 'sys', 'Element': 'xml.etree.cElementTree.Element', 'ElementTree': 'xml.etree.cElementTree.ElementTree'}
{'*': 'xml.dom.minidom.*'}
{}
{}
{}
{}
{}
{}
{'setup': 'distutils.core.setup'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'script': 'werkzeug.script'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'win32service': 'win32service', 'win32serviceutil': 'win32serviceutil', 'socket': 'socket', 'win32event': 'win32event', 'servicemanager': 'servicemanager'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'unittest': 'unittest'}
{'parameterized': 'nose_parameterized.parameterized'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'NSBeep': 'AppKit.NSBeep'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'namedtuple': 'collections.namedtuple'}
{}
{}
{}
{}
{}
{}
{'locale': 'locale'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'create_object': 'django.views.generic.create_update.create_object', 'Product': 'my_products_app.models.Product', '*': 'django.conf.urls.defaults.*'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'lxml.html': 'lxml.html'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'odbc': 'odbc', 'dbi': 'dbi'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'Product': 'my_app.models.Product', 'widgets': 'django.contrib.admin.widgets', 'forms': 'django.forms'}
{}
{}
{}
{}
{}
{}
{'mkstemp': 'tempfile.mkstemp', 'remove': 'os.remove', 'move': 'shutil.move', 'fdopen': 'os.fdopen'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'somepackage': 'somepackage'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'glob': 'glob', 'os': 'os'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'os': 'os'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'time': 'time', 'os': 'os'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'inspect': 'inspect'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'imap': 'itertools.imap'}
{}
{'*': 'augustus.kernel.unitable.*'}
{'Project': 'app.controllers.project.Project'}
{'Project': 'app.controllers.Project'}
{}
{'Project': 'project.Project'}
{'Project': 'project.Project'}
{}
{'tkMessageBox': 'tkMessageBox', '*': 'Tkinter.*'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'*': '__future__.*'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'*': 'visual.*'}
{'webapp': 'google.appengine.ext.webapp'}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{}
{'BeautifulSoup': 'bs4.BeautifulSoup'}
{}
{}
{}
{}
{}
{}
{}
[] []
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-16-538863a5d8c2> in <module>()
     26 y_vals = np.array([el[1] for el in tuples])
     27 print(x_vals, y_vals)
---> 28 y_vals = y_vals/max(y_vals)
     29 plt.plot(x_vals,y_vals)
     30 tuples_agg = sorted([(key, aggregate_result[key]) for key in aggregate_result.keys()])

ValueError: max() arg is an empty sequence

In [44]:
for i in range(100):
    funcs = []
    for cell in a.get_notebook(i).get_all_cells():
        funcs.extend(cell.get_feature('full_name_string'))
    print(funcs)


[]
['var.system.System.gc', 'var.system.System.gc']
['var']
['var.UTF8.GetString', 'var.UTF8.GetBytes']
[]
['var', 'var', 'var', 'var', 'var', 'var', 'var']
[]
['var.execute', 'var.fetchall']
['var.execute', 'var.fetchone']
['dict']
['dict']
['hasattr']
['var']
[]
['var', 'var.append', 'var.append', 'list']
['cgitb.enable']
['cgitb.enable']
['var.getTimezoneOffset']
[]
['var', 'var']
[]
[]
[]
['msvcrt.getch']
[]
[]
['var.BindData.Invoke']
['var.AJAXify']
['var.Attributes.Add']
['var.FindControl']
['var', 'var']
['var']
['ftplib.FTP', 'exit', 'open', 'var.delete', 'var.storbinary', 'var.close', 'var.dir', 'var.quit']
['open']
[]
['var', 'var']
['exec', 'exec']
['var.ToString', 'var.ToChar', 'var.Parse', 'var.Substring']
[]
['var.GetRecordset', 'var.GetDataset', 'var.Execute']
['var.alpha', 'var.r', 'var.r', 'var.g', 'var.g', 'var.b', 'var.b']
['var']
['var']
[]
['var.gethostbyname']
['var', 'var', 'var']
['var']
['var']
['getattr', 'var']
['var.getattr', 'getattr']
['var.Extensions.Base64Encode']
['var.ASCII.GetString']
['var.ToInt32']
[]
[]
['var', 'map']
['var.getaddrinfo', 'var']
['map']
[]
['var', 'var']
[]
[]
[]
[]
[]
['var.Utility.RegisterTypeForAjax', 'var']
['var.Get5']
['var.createElement']
[]
['var', 'var']
['var.toString']
['var.toString']
['var.toString']
['var.Join', 'var.ToArray']
['var.Start']
['__import__', 'getattr', 'var']
['MyMethod']
[]
['var.SetParentRow']
[]
[]
['var', 'var', 'var']
['var', 'var', 'var', 'var', 'var']
['var', 'var']
['var.SetParentRow', 'var.Relations']
['var.IsNotNull']
['var.Animals.GetEnumerator']
[]
[]
['var.Create']
['var.getElementById']
['var.getElementById']
['var.getElementById', 'var.getElementById']
['var.stat']
[]
[]
[]
['var.append', 'var.append']
['int', 'input.split', 'range', 'input.join.split', 'range', 'len', 'len', 'var.input.replace', 'input.startswith', 'input.remove', 'var.input.replace', 'join', 'input.remove']
['var.replace', 'var.var.sub.split', 'var.sub']

In [ ]: