In [1]:
%load_ext autoreload
import re
%aimport asm_utils
In [2]:
samples = asm_utils.get_collection()
In [3]:
# get names of files from mongo; testing with class 5
names = [post['id'] for post in samples.find({'class':'5'})]
test_file = names[0]
test_file
Out[3]:
In [4]:
test_id = '0qjuDC7Rhx9rHkLlItAp'
post_test = samples.find_one({'id': test_id})
print(post_test['id'])
In [5]:
with open('asm_instructions.csv', 'r') as f:
asm_instr = [line.strip() for line in f.readlines()]
In [104]:
asm = asm_utils.read_assembly(post['id'])
instr_seq = asm_utils.get_inst(asm, asm_instr)
test_asm_info = asm_seq_info(instr_seq)
In [51]:
# test_a = [to_utf(a) for a in asm if len(a) > 1]
# words = [line.split() for line in test_a if line != None]
In [6]:
def make_asm_info(post):
"""Takes a mongodb post and inserts information about it's assembly code in it"""
asm = asm_utils.read_assembly(post['id'])
instr_seq = asm_utils.get_inst(asm, asm_instr)
post['asm_info'] = asm_utils.asm_seq_info(instr_seq)
#post['asm_instr_seq'] = instr_seq
#post['asm_instr_count'] = len(instr_seq)
#post['asm_uniq_instr'] = set(instr_seq)
#post['asm_uniq_count'] = len(set(instr_seq))
samples.save(post)
In [7]:
In [8]:
make_asm_info(post_test)
In [13]:
documents = samples.find()
In [15]:
for doc in documents:
print('extracting assembly info for %s' % doc['id'])
make_asm_info(doc)
Out[15]:
In [10]:
In [21]:
In [66]:
In [34]:
In [117]:
In [118]:
jumps = [word for word in a if word in ['je', 'ne', 'jz', 'jg', 'jge', 'jl', 'jle']]
In [113]:
In [114]:
# lcss
Out[114]:
In [117]:
Out[117]:
In [130]:
def update_collection(collection, _id, key, value):
# samples.update({'id': test_file}, {"$set": {'ida_comments': test_comments }})
if id is None:
print('you must pass an id')
return
print(collection)
# print('updating %s with %s' % (_id, value['num_instr']))
collection.update({'id': _id},
{"$set": {key : value }})
In [12]:
In [8]:
In [131]:
update_collection(samples, test_file, 'asm_count', 'x')
In [60]:
In [118]:
In [9]:
a = samples.find({'id': post_test['id']})
In [12]:
a[0]['asm_info']['num_instr']
Out[12]:
In [ ]:
In [ ]: