In [2]:
%pylab --no-import-all inline
%matplotlib inline
In [3]:
from __future__ import division
import pandas as pd
import numpy as np
import csv
In [6]:
def main():
markdown_file = "../similar_attacks.md"
preprocessed_file = "../data/vcdb_fully_processed.csv"
output_file = "../data/vcdb_similarity_comparison.csv"
relevant_attributes = ["incident_id","industry.categories"]
with open(markdown_file, "rb") as md_file:
for i in md_file:
if len(i)<10:
pass
else:
first_id = i[2:38]
second_id = i[43:78]
#print first_id +", "+ second_id
with open(preprocessed_file, "rb") as csv_file:
reader = csv.reader(csv_file)
headers = reader.next()
relevant_attributes_index = []
with open(output_file, "wb") as new_csv_file:
wrtr = csv.writer(new_csv_file)
for i in range(len(headers)):
if headers[i] in relevant_attributes:
relevant_attributes_index.append(i)
for row in reader:
for i in range(len(relevant_attributes_index)):
if first_id in relevant_attributes_index[i]:
print first_id
if __name__ == "__main__":
try:
main()
except Exception as e:
print 'Something went wrong ', e
In [ ]: