In [1]:
from scipy.spatial.distance import cosine
import numpy as np
import re
In [2]:
text = list()
with open("sentences_example.txt", 'r') as file_obj:
for line in file_obj:
text.append(filter(lambda w: w != '', re.split("[^a-z]", line.lower())))
In [3]:
words_set = set()
for line in text:
for word in line:
words_set.add(word)
words = dict(enumerate(words_set))
In [4]:
words_array = []
for line in text:
words_array += [ list(map(lambda w: line.count(words[w]), words)) ]
words_matrix = np.array(words_array)
In [5]:
distances = list()
for row in range(1, words_matrix.shape[0]):
distances.append(cosine(words_matrix[0], words_matrix[row]))
In [6]:
def two_mins(nums):
min_1, min_2 = float('inf'), float('inf')
for num in nums:
if num <= min_1:
min_1, min_2 = num, min_1
elif num < min_2:
min_2 = num
return min_1, min_2
In [7]:
with open("submission-1.txt", 'w') as file_obj:
min_1, min_2 = two_mins(distances)
ind_1, ind_2 = distances.index(min_1) + 1, distances.index(min_2) + 1
file_obj.write(str(ind_1) + " " + str(ind_2))