In [1]:
import distance as dist
import numpy as np
import pandas as pd
from collections import deque
import re
In [2]:
#Variables
path = "../data/ExampleTest.txt"
apath = "../data/ExampleTestArray_modified.txt"
In [20]:
dict = {}
appearances = {}
names = deque()
with open(path) as f:
txt = f.readlines()
with open(apath) as f2:
array_txt_str = f2.read()
array_txt_str = re.sub(r'\[.*?\]', '',array_txt_str)
name = ""
size = 0
for i in txt:
actual = i.replace("\n", '')
size +=len(actual)
if(len(i.strip()) == 0):
continue
try :
number = float(actual)
if (number > 10000000):
continue
#TODO
#need to remove data between brackets in array_txt_str
#since some conflicts happen when finding numbers in that text
if(number == 0.0):
zeros = len([k.start() for k in re.finditer("0", actual)])
idx = [k.start() for k in re.finditer(actual, array_txt_str)]
idx_end = [k.end() for k in re.finditer(actual, array_txt_str)]
#myidx = np.where(min_idx == min(min_idx))
try:
appearances[actual] += 1
except:
appearances[actual] = 1
#dict[actual] = actual
found = False
while appearances[actual] <= len(idx) and found == False:
prev = array_txt_str[idx[appearances[actual] - 1] - 1]
post = array_txt_str[idx_end[appearances[actual] - 1] + 1]
if (number == 0):
zf = [k.start() for k in re.finditer("0", array_txt_str[(idx[appearances[actual] - 1] - zeros):(idx_end[appearances[actual] - 1] + zeros)]
#if (len(zf) == zeros):
# found = True
if (prev.isdigit() == False and post.isdigit() == False):
found = True
else:
if (found == False):
appearances[actual] += 1
print("finding: {} prev: {} found?: {} number: {}".format(actual, prev, found, appearances[actual] ))
print("found: {} location per line: {} location as array: {} apps: {} size: {} type of name: {}".format(actual, size,idx[appearances[actual] -1],appearances[actual], len(idx),type(actual)))
except :
pass
print(array_txt_str)
In [3]: