Read data


In [1]:
import distance as dist
import numpy as np
import pandas as pd
from collections import deque
import re

In [2]:
#Variables
path = "../data/ExampleTest.txt"
apath = "../data/ExampleTestArray_modified.txt"

In [20]:
dict = {}
appearances = {}
names = deque()
with open(path) as f:
    txt = f.readlines()
with open(apath) as f2:
    array_txt_str = f2.read()
    
array_txt_str = re.sub(r'\[.*?\]', '',array_txt_str)
name = ""
size = 0
for i in txt:
    actual = i.replace("\n", '')
    size +=len(actual)
    if(len(i.strip()) == 0):
        continue
    try :
        number = float(actual)
        if (number > 10000000):
            continue
        
        #TODO
        #need to remove data between brackets in array_txt_str
        #since some conflicts happen when finding numbers in that text
        if(number == 0.0):
            zeros = len([k.start() for k in re.finditer("0", actual)])
            
        idx = [k.start() for k in re.finditer(actual, array_txt_str)]
        idx_end = [k.end() for k in re.finditer(actual, array_txt_str)]
        #myidx = np.where(min_idx == min(min_idx))
        try:
            appearances[actual] += 1
        except:
            appearances[actual] = 1
        
        #dict[actual] = actual
        found = False
        while appearances[actual] <= len(idx) and found == False:
            prev = array_txt_str[idx[appearances[actual] - 1] - 1]
            post = array_txt_str[idx_end[appearances[actual] - 1] + 1]
            if (number == 0):
                zf  = [k.start() for k in re.finditer("0", array_txt_str[(idx[appearances[actual] - 1] - zeros):(idx_end[appearances[actual] - 1] + zeros)]
                #if (len(zf) == zeros):
                #    found = True
                    
            if (prev.isdigit() == False and post.isdigit() == False):
                found = True
            else:
                if (found == False):
                    appearances[actual] += 1
            print("finding: {} prev: {} found?: {} number: {}".format(actual, prev, found, appearances[actual] ))
        print("found: {} location per line: {} location as array: {} apps: {} size: {} type of name: {}".format(actual, size,idx[appearances[actual] -1],appearances[actual], len(idx),type(actual)))
    except :
        pass
print(array_txt_str)


  File "<ipython-input-20-87a46142eee2>", line 46
    if (prev.isdigit() == False and post.isdigit() == False):
                                                            ^
SyntaxError: invalid syntax

In [3]: