Cambios en los ficheros:
de "espacio" a "_"
de "," a "%"
de ";" a ","
In [1]:
import numpy as np
import os as os
import pandas as pd
In [6]:
datospanda = pd.read_csv('STOCKb-codigos.txt',skiprows = 0)
datos = np.array(datospanda)
nombrespanda = pd.read_csv('lista_nombres_correctos.txt',skiprows = 0)
nombres = np.array(nombrespanda)
num_rows = datos.shape[0]
num_nom = nombres.shape[0]
root = 'lista_datos_con nombre.txt'
root_error = 'lista_datos_no_encontrados.txt'
try:
os.remove(root)
except :
pass
tabla = open(root, mode = 'w')
try:
os.remove(root_error)
except :
pass
tabla_error = open(root_error, mode = 'w')
nombres_2 = []
for nombre in nombres[:,0]:
aa = str(nombre)
aa = aa.replace('"', "")
aa = aa.replace('_', "")
aa = aa.replace('.', "")
aa = aa.replace('-', "")
aa = aa.replace('%', "")
if '/' in aa:
_pos = aa.index('/')
while aa[_pos+1] == '0':
aa = aa[:_pos+1]+aa[_pos+2:]
_pos = aa.index('/')
aa = aa.replace('/', "")
while aa[0] == '0':
aa = aa[1:]
while aa[-1] == '0':
aa = aa[:-1]
if 'x' in aa:
_pos = aa.index('x')
while aa[_pos-1] == '0':
aa = aa[:_pos-1]+aa[_pos:]
_pos = aa.index('x')
nombres_2.append(aa)
datos_2 = []
for nombre in datos[:,0]:
aa = str(nombre)
aa = aa.replace('"', "")
aa = aa.replace('_', "")
aa = aa.replace('.', "")
aa = aa.replace('-', "")
aa = aa.replace('%', "")
if '/' in aa:
_pos = aa.index('/')
while aa[_pos+1] == '0':
aa = aa[:_pos+1]+aa[_pos+2:]
_pos = aa.index('/')
aa = aa.replace('/', "")
while aa[0] == '0':
aa = aa[1:]
while aa[-1] == '0':
aa = aa[:-1]
if 'x' in aa:
_pos = aa.index('x')
while aa[_pos-1] == '0':
aa = aa[:_pos-1]+aa[_pos:]
_pos = aa.index('x')
datos_2.append(aa)
printcounter = 0
for row in range(num_rows):
nombre_dato = datos[row, 0]
found = False
for nombre_counter in range(num_nom):
nombre_bueno = nombres[nombre_counter, 0]
if (datos_2[row] == nombres_2[nombre_counter]) :
found = True
nombres_row = nombre_counter
#print (row, nombre_dato , ' -- ', nombre_bueno)
tabla.write(nombre_bueno + '\n')
break
if found == False :
tabla_error.write(str(row) + str(datos[row]) + '\n')
tabla.write('no_encontrado' + '\n')
#print (row, nombre_dato , ' no encontrado')
if printcounter == 1000 :
print('row: ', row, '/', num_rows)
printcounter = 1
else:
printcounter += 1
row = row + 1
print('FINISHED')
tabla.close()
tabla_error.close()
In [3]:
x = 'aaabcdef'
In [5]:
x[:-1]
Out[5]:
In [30]:
a = (0,1,2,3)
b = a[1]
b = 4
print(a)
In [ ]:
import numpy as np
import os as os
import pandas as pd
importadorpanda = pd.read_csv('Importadortxtdelimtab.txt',skiprows = 1)
importador = np.array(importadorpanda)
contactosclientespanda = pd.read_csv('contactosclientescomas.txt',skiprows = 1)
contactosclientes = np.array(contactosclientespanda)
contactosproovpanda = pd.read_csv('Contactosproveedores.txt',skiprows = 1)
contactosproov = np.array(contactosproovpanda)
#print(importador.shape, contactosclientes.shape, contactosproov.shape)
#print (importador[0,:])
num_rows = importador.shape[0]
root = 'tabla_nueva.txt'
try:
os.remove(root)
except :
pass
contactos = np.vstack([contactosclientes, contactosproov])
substring = '*'
def iguales(a,b):
length1 = len(a)
length2 = len(b)
a2 = False
a3 = False
a4 = False
a5 = False
a6 = False
a1 = (a==b)
if length1<length2 :
a2 = (a==b[0:length1])
a3 = (a[0:length1-2] == b[0:length1-2])
elif length1<length2 :
a4 = (a[0:length2]==b)
a5 = (a[0:length2-2] == b[0:length2-2])
else :
a6 = (a[0:length2-4] == b[0:length2-4])
# if ('*' in a):
# a = a[0, a.find(substring)]
# if ('*' in b):
# b = b[0, a.find(substring)]
# a1 = (a == b)
# a2 =(company == data_company + '= S.A.')
# a3 = (company == data_company + '= S.L.')
# a4 = (company == data_company + '= S.A.U.')
# a5 = (company == data_company + '=S.L')
# a6 = (company == data_company + '= S.L.U')
return a1 or a2 or a3 or a4 or a5 or a6
print(contactos[133,:])
tabla = open(root, mode = 'w')
data_row = 133
for row in np.arange(0, 153, 1):
line = ''
for cell in np.arange(0, 20, 1):
line = line + str(importador[row,cell]) + ','
tabla.write(line + '\n')
for row in np.arange(153, num_rows, 1):
company = importador[row, 1]
data_company = contactos[data_row, 0]
print (row, company, '--' ,data_company)
line = ''
for cell in np.arange(0, 14, 1):
line = line + str(importador[row,cell]) + ','
if (iguales(company, data_company)) :
for cell in np.arange(1, 7, 1):
line = line + str(contactos[data_row,cell]) + ','
tabla.write(line + '\n')
data_row = data_row + 1
if (data_row == 3219):
break
data_company = contactos[data_row, 0]
while(iguales(company, data_company)):
line = 14 * ','
for cell in np.arange(1, 7, 1):
line = line + str(contactos[data_row,cell]) + ','
tabla.write(line + '\n')
data_row = data_row + 1
if (data_row == 3221):
break
data_company = contactos[data_row, 0]
else:
line = line + 6 * ','
tabla.write(line + '\n')
tabla.close()