In [17]:
import re
import pandas as pd
import numpy as np
#Location = r'./canales/media/pictures/nwpat_EErd9Nc.csv'
Location = r'./canales/media/pictures/nwcas_uTqj6eE.csv'
df = pd.read_csv(Location)
dwot = re.compile('\d{2,2}/\d{2,2}/\d{4,4}')
dwt = re.compile('\d{2,2}/\d{2,2}/\d{4,4} \d{1,2}:\d{2,2}:\d{2,2} (AM|PM)')
t_cols = []
def str_or_date(val):
if type(val) == str:
if (dwot.match(val) or dwt.match(val)):
return 'DATETIME'
else:
return ('VARCHAR')
else:
return 'VARCHAR(1200) CHARACTER SET utf8'
for column in df:
dt = type(df[column].iloc[0])
if dt == str:
c_type = df[column].apply(lambda x: str_or_date(x))[0]
if c_type == 'VARCHAR':
c_len = df[column].apply(lambda x: len(str(x))).max()
c_def = ("%s(%i) CHARACTER SET utf8" % (c_type, c_len))
else:
c_def = c_type
t_cols.append((column, c_def))
elif dt == np.int64:
t_cols.append((column, 'INT'))
elif dt == float or np.float64:
t_cols.append((column, 'FLOAT'))
elif dt == np.bool_:
t_cols.append((column, 'BOOL'))
c_cont = 0
t_name = Location.split('/')[len(Location.split('/')) - 1].split('.')[0]
create_table = ("CREATE TABLE %s (\n\r" % t_name)
for c in t_cols:
c_name = c[0].replace(' ', '_')
if c_cont < len(t_cols)-1:
create_table += '\t`'+c_name+'` '+c[1]+',\n\r'
else:
create_table += '\t`'+c_name+'` '+c[1]+'\n\r'
c_cont+=1
create_table +=');'
#print(create_table)
c_cont = 0
insert_to = 'INSERT INTO table ('
for i_value in t_cols:
if c_cont < len(t_cols)-1:
insert_to += '`' + i_value[0].replace(' ','_') + '`,'
else:
insert_to += '`' + i_value[0].replace(' ','_') + '`)'
c_cont+=1
insert_values = '('
#for dv in df.values:
#print(type(dv[0]))
for c_def in t_cols:
val = ''
if c_def[1].find('VARCHAR') == 0:
val = "'" + df[c_def[0]] + "'"
insert_values += val
insert_values += ');'
print(insert_values)
#print(insert_to)
#print(insert_values)
#for i in df.iteritems():
# print(i[0])
# result = []
# if i[1].dtype == np.int64:
# for j in i[1]:
# print(j)
In [115]:
a = '10/29/1944'
a
b = '07/06/2017 8:56:57 AM'
d = re.compile('\d{2,2}/\d{2,2}/\d{4,4}')
print(d.match(a))
dwt = re.compile('\d{2,2}/\d{2,2}/\d{4,4} \d{1,2}:\d{2,2}:\d{2,2} (AM|PM)')
print(dwt.match(b))
None == True
type('') == str
Out[115]:
In [ ]: