In [1]:
import mysql.connector
La idea de este taller es manipular archivos (leerlos, parsearlos y escribirlos) y hacer lo mismo con bases de datos estructuradas.
In [2]:
import pandas as pd
df= pd.read_csv('C:/Users/Alex/Documents/eafit/semestres/X semestre/programacion/taller2.tsv', sep = '\t')
df[:1]
Out[2]:
Qué Entidades (tablas) puede definir?
Cree la base de datos (copie el código SQL que se usó)
In [5]:
CREATE TABLE enfermedad
(
id_enfermedad int PRIMARY KEY,
nombre varchar(255)
);
create table plataforma
(
id_plataforma int primary key,
nombre varchar(255)
);
CREATE TABLE loci
(
id_loci int NOT NULL PRIMARY KEY,
region varchar(255),
chrom varchar(255),
pos int,
genes_reportados int,
gen_mapped varchar(255),
gen_upstream int,
gen_downstream int,
SNP_GENE_IDS int,
UPSTREAM_GENE_DISTANCE int,
DOWNSTREAM_GENE_DISTANCE int,
STRONGEST_SP_RISK varchar(255),
SNPS varchar(255),
MERGED int,
SNP_ID_CURRENT varchar(255),
CONTEXTO varchar(255),
risk_allele varchar(255),
PVAl int,
PVALUE_MLOG int,
PVALUE_txt varchar(255),
BETA int,
novCI varchar(255),
id_plataforma int,
foreign key (id_plataforma) references plataforma(id_plataforma)
);
CREATE TABLE enfermedad_loci
(
id_enfermedad int,
id_loci int,
PRIMARY KEY (id_enfermedad, id_loci),
foreign key (id_enfermedad) references enfermedad(id_enfermedad),
foreign key (id_loci) references loci(id_loci)
);
CREATE TABLE journal
(
id_journal int primary key,
nombre varchar(255)
);
create table publicacion
(
id_publicacion int,
id_pubmed int,
autor varchar (255),
fecha_pub varchar (20),
link varchar (255),
id_journal int,
id_estudio int,
foreign key (id_journal) references journal(id_journal),
foreign key (id_estudio) references estudio(id_estudio)
);
CREATE TABLE estudio
(
nombre varchar(255),
id_estudio int primary key,
id_enfermedad int,
id_publicacion int,
foreign key (id_publicacion) references publicacion(id_publicacion),
foreign key (id_enfermedad) references enfermedad(id_enfermedad),
tamano_muestra int,
replicas int
);
In [6]:
df.head(1)
Out[6]:
In [3]:
hostname = '127.0.0.1'
username = 'alexacl95'
password = 'SUSAna05'
database = 'programacion'
def doQuery( conn ) :
cur = conn.cursor()
cur.execute( "select * from enfermedad" )
for id_nombre, nombre_enf in cur.fetchall() :
print (id_nombre, nombre_enf)
myConnection = mysql.connector.connect( host=hostname, user=username, passwd=password, db=database )
doQuery( myConnection )
myConnection.close()
In [7]:
def get_diseaseId(disease_name):
cur = myConnection.cursor()
cur.execute( """select * from enfermedad where nombre = "%s" """ % (disease_name) )
id_enf = None
for id_, nombre_enf in cur.fetchall() :
id_enf = id_
if not id_enf:
cur.execute("""insert into enfermedad values (NULL, "%s" )""" % (disease_name))
cur.execute("SELECT LAST_INSERT_ID()")
id_enf = cur.fetchall()[0][0]
myConnection.commit()
return id_enf
In [47]:
def get_platId(plat_name):
cur = myConnection.cursor()
cur.execute( """select * from plataforma where nombre = "%s" """ % (plat_name) )
id_plat = None
for id_, nombre_plat in cur.fetchall() :
id_plat = id_
if not id_plat:
print("""insert into plataforma values (NULL, "%s" )""" % (plat_name))
cur.execute("""insert into plataforma values (NULL, "%s" )""" % (plat_name))
cur.execute("SELECT LAST_INSERT_ID()")
id_plat = cur.fetchall()[0][0]
myConnection.commit()
return id_plat
for index, row in df.iterrows():
plat_name = row['PLATFORM [SNPS PASSING QC]']
plat_id = get_platId(plat_name)
In [8]:
def get_lociId(loci_name):
cur = myConnection.cursor()
cur.execute( """select * from loci where nombre = "%s" """ % (disease_name) )
id_loci = None
for id_, nombre_enf in cur.fetchall() :
id_loci = id_
if not id_loci:
print("""insert into enfermedad values (NULL, "%s", )""" % (disease_name))
cur.execute("""insert into enfermedad values (NULL, "%s" )""" % (disease_name))
cur.execute("SELECT LAST_INSERT_ID()")
id_enf = cur.fetchall()[0][0]
myConnection.commit()
return id_enf
In [9]:
hostname = '127.0.0.1'
username = 'alexacl95'
password = 'SUSAna05'
database = 'programacion'
myConnection = mysql.connector.connect( host=hostname, user=username, passwd=password, db=database )
for index, row in df.iterrows():
dis_name = row['DISEASE/TRAIT']
dissease_id = get_diseaseId(dis_name)
print()
myConnection.close()
In [ ]:
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_sql_query.html
In [ ]: