Data import

To connect and get a terminal to the postgres docker, run docker exec -it pbdw2018_hackathon-master_data_postgres_1 /bin/bash

You can then do things like

dropdb mydata -U postgres

or

psql postgres -U postgres


In [ ]:
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import csv

# Connect to an existing database
try:
    conn = psycopg2.connect("host=postgresdb user=postgres password=postgres")
except:
    print ("Error:  unable to connect to the database")
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)

# Open a cursor to perform database operations
cur = conn.cursor()

# Execute a command to end all connections to the db
try:
    cur.execute("SELECT pg_terminate_backend(pg_stat_activity.pid) FROM pg_stat_activity WHERE pg_stat_activity.datname = 'mydata' AND pid <> pg_backend_pid();")
except:
    print("Error killing database connections, perhaps it does not exist?")
    
# Execute a command to drop the table
try:
    cur.execute("DROP DATABASE mydata")
except:
    print("Error while dropping database, perhaps it does not exist?")

# Execute a command to create a new table
try:
    cur.execute("CREATE DATABASE mydata;")
except:
    print("Error while creating database, does it already exist?")
# Close database connection
conn.close()

In [ ]:
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
# Open a cursor to perform database operations
cur = conn.cursor()
# Execute a command: this creates a new table
try:
    cur.execute("""CREATE TABLE PBDWHackathon2018 (
    ID text PRIMARY KEY,
    PatientID text,
    Age text,
    Clinical_T_Stage text,
    Clinical_N_Stage text,
    Clinical_M_Stage text,
    Overall_Stage text,
    Histology text,
    Gender text,
    SurvivalTime text,
    DeadStatus text
    );
    """)
except:
    print("Error while creating table, does it already exist?")
# Close database connection
conn.close()

In [ ]:
import pandas
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
cur = conn.cursor()

#open CSV file
df = pandas.read_csv('Clinical1.csv')
for index, row in df.iterrows():
    #print(str(index) + " | " + row.Idfu)
    #cur.execute
    cur.execute("""INSERT INTO PBDWHackathon2018 (ID, PatientID, Age, Clinical_T_Stage, Clinical_N_Stage, Clinical_M_Stage, Overall_Stage, Histology, Gender, SurvivalTime, DeadStatus) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
          (index, row.PatientID, row.Age, row["Clinical.T.Stage"], row["Clinical.N.Stage"], row["Clinical.M.Stage"], row["Overall.Ajcc.Stage"], row.Histology, row.Sex, row["Survival.Time.Days"], row["deadstatus.event"]))
conn.commit()

In [ ]:
# Connect to an existing database
conn = psycopg2.connect("host=postgresdb user=postgres dbname=mydata password=postgres")
cur = conn.cursor()

cur.execute("""SELECT *
    FROM PBDWHackathon2018;""")

results = cur.fetchall()
conn.close()

for row in results:
    print(row)

In [ ]: