In [1]:
import os
import sqlite3
import numpy as np
import io
import time
import genericUtils as GUTIL

Insert / read whole numpy arrays


In [2]:
def adapt_array(arr):
    out = io.BytesIO()
    np.save(out, arr)
    out.seek(0)
    return sqlite3.Binary(out.read())

def convert_array(text):
    out = io.BytesIO(text)
    out.seek(0)
    return np.load(out)

# Converts np.array to TEXT when inserting
sqlite3.register_adapter(np.ndarray, adapt_array)

# Converts TEXT to np.array when selecting
sqlite3.register_converter("array", convert_array)

In [8]:
x = np.arange(12).reshape(2,6)
print x


[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]

In [9]:
con = sqlite3.connect(":memory:", detect_types=sqlite3.PARSE_DECLTYPES)
cur = con.cursor()
cur.execute("create table test (arr array)")


Out[9]:
<sqlite3.Cursor at 0x7f9288ead420>

In [10]:
cur.execute("insert into test (arr) values (?)", (x, ))


Out[10]:
<sqlite3.Cursor at 0x7f9288ead420>

In [11]:
cur.execute("select arr from test")
data = cur.fetchone()[0]

print(data)
print type(data)


[[ 0  1  2  3  4  5]
 [ 6  7  8  9 10 11]]
<type 'numpy.ndarray'>

Create a DB/table for storing results


In [12]:
def create_or_open_db(db_file):
    db_is_new = not os.path.exists(db_file)
    con = sqlite3.connect(db_file, detect_types=sqlite3.PARSE_DECLTYPES)
    if db_is_new:
        print 'Creating results schema'
        sql = '''CREATE TABLE IF NOT EXISTS results(
        run_id            TEXT,
        run_step_num      INTEGER,
        theta23           REAL,
        deltam31          REAL,
        metric            REAL,
        minimizer_steps   array,
        PRIMARY KEY (run_id, run_step_num)
        );'''
        with con:
            con.execute(sql)
            
        print 'Creating config schema'
        sql = '''CREATE TABLE IF NOT EXISTS config(
        run_id             TEXT PRIMARY KEY,
        template_settings  TEXT,
        minimizer_settings TEXT,
        grid_settings      TEXT
        );'''
        with con:
            con.execute(sql)
    else:
        print 'Schema exists\n'
    return con

Insert a single row into the results table

Each insert is synchronous

This is safest, but is about 20 times (or more) slower than syncing once after all the inserts are performed (see below).


In [13]:
rm ./test.db

In [14]:
np.random.seed(0)
con = create_or_open_db('./test.db')
sql_insert_data = '''INSERT INTO results VALUES (?,?,?,?,?,?);'''
n_inserts = 100
n_mod = 10
t0 = time.time()
for n in xrange(n_inserts):
    if n % n_mod == 0:
        GUTIL.wstdout('.')
    input_data = (
        'msu_0',
        n,
        1139.389,
        0.723,
        2e-3,
        np.random.rand(100,6)
    )
    try:
        with con:
            con.execute(sql_insert_data, input_data)
    except sqlite3.IntegrityError as e:
        if not 'UNIQUE constraint failed' in e.args[0]:
            raise
        elif n % n_mod == 0:
            GUTIL.wstdout('x')
dt = time.time()-t0
con.close()
GUTIL.wstdout(
    '\n%s total (%s/insert)' %
    (GUTIL.timediffstamp(dt), GUTIL.timediffstamp(dt/float(n_inserts)))
)


Creating results schema
Creating config schema
..........
1.445 sec total (0.014 sec/insert)

In [15]:
e.message


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-15-5e19de34f328> in <module>()
----> 1 e.message

NameError: name 'e' is not defined

In [16]:
ls -hl ./test.db


-rw-r----- 1 justin justin 507K May 19 22:51 ./test.db

In [17]:
rm ./test2.db

In [18]:
np.random.seed(0)
con = create_or_open_db('./test2.db')
sql_insert = '''INSERT INTO results VALUES (?,?,?,?,?,?);'''
t0=time.time()
with con:
    for n in xrange(n_inserts):
        if n % n_mod == 0:
            GUTIL.wstdout('.')
        input_data = (
            'msu_0',
            n,
            1139.389,
            0.723,
            2e-3,
            np.random.rand(100,6)
        )
        try:
            con.execute(sql_insert, input_data)
        except sqlite3.IntegrityError as e:
            if not 'UNIQUE constraint failed' in e.args[0]:
                raise
            elif n % n_mod == 0:
                GUTIL.wstdout('o')
dt = time.time()-t0
con.close()
GUTIL.wstdout(
    '\n%s total (%s/insert)' %
    (GUTIL.timediffstamp(dt), GUTIL.timediffstamp(dt/float(n_inserts)))
)


Creating results schema
Creating config schema
..........
0.090 sec total (0.001 sec/insert)

In [19]:
dt/n_inserts


Out[19]:
0.000902400016784668

In [20]:
ls -hl ./test2.db


-rw-r----- 1 justin justin 507K May 19 22:51 ./test2.db

Read the row back to ensure the data is correct


In [23]:
con = create_or_open_db('./test2.db')
con.row_factory = sqlite3.Row
sql = '''SELECT
    metric, theta23, deltam31, run_id, run_step_num, minimizer_steps
    FROM results'''
cursor = con.execute(sql)
for row in cursor:
    print row.keys()[:-1]
    print [x for x in row][:-1]
    print 'shape of', row.keys()[-1], row['minimizer_steps'].shape
    break


Schema exists

['metric', 'theta23', 'deltam31', 'run_id', 'run_step_num']
[0.002, 1139.389, 0.723, u'msu_0', 0]
shape of minimizer_steps (100, 6)

In [24]:
ls -hl ./test.db


-rw-r----- 1 justin justin 507K May 19 22:51 ./test.db

In [26]:
a = row[-1]