In [50]:
import os
import numpy as np
import itertools
import pathos
from pathos.multiprocessing import ProcessingPool as Pool
import matplotlib.pyplot as plt
%matplotlib inline
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import MetaData, Column, Table, ForeignKey, UniqueConstraint
from sqlalchemy import Integer, String, Float, BLOB
from sqlalchemy.orm import relationship
from sqlalchemy.orm import sessionmaker
import qp
In [8]:
Base = declarative_base()
In [9]:
def setup(where):
global parametrizations_table, metaparameters_table, PDFs_table, parameters_table
global engine, metadata
# if not os.path.isfile(where):
engine = create_engine('sqlite:///'+where)
print('making the tables in '+where)
metadata = MetaData(engine)
Base.metadata.create_all(engine)
# rows are parametrizations keyed on parametrization ID
# columns are type of parametrization, number of parameters
parametrizations_table = Table('parametrizations', metadata,
Column('id', Integer, primary_key=True),
Column('parametrization', String(8), nullable=False),
Column('number_metaparameters', Integer, nullable=False)
)
# rows are metaparameters keyed on metaparameter_set ID
# columns are parametrization ID, metaparameter_set
metaparameters_table = Table('metaparameters', metadata,
Column('id', Integer, primary_key=True),
Column('parametrization_id', None, ForeignKey('parametrizations.id')),
#Column('metaparameter_number', Integer),
Column('metaparameter_set', BLOB, unique=True)
)
# rows are PDFs keyed on PDF ID
# no other columns
PDFs_table = Table('PDFs', metadata,
Column('id', Integer, primary_key=True)
)
# rows are parameters keyed on parameter_set ID
# columns are PDF ID, parametrization ID, parameter_set
parameters_table = Table('parameters', metadata,
Column('id', Integer, primary_key=True),
Column('PDFs_id', None, ForeignKey('PDFs.id')),
Column('metaparameters_id', None, ForeignKey('metaparameters.id')),
#Column('parameter_id', Integer),
Column('parameter_set', BLOB),
UniqueConstraint('PDFs_id', 'metaparameters_id')#, name=''),
)
metadata.create_all()
# else:
# engine = create_engine('sqlite:///'+where)
# print('loading the tables from '+where)
# metadata = MetaData(engine)
# parametrizations_table = Table('parametrizations', metadata, autoload=True)
# metaparameters_table = Table('metaparameters', metadata, autoload=True)
# PDFs_table = Table('PDFs', metadata, autoload=True)
# parameters_table = Table('parameters', metadata, autoload=True)
# metadata.create_all()
In [10]:
where = 'example.db'
setup(where)
In [11]:
parametrizations = ['samples', 'histogram', 'quantiles']
number_parameters = [3, 10, 30, 100]
for p in parametrizations:
for n in number_parameters:
ins = parametrizations_table.insert().values(parametrization=p, number_metaparameters=n)
conn = engine.connect()
result = conn.execute(ins)
In [12]:
s = sqlalchemy.sql.select([parametrizations_table])
result = conn.execute(s).fetchall()
for r in result:
print(r)
In [14]:
# choose one of these:
dataname = 'mg'
# dataname = 'ss'
if dataname == 'mg':
# Melissa Graham's data
datafilename = 'bpz_euclid_test_10_2.probs'
z_low = 0.01
z_high = 3.51
elif dataname == 'ss':
# Sam Schmidt's data
datafilename = 'test_magscat_trainingfile_probs.out'
z_low = 0.005
z_high = 2.11
z = np.arange(z_low, z_high, 0.01, dtype='float')
z_range = z_high - z_low
delta_z = z_range / len(z)
## Warning: reading in the data is slow for Sam Schmidt's dataset!
with open(datafilename, 'rb') as data_file:
lines = (line.split(None) for line in data_file)
lines.next()
pdfs = np.array([[float(line[k]) for k in range(1,len(line))] for line in lines])
# print(sys.getsizeof(pdfs))
In [16]:
s = sqlalchemy.sql.select([parametrizations_table])
result = conn.execute(s).fetchall()
for r in result:
if r.parametrization == 'quantiles':
quantiles = np.linspace(0., 1., r.number_metaparameters).tostring()
ins = metaparameters_table.insert().prefix_with("OR IGNORE").values(parametrization_id=r.id, metaparameter_set=quantiles)
conn = engine.connect()
result = conn.execute(ins)
if r.parametrization == 'histogram':
bindif = 1./r.number_metaparameters
binends = np.arange(z_low, z_high+bindif, bindif).tostring()
ins = metaparameters_table.insert().prefix_with("OR IGNORE").values(parametrization_id=r.id, metaparameter_set=binends)
conn = engine.connect()
result = conn.execute(ins)
if r.parametrization == 'samples':
n = np.array([r.number_metaparameters]).tostring()
ins = metaparameters_table.insert().prefix_with("OR IGNORE").values(parametrization_id=r.id, metaparameter_set=n)
conn = engine.connect()
result = conn.execute(ins)
In [17]:
s = sqlalchemy.sql.select([metaparameters_table])
result = conn.execute(s).fetchall()
for r in result:
print(r)
In [18]:
(n_pdfs, n_start) = np.shape(pdfs)
parametrizations.append('gridded')
number_parameters.append(n_start)
ins = parametrizations_table.insert().values(parametrization=parametrizations[-1], number_metaparameters=number_parameters[-1])
conn = engine.connect()
result = conn.execute(ins)
s = sqlalchemy.sql.select([parametrizations_table])
result = conn.execute(s).fetchall()
for r in result:
print(r)
In [19]:
s = sqlalchemy.sql.select([parametrizations_table]).where(parametrizations_table.c.parametrization=='gridded')
result = conn.execute(s).fetchall()
for r in result:
ins = metaparameters_table.insert().prefix_with("OR IGNORE").values(parametrization_id=r.id, metaparameter_set=z)
conn = engine.connect()
result = conn.execute(ins)
s = sqlalchemy.sql.select([metaparameters_table])
result = conn.execute(s).fetchall()
for r in result:
print(r)
In [20]:
s = sqlalchemy.sql.select([parametrizations_table.c.id, parametrizations_table.c.number_metaparameters]).where(parametrizations_table.c.parametrization=='gridded')
latest = conn.execute(s).fetchall()
print(type(latest))
In [21]:
s1 = sqlalchemy.sql.select([parametrizations_table.c.id]).where(parametrizations_table.c.parametrization=='gridded' and parametrizations_table.c.number_metaparameters==n_start)
metaparam_index = conn.execute(s1).fetchall()[0]
print(metaparam_index)
s2 = sqlalchemy.sql.select([metaparameters_table.c.metaparameter_set]).where(metaparameters_table.c.parametrization_id==metaparam_index[0])
metaparam_bytes = conn.execute(s2).fetchall()
print(metaparam_bytes)
in_metaparams = np.fromstring(metaparam_bytes[0][0])
print(in_metaparams)
In [22]:
ins = PDFs_table.insert()
conn = engine.connect()
result = conn.execute(ins, [{}]*n_pdfs)
s = sqlalchemy.sql.select([PDFs_table.c.id])
result = conn.execute(s).fetchall()
for r in result[:10]:
print(r[0])
In [23]:
s1 = sqlalchemy.sql.select([PDFs_table.c.id])
pdf_indices = np.array(conn.execute(s1).fetchall()[:100])
print(type(pdf_indices), type(pdf_indices[0]), type(pdf_indices[0][0]))
s2 = sqlalchemy.sql.select([metaparameters_table.c.id]).where(metaparameters_table.c.metaparameter_set==in_metaparams.tostring())
metaparam_id = conn.execute(s2).fetchall()[0][0]
print(metaparam_id)
def make_parameter_dicts(i):
new_parameters = {'PDFs_id' : i[0], 'metaparameters_id' : metaparam_id, 'parameter_set' : pdfs[i[0]-1].tostring()}
return new_parameters
pool = Pool(2)
parameter_dicts = pool.map(make_parameter_dicts, pdf_indices)
print(len(parameter_dicts))
# def pdfs_from_file_to_db(i):
# # P = qp.PDF(gridded=(in_metaparams, pdfs[i]), vb=False)
# ins = parameters_table.insert().values(PDFs_id=i[0], metaparameters_id=metaparam_id, parameter_set=pdfs[i].tostring())
# conn = engine.connect()
# result = conn.execute(ins)
# pool = Pool(2)
# pool.map(pdfs_from_file_to_db, pdf_indices :
In [24]:
conn = engine.connect()
conn.execute(parameters_table.insert(), parameter_dicts)
# s = sqlalchemy.sql.select([parameters_table.c.id])
# result = conn.execute(s).fetchall()
# for r in result[:10]:
# print(r[0])
Out[24]:
In [43]:
sxi = sqlalchemy.sql.select([metaparameters_table.c.metaparameter_set]).where(metaparameters_table.c.id==metaparam_id)
xi = np.fromstring(conn.execute(sxi).fetchall()[0][0])
# sy = sqlalchemy.sql.select([parameters_table.c.parameter_set]).where(parameters_table.c.metaparameters_id==metaparam_id)
# y = conn.execute(sy).fetchall()
sp = sqlalchemy.sql.select([parametrizations_table.c.id]).where(parametrizations_table.c.parametrization=='samples')
ps = np.array(conn.execute(sp).fetchall())
# for p in ps:
# print p
sf = sqlalchemy.sql.select([metaparameters_table.c.id]).where(metaparameters_table.c.parametrization_id.in_(ps[:,0]))
fs = np.array(conn.execute(sf).fetchall())[:,0]
# for f in fs:
# print f[0]
print(fs)
In [52]:
def gridded_to_samples((i, f)):
print(i, f)
conn = engine.connect()
sxi = sqlalchemy.sql.select([metaparameters_table.c.metaparameter_set]).where(metaparameters_table.c.id==metaparam_id)
xi = np.fromstring(conn.execute(sxi).fetchall()[0][0])
syi = sqlalchemy.sql.select([parameters_table.c.parameter_set]).where(parameters_table.c.metaparameters_id==metaparam_id and parameters_table.c.PDFs_id==i[0])
yi = np.fromstring(conn.execute(syi).fetchall()[0][0])
G = qp.PDF(gridded=(xi, yi))
sxf = sqlalchemy.sql.select([metaparameters_table.c.metaparameter_set]).where(metaparameters_table.c.id==f)
xf = np.fromstring(conn.execute(sxf).fetchall()[0][0], dtype=int)
S = G.sample(N=xf, using='gridded')
ins = parameters_table.insert().prefix_with("OR IGNORE").values(PDFs_id=i[0], metaparameters_id=f, parameter_set=S.tostring())
result = conn.execute(ins)
return S
In [53]:
conn = engine.connect()
new_pdfs = pool.map(gridded_to_samples, itertools.product(pdf_indices[:100], fs))
In [54]:
s = sqlalchemy.sql.select([parameters_table.c.parameter_set]).where(parameters_table.c.PDFs_id==5)
result = conn.execute(s).fetchall()
for r in result:
print np.fromstring(r[0])
In [ ]:
In [ ]:
In [ ]:
In [20]:
s = sqlalchemy.sql.select([parameters_table.c.id])
parameter_indices = conn.execute(s).fetchall()
for p in parameter_indices:
print(p)
In [ ]:
for p in range(n_pdfs):
ins = parametrizations_table.insert().values(parameters=pdfs[p].tostring(), number_metaparameters=n)
conn = engine.connect()
result = conn.execute(ins)
In [7]:
s = sqlalchemy.sql.select([parametrizations_table]).where(parametrizations_table.c.parametrization=='quantiles')
result = conn.execute(s)
out_place = result.fetchall()
for o in out_place:
quantiles = np.linspace(0., 1., r.number_metaparameters).tostring()
ins = metaparameters_table.insert().values(parametrization_id=o.id, metaparameter_set=quantiles)
conn = engine.connect()
result = conn.execute(ins)
In [8]:
#print(np.fromiter(metaparameters_table.fetchall(), np.ndarray).fromstring())
s = sqlalchemy.sql.select([metaparameters_table])
result = conn.execute(s)
for r in result.fetchall():
print(r)
In [ ]:
In [ ]:
for q in result:
quantiles = np.linspace(re)
ins = metaparameters_table.insert().values(parametrization=p, number_metaparameters=n)
conn = engine.connect()
result = conn.execute(ins)
print(r)
In [ ]:
for r in result:
print(r.id)
In [ ]:
ps = sqlalchemy.sql.select([parametrizations_table])
result = conn.execute(ps)
for p in result:
print p
In [ ]:
In [ ]:
In [ ]:
In [ ]:
import dataset as ds
# help(dataset)
In [ ]:
db = ds.connect('postgresql:///example.db')
In [ ]:
import peewee as pw
help(pw)
from peewee import Model, CharField
In [ ]:
import playhouse as ph
help(ph)
from playhouse.postgres_ext import PostgresqlExtDatabase
In [ ]:
psql_db = PostgresqlExtDatabase('example', user='postgres')
class BaseModel(Model):
class Meta:
database = psql_db
# class Galaxy(BaseModel):
# Galaxy_id = pw.PrimaryKey()
# class PDF(BaseModel):
# PDF_id = pw.ForeignKeyField(Galaxy)
# parametrization_id = pw.ForeignKeyField(Parametrization)
# parameters = Array()
class Parametrization(BaseModel):
P_type = CharField()
parameters = ph.postgres_ext.ArrayField(pw.FloatField)
In [ ]:
psql_db.connect()
In [ ]:
def setup(where):
global parametrizations_table, metaparameters_table, PDFs_table, parameters_table
global engine, metadata
if not os.path.isfile(where):
engine = create_engine('sqlite:///'+where)
print('making the tables in '+where)
metadata = MetaData(engine)
Base.metadata.create_all(engine)
parametrizations_table = Table('parametrizations', metadata,
Column('id', Integer, primary_key=True),
Column('parametrization', String(8))
)
metaparameters_table = Table('metaparameters', metadata,
Column('id', Integer, primary_key=True),
Column('parametrization_id', None, ForeignKey('parametrizations.id')),
Column('metaparameter', ARRAY(Float))
)
PDFs_table = Table('PDFs', metadata,
Column('id', Integer, primary_key=True)
)
parameters_table = Table('parameters', metadata,
Column('id', Integer, primary_key=True),
Column('PDFs_id', None, ForeignKey('PDFs.id')),
Column('metaparameters_id', None, ForeignKey('metaparameters.id')),
Column('parameter', ARRAY(Float))
)
metadata.create_all()
else:
engine = create_engine('sqlite:///'+where)
print('loading the tables from '+where)
metadata = MetaData(engine)
parametrizations_table = Table('parametrizations', metadata, autoload=True)
metaparameters_table = Table('metaparameters', metadata, autoload=True)
PDFs_table = Table('PDFs', metadata, autoload=True)
parameters_table = Table('parameters', metadata, autoload=True)
metadata.create_all()
In [ ]:
In [ ]:
from sqlalchemy.dialects import postgresql
from sqlalchemy import create_engine
from sqlalchemy import MetaData, Column, Table, ForeignKey
from sqlalchemy import Integer, String, ARRAY
engine = create_engine('postgresql:///home/aimalz/Code/qp/docs/desc-0000-qp-photo-z_approximation/research/ensemble.db',
echo=True)
metadata = MetaData(bind=engine)
parametrizations_table = Table('parametrizations', metadata,
Column('id', Integer, primary_key=True),
Column('parametrization', String(8)),
)
metaparameters_table = Table('metaparameters', metadata,
Column('id', Integer, primary_key=True),
Column('parametrization_id', None, ForeignKey('parametrizations.id')),
Column('metaparameters', ARRAY(float), nullable=False)
)
# create tables in database
metadata.create_all()
In [ ]:
In [ ]:
import pandas as pd
import numpy as np
In [ ]:
test = pd.DataFrame()
print(test)
In [ ]:
print(test.columns)
# print(full_df.columns)
In [ ]:
df_columns = ['quantiles', 'histogram', 'samples', 'gridded', 'gmm']
test = pd.DataFrame(columns = df_columns)
print(type(test.index))
In [ ]:
df_columns = ['quantiles', 'histogram', 'samples', 'gridded', 'gmm']
data1 = {'quantiles': np.arange(10), 'histogram': np.arange(10)}
test_df = pd.DataFrame([data1], index = ['first'])
print(test_df.index)
In [ ]:
data2 = {'samples': np.arange(10), 'histogram': np.arange(10)}
another_df = pd.DataFrame([data2], index = ['second'])
combo_df = test_df.append(another_df)
print(combo_df.index)
In [ ]:
galinds = np.arange(0, 1, 0.1)
galnames = [str(ind) for ind in galinds]
print(galnames)
empty_df = pd.DataFrame(None)#, index = galnames)
print(empty_df)
In [ ]:
new_galinds = galinds + 0.5
new_galnames = [str(ind) for ind in new_galinds]
old_df = pd.DataFrame([data1] * 10, index = galnames)
#for galname in galnames:
new_df = pd.DataFrame([data2] * 10, index = new_galnames)
# full_df = pd.concat([old_df, new_df], axis=1)
# print(full_df)
In [ ]:
# data1pp, data2pp = [], []
# for i in range(len(galnames)):
# print(galnames[i])
# td1 = data1
# td1['galname'] = galnames[i]
# data1pp.append(td1)
# td2 = data2
# td2['galname'] = galnames[i]
# data2pp.append(td2)
# full_df = pd.DataFrame(data1pp)
# print(full_df)
# new_df = pd.DataFrame(data2pp)#, index = galnames)
# print(new_df)
#full_df = pd.merge(old_df, new_df, how = 'outer', on = 'index')
full_df = old_df.combine_first(new_df)
print(full_df)
In [ ]:
In [ ]:
The qp.Ensemble class should relate a few structures:
In [ ]:
#help(parametrizations_table)
print(parametrizations_table.metadata)
In [ ]:
In [ ]:
metadata = MetaData()
Ensemble = Table('ensemble', metadata,
Column('meta_mixmod', ARRAY(String)),
Column('meta_quantiles', ARRAY(Float)),
Column('meta_histogram', ARRAY(Float)),
Column('meta_gridded', ARRAY(Float)),
Column('meta_samples', ARRAY(Float)))
In [ ]:
class Galaxy(Base):
__tablename__ = 'galaxy'
idno = Column(Integer, primary_key=True)
pdf = Column(BLOB)
In [ ]:
class Parametrizations(Base):
__tablename__ = 'parametrizations'
idno = Column(Integer, primary_key=True)
mixmod = Column(ARRAY(Float))
quantiles = Column(ARRAY(Float))
histogram = Column(ARRAY(Float))
gridded = Column(ARRAY(Float))
samples = Column(ARRAY(Float))
galaxy = Relationship(Galaxy)
In [ ]:
In [ ]:
testpdf = qp.PDF()
In [ ]:
In [ ]:
help(sqlalchemy)