In [1]:
%pylab inline
import datajoint as dj
import numpy as np
In [13]:
schema = dj.schema('examples_generic', locals())
In [4]:
@schema
class Experiment(dj.Manual):
definition = """ # A simple experiment.
experiment : int
----
"""
@schema
class Set(dj.Imported):
definition = """
# A set of datapoints
-> Experiment
-----
"""
class DataPoint(dj.Part):
definition = """
# Collected data.
-> Set
datapoint : int
-----
x : float
y : float
"""
def _make_tuples(self, key):
n = 10
mu = 0
sigma = .1
self.insert1(key)
self.DataPoint().insert((
dict(key,
datapoint=i,
x=i + np.random.normal(mu, sigma),
y=2*i + np.random.normal(mu, sigma))
for i in range(n)))
In [5]:
dj.ERD(schema).draw()
In [6]:
## populate data
Experiment().insert(([1],[2],[3]), skip_duplicates=True)
Set().populate()
In [7]:
print(Set.DataPoint())
In [8]:
@schema
class LinearModel(dj.Computed):
definition = """
# fits line a DataCollection. y=mx+b form
-> Set
-----
m : float # Slope
b : float # intercept
"""
def _make_tuples(self, key):
X, Y = (Set.DataPoint() & key).fetch['x', 'y']
X = np.stack([X, np.ones_like(X)], axis=-1)
m, b = np.linalg.lstsq(X, Y)[0]
self.insert1(dict(key, m=m, b=b))
@schema
class Stats(dj.Computed):
definition = """
# Computes Mean Square Error and R2 for a particular Model
-> LinearModel
-----
mse : float # The MSE value.
r2 : float # R-squared of linear fit
"""
def _make_tuples(self, key):
X, Y = (Set.DataPoint() & key).fetch['x', 'y']
m, b = (LinearModel() & key).fetch1['m', 'b']
yCalc = X*m + b
self.insert1(
dict(key,
mse=((Y - yCalc) ** 2).mean(axis=0),
r2=1-np.sum((Y - yCalc)**2)/np.sum((Y - np.mean(Y))**2)))
In [12]:
dj.ERD(schema).draw()
In [9]:
# Build the models and compute the stats
LinearModel().populate()
Stats().populate()
In [10]:
print(Stats())
In [16]:
Experiment().drop()
In [ ]: