In [1]:
import pandas as pd
import numpy as np
from patsy import dmatrix, dmatrices

In [2]:
df = pd.DataFrame({'x': np.arange(1, 5), 'y': [2*i for i in np.arange(1, 5)]})

In [3]:
df


Out[3]:
x y
0 1 2
1 2 4
2 3 6
3 4 8

In [4]:
df['z'] = df.x + df.y

In [5]:
df


Out[5]:
x y z
0 1 2 3
1 2 4 6
2 3 6 9
3 4 8 12

In [16]:
# Basic design matrix
# Pandas compatibility
# Interaction term is included using x*y
dmat = dmatrix('x + y + x*y', df)
print(dmat)


[[ 1.  1.  2.  2.]
 [ 1.  2.  4.  8.]
 [ 1.  3.  6. 18.]
 [ 1.  4.  8. 32.]]

In [17]:
dmat_df = pd.DataFrame(dmat)

In [18]:
dmat_df.columns = dmat.design_info.term_names

In [19]:
dmat_df


Out[19]:
Intercept x y x:y
0 1.0 1.0 2.0 2.0
1 1.0 2.0 4.0 8.0
2 1.0 3.0 6.0 18.0
3 1.0 4.0 8.0 32.0

In [20]:
# Design matrices
y, x = dmatrices('z ~ x + x:y', df)

In [23]:
y


Out[23]:
DesignMatrix with shape (4, 1)
   z
   3
   6
   9
  12
  Terms:
    'z' (column 0)

In [22]:
# Easy to convert to pandas DataFrame
pd.DataFrame(y)


Out[22]:
0
0 3.0
1 6.0
2 9.0
3 12.0

In [24]:
x


Out[24]:
DesignMatrix with shape (4, 3)
  Intercept  x  x:y
          1  1    2
          1  2    8
          1  3   18
          1  4   32
  Terms:
    'Intercept' (column 0)
    'x' (column 1)
    'x:y' (column 2)

In [28]:
# Easy to convert to pandas DataFrame
pd.DataFrame(x, columns=x.design_info.term_names)


Out[28]:
Intercept x x:y
0 1.0 1.0 2.0
1 1.0 2.0 8.0
2 1.0 3.0 18.0
3 1.0 4.0 32.0

In [ ]: