In [5]:
from IPython.core.display import HTML
In [7]:
import cPickle
import collections
import gzip
import numpy
import os
import sys
import theano
from theano.tensor.shared_randomstreams import RandomStreams
import time
import theano.tensor as T
from numpy import dtype
In [37]:
from theano import tensor as T
from theano import function, shared
import numpy
x = shared(numpy.array([[0,1,2], [0,1,2]]))
z = shared(numpy.array([[0,1,1], [0,1,1]]))
size_of_x = 2
In [39]:
x.get_value()
Out[39]:
In [41]:
y = theano.tensor.mean(x)
In [71]:
train_y = numpy.array([0,1,2,4,5,6 ,7, 8, 9, 2, 8])
In [72]:
train_y_T = train_y[numpy.newaxis].T
In [76]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(n_values = 10, dtype = theano.config.floatX, sparse=False)
In [74]:
encode_train_y = enc.fit_transform(train_y_T)
In [68]:
train_y_T
Out[68]:
In [75]:
encode_train_y
Out[75]:
In [36]:
x[:,:size_of_x].get_value()
In [ ]:
- T.mean(self.x[:,:size_of_x] * T.log(z[:,:size_of_x]) + (1 - self.x[:,:size_of_x]) * T.log(1 - z[:,:size_of_x]), axis=1)
In [32]:
a = {}
a['s'] =1
a['b'] =2
In [33]:
a
Out[33]:
In [34]:
b = a.copy()
b['s'] =3
b['c']=3
b
Out[34]:
In [35]:
a
Out[35]:
In [8]:
import pandas as pd
import numpy as np
import rpy2.robjects as robjects
import rpy2.robjects as ro
In [5]:
pi = robjects.r('pi')
pi[0]
Out[5]:
In [10]:
%load_ext rmagic
%load_ext rpy2.ipython
Run linear regression in R, print out a summary, and pass the result variable error back to Python:
In [11]:
%%R -o error
set.seed(10)
y<-c(1:1000)
x1<-c(1:1000)*runif(1000,min=0,max=2)
x2<-(c(1:1000)*runif(1000,min=0,max=2))^2
x3<-log(c(1:1000)*runif(1000,min=0,max=2))
all_data<-data.frame(y,x1,x2,x3)
positions <- sample(nrow(all_data),size=floor((nrow(all_data)/4)*3))
training<- all_data[positions,]
testing<- all_data[-positions,]
lm_fit<-lm(y~x1+x2+x3,data=training)
print(summary(lm_fit))
predictions<-predict(lm_fit,newdata=testing)
error<-sqrt((sum((testing$y-predictions)^2))/nrow(testing))
In [12]:
print error
First we create the data in R:
In [13]:
%%R -o training,testing
set.seed(10)
y<-c(1:1000)
x1<-c(1:1000)*runif(1000,min=0,max=2)
x2<-(c(1:1000)*runif(1000,min=0,max=2))^2
x3<-log(c(1:1000)*runif(1000,min=0,max=2))
all_data<-data.frame(y,x1,x2,x3)
positions <- sample(nrow(all_data),size=floor((nrow(all_data)/4)*3))
training<- all_data[positions,]
testing<- all_data[-positions,]
The variables training and testing are now available as numpy array in Python namespace due to the -o flag in the cell above. We'll create pandas DataFrame from them:
In [8]:
tr = pd.DataFrame(dict(zip(['y', 'x1', 'x2', 'x3'], training)))
te = pd.DataFrame(dict(zip(['y', 'x1', 'x2', 'x3'], testing)))
tr.head()
Out[8]:
Create linear regression model, print a summary:
In [9]:
from statsmodels.formula.api import ols
lm = ols('y ~ x1 + x2 + x3', tr).fit()
lm.summary()
Out[9]:
Predict and compute RMSE:
In [10]:
pred = lm.predict(te)
error = sqrt((sum((te.y - pred)**2)) / len(te))
error
Out[10]:
First we create data (numpy array) in Python:
In [11]:
X = np.array([0,1,2,3,4])
Y = np.array([3,5,4,6,7])
We pass them into R using the -i flag, run linear regression in R, print a summary and plot, output the result back in Python:
In [12]:
%%R -i X,Y -o XYcoef
XYlm = lm(Y~X)
XYcoef = coef(XYlm)
print(summary(XYlm))
par(mfrow=c(2,2))
plot(XYlm)
We also pass the model coefficients from R as variable XYcoef:
In [13]:
XYcoef
Out[13]:
In [ ]: