In [1]:
from swat import *
In [2]:
s = CAS('host_name', 5570)
s.sessionprop.setsessopt(caslib='yourcaslib')
s.loadactionset('deepLearn')
s.loadactionset('castmine')
s.loadactionset('fedsql')
Out[2]:
In [3]:
train = s.loadtable('yelp_review_train.sashdat', casout = {"replace" : True} )['casTable']
val = s.loadtable('yelp_review_val.sashdat', casout = {"replace" : True} )['casTable']
test = s.loadtable('yelp_review_test.sashdat', casout = {"replace" : True} )['casTable']
In [4]:
s.fetch(train, to=5)
Out[4]:
In [5]:
# GloVe: Global Vectors for Word Representation. GloVe is an unsupervised learning algorithm for obtaining vector
# representations for words. Training is performed on aggregated global word-word co-occurrence statistics from a corpus,
# and the resulting representations showcase interesting linear substructures of the word vector space.
s.upload(r'..\folder_on_your_local_machine\glove_100d_tab_clean.txt',
casout=dict(name='glove', replace=True),
importoptions=dict(fileType='delimited', delimiter='\t'))
Out[5]:
In [6]:
# Sentiment classification
# In this example, GRU model is used as specified by the option "rnnType". You can specify other layer types "LSTM" and "RNN".
# In some layers, reverse = True is specified, and that makes GRU bi-directional. Specifically, layers rnn11 and rnn 21
# are in the reverse direction, which means the model scan the sentence from the end to the beginning, while rnn12 and rnn22 are
# in the common forward direction. Therefore, the state of a neuron is not only affected by the previous words, but also the
# words after the neuron.
n=64
init='msra'
s.buildmodel(model=dict(name='sentiment', replace=True), type='RNN')
s.addlayer(model='sentiment', name='data', layer=dict(type='input'))
s.addlayer(model='sentiment', name='rnn11', srclayers=['data'],
layer=dict(type='recurrent',n=n,init=init,rnnType='GRU',outputType='samelength',
reverse=True))
s.addlayer(model='sentiment', name='rnn12', srclayers=['data'],
layer=dict(type='recurrent',n=n,init=init,rnnType='GRU',outputType='samelength',
reverse=False))
s.addlayer(model='sentiment', name='rnn21', srclayers=['rnn11', 'rnn12'],
layer=dict(type='recurrent',n=n,init=init,rnnType='GRU',outputType='samelength',
reverse=True))
s.addlayer(model='sentiment', name='rnn22', srclayers=['rnn11', 'rnn12'],
layer=dict(type='recurrent',n=n,init=init,rnnType='GRU',outputType='samelength',
reverse=False))
s.addlayer(model='sentiment', name='rnn3', srclayers=['rnn21', 'rnn22'],
layer=dict(type='recurrent',n=n,init=init,rnnType='GRU',outputType='encoding'))
s.addlayer(model='sentiment', name='outlayer', srclayers=['rnn3'],
layer=dict(type='output'))
Out[6]:
In [7]:
s.tableinfo()
Out[7]:
In [7]:
s.dlTrain(table=train, model='sentiment', validtable=val,
modelWeights=dict(name='sentiment_trainedWeights', replace=True),
textParms=dict(initEmbeddings='glove', hasInputTermIds=False, embeddingTrainable=False),
target='sentiment',
inputs=['review'],
texts=['review'],
nominals=['sentiment'],
optimizer=dict(miniBatchSize=4, maxEpochs=20,
algorithm=dict(method='adam', beta1=0.9, beta2=0.999, gamma=0.5,
learningRate=0.0005, clipGradMax=100, clipGradMin=-100,
stepSize=20, lrPolicy='step')
),
seed=12345
)
Out[7]:
In [8]:
s.save(table='sentiment_trainedWeights', caslib='casuser',
name='demo_review_sentiment_trainedweights.sashdat', replace=True, saveAttrs = True)
Out[8]:
In [9]:
s.dlScore(table=test, model='sentiment', initWeights='sentiment_trainedWeights',
copyVars=['review', 'sentiment'], textParms=dict(initInputEmbeddings='glove'),
casout=dict(name='sentiment_out', replace=True))
Out[9]: