In [54]:
""" Imports """
import re
from nltk.tokenize import word_tokenize, sent_tokenize
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

"""Global definitons"""
_start = 'S_START'
_end = 'S_END'

In [55]:
""" util definitions"""

def hyperbolic(net):
    return np.tanh(net)

def relu(net):
    return np.maximum(0,net)

def softmax(net):
    _exp = np.exp(net)
    return _exp/np.sum(_exp)

def predict(scores):
    return np.argmax(scores)

In [56]:
class WordItem:
    def __init__(self,word,count=0):
        self.word = word
        self.count = count

In [57]:
class RNNlayer:
    
    """ 
    RNN nodes for decoder
    
    hidden state at time step t of decoder is conditioned on hidden state at time step t-1,
    output at time step t-1 and input at time step t
    """
    
    def __init__(self, inputSize, outputSize, bptt_truncate = 5, hiddenDim = 10):
        """
        inputSize = dimensions of the input embedding 
        outputSize = vocabulary size
        hiddenDim = size of the hidden unit in RNN
        bptt_truncate = truncate the number of time steps we calculate the gradient during backpropagation
        """
        self.inputSize = inputSize
        self.outputSize = outputSize
        self.hiddenDim = hiddenDim
        self. bptt_truncate = bptt_truncate
        
        self.w_in = np.random.uniform(-np.sqrt(1./inputSize), np.sqrt(1./inputSize),(hiddenDim, inputSize))
        self.w_hh = np.random.uniform(-np.sqrt(1./hiddenDim), np.sqrt(1./hiddenDim),(hiddenDim, hiddenDim))
        #self.w_outH = np.random.uniform(-np.sqrt(1./hiddenDim), np.sqrt(1./hiddenDim),(outputSize, hiddenDim))
        self.w_out = np.random.uniform(-np.sqrt(1./hiddenDim), np.sqrt(1./hiddenDim),(outputSize, hiddenDim))
        
    def forwardProp(self, inSentence, expSent):
        """
        inSentence: word indices in input language vocabulary
        expSent: word indices in target language vocabulary
        """
        
        #Total number of time steps equal to number of words in the sentence
        T = len(inSentence)
        
        #Saving all hidden states and outputs during forward propagation
        _h = np.zeros((T,self.hiddenDim))
        _o = np.zeros((T,self.outputSize))
        
        #Initializing initial output as the start token
        #_o[-1] = 
        
        #For each time step calculating hidden state and output
        for t in np.arange(T):
            #outIdx = predict(_o[t-1])
            _h[t] = hyperbolic(self.w_in.dot(inSentence[t]) + self.w_hh.dot(_h[t-1])) #+ self.w_outH[:,outIdx:outIdx+1])
            _o[t] = softmax(self.w_out.dot(_h[t]))
            
        return _o, _h
    
    def calculateLoss(self, inSentence, expSentence):
        
        #For each sentence
        o, h = self.forwardProp(inSentence, expSentence)
        #TODO recheck this part
        correctPred = o[np.arange(len(expSentence)), expSentence]
        #Loss for each sentence
        l = -1 * np.sum(np.log(correctPred))
        return l
    
    def calculateTotalLoss(self, inSentence, expSentences):
        
        L = 0.0
        for i in range(len(inSentence)):
            if len(inSentence[i]) == len(expSentences[i]) :
                L += self.calculateLoss(inSentence[i], expSentences[i])
            
        return L
    
    def backPropTT(self, inSentence, expSentence):
        
        # Total number of time steps equal to number of words in the sentence
        T = len(expSentence)
        
        # Performing forward propagation
        o, h = self.forwardProp(inSentence, expSentence)
        
        # Defining gradient variables
        dLdin = np.zeros(self.w_in.shape)
        dLdhh = np.zeros(self.w_hh.shape)
        #dLdoutH = np.zeros(self.w_outH.shape)
        dLdout = np.zeros(self.w_out.shape)
        
        # Calculating the difference between output and actual output
        delta_o = o
        delta_o[np.arange(T), expSentence] -= 1
        #print 'delta_o', delta_o
        
        # Calculating gradients backwards through time
        for t in np.arange(T)[::-1]:
            #Output gradient is only dependent on time step t
            dLdout += np.outer(delta_o[t], h[t])
            
            # Initial delta calculation propagating gradients from output
            delta_t = self.w_out.T.dot(delta_o[t]) * (1 - (h[t] ** 2))
            
            # Backpropagation through time (for at most self.bptt_truncate steps)
            for bptt_step in np.arange(max(0, t-self.bptt_truncate), t+1)[::-1]:
                # print "Backpropagation step t=%d bptt step=%d " % (t, bptt_step)
                # Add to gradients at each previous step
                dLdhh += np.outer(delta_t, h[bptt_step-1])              
                dLdin += np.outer(delta_t, inSentence[bptt_step-1])
                #dLdoutH += np.outer(delta_t, o[bptt_step-1])
                # Update delta for next step dL/dz at t-1
                delta_t = self.w_hh.T.dot(delta_t) * (1 - h[bptt_step-1] ** 2)
            """TODO review backprop implementation"""
            
        #return dLdin, dLdhh, dLdoutH, dLdout
        return dLdin, dLdhh, dLdout
    
    def sgd_step(self, inSentence, expSentence, learningRate):
        
        """ Performs a single stochastic gradient step"""
        
        # Calculating gradients
        #dLdin, dLdhh, dLdoutH, dLdout = self.backPropTT(inSentence, expSentence)
        dLdin, dLdhh, dLdout = self.backPropTT(inSentence, expSentence)
        
        # Updating parameters
        self.w_in -= learningRate * dLdin
        self.w_hh -= learningRate * dLdhh
        #self.w_outH -= learningRate * dLdoutH
        self.w_out -= learningRate * dLdout
        
    def train_Decoder_With_SGD(self, X_train, Y_train, learningRate = 0.05, nepochs = 200):
        """TODO evaluate losses and update learning rate if required"""
        loss = 100000000000
        for epoch in range(nepochs):
            for i in range(len(Y_train)):
                if len(X_train[i]) == len(Y_train[i]) :
                    self.sgd_step(X_train[i], Y_train[i], learningRate)
            newLoss = self.calculateTotalLoss(X_train, Y_train)
            print epoch, " ", newLoss
            if newLoss > loss :
                break
            loss = newLoss

In [58]:
""" Word preprocessing """
def dataset(_fi='/home/jazzycrazzy/PythonScripts/dataset.csv', _fo = 'testfile.txt'):
    file_in = open(_fi)
    #file_out = open(_fo,'wb')

    words = [] #stores unique words encountered in the document as WordItem objects
    _dict = {} #temporary dictionary to maintain count of each word
    
    _dict['UNK'] = 0

    for l in file_in:
        #file_out.write(l+'\n')
        l = _start+' '+l+' '+_end
        split = word_tokenize(l.decode('utf-8'))
        for w in split:
            if len(w)==0:
                continue
            elif len(w) > 15: #if word's length is greater than 15 counting it as unknown
                _dict['UNK'] += 1
                continue
            if w not in _dict:
                _dict[w] = 1
            _dict[w] += 1
            
    _vocab = {} #dictionary with words as keys and values as indices of them in 'word' list
    _vocab['UNK'] = len(words)
    words.append(WordItem('UNK',_dict['UNK']))
    for k,v in _dict.iteritems():
        #if v > 9 and k != 'UNK':
        if k != 'UNK':
            _vocab[k] = len(words)
            words.append(WordItem(k,v))
        else:
            words[0].count += 1
    
    #cleaning up unnecessary memory
    del _dict
    file_in.close()
    #file_out.close()
    
    return _vocab, words

def UnigramTable(_vocab, words):
    """ Calculates probabilities based on count of each word present"""
    pow = 0.75
    totalFreqPow = 0.0
    unigramTable = {}
    
    l = [words[i].count**pow for i in range(len(_vocab))]
    totalFreqPow = np.sum(l)
    
    for i in range(len(_vocab)):
        unigramTable[i] = (words[i].count**pow)/totalFreqPow
    
    del l
    return unigramTable

def hotVector(wordIndex,vocabSize):
    """ Returns hot vector representation of a word """
    hVector = np.zeros(vocabSize)
    hVector[wordIndex-1] = 1
    return hVector

def softmax(net):
    """ calculates softmax score - target score normalized with noise scores and calculated as probability"""
    _exp = np.exp(net)
    return _exp/np.sum(_exp)

def sigmoid(net):
    """ Applies sigmoid logistic function on net """
    return 1.0/(1+np.exp(-net))

def randomIdx(k, vocabSize, current):
    """ Returns k indices from with unigram table randomly with respect to each word's probablity """
    global _unigramTable
    idxs = list(np.random.choice(vocabSize, k+1, False, p = _unigramTable.values()))
    if current in idxs:
        idxs.remove(current)
    else:
        del idxs[-1]
    return idxs
    
def softmaxCostGradient(net, target):
    prob = softmax(net)
    print(prob)
    
    
def negSamplingCostGradient(out, context, emb, vocabSize, learningRate, W_Output, k = 10):
    
    errorHidden = np.zeros(shape=(emb.size,1))
    
    actOut = sigmoid(out[context])
    negSamples = randomIdx(k, vocabSize, context)
    _negSamples = [-out[sample] for sample in negSamples]
    
    # error for context word
    e = -np.log(actOut) - np.sum(np.log(sigmoid(np.array(_negSamples))))
    
    """ calculating gradients for output vectors for both target and negative samples
    calculating hidden layer error for each context word """
    # Updating output weight vector for context word
    delta = actOut - 1
    errorHidden += delta * W_Output[:,context:context+1]
    W_Output[:,context:context+1] -= learningRate * np.reshape(delta * emb,(emb.size,1))
    
    # Updating output weight vectors for negative sampling
    for sample in negSamples:
        delta = sigmoid(out[sample])
        errorHidden += delta * W_Output[:,sample:sample+1]
        W_Output[:,sample:sample+1] -= learningRate * np.reshape(delta * emb,(emb.size,1))
    
    return errorHidden,e    
    
def skipgram(target,contextWords, vocabSize, learningRate, W_Embedding, W_Output):
    
    """
    will be called on each window with
    target: Target word index
    contextWords: Arrray of integers representing context words
    """
    loss = 0
    k = 10 #Number of negative samples
    emb = W_Embedding[target]
    out = np.matmul(emb,W_Output) # [1 x EmbSize].[EmbSize x VocabSize]
    #print out.shape
    _predicted = []
    EH = np.zeros(shape=(emb.size,1))
    for context in contextWords:
        #predicted = hotVector(context, vocabSize)
        #softmaxCostGradient(out,context)
        _EH,_e = negSamplingCostGradient(out, context, emb, vocabSize, learningRate, W_Output, k)
        EH += _EH
        loss += _e
        #EH += sof
        
    #updating hidden layer input vector embedding
    W_Embedding[target] -= learningRate * EH.T[0]
    return loss

In [59]:
""" Creates word embeddings in vector space representation """

""" Feedforward Neural Net Language model """
#Input layer

#Projection layer

#Hidden layer

#Output layer

#Initialization
fin='/Users/preethikapachaiyappa/Documents/MachineLearning/Data/English-small.txt'#/home/jazzycrazzy/PythonScripts/dataset.csv'
fin1='/Users/preethikapachaiyappa/Documents/MachineLearning/Data/French-small.txt'
fout = 'testfile.txt'
fout1 = 'testfile1.txt'
_vocab, words = dataset(fin, fout)
_vocab_f, words_f = dataset(fin1, fout1)
_unigramTable = UnigramTable(_vocab, words)

learningRate = 0.1
vocabSize = len(words)
vocabSize_f = len(words_f)
emb_size = 10
win_size = 2
target = None
contextWords = []
epoch = 20

print _vocab
print _vocab_f


# No need of hidden layer since when the embedding matrix is multiplied with hot vector 
#it essentially gives that embedding row
W_Embedding = np.random.randn(vocabSize,emb_size) #Embedding matrix
W_Output = np.random.randn(emb_size,vocabSize) #Outputlayer weight matrix Emb_size x Vocab

for _ in np.arange(epoch):
    
    totalLoss = 0
    loss = 0
    
    fileIn = open(fin)
    for l in fileIn:
        l = _start+' '+l+' '+_end
        tokens = word_tokenize(l.decode('utf-8'))
        #print 'tokens',tokens
        for token in tokens:
            
            loss = 0
            contextWords = []
            
            if token in _vocab:
                target = _vocab[token]
                trgtIdx = tokens.index(token)
                cntxtIdxs = range(trgtIdx-win_size, trgtIdx+win_size+1)
                cntxtIdxs.remove(trgtIdx)
                for idx in cntxtIdxs:
                    #check for first word and last word and use UNK for context words for window where words not available
                    if idx >-1 and idx < len(tokens) and tokens[idx] in _vocab:
                        contextWords = np.append(contextWords, _vocab[tokens[idx]])
                    else:
                        contextWords = np.append(contextWords, _vocab['UNK'])
                loss += skipgram(target, contextWords, vocabSize, learningRate, W_Embedding, W_Output)
        totalLoss += loss
    print 'Total Loss:',totalLoss
                

print(W_Embedding)


{u'enjoy': 1, u'S_END': 34, u'have': 3, u'tired': 4, u'ran': 5, u'is': 6, u'am': 8, u'How': 10, u'see': 11, u'at': 12, u'want': 13, u'in': 7, u'go': 15, u'your': 17, u'speak': 18, u'are': 65, u'what': 20, u'her': 21, u'how': 26, u'sun': 23, u'friends': 49, u'day': 50, u'graduate': 16, u'write': 19, u'to': 22, u'of': 61, u'enjoys': 28, u'has': 30, u'beach': 31, u'?': 32, u'she': 39, u'dad': 2, u'be': 35, u'we': 36, u'good': 37, u'tomorrow': 24, u'read': 9, u'student': 41, u'birth': 46, u'here': 42, u'every': 44, u'food': 45, u'mom': 25, u'date': 47, u'president': 48, 'UNK': 0, u'come': 27, u'you': 29, u'died': 51, u'he': 52, u'me': 43, u'boy': 54, u'store': 60, u'I': 38, u'name': 55, u'shop': 56, u'did': 57, u'S_START': 58, u'work': 59, u'a': 53, u'can': 33, u'night': 40, u'the': 62, u'nice': 63, u'where': 64, u'left': 14}
{u'comment': 1, u'votre': 2, u'peux': 3, u'aller': 4, u'dipl\xf4m\xe9': 5, u'La': 6, u'appelez': 7, u'venue': 66, u'naissance': 9, u'allez': 10, u'peut': 11, u'au': 13, u'il': 27, u'\xe9crire': 15, u'passe': 16, u"m'a": 17, u'veux': 18, u'magasin': 57, u'es': 36, u'suis': 32, u'ici': 22, u'est': 23, u'couru': 24, u'quelle': 25, u'la': 26, u'tu': 28, u'parler': 29, u'mort': 14, u'appr\xe9cie': 31, u'avez': 21, u'demain': 33, u'chaque': 34, u'sais': 35, u'quitt\xe9': 37, u'nourriture': 38, u'lire': 19, u'?': 39, u'S_END': 41, u'vous': 42, u'venez': 43, u'travail': 44, u'o\xf9': 73, u'\xe0': 59, u'nuit': 50, u'de': 49, u'papa': 47, u'elle': 48, u'\xe9tudiant': 70, u'appr\xe9ci\xe9': 51, u'fatigue': 53, u'rendez': 54, u'amis': 75, u'date': 55, u'je': 56, 'UNK': 0, u'maman': 30, u'a': 58, u'on': 46, u'Peux': 60, u'bonne': 71, u'\xeatre': 12, u'\xcates': 62, u'plage': 63, u'S_START': 64, u'journ\xe9e': 65, u'dans': 8, u'venu': 68, u'gar\xe7on': 69, u'le': 52, u'un': 67, u"l'ai": 20, u'bon': 45, u'\xeates': 74, u'venir': 40, u'une': 72, u'soleil': 61, u'Pr\xe9sident': 76}
/Users/preethikapachaiyappa/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:92: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/preethikapachaiyappa/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:103: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
/Users/preethikapachaiyappa/anaconda/lib/python2.7/site-packages/ipykernel/__main__.py:104: VisibleDeprecationWarning: using a non-integer number instead of an integer will result in an error in the future
Total Loss: 548.577064146
Total Loss: 377.797238059
Total Loss: 388.049088995
Total Loss: 372.10636965
Total Loss: 385.26746931
Total Loss: 401.391704975
Total Loss: 385.30087946
Total Loss: 399.192853474
Total Loss: 381.443801897
Total Loss: 385.430852802
Total Loss: 398.984780239
Total Loss: 397.283081357
Total Loss: 398.250030996
Total Loss: 393.6448626
Total Loss: 404.114884112
Total Loss: 388.503000725
Total Loss: 386.54853421
Total Loss: 385.404129374
Total Loss: 392.885982835
Total Loss: 401.044350713
[[ -1.02463943e+00  -1.00421919e+00  -2.95574953e-02   7.67953492e-01
   -1.30218626e+00  -8.61392017e-01  -8.64616979e-01   1.54902026e+00
    7.80616089e-01   1.05430431e-01]
 [ -1.86139489e+00   9.10745822e-02   6.38293466e-01  -2.52006939e+00
    2.45838195e-01  -3.76749787e-01  -7.50167314e-01   9.59485177e-01
    4.09441406e-01  -9.95104751e-01]
 [ -2.63922584e-01   2.83264421e-01   4.05438001e-01  -4.50828581e-01
   -1.20673584e-01  -7.97935303e-01   1.14141033e+00  -1.84813507e-01
   -1.11011071e+00  -1.56486027e+00]
 [ -1.33716671e+00  -1.48608474e+00  -4.98208893e-01  -5.82551072e-01
   -4.38711732e-01  -1.07907343e-01   2.51349593e+00  -2.31861844e-01
    5.81750548e-01  -1.96025355e+00]
 [ -2.21813360e-01   7.66925294e-01   1.74123527e+00  -7.92468424e-01
   -9.75634961e-01   3.06982520e-01   3.68436167e-01  -6.01467949e-01
    1.48989273e+00  -8.91546289e-01]
 [  2.75737634e-01   1.31714083e+00   1.35132268e+00  -2.15860496e+00
   -7.37253092e-01   1.10860086e+00  -4.26960749e-01   1.34269276e+00
   -2.30923164e-01  -1.01979339e+00]
 [ -1.64478036e+00  -1.71269652e-02   1.13278134e+00  -6.35031100e-01
    4.94934702e-01   7.91517912e-01   2.68605239e-01   7.07875185e-01
   -8.74130533e-01  -5.94195215e-01]
 [ -1.59507346e+00  -1.39529691e-01  -3.17736074e-01  -2.03259005e+00
    1.24810332e+00   6.71715802e-02   5.71391323e-03   5.25258848e-01
   -4.36068199e-01  -1.60602610e+00]
 [  2.28203769e-01   5.41265923e-01   1.84786691e+00  -1.34001791e+00
   -1.43641204e+00  -6.74442300e-01   1.40858303e+00  -1.49718358e+00
   -1.79504656e-01  -6.77950639e-01]
 [ -7.00397302e-01   1.34430429e+00   1.93985747e+00   5.49936312e-02
   -1.45309339e-01   1.28734225e-01   7.04696519e-01  -8.91116273e-02
   -5.99081790e-01  -1.84320115e+00]
 [ -1.81262538e+00  -4.18627829e-01   8.17485647e-01  -2.03663045e+00
   -1.13568905e+00  -1.63775753e-01   1.16211623e+00   2.15818713e-01
    7.26607468e-01   2.84793702e-02]
 [ -9.75823324e-01  -5.11592587e-01  -3.76864982e-01  -1.34102155e+00
   -1.42555867e+00   2.76473464e-02   7.65602760e-01  -8.41039147e-01
    2.75943384e-01  -5.69371498e-01]
 [ -1.91213197e+00   8.89795547e-02  -7.56743951e-01  -1.16643949e+00
    1.29852631e+00   1.84717146e+00   7.54835494e-01  -1.70326913e+00
    6.22398017e-01  -1.36666096e+00]
 [  2.32244591e-01   2.07589963e+00   1.02561777e+00  -1.13278596e+00
   -1.48371311e+00   1.01899166e-01   1.41598858e+00  -1.92463996e-01
   -3.42223123e-01   8.42919788e-02]
 [  2.24483709e-01   1.10063066e+00   5.17446450e-01  -5.34539654e-02
   -1.40544191e+00  -7.92730255e-02   1.41401653e+00  -4.42463053e-01
   -4.82935258e-01  -9.20185464e-01]
 [  2.07841304e-02   2.86121522e+00  -6.18728940e-01  -1.56183197e+00
   -1.04416552e+00   4.08470375e-01   3.65037666e-03  -3.74213703e-01
   -6.87438677e-01  -2.02809241e+00]
 [ -1.01335143e+00  -2.16251126e-01  -1.49861541e+00  -1.14709832e-01
   -6.71632120e-01  -2.18054774e-01   1.12241893e-01  -7.04106965e-01
    1.83170945e+00  -2.71910218e+00]
 [ -1.85935173e+00  -8.40973234e-01   7.22703715e-01   2.20450304e-01
   -1.04859460e+00   1.90030262e-01  -5.65092524e-01   7.87742426e-01
    6.24378855e-01  -2.32819225e+00]
 [ -7.54678391e-01   1.55268373e+00   2.15696914e+00   8.78827093e-02
    2.18994712e-01  -3.87380121e-01   6.18822585e-01  -2.57595865e-01
   -9.51307941e-02  -1.47934542e+00]
 [ -9.55521814e-01   1.58918298e+00   1.64759804e+00  -5.20595971e-02
    4.56370067e-01   3.97389043e-01   6.43884048e-01  -2.39085805e-01
    2.03203225e-01  -1.82106630e+00]
 [ -8.36031500e-01  -5.90645441e-01  -4.33232739e-01  -4.51832380e-01
    3.76471545e-01   1.11548737e+00   6.40042419e-01   2.54908067e-01
   -1.42451996e+00  -2.72110461e+00]
 [  5.39057514e-01   1.90342478e+00   1.63085876e+00  -9.94133295e-01
   -8.80197417e-01   1.21304119e-02   2.26273993e+00  -1.31895320e+00
    1.14251301e+00  -1.22562964e+00]
 [  1.87188245e-02   2.12290246e+00   3.96946032e-01  -1.62826078e+00
   -1.81214701e+00   1.32310078e+00   7.70137939e-01   9.36481058e-01
    5.42515568e-01  -6.38971268e-01]
 [ -4.40855697e-01   1.15805543e+00   1.34520184e+00  -1.16466402e+00
   -1.36797880e+00   1.08484733e+00  -7.76665831e-01  -5.80721301e-01
   -5.25255503e-01  -1.47700317e+00]
 [ -4.91486950e-01  -4.41733768e-01   4.94602083e-01  -1.09051888e+00
   -4.19224221e-02   1.94941253e-01   8.41101422e-01   1.99903417e-01
    6.07211569e-01  -7.90782130e-01]
 [ -7.69214194e-02   3.32936597e-01   2.98734246e-01  -4.43404947e-01
   -1.35286557e-01  -9.45691701e-01   1.35394740e+00  -1.23588484e-01
   -1.20922199e+00  -1.58541787e+00]
 [ -1.66347728e+00   1.36801791e-01   1.33903828e-01  -1.43756168e+00
   -1.64968220e+00  -1.96692567e-01   1.36283004e+00   6.93479946e-01
    6.66011484e-01   6.28919660e-02]
 [ -1.21291807e+00   8.56603217e-01   2.49569733e-01   6.45233730e-01
   -8.04628240e-01   2.14526316e-01   5.98730035e-01  -8.62732892e-01
   -4.66243726e-01  -1.72889467e+00]
 [ -9.14838029e-01   1.10189599e+00   8.60182391e-01  -1.43468083e+00
   -1.58991963e+00   1.41780905e+00  -1.13142947e+00   4.75841860e-01
   -1.04874898e+00  -2.03373038e+00]
 [ -5.12698562e-01  -6.78415466e-01   1.52817997e+00  -1.46895538e+00
    1.62992243e-01  -3.58754525e-01   9.85200124e-01  -1.79161381e-01
    4.58530729e-01  -6.68220981e-01]
 [ -5.98020291e-01   6.89301949e-01   3.87993400e-01   1.01658807e-01
    6.02955410e-01   9.42370746e-01   2.13290238e+00  -3.26964594e-02
   -5.29238822e-01  -1.16739690e+00]
 [  1.10611120e+00   2.58782273e+00   5.73373855e-01  -1.40094650e+00
   -5.22003950e-01   4.45579889e-01   6.86236640e-01  -1.11850307e+00
   -3.51988111e-01  -1.81075020e+00]
 [ -1.12371508e+00   1.30078166e-01   2.76651711e-01  -6.41391950e-01
   -6.12535928e-01   1.28154591e+00   2.54893332e-01  -6.10638011e-01
    1.68212581e+00  -1.48467666e+00]
 [ -8.75963923e-01   9.83915660e-01   6.58557028e-01  -3.78542640e-01
   -4.35693579e-01   1.95275908e-01   1.60462511e+00  -1.14394461e+00
   -6.09163191e-01  -5.25937844e-01]
 [ -1.23845156e+00   1.08351199e+00   4.90598103e-02  -1.20830723e+00
    1.89516447e-01  -6.57073127e-01   3.64893241e-01   3.98002113e-01
   -1.06220219e-01  -1.03169341e+00]
 [ -2.88552249e+00   8.78171278e-01   8.50523861e-01  -5.88249809e-01
    5.93399872e-01  -1.07419990e+00   1.03672231e+00  -5.72456585e-02
    6.64017659e-01  -7.32686552e-01]
 [ -2.12925404e+00   6.29941962e-01   6.32334480e-01  -6.61694914e-01
    7.93539689e-02  -2.59069156e-01   4.27828171e-01  -3.14591124e-01
   -8.93769507e-02   3.02725095e-01]
 [ -5.41967498e-01  -2.96659039e-01   8.77638212e-02  -6.53089879e-01
    1.99855556e-01   1.03913779e+00   1.17116969e+00   8.25820766e-01
    1.13368327e+00  -2.32769336e+00]
 [ -6.40620592e-01   1.06246282e+00   6.08286559e-01  -1.35925070e+00
   -5.16650432e-01  -1.35983343e+00   1.07592760e+00  -1.99285396e-01
   -9.24554298e-01  -4.78861151e-01]
 [ -7.04948762e-01   8.68914655e-01  -1.65395346e-01  -5.49645420e-01
   -4.05865975e-01  -2.51170170e-01   1.68518202e+00   1.28496749e+00
   -8.20044694e-01  -7.74248778e-01]
 [ -3.58543698e-01  -8.77836366e-01  -3.18301766e-01   9.32570048e-01
   -3.64762505e-01   1.07003408e+00   2.71210806e+00  -3.58298355e-01
    7.35696137e-01  -2.59537301e+00]
 [ -4.34895898e-01  -1.52861620e+00   5.25993618e-01  -8.43103160e-01
    4.47747265e-01   2.83593366e-01   1.14426515e+00  -1.40847614e+00
    1.19744034e+00  -2.16941676e+00]
 [  1.94084938e-01   3.55640135e-01  -4.43012673e-02  -1.61087814e+00
    1.80067629e-01  -7.09415122e-02   1.53237725e+00  -6.90465136e-01
   -9.73608514e-01  -1.86287220e+00]
 [  1.10604398e+00   1.33757837e+00   1.16712759e+00  -4.81790223e-01
   -1.01603159e+00  -4.22711627e-01   2.17502354e+00  -1.40643936e-01
    1.83115805e-01  -1.67865499e+00]
 [ -1.38931480e-01   3.30000993e-02   1.00174770e+00  -1.09597198e+00
   -2.33650978e+00   3.10247460e-02   2.72972086e-01  -1.09656655e-01
   -1.02452864e+00  -1.78122956e+00]
 [ -1.14536705e-01   6.93319087e-02   8.52367561e-01  -2.48566520e+00
    3.36386011e-01   1.32112810e+00   5.89957892e-01   7.38354611e-01
   -6.66334913e-01  -1.32674429e+00]
 [ -1.56519852e+00  -8.45248709e-01   1.31758019e+00   6.11999492e-01
   -1.58030730e+00   4.57489790e-01  -2.59350326e-01  -1.89095926e+00
    1.61762858e-01  -2.27863583e+00]
 [ -1.33542972e+00  -7.20588708e-01  -2.36096627e-01   4.25365515e-01
   -1.24104774e+00   1.81426150e+00  -8.20173196e-01   1.91328037e-01
    5.49573277e-01  -2.41344902e+00]
 [ -3.62346265e-01   4.64647194e-02  -1.89354608e-01  -1.01388952e-01
   -5.43616789e-01  -7.21525415e-01   2.13636286e+00  -5.06432675e-01
   -3.90903310e-01  -1.56386108e+00]
 [ -1.56001916e+00   8.92564958e-01   1.12321629e+00  -9.57225943e-01
    8.39578500e-01   2.29356835e-01   3.48499280e-01  -1.86697844e+00
    1.78950092e+00  -6.24380055e-01]
 [ -9.20588908e-01   3.10387507e-01  -1.29152573e+00  -1.47031471e+00
    6.86797853e-01   4.09546494e-01   1.32573595e+00  -6.99850677e-01
    1.60761797e+00  -2.05097796e+00]
 [ -7.74390026e-01   1.06041609e+00   4.23988905e-01   1.31858296e+00
   -6.26918220e-01   7.84149888e-01   7.28389678e-01  -9.78367370e-01
   -6.35802062e-03  -2.56155503e+00]
 [ -1.85046208e+00   8.67958204e-01  -1.98043296e-01  -8.02612110e-01
   -2.95817325e-01  -7.49026495e-01   8.97377732e-01   4.27743324e-01
   -1.30099901e+00  -8.50817160e-01]
 [ -1.74987589e+00  -6.85031052e-01  -8.22678843e-01  -4.72732831e-01
   -7.11907941e-01   2.00179186e+00   1.73939855e+00   7.95298720e-01
    8.19880122e-01  -8.14912144e-01]
 [ -1.17072638e+00   1.02708399e+00   9.14676565e-01  -1.90674076e+00
   -1.76301121e+00   1.41206987e+00  -2.31903940e-01   8.07081894e-02
   -9.08399372e-01  -2.32648108e-02]
 [ -3.65438466e-01  -6.34875744e-01   2.83766579e-01  -1.72036438e-01
    4.75264845e-01   5.18028951e-01  -2.56712967e-01  -6.43450594e-01
   -8.10146742e-02  -2.86653603e+00]
 [ -2.86339117e-01   8.97501546e-01   1.40066918e+00  -1.75155491e+00
    3.63300804e-01   9.30620767e-01  -7.19899742e-02   5.25343544e-01
    2.35436200e-01  -1.63254073e+00]
 [ -1.16332883e+00   6.62707098e-02  -6.96979888e-01  -3.13054312e+00
   -1.63574785e+00   1.31352720e-01   4.09920605e-01  -6.90016194e-01
    3.56866929e-01  -1.05307630e+00]
 [ -9.32426501e-01   3.68403234e-01   6.85692851e-01  -1.43903983e-01
   -2.55148657e-01  -4.53330867e-01   5.17409600e-01   1.28504208e+00
    1.85385774e-01  -1.34983142e+00]
 [  1.11938366e-01   5.50510499e-01   1.56778491e-02  -1.51615664e+00
    8.08499257e-01   1.65965944e+00   4.64107354e-01   9.76263138e-02
    1.07204066e+00  -2.51720972e+00]
 [ -2.60113011e-01   1.98547441e+00   1.11535132e+00  -1.34256616e+00
    1.62264387e-01   1.31239325e+00  -1.07060367e-03  -3.95435581e-01
   -3.29045938e-01  -1.56108325e+00]
 [ -1.88719309e+00  -1.98953348e-01   5.88682657e-01  -5.99159707e-01
    5.70454028e-02   7.13131172e-01  -1.35272656e+00  -9.87733741e-01
    1.06037438e+00  -2.59872024e+00]
 [ -7.07502710e-01  -8.22483531e-01   5.27887003e-01  -1.23232622e+00
   -9.10151110e-01   2.59035388e-01   3.05093231e-01   4.46486512e-01
   -6.87991026e-01  -1.94615158e+00]
 [  1.77584036e-01   3.70791318e-02  -7.25867733e-01   2.54083911e-01
   -3.52020991e-01   1.96063014e+00   1.46159880e+00   4.68383959e-01
    7.74216859e-01  -2.27527654e+00]
 [ -1.23750943e+00   5.49588426e-01  -1.16337513e-02  -1.19021327e+00
   -4.16809436e-01   4.55732316e-01   1.07815893e+00   1.27692189e+00
   -6.29985922e-01  -5.86875060e-01]
 [ -1.12454035e+00  -1.00038973e+00   8.61082648e-01  -9.33158010e-01
   -1.30220871e+00  -6.70916388e-01   9.03548869e-01  -7.47194188e-01
    1.82807334e-01  -9.02811687e-01]]

In [60]:
mean_list = W_Embedding.mean(0)
print mean_list

W_Embedding_new = W_Embedding - mean_list
print W_Embedding_new


[-0.77099557  0.40361636  0.46337849 -0.82341657 -0.4389885   0.31120532
  0.69379779 -0.12915396  0.07898444 -1.37713765]
[[ -2.53643865e-01  -1.40783556e+00  -4.92935987e-01   1.59137006e+00
   -8.63197760e-01  -1.17259733e+00  -1.55841477e+00   1.67817423e+00
    7.01631647e-01   1.48256808e+00]
 [ -1.09039933e+00  -3.12541783e-01   1.74914974e-01  -1.69665282e+00
    6.84826695e-01  -6.87955103e-01  -1.44396511e+00   1.08863914e+00
    3.30456964e-01   3.82032901e-01]
 [  5.07072983e-01  -1.20351944e-01  -5.79404910e-02   3.72587986e-01
    3.18314916e-01  -1.10914062e+00   4.47612534e-01  -5.56595458e-02
   -1.18909515e+00  -1.87722622e-01]
 [ -5.66171148e-01  -1.88970111e+00  -9.61587385e-01   2.40865495e-01
    2.76767437e-04  -4.19112658e-01   1.81969814e+00  -1.02707883e-01
    5.02766106e-01  -5.83115903e-01]
 [  5.49182207e-01   3.63308929e-01   1.27785678e+00   3.09481425e-02
   -5.36646461e-01  -4.22279582e-03  -3.25361627e-01  -4.72313987e-01
    1.41090829e+00   4.85591363e-01]
 [  1.04673320e+00   9.13524466e-01   8.87944188e-01  -1.33518839e+00
   -2.98264592e-01   7.97395543e-01  -1.12075854e+00   1.47184672e+00
   -3.09907605e-01   3.57344263e-01]
 [ -8.73784790e-01  -4.20743330e-01   6.69402844e-01   1.88385466e-01
    9.33923202e-01   4.80312596e-01  -4.25192555e-01   8.37029146e-01
   -9.53114975e-01   7.82942436e-01]
 [ -8.24077897e-01  -5.43146056e-01  -7.81114566e-01  -1.20917349e+00
    1.68709182e+00  -2.44033736e-01  -6.88083881e-01   6.54412809e-01
   -5.15052641e-01  -2.28888449e-01]
 [  9.99199336e-01   1.37649559e-01   1.38448842e+00  -5.16601342e-01
   -9.97423540e-01  -9.85647616e-01   7.14785234e-01  -1.36802962e+00
   -2.58489097e-01   6.99187012e-01]
 [  7.05982654e-02   9.40687923e-01   1.47647897e+00   8.78410198e-01
    2.93679161e-01  -1.82471091e-01   1.08987249e-02   4.00423340e-02
   -6.78066231e-01  -4.66063500e-01]
 [ -1.04162982e+00  -8.22244194e-01   3.54107155e-01  -1.21321388e+00
   -6.96700553e-01  -4.74981068e-01   4.68318431e-01   3.44972674e-01
    6.47623026e-01   1.40561702e+00]
 [ -2.04827757e-01  -9.15208951e-01  -8.40243474e-01  -5.17604983e-01
   -9.86570167e-01  -2.83557970e-01   7.18049665e-02  -7.11885185e-01
    1.96958942e-01   8.07766154e-01]
 [ -1.14113641e+00  -3.14636810e-01  -1.22012244e+00  -3.43022919e-01
    1.73751481e+00   1.53596614e+00   6.10377005e-02  -1.57411517e+00
    5.43413575e-01   1.04766900e-02]
 [  1.00324016e+00   1.67228327e+00   5.62239273e-01  -3.09369394e-01
   -1.04472462e+00  -2.09306150e-01   7.22190789e-01  -6.33100351e-02
   -4.21207565e-01   1.46142963e+00]
 [  9.95479276e-01   6.97014300e-01   5.40679584e-02   7.69962601e-01
   -9.66453413e-01  -3.90478341e-01   7.20218732e-01  -3.13309092e-01
   -5.61919700e-01   4.56952187e-01]
 [  7.91779698e-01   2.45759886e+00  -1.08210743e+00  -7.38415408e-01
   -6.05177022e-01   9.72650586e-02  -6.90147417e-01  -2.45059742e-01
   -7.66423118e-01  -6.50954756e-01]
 [ -2.42355861e-01  -6.19867491e-01  -1.96199390e+00   7.08706734e-01
   -2.32643620e-01  -5.29260090e-01  -5.81555901e-01  -5.74953003e-01
    1.75272501e+00  -1.34196452e+00]
 [ -1.08835616e+00  -1.24458960e+00   2.59325223e-01   1.04386687e+00
   -6.09606103e-01  -1.21175054e-01  -1.25889032e+00   9.16896387e-01
    5.45394413e-01  -9.51054596e-01]
 [  1.63171765e-02   1.14906737e+00   1.69359064e+00   9.11299276e-01
    6.57983212e-01  -6.98585437e-01  -7.49752089e-02  -1.28441903e-01
   -1.74115236e-01  -1.02207763e-01]
 [ -1.84526247e-01   1.18556661e+00   1.18421955e+00   7.71356970e-01
    8.95358567e-01   8.61837266e-02  -4.99137458e-02  -1.09931844e-01
    1.24218783e-01  -4.43928646e-01]
 [ -6.50359328e-02  -9.94261806e-01  -8.96611231e-01   3.71584187e-01
    8.15460044e-01   8.04282057e-01  -5.37553753e-02   3.84062029e-01
   -1.50350440e+00  -1.34396696e+00]
 [  1.31005308e+00   1.49980842e+00   1.16748027e+00  -1.70716728e-01
   -4.41208917e-01  -2.99074904e-01   1.56894214e+00  -1.18979924e+00
    1.06352857e+00   1.51508010e-01]
 [  7.89714392e-01   1.71928610e+00  -6.64324605e-02  -8.04844209e-01
   -1.37315851e+00   1.01189546e+00   7.63401448e-02   1.06563502e+00
    4.63531126e-01   7.38166384e-01]
 [  3.30139870e-01   7.54439068e-01   8.81823348e-01  -3.41247451e-01
   -9.28990297e-01   7.73642010e-01  -1.47046363e+00  -4.51567339e-01
   -6.04239944e-01  -9.98655166e-02]
 [  2.79508617e-01  -8.45350133e-01   3.12235911e-02  -2.67102312e-01
    3.97066078e-01  -1.16264063e-01   1.47303628e-01   3.29057379e-01
    5.28227127e-01   5.86355522e-01]
 [  6.94074148e-01  -7.06797679e-02  -1.64644246e-01   3.80011619e-01
    3.03701942e-01  -1.25689702e+00   6.60149605e-01   5.56547724e-03
   -1.28820643e+00  -2.08280215e-01]
 [ -8.92481715e-01  -2.66814573e-01  -3.29474664e-01  -6.14145113e-01
   -1.21069370e+00  -5.07897883e-01   6.69032242e-01   8.22633907e-01
    5.87027042e-01   1.44002962e+00]
 [ -4.41922506e-01   4.52986853e-01  -2.13808759e-01   1.46865030e+00
   -3.65639740e-01  -9.66790003e-02  -9.50677588e-02  -7.33578931e-01
   -5.45228168e-01  -3.51757021e-01]
 [ -1.43842462e-01   6.98279627e-01   3.96803899e-01  -6.11264262e-01
   -1.15093113e+00   1.10660373e+00  -1.82522726e+00   6.04995821e-01
   -1.12773342e+00  -6.56592728e-01]
 [  2.58297005e-01  -1.08203183e+00   1.06480148e+00  -6.45538815e-01
    6.01980743e-01  -6.69959841e-01   2.91402330e-01  -5.00074195e-02
    3.79546287e-01   7.08916671e-01]
 [  1.72975276e-01   2.85685585e-01  -7.53850915e-02   9.25075374e-01
    1.04194391e+00   6.31165430e-01   1.43910458e+00   9.64575018e-02
   -6.08223264e-01   2.09740750e-01]
 [  1.87710677e+00   2.18420637e+00   1.09995363e-01  -5.77529935e-01
   -8.30154505e-02   1.34374574e-01  -7.56115383e-03  -9.89349105e-01
   -4.30972553e-01  -4.33612547e-01]
 [ -3.52719508e-01  -2.73538199e-01  -1.86726781e-01   1.82024617e-01
   -1.73547428e-01   9.70340596e-01  -4.38904462e-01  -4.81484050e-01
    1.60314137e+00  -1.07539009e-01]
 [ -1.04968356e-01   5.80299295e-01   1.95178536e-01   4.44873927e-01
    3.29492109e-03  -1.15929408e-01   9.10827313e-01  -1.01479065e+00
   -6.88147633e-01   8.51199808e-01]
 [ -4.67455995e-01   6.79895626e-01  -4.14318682e-01  -3.84890660e-01
    6.28504946e-01  -9.68278443e-01  -3.28904553e-01   5.27156074e-01
   -1.85204660e-01   3.45444237e-01]
 [ -2.11452693e+00   4.74554914e-01   3.87145369e-01   2.35166757e-01
    1.03238837e+00  -1.38540521e+00   3.42924515e-01   7.19083027e-02
    5.85033217e-01   6.44451100e-01]
 [ -1.35825847e+00   2.26325597e-01   1.68955988e-01   1.61721653e-01
    5.18342469e-01  -5.70274472e-01  -2.65969623e-01  -1.85437162e-01
   -1.68361392e-01   1.67986275e+00]
 [  2.29028069e-01  -7.00275404e-01  -3.75614671e-01   1.70326688e-01
    6.38844056e-01   7.27932479e-01   4.77371895e-01   9.54974727e-01
    1.05469883e+00  -9.50555705e-01]
 [  1.30374975e-01   6.58846451e-01   1.44908067e-01  -5.35834138e-01
   -7.76619324e-02  -1.67103875e+00   3.82129806e-01  -7.01314351e-02
   -1.00353874e+00   8.98276501e-01]
 [  6.60468050e-02   4.65298290e-01  -6.28773838e-01   2.73771147e-01
    3.31225244e-02  -5.62375486e-01   9.91384230e-01   1.41412145e+00
   -8.99029135e-01   6.02888873e-01]
 [  4.12451869e-01  -1.28145273e+00  -7.81680258e-01   1.75598661e+00
    7.42259951e-02   7.58828761e-01   2.01831027e+00  -2.29144393e-01
    6.56711695e-01  -1.21823536e+00]
 [  3.36099669e-01  -1.93223257e+00   6.26151261e-02  -1.96865939e-02
    8.86735765e-01  -2.76119504e-02   4.50467359e-01  -1.27932218e+00
    1.11845590e+00  -7.92279105e-01]
 [  9.65080505e-01  -4.79762297e-02  -5.07679759e-01  -7.87461576e-01
    6.19056128e-01  -3.82146828e-01   8.38579457e-01  -5.61311175e-01
   -1.05259296e+00  -4.85734547e-01]
 [  1.87703955e+00   9.33962006e-01   7.03749098e-01   3.41626344e-01
   -5.77043088e-01  -7.33916943e-01   1.48122575e+00  -1.14899746e-02
    1.04131363e-01  -3.01517336e-01]
 [  6.32064087e-01  -3.70616265e-01   5.38369212e-01  -2.72555411e-01
   -1.89752128e+00  -2.80180570e-01  -4.20825708e-01   1.94973061e-02
   -1.10351308e+00  -4.04091907e-01]
 [  6.56458862e-01  -3.34284456e-01   3.88989069e-01  -1.66224864e+00
    7.75374511e-01   1.00992278e+00  -1.03839902e-01   8.67508572e-01
   -7.45319355e-01   5.03933568e-02]
 [ -7.94202956e-01  -1.24886507e+00   8.54201703e-01   1.43541606e+00
   -1.14131880e+00   1.46284474e-01  -9.53148120e-01  -1.76180530e+00
    8.27784160e-02  -9.01498176e-01]
 [ -5.64434149e-01  -1.12420507e+00  -6.99475119e-01   1.24878208e+00
   -8.02059241e-01   1.50305618e+00  -1.51397099e+00   3.20481999e-01
    4.70588835e-01  -1.03631137e+00]
 [  4.08649302e-01  -3.57151645e-01  -6.52733100e-01   7.22027615e-01
   -1.04628289e-01  -1.03273073e+00   1.44256507e+00  -3.77278713e-01
   -4.69887751e-01  -1.86723426e-01]
 [ -7.89023589e-01   4.88948593e-01   6.59837802e-01  -1.33809376e-01
    1.27856700e+00  -8.18484811e-02  -3.45298514e-01  -1.73782448e+00
    1.71051648e+00   7.52757597e-01]
 [ -1.49593341e-01  -9.32288577e-02  -1.75490422e+00  -6.46898143e-01
    1.12578635e+00   9.83411777e-02   6.31938155e-01  -5.70696715e-01
    1.52863353e+00  -6.73840310e-01]
 [ -3.39445930e-03   6.56799726e-01  -3.93895869e-02   2.14199953e+00
   -1.87929720e-01   4.72944572e-01   3.45918841e-02  -8.49213409e-01
   -8.53424622e-02  -1.18441738e+00]
 [ -1.07946652e+00   4.64341839e-01  -6.61421788e-01   2.08044571e-02
    1.43171174e-01  -1.06023181e+00   2.03579938e-01   5.56897285e-01
   -1.37998345e+00   5.26320492e-01]
 [ -9.78880326e-01  -1.08864742e+00  -1.28605733e+00   3.50683735e-01
   -2.72919441e-01   1.69058654e+00   1.04560075e+00   9.24452681e-01
    7.40895680e-01   5.62225508e-01]
 [ -3.99730811e-01   6.23467624e-01   4.51298073e-01  -1.08332419e+00
   -1.32402271e+00   1.10086455e+00  -9.25701734e-01   2.09862151e-01
   -9.87383814e-01   1.35387284e+00]
 [  4.05557102e-01  -1.03849211e+00  -1.79611913e-01   6.51380129e-01
    9.14253345e-01   2.06823635e-01  -9.50510761e-01  -5.14296633e-01
   -1.59999116e-01  -1.48939837e+00]
 [  4.84656451e-01   4.93885181e-01   9.37290689e-01  -9.28138342e-01
    8.02289304e-01   6.19415451e-01  -7.65787768e-01   6.54497505e-01
    1.56451759e-01  -2.55403082e-01]
 [ -3.92333259e-01  -3.37345655e-01  -1.16035838e+00  -2.30712655e+00
   -1.19675935e+00  -1.79852596e-01  -2.83877189e-01  -5.60862233e-01
    2.77882488e-01   3.24061352e-01]
 [ -1.61430934e-01  -3.52131303e-02   2.22314359e-01   6.79512584e-01
    1.83839842e-01  -7.64536183e-01  -1.76388194e-01   1.41419604e+00
    1.06401332e-01   2.73062305e-02]
 [  8.82933933e-01   1.46894134e-01  -4.47700643e-01  -6.92740074e-01
    1.24748776e+00   1.34845412e+00  -2.29690440e-01   2.26780275e-01
    9.93056215e-01  -1.14007207e+00]
 [  5.10882556e-01   1.58185804e+00   6.51972831e-01  -5.19149593e-01
    6.01252887e-01   1.00118793e+00  -6.94868398e-01  -2.66281620e-01
   -4.08030379e-01  -1.83945595e-01]
 [ -1.11619753e+00  -6.02569713e-01   1.25304165e-01   2.24256860e-01
    4.96033903e-01   4.01925856e-01  -2.04652435e+00  -8.58579780e-01
    9.81389936e-01  -1.22158258e+00]
 [  6.34928576e-02  -1.22609990e+00   6.45085115e-02  -4.08909657e-01
   -4.71162610e-01  -5.21699277e-02  -3.88704563e-01   5.75640473e-01
   -7.66975468e-01  -5.69013928e-01]
 [  9.48579603e-01  -3.66537233e-01  -1.18924622e+00   1.07750048e+00
    8.69675086e-02   1.64942482e+00   7.67801002e-01   5.97537920e-01
    6.95232417e-01  -8.98138892e-01]
 [ -4.66513859e-01   1.45972061e-01  -4.75012243e-01  -3.66796708e-01
    2.21790642e-02   1.44527000e-01   3.84361139e-01   1.40607585e+00
   -7.08970364e-01   7.90262592e-01]
 [ -3.53544779e-01  -1.40400609e+00   3.97704156e-01  -1.09741444e-01
   -8.63220210e-01  -9.82121704e-01   2.09751075e-01  -6.18040227e-01
    1.03822893e-01   4.74325965e-01]]

In [61]:
inSentence = []
expSentence = []

fileIn0 = open(fin)
for l in fileIn0 :
    #l = _start+' '+l+' '+_end
    tokens = word_tokenize(l.decode('utf-8'))
    inSent = []
    for token in tokens :
        target = ""
        if token not in _vocab : 
            target = _vocab['UNK']
        else : 
            target = _vocab[token]
        vec = W_Embedding_new[target]
        vec_list = vec.tolist()
        inSent.append(vec_list)
    inSentence.append(inSent)

fileIn1 = open(fin1)
for l in fileIn1 :
    #l = _start+' '+l+' '+_end
    tokens = word_tokenize(l.decode('utf-8'))
    expSent = []
    for token in tokens :
        target = ""
        if token not in _vocab_f : 
            target = _vocab_f['UNK']
        else : 
            target = _vocab_f[token]
        expSent.append(target)
    expSentence.append(expSent)

#print inSentence
#print expSentence
        
a = RNNlayer(10,vocabSize_f)
a.train_Decoder_With_SGD(inSentence, expSentence, 0.1, 25)


0   676.343121879
1   559.678232753
2   466.106003846
3   403.321027958
4   363.39722914
5   337.228165175
6   303.39501963
7   291.722530273
8   280.906331054
9   260.910654161
10   242.253290972
11   224.447399452
12   251.125510319

In [68]:
inSentence = []
input = "dad has left"
#target = _vocab[input]
#vec = W_Embedding_new[target]
#inSentence.append(vec)
tokens = word_tokenize(input.decode('utf-8'))
inSent = []
for token in tokens :
    target = _vocab[token]
    vec = W_Embedding_new[target]
    vec_list = vec.tolist()
    inSent.append(vec_list)
inSentence.append(inSent)
print inSentence

o,h = a.forwardProp(inSentence[0],None)
print o
words1 = o.argmax(axis=1)
for i in range(len(words1)) :
    print words_f[words1[i]].word


[[[0.5070729833579013, -0.12035194421675688, -0.05794049099940912, 0.37258798584359093, 0.3183149159821348, -1.1091406192035431, 0.44761253365321696, -0.05565954576060961, -1.1890951527451588, -0.18772262211994373], [0.17297527561999215, 0.2856855845545315, -0.07538509154828271, 0.9250753737318159, 1.04194390939685, 0.6311654301418484, 1.4391045818256725, 0.0964575018490263, -0.6082232638144489, 0.2097407502281834], [0.9954792761758973, 0.6970142998677618, 0.05406795835104378, 0.769962601255683, -0.9664534127529022, -0.39047834148867144, 0.720218732148162, -0.31330909170275634, -0.561919700032848, 0.4569521874179211]]]
[[  2.00794550e-04   1.67446555e-05   9.71852397e-04   1.26703128e-03
    9.32719924e-05   3.34391420e-04   1.37314300e-03   1.82754577e-04
    2.28267731e-03   1.25485734e-04   9.75763515e-05   3.01249722e-03
    5.47627321e-05   1.41111660e-04   2.68871825e-04   4.50327984e-04
    3.69132929e-04   1.67834087e-03   3.85446407e-04   2.20911894e-02
    8.38569850e-04   2.16405717e-05   3.77156026e-03   6.14446182e-03
    3.50737315e-04   1.51607605e-03   3.73106812e-04   1.50905916e-02
    3.29866709e-04   9.07238981e-03   3.12625259e-01   1.14936918e-04
    2.57956225e-03   1.30488119e-05   4.40431147e-03   1.31193335e-03
    1.22146720e-03   1.45758948e-04   2.11768036e-03   1.52239677e-06
    1.38851420e-03   3.86468683e-04   5.07934424e-04   2.88518146e-03
    3.81216338e-05   2.10702509e-04   9.06076467e-05   5.17784362e-01
    1.13068633e-02   1.28722985e-04   5.09454758e-05   5.01042641e-04
    3.14722493e-03   1.41109465e-05   3.35847744e-06   1.77121747e-04
    4.55875430e-02   8.16249749e-03   8.63431816e-04   3.12466756e-04
    2.89262871e-04   1.78687871e-03   2.14772917e-04   2.98979922e-04
    2.75663313e-04   6.27670908e-05   5.82463581e-04   2.29972841e-05
    4.28120077e-04   1.73183762e-04   7.04314454e-05   1.06856654e-05
    3.21074968e-05   2.85145095e-04   2.84757735e-04   2.00308244e-05
    4.17264362e-03]
 [  6.83703210e-04   4.16010728e-03   1.36089117e-04   1.28117483e-02
    3.37275496e-03   1.07440694e-04   7.63349031e-04   1.56442934e-04
    2.43778930e-03   1.60268600e-05   3.00627441e-06   3.02532609e-02
    5.24307025e-04   4.56737697e-03   9.44496073e-04   4.39845640e-04
    1.11247806e-03   2.36658520e-02   1.96807483e-03   6.22912188e-04
    4.31389178e-03   1.41891984e-04   1.94557554e-03   4.19810703e-01
    1.08981956e-02   8.40193217e-04   8.54847487e-04   8.46196494e-02
    1.63317671e-05   3.27285534e-04   6.23405299e-03   1.25008304e-04
    5.40801136e-05   1.42188424e-04   5.24006685e-05   3.50990792e-03
    3.87009907e-04   2.10254510e-03   1.31742135e-03   2.27340063e-04
    1.43431400e-04   6.87294016e-04   1.57823904e-06   2.92189824e-04
    1.93927692e-03   6.59777436e-03   1.87919412e-03   1.25217937e-03
    7.68752012e-02   6.63240494e-05   3.86859408e-03   1.29041436e-04
    1.75233844e-04   1.04018587e-05   2.32346825e-05   2.40722763e-04
    5.12238364e-04   8.80128391e-04   1.03660233e-01   3.91939423e-03
    5.54611508e-04   2.75948776e-04   3.06443985e-05   7.48600817e-04
    6.38700066e-04   3.19890287e-03   4.61821582e-03   5.18294692e-03
    3.60063662e-03   2.40303290e-03   1.31295780e-03   1.26041474e-03
    1.97404641e-02   1.21849268e-01   2.84422437e-05   3.04492293e-04
    4.43047244e-03]
 [  5.54312456e-04   2.72509733e-06   2.09639177e-04   4.70771879e-03
    1.67402737e-03   8.68017164e-04   2.73112623e-05   3.37171711e-04
    3.58141973e-05   7.72697694e-05   2.60620751e-05   1.38905395e-01
    1.45559347e-03   9.36602217e-05   2.55134130e-02   2.48798991e-02
    7.78460551e-05   4.70485398e-02   5.11582881e-03   2.46780422e-02
    4.97578913e-03   1.00145527e-05   9.56555721e-04   2.71640062e-03
    1.08597743e-03   1.96712686e-05   9.47881390e-05   1.57287361e-03
    2.43948395e-04   9.67450906e-03   1.27063483e-03   1.07241721e-04
    9.38187831e-04   5.13665137e-07   1.78462708e-04   5.58471378e-03
    9.65038096e-04   3.49066627e-01   3.14794289e-05   3.34579487e-04
    1.70770949e-03   6.44974252e-04   1.92697036e-06   3.67185118e-02
    3.83110048e-04   5.89438169e-04   7.96934180e-04   3.15125050e-03
    8.24791536e-03   8.71318869e-05   1.11753054e-03   5.20371739e-06
    2.48082368e-05   1.44906088e-03   1.58492718e-05   1.69115635e-04
    1.11357846e-04   2.26395723e-03   5.94488698e-04   9.85387352e-04
    8.11645741e-03   2.83696597e-04   2.01624351e-04   4.53585768e-03
    3.63319658e-04   2.77558474e-04   1.19724444e-01   2.14645854e-04
    1.37618846e-01   1.42753444e-04   2.08763465e-03   1.97481416e-03
    8.16748795e-04   6.19263504e-04   1.16779247e-04   1.64208179e-03
    6.08151935e-03]]
papa
est
quitté