Training Regression on wassa data

emoNames = ['anger','fear','joy','sadness']
# emoNames_hashTag = ['sadness', 'disgust', 'surprise', 'anger', 'fear', 'joy']

from nltk.tokenize import TweetTokenizer
import nltk.tokenize.casual as casual
from drevicko.twitter_regexes import cleanString, setupRegexes, tweetPreprocessor
import preprocess_twitter

def preprocess_tweet(text):    
    text = casual.reduce_lengthening(text)
    text = cleanString(setupRegexes('twitterProAna'),text)   
    text = ' '.join([span for notentity,span in tweetPreprocessor(text, ("urls", "users", "lists")) if notentity])
    text = text.replace('\t','')
    text = text.replace('< ','<').replace(' >','>')
    text = text.replace('):', '<sadface>').replace('(:', '<smile>')
    text = text.replace(" 't", "t").replace('#','')
    return text

def tokenise_tweet(text):
    text = preprocess_twitter.tokenize(text)
    text = preprocess_tweet(text)     
    return ' '.join(text.split())

tokenise_tweet.regexes = setupRegexes('twitterProAna')

imported regex as re

import subprocess

datasetList = ['',

# ['wget'] + datasetList, stdout=subprocess.PIPE )

import os
import pandas as pd

def _read_csv_data(filename = "data.csv", header=True, columns=['id','tweet','emotion','label']):
    df = pd.DataFrame(pd.read_csv(filepath_or_buffer = filename,sep='\t',header=None))
    df.columns = columns
    tweets_list = []    
    for index, row in enumerate(df.iterrows()): 
    df['tweet'] = tweets_list

    return df

def get_input_files(directory):
    filenames_array = [filenames for root, dirnames, filenames in os.walk(directory)]
    files  = [val for sublist in filenames_array for val in sublist]
    return ["%s/%s" %(directory, file) for file in files if file.endswith(".txt")]

def _get_dfs(directory):
    dfs_train, dfs_test, dfs_dev = {},{},{}
    for dir in get_input_files(directory):

        for emo in emoNames:
            if emo in dir:
                if 'train' in dir:
                    dfs_train.update( {emo : _read_csv_data(dir)} )                
                    print('train.'+emo+' \t' + str(len(dfs_train[emo])) + '\t<' + dir + '>')
                elif '' in dir:
                    dfs_test.update( {emo : _read_csv_data(dir)} )                
                    print('test.'+emo+' \t' + str(len(dfs_test[emo])) + '\t<' + dir + '>')
                elif '' in dir:
                    dfs_dev.update( {emo : _read_csv_data(dir)} )                
                    print('dev.'+emo+' \t' + str(len(dfs_dev[emo])) + '\t<' + dir + '>')
    return {'train':dfs_train, 'test':dfs_test, 'dev':dfs_dev}

dfs = _get_dfs('/home/vlaand/IpythonNotebooks/wassa2017/data') 	79	</home/vlaand/IpythonNotebooks/wassa2017/data/>
test.fear 	995	</home/vlaand/IpythonNotebooks/wassa2017/data/>
dev.fear 	110	</home/vlaand/IpythonNotebooks/wassa2017/data/>
train.fear 	1147	</home/vlaand/IpythonNotebooks/wassa2017/data/fear-ratings-0to1.train.txt>
dev.anger 	84	</home/vlaand/IpythonNotebooks/wassa2017/data/>
test.anger 	760	</home/vlaand/IpythonNotebooks/wassa2017/data/>
dev.sadness 	74	</home/vlaand/IpythonNotebooks/wassa2017/data/>
train.sadness 	786	</home/vlaand/IpythonNotebooks/wassa2017/data/sadness-ratings-0to1.train.txt> 	823	</home/vlaand/IpythonNotebooks/wassa2017/data/joy-ratings-0to1.train.txt>
test.sadness 	673	</home/vlaand/IpythonNotebooks/wassa2017/data/> 	714	</home/vlaand/IpythonNotebooks/wassa2017/data/>
train.anger 	857	</home/vlaand/IpythonNotebooks/wassa2017/data/anger-ratings-0to1.train.txt>


from collections import Counter
from stop_words import get_stop_words

import os
from sklearn.externals import joblib

def ifExists(filename):
    dir = os.path.dirname(filename)
def checkFolder(filename):
    dir = os.path.dirname(filename)

def _get_unique_tokens(tweets):    
    return(Counter(token for tweet in tweets for token in tweet.split()))

def _save_unique_tokens(tokens, filename='wordFrequencies.dump'):
    _ = joblib.dump(tokens, filename=filename, compress=9)

def _plot_word_frequencies(wordFrequencies, WORD_FREQUENCY_TRESHOLD = 3):
    freqs = []
    for t,c in wordFrequencies.items():
    q = 0
    for t,c in wordFrequencies.items():
    print(q, len(wordFrequencies))
    %pylab inline

def _reduce_text(text, LANGUAGE='en', WORD_FREQUENCY_TRESHOLD = 3):    

    stop_words = get_stop_words(LANGUAGE)

    tweets_reduced = []

    for tw in tweets:
        tweet_r = []
        for token in tw.split():
            if(wordFrequencies[token] >= WORD_FREQUENCY_TRESHOLD):
                if(not token in stop_words):

        tweets_reduced.append( ' '.join(tweet_r)  )

Let the fun begin!

import numpy as np

seed = 1337

# maxlen = 65
maxlen = 55
batch_size = 32
nb_epoch = 50

hidden_dims1 = 50
hidden_dims2 = 25
hidden_dims3 = 3

path = '/home/vlaand/wassa2017'

_path_wordembeddings = '/home/vlaand/data/Glove/glove.twitter.27B/glove.twitter.27B.'+str(EMBEDDINGS_DIM)+'d.txt'

Load training data and word embeddinbgs

import numpy as np
import pandas as pd
import csv

from keras.preprocessing import sequence
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Activation, Embedding, Bidirectional, Dropout, LSTM
from keras.regularizers import l2

from imblearn.over_sampling import RandomOverSampler
# from sklearn.model_selection import StratifiedKFold

def _read_csv_data(filename = "data.csv", header=True):
    df = pd.read_csv(filepath_or_buffer = filename)     
    print('data loaded from <'+filename+'>')
    print('\t'+str(len(df))+' entries')    
    tweets_list = []    
    for row in df.iterrows():
        tweets_list.append([tokenise_tweet(row[1]['tweet'])] +[row[1][emo]for emo in emoNames])   
    return tweets_list

def _read_csv_we(filename = "data.csv"):
    embedding_index = {}

    for row in pd.read_csv(filepath_or_buffer=filename, sep = ' ', header=None).iterrows():
        word, coefs = row[1][0], np.asarray(row[1][1:])
        embedding_index[word] = coefs
    print('we vectors loaded from <'+filename+'>')
    return embedding_index

def _load_original_vectors(filename = 'wordvectors-glove.twitter.27B.100d.txt', sep = ' ', wordFrequencies = None):

    Dictionary, Indices  = {},{}
    for line in open(filename, 'rb'): 
        values = line.decode('utf-8').split(sep)
        token = values[0]
        token_vector = np.array(values[1:], dtype = 'float32')   
            if(token in wordFrequencies):                
                Dictionary[token] = token_vector
            Dictionary[token] = token_vector
    print('we vectors loaded from <'+filename+'>')
    print('\t'+str(len(Dictionary))+' entries') 
    return(Dictionary, Indices)

def pretrainedEmbeddings(EmbeddingPath):
        embedding_index = {}
        with open(EmbeddingPath) as f:
            embedding_wordsList = []
            for line in f:
                values = line.split(" ")
                word = values[0]
                coefs = np.asarray(values[1:])
                embedding_index[word] = coefs
        return (embedding_index, embedding_wordsList)

def _texts_to_sequences(train_tweets):
    train_sequences = []
    for i,tweet in enumerate(train_tweets): 
        tw = []
        for token in tweet.split():
        tw.extend([0]*( maxlen-len(tw)) )
    return train_sequences

def _data_to_lists(dataTrain):    
    train_tweets, train_labels = [], []
    print('stacking data to lists')
    for i in dataTrain:
        scores = []
        for score in i[1:]:
            if np.isnan(score):
                scores.append( 0 )
                print('\tWarning: Nan value present in dataset')
    print('data stacked to lists\n\t'+str(len(train_tweets))+' tweets\n\t'+str(len(train_labels))+' labels')
    return train_tweets, train_labels

Using Theano backend.

Dictionary, Indices = _load_original_vectors(
        filename = '/home/vlaand/data/Glove/glove.twitter.27B/glove.twitter.27B.'+str(EMBEDDINGS_DIM)+'d.txt', 
        sep = ' ',
        wordFrequencies = None)#wordFrequencies) # leave wordFrequencies=None for loading the entire WE file

Indices_reversed = {}
for key in Indices.keys():

we vectors loaded from </home/vlaand/data/Glove/glove.twitter.27B/glove.twitter.27B.100d.txt>
	1193514 entries

Data conversion to an input to the model

def dataframe_to_lists(df):

    train_tweets, train_labels = [], []

    for row in df.iterrows():
    return train_tweets, train_labels

def lists_to_vectors(train_tweets, train_labels):

    train_sequences = _texts_to_sequences(train_tweets)

    embedding_matrix = np.zeros((len(Indices)+1, EMBEDDINGS_DIM))

    print('matrix created\n\t',embedding_matrix.shape)
    for (word, i) in Indices.items():
        embedding_vector = Dictionary.get(word)
        if (embedding_vector != None):
            embedding_matrix[i] = embedding_vector.astype(np.float)

    _X = sequence.pad_sequences(train_sequences, maxlen=maxlen)
    _y = np.array(train_labels)

    print(len(_X), 'train sequences loaded')
    print('\t',_X.shape,'\n\t', _y.shape)
    return _X, _y, embedding_matrix

def _get_maxlen(tweets):
    max = 0
    for tw in tweets:
        if len(tw.split()) > max:
            max = len(tw.split())
    return max


train_tweets, train_labels = dataframe_to_lists(dfs['train'][emoNames[EMOTION]])
dev_tweets, dev_labels = dataframe_to_lists(dfs['dev'][emoNames[EMOTION]])
test_tweets, test_labels = dataframe_to_lists(dfs['test'][emoNames[EMOTION]])

print("max tweet length: %d tokens" %(_get_maxlen(train_tweets+dev_tweets+test_tweets)) )

meltTweets = []
# meltTweets.extend(train_tweets)
for dataset in ['train','test','dev']:
    for emo in emoNames:
            print('KeyError: ',emo)
print('all tweets melted into list, ',len(meltTweets))

def _get_unique_tokens(tweets):    
    return(Counter(token for tweet in tweets for token in tweet.split()))

wordFrequencies = _get_unique_tokens(meltTweets) 
_plot_word_frequencies(wordFrequencies, WORD_FREQUENCY_TRESHOLD = WORD_FREQUENCY_TRESHOLD)

max tweet length: 43 tokens
all tweets melted into list,  7102
7001 12927
Populating the interactive namespace from numpy and matplotlib
/home/vlaand/anaconda3/lib/python3.5/site-packages/IPython/core/magics/ UserWarning: pylab import has clobbered these variables: ['seed']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

Preparing for SVR

from sklearn.feature_extraction.text import CountVectorizer

def _save_ngramizer(ngramizer, filename = 'ngramizer.dump'):
    _ = joblib.dump(ngramizer, filename=filename, compress=9)
    print('ngramizer saved\t<'+filename+'>')
def _load_ngramizer(filename = 'ngramizer.dump'):
    ngramizer = joblib.load(filename = filename)
    print('ngramizer <'+filename+'> loaded')
    return ngramizer

vectorizer = CountVectorizer(ngram_range = (1,NGRAM_VALUE),token_pattern=r'\b\w+\b', min_df=WORD_FREQUENCY_TRESHOLD,max_df=1000)
# ngramizer =
ngramizer =

vec = ngramizer.transform(train_tweets+dev_tweets+test_tweets).toarray()
print(len(vec), len(vec[0]))
# _save_ngramizer(ngramizer, filename = '/home/vlaand/IpythonNotebooks/wassa2017/wassa_ngramizer.dump')
# _save_ngramizer(ngramizer, filename = '/home/vlaand/IpythonNotebooks/05_emotion_wassa_nuig/wassaRegression/wassa_ngramizer.dump')

1701 2895

from natsort import natsorted
train_data_features = vec#X_train_counts.toarray()
vocab = ngramizer.get_feature_names()
dist = np.sum(train_data_features, axis=0)
ngram_freq = {}

# For each, print the vocabulary word and the frequency
for tag, count in zip(vocab, dist):
    #print(tag, count)


import numpy as np
import math, itertools
from scipy import spatial
def _vectors_similarity(v1 , v2):
    return( 1 - spatial.distance.cosine(v1,v2) )
def similarityVector(vector_, vectors_):
    resVector = np.asarray([_vectors_similarity(vector_ , v_) for v_ in vectors_])
    return np.asarray([np.max(resVector), np.mean(resVector), np.std(resVector), np.min(resVector)])
def compareTokenToSentence(leftToken, sentence):
    sentence_vectors = []
    for token in sentence:
        if token in Dictionary:
            token = token.replace('#','')
            if token in Dictionary:
    return similarityVector( Dictionary[leftToken], sentence_vectors)  
def capitalRatio(tweet):
        firstCap, allCap = 0, 0
        length = len(tweet)
        if length==0:
            return np.array([0,0])
        for i,token in enumerate(tweet.split()):
            if( token.istitle() ):
                firstCap += 1
            elif( token.isupper() ):
                allCap += 1
        return np.asarray([firstCap/length,allCap/length]) 
def tweetToWordVectors(dictionary, tweet, fixedLength=False):
    output = []    
        for i in range(MAX_SEQUENCE_LENGTH):
        for i,token in enumerate(tweet.split()):
            if token in Dictionary:
                output[i] = Dictionary[token]                
         for i,token in enumerate(tweet.lower().split()):
            if token in Dictionary:
            elif token.replace('#','') in Dictionary:
    return output
def ModWordVectors(x, mod=True):
    if(len(x) == 0):       
            return(np.zeros(EMBEDDINGS_DIM*3, dtype='float32'))
            return(np.zeros(EMBEDDINGS_DIM, dtype='float32'))        
    m =  np.matrix(x)
        xMean = np.array(m.mean(0))[0]
        xMin = np.array(m.min(0))[0]
        xMax = np.array(m.max(0))[0]
        xX = np.concatenate((xMean,xMin,xMax))
        return xX
        return np.array(m.mean(0))[0]
def bindTwoVectors(x0,x1):
    return np.array(list(itertools.chain(x0,x1)),dtype='float32') 
def _bind_vectors(x):
    return np.concatenate(x)   
def myLog10(vector):
    for i,v in enumerate(vector):
        if v > 0:
            vector[i] = np.log(v)
    return vector            
def _convert_text_to_vector(tweets,  Dictionary, labels, ngramizer):
    _X = []
    _y = []
    vec = ngramizer.transform(tweets).toarray()
    for i, t in enumerate(tweets):
        embeddingsVector = ModWordVectors(tweetToWordVectors(Dictionary,tweets[i]))
#         capitalRatioVector = capitalRatio(dfs[st][emoNames[EMOTION]][i])
#         simVector = compareTokenToSentence(leftToken = emoNames[EMOTION], sentence = t)
        ngramVector = vec[i]
#         _X.append( _bind_vectors((ngramVector, embeddingsVector, simVector))  )
        _X.append( _bind_vectors((ngramVector, embeddingsVector))  )
    return(np.asarray(_X), np.asarray(_y))

# finalTraining = False

# if finalTraining:
#     print('chosen emotion:', emoNames[EMOTION])

#     svr_X, svr_y = _convert_text_to_vector(
#         tweets = train_tweets+dev_tweets+test_tweets,
#         labels = train_labels+dev_labels+test_labels, 
#         Dictionary = Dictionary, 
#         ngramizer = ngramizer)

#     print('\tdata shape:\t', svr_X.shape, svr_y.shape)  

#     svr_X_test, svr_y_test = _convert_text_to_vector(
#         tweets = test_tweets,
#         labels = test_labels, 
#         Dictionary = Dictionary, 
#         ngramizer = ngramizer)

#     print('\tdata shape:\t', svr_X_test.shape, svr_y_test.shape)
# else:

print('chosen emotion:', emoNames[EMOTION])

svr_X_train, svr_y_train = _convert_text_to_vector(
    tweets = train_tweets,
    labels = train_labels, 
    Dictionary = Dictionary, 
    ngramizer = ngramizer)

print('\tdata shape:\t', svr_X_train.shape, svr_y_train.shape)  

svr_X_dev, svr_y_dev = _convert_text_to_vector(
    tweets = dev_tweets,
    labels = dev_labels, 
    Dictionary = Dictionary, 
    ngramizer = ngramizer)

print('\tdata shape:\t', svr_X_dev.shape, svr_y_dev.shape)

svr_X_test, svr_y_test = _convert_text_to_vector(
    tweets = test_tweets,
    labels = test_labels, 
    Dictionary = Dictionary, 
    ngramizer = ngramizer)

print('\tdata shape:\t', svr_X_test.shape, svr_y_test.shape)

chosen emotion: anger
	data shape:	 (857, 3195) (857,)
	data shape:	 (84, 3195) (84,)
	data shape:	 (760, 3195) (760,)

from sklearn.svm import SVR, LinearSVR
from sklearn.externals import joblib
from sklearn.metrics import f1_score, r2_score
from sklearn.model_selection import cross_val_predict, cross_val_score, train_test_split
from sklearn.metrics import r2_score, f1_score, classification_report
from scipy.stats import pearsonr, spearmanr
from collections import Counter
from multiprocessing import Pool
import warnings

cv_folds = 5

def _greed_search(EMOTION=0):     

    list_acc = []    
    list_val = []
    if(ESTIMATOR == 'LinearSVR'):                             
        epsilon = 0.001
#         C = 0.01
        for tol in [1e-6,1e-5,1e-4]:
            for C in [0.001,0.01,0.1]:
#                 cvs = cross_val_score(estimator = LinearSVR(C=C, tol=tol), X=X, y=y, cv=cv_folds, n_jobs=cv_folds, scoring='r2') 
#                 meanScore = np.mean(np.asarray(cvs)) 
#                 meanScore = np.mean(np.asarray(cvs))
                svrTrained = LinearSVR(C=C, tol=tol) 
      , svr_y)

                svr_y_test_predict = svrTrained.predict(svr_X_test)
                prs = pearsonr(svr_y_test , svr_y_test_predict)[0]
                spr = spearmanr(svr_y_test , svr_y_test_predict)[0]
                list_val.append([emoNames[EMOTION],prs,ESTIMATOR, C, gamma,epsilon,tol,NGRAM_VALUE,EMBEDDINGS_DIM])
                print(emoNames[EMOTION]+': C='+str(C)+', tol='+str(tol)+', prs='+str(prs)+', spr='+str(spr))      
    elif(ESTIMATOR == 'SVR'):                          
        epsilon = 0.001
#         C = 1.0
#         tol = 1e-6
        for tol in [1e-5,1e-4]:
            for gamma in [0.001,0.01,0.1]:       
                for C in [0.1,1.0]:                    
#                     cvs = [0.5,0.5,0.5]
#                     cvs = cross_val_score(estimator = SVR(C=C,gamma=gamma, tol=tol), X=X, y=y, cv=cv_folds, n_jobs=cv_folds, scoring='r2') 
#                     meanScore = np.mean(np.asarray(cvs))
                    svrTrained = SVR(C=C, tol=tol,gamma=gamma) 
          , svr_y)

                    svr_y_test_predict = svrTrained.predict(svr_X_test)
                    prs = pearsonr(svr_y_test , svr_y_test_predict)[0]
                    spr = spearmanr(svr_y_test , svr_y_test_predict)[0]

                    list_val.append([emoNames[EMOTION],prs, ESTIMATOR, C, gamma,epsilon,tol,NGRAM_VALUE,EMBEDDINGS_DIM])
                    print(emoNames[EMOTION]+': C='+str(C)+', gamma='+str(gamma)+', tol='+str(tol)+', prs='+str(prs)+', spr='+str(spr))
    best = np.argmax(list_acc)    
    out0 = {
        'gamma': list_val[best][4],
        'epsilon': list_val[best][5],
        'tol': list_val[best][6],
        'ngrams': list_val[best][7],
        'EMBEDDINGS_DIM': list_val[best][8],
        'score': list_val[best][1]

    return {emoNames[EMOTION]:out0}
def _combine_best_results(pool_output, ESTIMATOR):
    new_p = {ESTIMATOR:{}}   
    for i in pool_output:
    return new_p            

pool_output = [_greed_search(EMOTION)]
temp_params = _combine_best_results(pool_output, ESTIMATOR)

    train_params = {}

anger: C=0.001, tol=1e-06, prs=0.795027862972, spr=0.79952275341
anger: C=0.01, tol=1e-06, prs=0.924740785403, spr=0.935437817059
anger: C=0.1, tol=1e-06, prs=0.966726681318, spr=0.972348849938
anger: C=0.001, tol=1e-05, prs=0.795026238536, spr=0.799525693465
anger: C=0.01, tol=1e-05, prs=0.924742059245, spr=0.935388260044
anger: C=0.1, tol=1e-05, prs=0.966848315417, spr=0.972361457987
anger: C=0.001, tol=0.0001, prs=0.79504664147, spr=0.799698226814
anger: C=0.01, tol=0.0001, prs=0.924738039014, spr=0.93543152671
anger: C=0.1, tol=0.0001, prs=0.966821785306, spr=0.972288763425
['anger', 0.96684831541719052, 'LinearSVR', 0.1, 1.0, 0.001, 1e-05, 4, 100]

train_params = {'LSTM': {'anger': {'nb_epoch': 12},
  'fear': {'nb_epoch': 36},
  'joy': {'nb_epoch': 8},
  'sadness': {'nb_epoch': 18}},
 'LinearSVR': {'anger': {'C': 0.1,
   'epsilon': 0.001,
   'gamma': 1.0,
   'ngrams': 4,
   'score': 0.95816165303133261,
   'tol': 1e-05},
  'fear': {'C': 0.1,
   'epsilon': 0.001,
   'gamma': 1.0,
   'ngrams': 4,
   'score': 0.96097704320011335,
   'tol': 1e-05},
  'joy': {'C': 0.1,
   'epsilon': 0.001,
   'gamma': 1.0,
   'ngrams': 4,
   'score': 0.96744036403654121,
   'tol': 1e-05},
  'sadness': {'C': 0.1,
   'epsilon': 0.001,
   'gamma': 1.0,
   'ngrams': 4,
   'score': 0.97601559873008892,
   'tol': 1e-05}},
 'SVR': {'anger': {'C': 1.0,
   'epsilon': 0.001,
   'gamma': 0.01,
   'ngrams': 4,
   'score': 0.5909199703343438,
   'tol': 0.0001},
  'fear': {'C': 1.0,
   'epsilon': 0.001,
   'gamma': 0.01,
   'ngrams': 4,
   'score': 0.6669056665602984,
   'tol': 0.0001},
  'joy': {'C': 1.0,
   'epsilon': 0.001,
   'gamma': 0.01,
   'ngrams': 4,
   'score': 0.6182783505906371,
   'tol': 1e-05},
  'sadness': {'C': 1.0,
   'epsilon': 0.001,
   'gamma': 0.001,
   'ngrams': 4,
   'score': 0.6839620329687072,
   'tol': 1e-05}}}

    svrTrained = SVR(C=train_params[ESTIMATOR][emoNames[EMOTION]]['C'], 
    svrTrained = LinearSVR(C=train_params[ESTIMATOR][emoNames[EMOTION]]['C'], 
                 verbose=True), svr_y)

def saveModelFor(model, ESTIMATOR, EMOTION=0, path='/home/vlaand/IpythonNotebooks/wassa2017/'):
    path = os.path.join(path,ESTIMATOR)
    filename = os.path.join(path,emoNames[EMOTION]+'.dump')
    _ = joblib.dump(model, filename, compress=9)
    print("'%s' model saved to <%s>" % (emoNames[EMOTION],filename))
# saveModelFor(svrTrained, ESTIMATOR=ESTIMATOR, EMOTION=EMOTION, path = '/home/vlaand/IpythonNotebooks/wassa2017/classifiers/')
# saveModelFor(svrTrained, ESTIMATOR=ESTIMATOR, EMOTION=EMOTION, path = '/home/vlaand/IpythonNotebooks/05_emotion_wassa_nuig/wassaRegression/classifiers/')

[LibSVM]SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.001, gamma=0.01,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.0001, verbose=True)

# load model
svrTrained = joblib.load(os.path.join('/home/vlaand/IpythonNotebooks/05_emotion_wassa_nuig/wassaRegression/classifiers/','SVR',emoNames[EMOTION]+'.dump'))

svrTrained.predict([ svr_X_dev[0] ])[0]


Preparing for LSTM

X_train, y_train, embedding_matrix = lists_to_vectors(train_tweets, train_labels)
X_dev, y_dev, embedding_matrix = lists_to_vectors(dev_tweets, dev_labels)
X_test, y_test, embedding_matrix = lists_to_vectors(test_tweets, test_labels)

# X, y, embedding_matrix = lists_to_vectors(train_tweets+dev_tweets+test_tweets, train_labels+dev_labels+test_labels)
# X, y, embedding_matrix = lists_to_vectors(train_tweets+dev_tweets, train_labels+dev_labels)

matrix created
	 (1193515, 100)
/home/vlaand/anaconda3/lib/python3.5/site-packages/ipykernel/ FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
857 train sequences loaded
	 (857, 55) 
matrix created
	 (1193515, 100)
84 train sequences loaded
	 (84, 55) 
matrix created
	 (1193515, 100)
760 train sequences loaded
	 (760, 55) 

Training on WASSA dataset

from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import StratifiedKFold
from multiprocessing import Pool, Manager
import os
import numpy as np

import keras.backend as K

def matthews_correlation(y_true, y_pred):
    y_pred_pos = K.round(K.clip(y_pred, 0, 1))
    y_pred_neg = 1 - y_pred_pos

    y_pos = K.round(K.clip(y_true, 0, 1))
    y_neg = 1 - y_pos

    tp = K.sum(y_pos * y_pred_pos)
    tn = K.sum(y_neg * y_pred_neg)

    fp = K.sum(y_neg * y_pred_pos)
    fn = K.sum(y_pos * y_pred_neg)

    numerator = (tp * tn - fp * fn)
    denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

    return numerator / (denominator + K.epsilon())

def _cross_validation_parallel(_input):
    train, test = _input
    model = Sequential()
    model.add(Embedding(len(Indices)+1,  EMBEDDINGS_DIM, weights=[embedding_matrix],
                                input_length=maxlen, trainable=True))
    model.add(Bidirectional(LSTM(EMBEDDINGS_DIM))) #dropout is same as regularisation
    model.add(Dense(hidden_dims1, W_regularizer=l2(0.01)), )
    model.add(Dense(hidden_dims2, W_regularizer=l2(0.01)), ) #!!!
    model.add(Dense(hidden_dims3, activation='softsign'))
    model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['accuracy', matthews_correlation])
   [train], y[train], batch_size=batch_size, nb_epoch=nb_epoch, validation_split=None)
#     scores = model.evaluate(X[test], y[test], verbose=0,)
    y_test_predict = model.predict(X[test])
    y_test_predict = np.reshape(y_test_predict, newshape=(len(y_test_predict),))
    scores =  [r2_score(y_test_predict, y[test]), pearson(y_test_predict, y[test]), spearman(y_test_predict, y[test])]

    print("%s: %.2f" % (model.metrics_names[2], scores[1]))
    return scores

nb_epoch = 10
n_splits = 5
hidden_dims1 = 50
hidden_dims2 = 25
hidden_dims3 = 1

# with open('senpy-plugins-development/fivePointRegression/classifiers/LSTM/log.out', "w") as log_file:
#     log_file.write(str(cvscores)+'\n')
#     log_file.write("%.2f (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

kfold = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=1337)

with Pool(processes = n_splits) as p:
    cvscores =, ((train, test) for (train, test) in kfold.split(X, y)))

# print("%.2f (+/- %.2f)" % (np.mean(cvscores), np.std(cvscores)))

print('%s' % (emoNames[EMOTION].upper()))
print('%d folds, %d epochs' % (n_splits,nb_epoch))

my_metrics = ['r2_score', 'pearson', 'spearman']
for func in range(3):
    print("%s:\t%.2f (+/- %.2f)" % (my_metrics[func],np.mean([i[func] for i in cvscores]), np.std([i[func] for i in cvscores])))

# p._pool[0].is_alive()

5 folds, 50 epochs

r2_score:	0.03 (+/- 0.18)
pearson:	0.67 (+/- 0.05)
spearman:	0.65 (+/- 0.05)

Final Training

            'sadness':{'nb_epoch':18 }}}
    train_params = {'LSTM':{
            'sadness':{'nb_epoch':18 }}}

hidden_dims1 = 50
hidden_dims2 = 25
hidden_dims3 = 1
model = Sequential()
model.add(Embedding(len(Indices)+1,  EMBEDDINGS_DIM, weights=[embedding_matrix],
                            input_length=maxlen, trainable=True))
model.add(Bidirectional(LSTM(EMBEDDINGS_DIM))) #dropout is same as regularisation
model.add(Dense(hidden_dims1, b_regularizer=l2(0.01)), )
model.add(Dense(hidden_dims2, b_regularizer=l2(0.01)), ) 
model.add(Dense(hidden_dims3, activation='softsign'))
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=[matthews_correlation]), y_train, batch_size=batch_size, nb_epoch=train_params['LSTM'][emoNames[EMOTION]]['nb_epoch'],validation_split=None,)

Epoch 1/12
857/857 [==============================] - 63s - loss: 0.1632 - matthews_correlation: 0.0191    
Epoch 2/12
857/857 [==============================] - 56s - loss: 0.1237 - matthews_correlation: 0.1968    
Epoch 3/12
857/857 [==============================] - 58s - loss: 0.1110 - matthews_correlation: 0.4301    
Epoch 4/12
857/857 [==============================] - 66s - loss: 0.0999 - matthews_correlation: 0.5086    
Epoch 5/12
857/857 [==============================] - 76s - loss: 0.0856 - matthews_correlation: 0.6187    
Epoch 6/12
857/857 [==============================] - 65s - loss: 0.0773 - matthews_correlation: 0.6468    
Epoch 7/12
857/857 [==============================] - 51s - loss: 0.0704 - matthews_correlation: 0.6884    
Epoch 8/12
857/857 [==============================] - 55s - loss: 0.0626 - matthews_correlation: 0.7491    
Epoch 9/12
857/857 [==============================] - 58s - loss: 0.0619 - matthews_correlation: 0.7269    
Epoch 10/12
857/857 [==============================] - 63s - loss: 0.0571 - matthews_correlation: 0.7543    
Epoch 11/12
857/857 [==============================] - 62s - loss: 0.0567 - matthews_correlation: 0.7853    
Epoch 12/12
857/857 [==============================] - 50s - loss: 0.0511 - matthews_correlation: 0.7723    
<keras.callbacks.History at 0x7fa22de93f60>

from IPython.display import SVG
# from keras.utils.vis_utils import model_to_dot
from keras.utils.visualize_util import model_to_dot

s=SVG(model_to_dot(model,show_shapes=True,show_layer_names=False).create(prog='dot', format='svg'))

G 140340728101184 InputLayer input: output: (None, 55) (None, 55) 140340728100456 Embedding input: output: (None, 55) (None, 55, 100) 140340728101184->140340728100456 140341205210952 Bidirectional input: output: (None, 55, 100) (None, 200) 140340728100456->140341205210952 140340727988632 Dropout input: output: (None, 200) (None, 200) 140341205210952->140340727988632 140340686474936 Dense input: output: (None, 200) (None, 50) 140340727988632->140340686474936 140340686464336 Dense input: output: (None, 50) (None, 25) 140340686474936->140340686464336 140340686464280 Dense input: output: (None, 25) (None, 1) 140340686464336->140340686464280

# Load ready model
from keras.models import load_model, model_from_json

def _load_model_emo_and_weights(filename, emo):
        with open(filename+'.'+emo+'.json', 'r') as json_file:
            loaded_model_json =
            loaded_model = model_from_json(loaded_model_json)
        return loaded_model
# savePath = "/home/vlaand/IpythonNotebooks/05_emotion_wassa_nuig/wassaRegression/classifiers/LSTM/wassaRegression"
# model = _load_model_emo_and_weights(savePath, emoNames[EMOTION])

# y_t_pred = model.predict(X_dev)

# y_dev_pred = np.array([y_[0] for y_ in model.predict(X_dev)])
# print(min(y_dev_pred), max(y_dev_pred))
# print("%8s\t%.2f\t%.2f\t%.2f" % (emoNames[EMOTION],
#                                  r2_score(y_dev , y_dev_pred),                                 
#                                  pearsonr(y_dev , y_dev_pred)[0],
#                                  spearmanr(y_dev , y_dev_pred)[0]))

y_test_pred = np.array([y_[0] for y_ in model.predict(X_test)])
print(min(y_test_pred), max(y_test_pred))
print("%8s\t%.2f\t%.2f\t%.2f" % (emoNames[EMOTION],
                                 r2_score(y_test , y_test_pred),                                 
                                 pearsonr(y_test , y_test_pred)[0],
                                 spearmanr(y_test , y_test_pred)[0]))

0.167627 0.788088
   anger	0.38	0.64	0.62

len(svr_y_test_predict), len(y_test_predict), len(mix_y_test_predict)

(714, 714, 714)

svr_y_test_predict = svrTrained.predict(svr_X_test)
y_test_predict = np.array([y_[0] for y_ in model.predict(X_test)])
mix_y_test_predict = np.array([ np.mean([y1,y2]) for y1,y2 in zip(y_test_predict, svr_y_test_predict) ])

print("%8s\t%.2f\t%.2f\t%.2f" % (emoNames[EMOTION]+'.svr',
                                 r2_score(svr_y_test , svr_y_test_predict),                                 
                                 pearsonr(svr_y_test , svr_y_test_predict)[0],
                                 spearmanr(svr_y_test , svr_y_test_predict)[0]))
print("%8s\t%.2f\t%.2f\t%.2f" % (emoNames[EMOTION]+'.lstm',
                                 r2_score(y_test , y_test_predict),                                 
                                 pearsonr(y_test , y_test_predict)[0],
                                 spearmanr(y_test , y_test_predict)[0]))
print("%8s\t%.2f\t%.2f\t%.2f" % (emoNames[EMOTION]+'.avg',
                                 r2_score(y_test , mix_y_test_predict),                                 
                                 pearsonr(y_test , mix_y_test_predict)[0],
                                 spearmanr(y_test , mix_y_test_predict)[0]))

sadness.svr	0.44	0.67	0.67
sadness.lstm	0.50	0.71	0.70
sadness.avg	0.51	0.73	0.72
anger.svr 0.38 0.62 0.59 anger.lstm 0.36 0.62 0.59 anger.avg 0.43 0.65 0.63 fear.svr 0.45 0.67 0.64 fear.lstm 0.46 0.68 0.66 fear.avg 0.50 0.71 0.68 joy.svr 0.37 0.62 0.63 joy.lstm 0.37 0.64 0.65 joy.avg 0.44 0.66 0.67 sadness.svr 0.44 0.67 0.67 sadness.lstm 0.50 0.71 0.70 sadness.avg 0.51 0.73 0.72

import sys


def recursive(prevMax = []):
    if len(prevMax) >= len(difference):
        return 0

    gid, gp = 0, 0.0

    for i,d in enumerate(difference):
        if d>gp and not(i in prevMax):
            gid, gp = i, d

    print('%d, %f, %f, %f, %f, %f, "%s", "%s"' % (gid, difference[gid], y_test[gid],  mix_y_test_predict[gid], svr_y_test_predict[gid], y_test_predict[gid], test_tweets[gid], emoNames[EMOTION]))

difference = np.abs(mix_y_test_predict - y_test)
print('tweetId, difference, y_actual, y_ensemble, y_svr, y_lstm, tweet, emotion')

tweetId, difference, y_actual, y_ensemble, y_svr, y_lstm, tweet, emotion
161, 0.497370, 0.271000, 0.768370, 0.742622, 0.794118, "god just replaced my sadness with laughter , cant go the whole day sad . <repeat>", "sadness"
385, 0.459350, 0.125000, 0.584350, 0.531545, 0.637155, "i lost my wallet , then found it , then lost it again and <allcaps> then <allcaps> found <allcaps> it <allcaps> ! <repeat> \ ncollege is brazy", "sadness"
460, 0.438554, 0.292000, 0.730554, 0.648212, 0.812897, "cuddling literally kills depression , relieves anxiety , and strengthens the immune system .", "sadness"
415, 0.405078, 0.792000, 0.386922, 0.420783, 0.353062, "there will be no gaming video today . an old friend of mine passed last night , so i 'm taking some time to grieve . thank you standuptocancer", "sadness"
381, 0.379538, 0.688000, 0.308462, 0.340629, 0.276296, "dreams dashed and divided like million stars in the night sky .", "sadness"
528, 0.373470, 0.121000, 0.494470, 0.466861, 0.522079, "clean the sink and make your bathroom shine .", "sadness"
272, 0.364924, 0.292000, 0.656924, 0.655518, 0.658329, "lauren jauregui makes all my problem dissapear it amazes my sadness . she keeps me strong", "sadness"
242, 0.354138, 0.917000, 0.562862, 0.570284, 0.555440, "when you lose somebody close to your heart you lose yourself as well 💔 lost", "sadness"
508, 0.351571, 0.083000, 0.434571, 0.483281, 0.385860, "you forever straight so fix that frown u good 😇 <user>", "sadness"
179, 0.351528, 0.229000, 0.580528, 0.532596, 0.628460, "<user> your foot is frowning .", "sadness"
14, 0.344748, 0.083000, 0.427748, 0.475970, 0.379526, "i love having such a big family . there 's never a dull moment in my house 😂", "sadness"
391, 0.335866, 0.333000, 0.668866, 0.647737, 0.689996, "love is when all your happiness and all your sadness and all your feelings are dependent on another person .", "sadness"
636, 0.332361, 0.854000, 0.521639, 0.491407, 0.551871, "life is hard . , its harder if ur stupid life love sadderness moreofsad howdoestears whatislife", "sadness"
499, 0.319856, 0.771000, 0.451144, 0.495762, 0.406525, "if you love something , let it go . if it comes back , it is yours . if it doesnt , it never will . sadness accepting", "sadness"
162, 0.315784, 0.729000, 0.413216, 0.427583, 0.398848, "need advice on how to get out of this rut ! <repeat> needmotivation", "sadness"
623, 0.314993, 0.135000, 0.449993, 0.435578, 0.464407, "it 's still not sunk in that im seeing joe next month , im so grateful and excited shit", "sadness"
130, 0.313090, 0.917000, 0.603910, 0.527648, 0.680172, "overwhelming sadness . this too shall pass . lonley startingover", "sadness"
3, 0.310302, 0.271000, 0.581302, 0.579698, 0.582907, "<user> i nearly dropped my phone into the sink hahahaha <allcaps>", "sadness"
38, 0.306475, 0.229000, 0.535475, 0.426680, 0.644271, "if someone keeps laughing at you , dont fret . at least u r giving happiness . ' \ n quotes quotestoliveby", "sadness"
459, 0.306238, 0.396000, 0.702238, 0.629992, 0.774483, "<user> i like cold gloomy weather", "sadness"
10, 0.305282, 0.292000, 0.597282, 0.606167, 0.588398, "refuse to let myself get discouraged .", "sadness"
395, 0.303521, 0.188000, 0.491521, 0.529233, 0.453808, "but guess what ? i 'm sober", "sadness"
348, 0.302531, 0.250000, 0.552531, 0.512949, 0.592112, "<user> please dont sulk over your defeat . come on . head up !", "sadness"
193, 0.293996, 0.146000, 0.439996, 0.482020, 0.397973, "the new <user> song is mega 💥 reminds me of <user> blues", "sadness"
104, 0.288571, 0.271000, 0.559571, 0.558299, 0.560842, "one point : dsp <allcaps> claims yt <allcaps> ers lost half their viewership in 2012 if they werent using direct capture . that 's when the search changes happened", "sadness"
147, 0.286231, 0.167000, 0.453231, 0.461991, 0.444472, "hate to see y 'all frown but i 'd rather see him smiling 💕 ✨", "sadness"
568, 0.284825, 0.188000, 0.472825, 0.467759, 0.477891, "lost 11 lbs since i got married ( eating healthy ) & ive gone from a 9 : 47 mile to a 8 : 50 mile in 2 weeks . areyousureimagedout fitness", "sadness"
505, 0.282745, 0.896000, 0.613255, 0.567120, 0.659391, "i feel like a burden every day that i waste but i dont know how to get out of this bc i get so discouraged all i wanna do is lay around 🙃", "sadness"
96, 0.282434, 0.917000, 0.634566, 0.555475, 0.713656, "<user> i 'm wearing all black tomorrow to mourn . 😭 💔", "sadness"
463, 0.281926, 0.949000, 0.667074, 0.661098, 0.673049, "honestly dont know why i 'm so unhappy most of the time . i just want it all to stop <sadface> itnevergoes", "sadness"
112, 0.281447, 0.854000, 0.572553, 0.578113, 0.566993, "the last few weeks have been dreadful . opening up of old wounds . the gossip of others / evil that spill from thier lips melancholy sadnnes", "sadness"
383, 0.278441, 0.083000, 0.361441, 0.387527, 0.335355, "fun tidbit : you can fall asleep on someone while dancing . blues latenight", "sadness"
475, 0.278424, 0.833000, 0.554576, 0.506780, 0.602372, "<user> my heart just sunk .", "sadness"
492, 0.271916, 0.729000, 0.457084, 0.483921, 0.430246, "<user> dreadful day at work n now im gonna be late for class n im covered in paint , n now i have to work a night shift too : /", "sadness"
131, 0.271846, 0.889000, 0.617154, 0.524637, 0.709671, "overwhelming sadness . this too shall pass . lost lonley startingover", "sadness"
298, 0.271252, 0.833000, 0.561748, 0.522563, 0.600934, "i came to work for no reason 😩 🔫 \ ni could 've stayed in bed", "sadness"
497, 0.270917, 0.708000, 0.437083, 0.417913, 0.456253, "synth backing tracks = sadness \ n depresspop dark + + + alt fuckingmeup", "sadness"
618, 0.267744, 0.833000, 0.565256, 0.547283, 0.583228, "ibiza blues hitting me hard already wow", "sadness"
109, 0.264262, 0.250000, 0.514262, 0.511454, 0.517070, "so drunk me hid my keys very well sober me couldnt find it anywhere", "sadness"
353, 0.263743, 0.875000, 0.611257, 0.535136, 0.687378, "only god knows why things happen , sometimes it 's just hard to understand . sad prayingforyou", "sadness"
67, 0.259539, 0.259000, 0.518539, 0.505327, 0.531750, "<user> high fantasy , i feel like you could make a melancholy college age slice of life thing work too", "sadness"
290, 0.259478, 0.208000, 0.467478, 0.486605, 0.448350, "dont get weary in well doing . \ n faith god prayer", "sadness"
269, 0.255059, 0.854000, 0.598941, 0.604321, 0.593561, "i wish i could live in y 'all reality where i can grieve over people i never met . to bad i got brothers <allcaps> dying left and right .", "sadness"
158, 0.254665, 0.125000, 0.379665, 0.410998, 0.348331, "getting so excited for <user> 2016 ! <repeat> we play the main stage sunday oct . 16 at 3 : 30 ! <repeat> jazzholiday riesbrothers rock blues jam", "sadness"
591, 0.250235, 0.167000, 0.417235, 0.411795, 0.422674, "yes i am picking up sticks and pine cones in my front yard", "sadness"
542, 0.250025, 0.292000, 0.542025, 0.638084, 0.445966, "never let the sadness of your past ruin your future", "sadness"
212, 0.247739, 0.114000, 0.361739, 0.375295, 0.348183, "the two brians , <user> & <user> , also known as the nigella lawsons of blues & pizza .", "sadness"
548, 0.246567, 0.240000, 0.486567, 0.541714, 0.431421, "its a gloomy day , im cuddled in my bed watching brendon covering songs and i couldnt be more relaxed or happy 😋", "sadness"
206, 0.245265, 0.720000, 0.474735, 0.517158, 0.432311, "do you know how much it hurts to see you best friend sad ?", "sadness"
122, 0.244769, 0.812000, 0.567231, 0.506687, 0.627775, "it 's sad when you talk to someone about ya past n how it fucked you up and then they do the same thing like people really dont have hearts", "sadness"
In [324]:
idx = np.argmin(difference)
print("%s\n%f" % (test_tweets[idx], difference[idx]))

living life so relentless

In [325]:
idx = np.argmax(difference)
print("%s\n%f" % (test_tweets[idx], difference[idx]))

<user> u fucked my house up i 'll always hold a grudge

In [303]:
%pylab inline
import numpy as np
import matplotlib.pyplot as plt
import natsort

dimension = EMOTION

s = y_test#[:, dimension] #y_annotated[dimension]
order = sorted(range(len(s)), key=lambda k: s[k])

g1 = y_test#[:, dimension]

model = 'SVM'
if model == 'BLSTM':    
    g2 = y_test_predict
elif model == 'SVM':
    g2 = svr_y_test_predict
    g2 = mix_y_test_predict
g3 = difference
colorMapping = {
    'Actual': 'm.',
    'SVM': 'g.' ,
    'BLSTM': 'c.',

# g3 = svr_y_test_predict#[yy[dimension] for yy in y_wassa_test_predict]#[:, dimension] 
# g4 = y_test_predict#[yy[dimension] for yy in y_wassa_test_predict]#[:, dimension] 

line_0, = plt.plot(np.array(g1)[order], 'm.',  label='Actual')
line_1, = plt.plot(np.array(g2)[order], colorMapping[model], label=model)
line_2, = plt.plot(np.array(g3)[order], 'r.', label='Difference')
# line_2, = plt.plot(np.array(g3)[order], 'g.', label='SVM')
# line_3, = plt.plot(np.array(g4)[order], 'c.', label='BLSTM')
plt.legend(handles=[line_0, line_1, line_2])
plt.legend(bbox_to_anchor=(1.02, .4, .65, .0), loc=3,ncol=1, mode="expand", borderaxespad=1.0)
plt.ylabel('dimension: '+emoNames[dimension])
plt.title("%s model bulit on WASSA corpus" % model)

Populating the interactive namespace from numpy and matplotlib
r2 pearson spearman anger.svr 0.34 0.60 0.57 anger.lstm 0.36 0.63 0.61 anger.avg 0.42 0.66 0.63 fear.svr 0.44 0.67 0.63 fear.lstm 0.45 0.68 0.66 fear.avg 0.49 0.71 0.68 joy.svr 0.36 0.62 0.63 joy.lstm 0.35 0.59 0.59 joy.avg 0.41 0.65 0.65 sadness.svr 0.43 0.68 0.69 sadness.lstm 0.45 0.70 0.69 sadness.avg 0.49 0.73 0.72 # hashtags remained anger.svr 0.34 0.59 0.56 anger.lstm 0.28 0.53 0.51 anger.avg 0.36 0.62 0.59 fear.svr 0.43 0.67 0.63 fear.lstm 0.30 0.57 0.53 fear.avg 0.44 0.66 0.63 joy.svr 0.36 0.62 0.63 joy.lstm 0.32 0.58 0.59 joy.avg 0.40 0.64 0.64 sadness.svr 0.42 0.68 0.69 sadness.lstm 0.32 0.59 0.57 sadness.avg 0.44 0.67 0.66

In [20]:
from sklearn.metrics import r2_score, f1_score, classification_report
# from skll.metrics import pearson, spearman
from scipy.stats import pearsonr, spearmanr

# print('[%8s]\tR2\tpearson\tspearman' % emoNames[EMOTION])
# y_dev_predicts = []
# for i in range(20):
#     if i>0: 
#, y_train, batch_size=batch_size, nb_epoch=1,validation_split=None,)
#     y_dev_pred = np.array([y_[0] for y_ in model.predict(X_dev)])
#     print("%8s\t%.2f\t%.2f\t%.2f" % (i,
#                                  r2_score(y_dev , y_dev_pred),                                 
#                                  pearsonr(y_dev , y_dev_pred)[0],
#                                  spearmanr(y_dev , y_dev_pred)[0]))    
#     y_dev_predicts.append(y_dev_pred)

#, y_train, batch_size=batch_size, nb_epoch=nb_epoch,validation_split=None,)

In [73]:
# print('[%8s]\tR2\tpearson\tspearman' % emoNames[EMOTION])
# for i,y__ in enumerate(y_dev_predicts):  
#         y_test_predict = y__
#         print("%8s\t%.2f\t%.2f\t%.2f" % (i,
#                                  r2_score(y_dev , y_test_predict),                                 
#                                  pearsonr(y_dev , y_test_predict)[0],
#                                  spearmanr(y_dev , y_test_predict)[0]))

[ sadness]	R2	pearson	spearman
       0	-0.40	-0.12	-0.13
       1	0.07	0.43	0.44
       2	0.17	0.53	0.54
       3	0.11	0.46	0.50
       4	0.21	0.49	0.54
       5	0.17	0.47	0.49
       6	0.21	0.48	0.54
       7	0.11	0.56	0.59
       8	0.14	0.50	0.56
       9	0.20	0.49	0.54
      10	0.10	0.48	0.52
      11	0.16	0.51	0.54
      12	0.18	0.50	0.53
      13	0.15	0.50	0.55
      14	0.18	0.51	0.53
      15	0.18	0.51	0.55
      16	0.19	0.52	0.56
      17	0.21	0.53	0.54
      18	0.23	0.54	0.56
      19	0.17	0.50	0.54

In [48]:
%pylab inline
import numpy as np
import matplotlib.pyplot as plt
import natsort

dimension = EMOTION

s = y_dev#[:, dimension] #y_annotated[dimension]
order = sorted(range(len(s)), key=lambda k: s[k])

g1 = y_dev#[:, dimension]
g2 = y_dev_pred#[yy[dimension] for yy in y_wassa_test_predict]#[:, dimension]    

line_0, = plt.plot(np.array(g1)[order], 'm.',  label='Actual')
line_1, = plt.plot(np.array(g2)[order], 'b.', label='Prediction')
plt.legend(handles=[line_0, line_1])
plt.legend(bbox_to_anchor=(1.02, .4, .65, .0), loc=3,ncol=1, mode="expand", borderaxespad=1.0)
plt.ylabel('dimension: '+emoNames[dimension])
plt.title("Model bulit on WASSA corpus")

Populating the interactive namespace from numpy and matplotlib
/home/vlaand/anaconda3/lib/python3.5/site-packages/IPython/core/magics/ UserWarning: pylab import has clobbered these variables: ['dist']
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"

Method 1 (save architecture and weights separately)

In [49]:
def _save_model_wassa(model, savePath, emo, modelName):
    if emo == None:
        savePath = os.path.join(savePath,modelName)
        savePath = os.path.join(savePath,modelName)+"."+emo
    model_json = model.to_json()
    with open(savePath + ".json", "w") as json_file:
        print("<%s.json> " % (savePath))
    model.save_weights(savePath +".h5", overwrite=True)
    print("<%s.h5> " % (savePath))
# + "_.h5")
# savePath = "/home/vlaand/IpythonNotebooks/senpy-plugins-NUIG/fivePointRegression/classifiers/LSTM/"
savePath = "/home/vlaand/IpythonNotebooks/05_emotion_wassa_nuig/wassaRegression/classifiers/LSTM/"

_save_model_wassa(model=model, savePath=savePath, emo=emoNames[EMOTION], modelName="wassaRegression")
_save_model_wassa(model=model, savePath='/home/vlaand/IpythonNotebooks/wassa2017/classifiers/LSTM/', emo=emoNames[EMOTION], modelName="wassaRegression")



In [359]:
from keras.models import model_from_json

with open('/home/vlaand/IpythonNotebooks/wassa2017/classifiers/LSTM/wassaRegression.'+emoNames[EMOTION]+'.json', 'r') as json_file:
    loaded_model = model_from_json(
    print('<%s> loaded' %(
    print('<%s> weights loaded' % ('.json','.h5')))

</home/vlaand/IpythonNotebooks/wassa2017/classifiers/LSTM/wassaRegression.anger.json> loaded
</home/vlaand/IpythonNotebooks/wassa2017/classifiers/LSTM/wassaRegression.anger.h5> weights loaded