Image data

The goal of this notebook is to train and evaluated HT risk classification using statistics from the images and the faces detect in the images associated to the set of ads provided for the CP1 during the MEMEX Winter QPR 2017. [Most of the code from Mayank repo]

Input files

  1. clusters images based features for training and testing

Outputs

  1. eval file

In [ ]:
import os
import json
import codecs
import re
import numpy as np
from random import shuffle

In [ ]:
def convert_string_to_float_list(string):
    return [float(i) for i in re.split(', ', string[1:-1])]

In [ ]:
def l2_norm_on_matrix(matrix):
    """
    Takes a np.matrix style object and l2-normalizes it.
    :param matrix:
    :return matrix:
    """
    from sklearn.preprocessing import normalize
    warnings.filterwarnings("ignore")
    return normalize(matrix)

In [ ]:
def sample_and_extend(list_of_vectors, total_samples):
    """
    Oversampling code for balanced training. We will do deep re-sampling, assuming that the vectors contain
    atoms.
    :param list_of_vectors: the list of vectors that are going to be re-sampled (randomly)
    :param total_samples: The total number of vectors that we want in the list. Make sure that this number
    is higher than the length of list_of_vectors
    :return: the over-sampled list
    """
    if len(list_of_vectors) >= total_samples:
        raise Exception('Check your lengths!')

    indices = range(0, len(list_of_vectors))
    shuffle(indices)
    desired_samples = total_samples-len(list_of_vectors)
    # print desired_samples>len(list_of_vectors)
    while desired_samples > len(indices):
        new_indices = list(indices)
        shuffle(new_indices)
        indices += new_indices
    new_data = [list(list_of_vectors[i]) for i in indices[0:desired_samples]]
    # print new_data
    return np.append(list_of_vectors, new_data, axis=0)

In [ ]:
def prepare_for_ML_classification(pos_neg_file, normalize=False):
    """
    We need to read in embeddings
    :param pos_neg_file: The file generated in one of the preprocess_filtered_* files
    :return: A dictionary where a 0,1 label references a numpy matrix.
    """
    result = dict()
    pos_features = list()
    neg_features = list()
    with codecs.open(pos_neg_file, 'r', 'utf-8') as f:
        for line in f:
            line = line[0:-1]
            cols = re.split('\t',line)
            # print list(cols[1])
            # break
            if int(cols[2]) == 1:
                pos_features.append(convert_string_to_float_list(cols[1]))
            elif int(cols[2]) == 0:
                neg_features.append(convert_string_to_float_list(cols[1]))
            else:
                print 'error; label not recognized'
    # print np.matrix(pos_features)
    if normalize == True:
        result[0] = l2_norm_on_matrix(np.matrix(neg_features))
        result[1] = l2_norm_on_matrix(np.matrix(pos_features))
    else:
        if len(pos_features) != 0:
            result[1] = pos_features
        if len(neg_features) != 0:
            result[0] = neg_features
    return result

In [ ]:
def prepare_train_test_data(pos_neg_file, train_percent = 0.3, randomize=True, balanced_training=True, data_vectors=None):
    """
    :param pos_neg_file:
    :param train_percent:
    :param randomize: If true, we'll randomize the data we're reading in from pos_neg_file. Otherwise, the initial
    train_percent fraction goes into the training data and the rest of it in the test data
    :param balanced_training: if True, we will equalize positive and negative training samples by oversampling
    the lesser class. For example, if we have 4 positive samples and 7 negative samples, we will randomly re-sample
    3 positive samples from the 4 positive samples, meaning there will be repetition. Use with caution.
    :param data_vectors: this should be set if pos_neg_file is None. It is mostly for internal uses, so
    that we can re-use this function by invoking it from some of the other _prepare_ files.
    :return: dictionary containing training/testing data/labels
    """
    import math
    if pos_neg_file:
        data = prepare_for_ML_classification(pos_neg_file)
    elif data_vectors:
        data = data_vectors
    else:
        raise Exception('Neither pos_neg_file nor data_vectors argument is specified. Exiting.')

    # print len(data[1])
    # print len(data[0])
    train_pos_num = int(math.ceil(len(data[1])*train_percent))
    train_neg_num = int(math.ceil(len(data[0])*train_percent))
    # print train_pos_num
    # print train_neg_num
    test_pos_num = len(data[1])-train_pos_num
    test_neg_num = len(data[0])-train_neg_num
    if test_pos_num == 0:
        test_pos_num = 1
    if test_neg_num == 0:
        test_neg_num = 1

    test_labels_pos = [[1] * test_pos_num]
    test_labels_neg = [[0] * test_neg_num]

    if not randomize:
        train_data_pos = data[1][0:train_pos_num]
        train_data_neg = data[0][0:train_neg_num]
        if train_pos_num < len(data[1]):
            test_data_pos = data[1][train_pos_num:]
        else:
            test_data_pos = [data[1][-1]]

        if train_neg_num < len(data[0]):
            test_data_neg = data[0][train_neg_num:]
        else:
            test_data_neg = [data[0][-1]]

    else:
        all_pos_indices = range(0, len(data[1]))
        all_neg_indices = range(0, len(data[0]))
        shuffle(all_pos_indices)
        shuffle(all_neg_indices)

        train_data_pos = [data[1][i] for i in all_pos_indices[0:train_pos_num]]
        train_data_neg = [data[0][i] for i in all_neg_indices[0:train_neg_num]]

        if train_pos_num < len(data[1]):
            test_data_pos = [data[1][i] for i in all_pos_indices[train_pos_num:]]
        else:
            test_data_pos = [data[1][-1]]

        if train_neg_num < len(data[0]):
            test_data_neg = [data[0][i] for i in all_neg_indices[train_neg_num:]]
        else:
            test_data_neg = [data[0][-1]]

    if balanced_training:
        if train_pos_num < train_neg_num:
            train_labels_pos = [[1] * train_neg_num]
            train_labels_neg = [[0] * train_neg_num]
            train_data_pos = sample_and_extend(train_data_pos, total_samples=train_neg_num)
        elif train_pos_num > train_neg_num:
            train_labels_pos = [[1] * train_pos_num]
            train_labels_neg = [[0] * train_pos_num]
            train_data_neg = sample_and_extend(train_data_neg, total_samples=train_pos_num)
        else:
            train_labels_pos = [[1] * train_pos_num]
            train_labels_neg = [[0] * train_neg_num]
    else:
        train_labels_pos = [[1] * train_pos_num]
        train_labels_neg = [[0] * train_neg_num]

    # print len(train_data_pos)
    # print len(train_data_neg)
    train_data = np.append(train_data_pos, train_data_neg, axis=0)
    test_data = np.append(test_data_pos, test_data_neg, axis=0)
    train_labels = np.append(train_labels_pos, train_labels_neg)
    test_labels = np.append(test_labels_pos, test_labels_neg)

    results = dict()
    results['train_data'] = train_data
    results['train_labels'] = train_labels
    results['test_data'] = test_data
    results['test_labels'] = test_labels

    return results

In [ ]:
def get_pos_neg_ids(pos_neg_file):
    result = list()
    with codecs.open(pos_neg_file, 'r', 'utf-8') as f:
        for line in f:
            line = line[0:-1]
            result.append(re.split('\t',line)[0])
    return result

In [ ]:
def prepare_train_test_data_separate_unseen(pos_neg_train_file, pos_neg_test_file, balanced_training=True):
    import numpy as np
    train = prepare_for_ML_classification(pos_neg_train_file)
    test = prepare_for_ML_classification(pos_neg_test_file)
    test_ids = get_pos_neg_ids(pos_neg_test_file)
    train_pos_num = len(train[1])
    train_neg_num = len(train[0])

    train_data_pos = train[1][0:train_pos_num]
    train_data_neg = train[0][0:train_neg_num]

    #test_pos_num = len(test[1])
    test_neg_num = len(test[0])
    #test_data_pos = test[1][0:test_pos_num]
    test_data_neg = test[0][0:test_neg_num]
    #test_labels_pos = [[1] * test_pos_num]
    test_labels_neg = [[0] * test_neg_num]

    if balanced_training:
        if train_pos_num < train_neg_num:
            train_labels_pos = [[1] * train_neg_num]
            train_labels_neg = [[0] * train_neg_num]
            train_data_pos = sample_and_extend(train_data_pos, total_samples=train_neg_num)
        elif train_pos_num > train_neg_num:
            train_labels_pos = [[1] * train_pos_num]
            train_labels_neg = [[0] * train_pos_num]
            train_data_neg = sample_and_extend(train_data_neg, total_samples=train_pos_num)
        else:
            train_labels_pos = [[1] * train_pos_num]
            train_labels_neg = [[0] * train_neg_num]
    else:
        train_labels_pos = [[1] * train_pos_num]
        train_labels_neg = [[0] * train_neg_num]

    # print len(train_data_pos)
    # print len(train_data_neg)
    train_data = np.append(train_data_pos, train_data_neg, axis=0)
    #test_data = np.append(test_data_neg, axis=0)
    train_labels = np.append(train_labels_pos, train_labels_neg)
    #test_labels = np.append(test_labels_neg)

    results = dict()
    results['train_data'] = train_data
    results['train_labels'] = train_labels
    results['test_data'] = test_data_neg
    results['test_labels'] = test_labels_neg
    results['test_ids'] = test_ids

    return results

In [ ]:
def train_and_test_classifier(train_data, train_labels, test_data, test_labels, classifier_model, test_ids=None):
    """
    Take three numpy matrices and compute a bunch of metrics. Hyperparameters must be changed manually,
    we do not take them in as input.
    This method is for BINARY CLASSIFICATION only, although there is some support for regression.
    :param train_data:
    :param train_labels:
    :param test_data:
    :param test_labels:
    :param classifier_model:
    :return:
    """
    from sklearn.linear_model import LogisticRegression, LinearRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn import neighbors
    from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, precision_recall_fscore_support
    if classifier_model == 'random_forest':
        model = RandomForestClassifier()
    elif classifier_model == 'knn':
        k = 9
        model = neighbors.KNeighborsClassifier(n_neighbors=k, weights='uniform')
    elif classifier_model == 'logistic_regression':
        model = LogisticRegression()
    elif classifier_model == 'linear_regression': # this is a regressor; be careful.
        model = LinearRegression()

    model.fit(train_data, train_labels)
    predicted_labels = model.predict(test_data)
    print predicted_labels
    print test_labels
    predicted_probabilities = model.predict_proba(test_data)
    #print predicted_probabilities

    final_results = list()
    if test_ids is not None:
        final_results.append(test_ids)
        final_results.append(predicted_probabilities)
        return final_results, model
    else:
        print 'AUC (Area Under Curve): ',
        print roc_auc_score(test_labels, predicted_labels)

    # precision, recall, thresholds = precision_recall_curve(test_labels, predicted_labels)
    # plt.clf()
    # plt.plot(recall, precision, label='precision-recall-curve')
    # plt.xlabel('Recall')
    # plt.ylabel('Precision')
    # plt.ylim([0.0, 1.05])
    # plt.xlim([0.0, 1.0])
    # plt.title('Precision-Recall curve')
    # plt.savefig('/home/mayankkejriwal/Downloads/memex-cp4-october/tmp/fig.png')
    if classifier_model not in ['linear_regression']:
        print 'Accuracy: ',
        print accuracy_score(test_labels, predicted_labels)
        # print precision_score(test_labels, predicted_labels)
        prf = ['Precision: ', 'Recall: ', 'F-score: ', 'Support: ']
        print 'Class 0\tClass 1'
        k = precision_recall_fscore_support(test_labels, predicted_labels)
        #for i in range(0, len(k)):
        #    print prf[i],
        #    print k[i]
        return [k[0][1], k[1][1], k[2][1]], model

In [ ]:
# set some parameters
data_dir = "../data"
prefix = "train"
#prefix = "test"
if prefix=="train":
    input_file = "train_adjusted.json"
else:
    input_file = "test_adjusted_unlabelled.json"

In [ ]:
def print_model_weights(weights, labels="fmimi, fmima, fmime, fmami, fmama, fmame, fmemi, fmema, fmeme, ftmi, ftma, ftme, ftt, avg_ftt, imi, ima, ime, it, avg_it, ads_t"):
    import matplotlib
    import matplotlib.pyplot as plt
    %matplotlib inline
    fig, ax = plt.subplots()
    rects1 = ax.bar(range(len(weights)), weights, width=0.5, color='r')
    ax.set_xticks(range(len(weights)))
    ax.set_xticklabels(tuple(labels.split(',')), rotation='vertical')
    plt.show()

In [ ]:
# train models and evaluate

In [ ]:
train_percent = 0.5
pos_neg_file = os.path.join(data_dir, prefix+"_images_faces_stats_mayank.tsv")
data_dict = prepare_train_test_data(pos_neg_file, train_percent=train_percent)
# 'logistic_regression', 'random_forest', 'knn'
data_dict['classifier_model'] = 'logistic_regression'
results, model = train_and_test_classifier(**data_dict)

In [ ]:
print model.coef_[0]
print_model_weights(model.coef_[0])

In [ ]:
data_dict['classifier_model'] = 'random_forest'
results, model = train_and_test_classifier(**data_dict)

In [ ]:
print model.feature_importances_
print_model_weights(model.feature_importances_)

In [ ]:
data_dict['classifier_model'] = 'knn'
results, model = train_and_test_classifier(**data_dict)

In [ ]:
# actual evaluation
train_pos_neg_file = os.path.join(data_dir, "train_images_faces_stats_mayank.tsv")
test_pos_neg_file = os.path.join(data_dir, "test_images_faces_stats_mayank.tsv")
data_dict = prepare_train_test_data_separate_unseen(train_pos_neg_file, test_pos_neg_file)
print len(data_dict['train_data']),len(data_dict['train_labels'])
print len(data_dict['test_data']),len(data_dict['test_labels'])
print data_dict['test_labels']

In [45]:
def normalize_feats(data, norm_values=None):
    data = np.asarray(data)
    print type(data)
    print data.shape
    new_norm_values = False
    if not norm_values:
        new_norm_values = True
        min_data = data.min(axis=0)
        print min_data
        print min_data.shape
        max_data = data.max(axis=0)
        print max_data
        print max_data.shape
        norm_values = [min_data, max_data]
    data = (data - norm_values[0])/(norm_values[1] - norm_values[0])
    if new_norm_values:
        return data, norm_values
    else:
        return data

In [48]:
# evaluation post submission with normalization
train_pos_neg_file = os.path.join(data_dir, "train_images_faces_stats_mayank.tsv")
test_pos_neg_file = os.path.join(data_dir, "test_images_faces_stats_mayank.tsv")
data_dict = prepare_train_test_data_separate_unseen(train_pos_neg_file, test_pos_neg_file)
# normalize feats between 0 and 1
#data_dict['train_data_nonorm'] = data_dict['train_data']
data_dict['train_data'], norm_values = normalize_feats(data_dict['train_data'])
#data_dict['test_data_nonorm'] = data_dict['test_data']
data_dict['test_data'] = normalize_feats(data_dict['test_data'], norm_values)
print len(data_dict['train_data']),len(data_dict['train_labels'])
print len(data_dict['test_data']),len(data_dict['test_labels'])
print data_dict['test_labels']


<type 'numpy.ndarray'>
(1120, 20)
[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  1.]
(20,)
[  2.00000000e+00   1.10000000e+01   2.00000000e+00   6.00000000e+00
   2.30000000e+01   6.00000000e+00   6.00000000e+00   1.10000000e+01
   6.00000000e+00   3.50000000e+01   9.04000000e+02   3.50000000e+01
   3.16890000e+04   2.20000000e+01   3.20000000e+01   9.04000000e+02
   3.20000000e+01   5.90830000e+04   2.88733927e+01   4.90300000e+03]
(20,)
<type 'numpy.ndarray'>
(130, 20)
1120 1120
130 1
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [49]:
# 'logistic_regression', 'random_forest', 'knn'
#data_dict['classifier_model'] = 'random_forest'
data_dict['classifier_model'] = 'logistic_regression'
results, model = train_and_test_classifier(**data_dict)


[0 0 1 0 0 0 1 1 1 1 1 1 1 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 1 1 0
 1 1 0 1 0 1 1 0 0 0 1 0 0 1 0 1 0 0 1 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 0 1
 0 1 0 1 0 1 0 1 0 1 0 0 1 0 1 1 1 1 1 0 1 0 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1
 0 0 1 0 1 1 1 1 0 0 1 0 1 0 0 0 1 0 0]
[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [ ]:
print results

In [ ]:
def show_model_weights(model, model_type):
    if model_type == 'logistic_regression':
        print model.coef_[0]
        print_model_weights(model.coef_[0])
    elif model_type == 'random_forest':
        print model.feature_importances_
        print_model_weights(model.feature_importances_)

In [ ]:
def prepare_CP1_eval_output(path, results):
    print path, results
    with codecs.open(path+'_test_results.jl', 'w', 'utf-8') as out:
        for i in range(0, len(results[0])):
            answer = dict()
            answer['cluster_id'] = str(results[0][i])
            answer['score'] = results[1][i][1]
            json.dump(answer, out)
            out.write('\n')

In [ ]:
prepare_CP1_eval_output(os.path.join(data_dir,'columbia_faceimagestats_'+data_dict['classifier_model']), results)
# we should also save the model that generated these results
import pickle
pickle.dump(model,open(os.path.join(data_dir,'columbia_faceimagestats_'+data_dict['classifier_model']+'.pkl'),'wb'))

In [22]:
comp_classifiers = {}
first_clf = True
import pickle
import os
#for classifier in ['logistic_regression', 'random_forest']:
for classifier in ['logistic_regression']:
    model = pickle.load(open(os.path.join(data_dir,'columbia_faceimagestats_'+classifier+'.pkl'),'rb'))
    show_model_weights(model, classifier)
    with open(os.path.join(data_dir,'columbia_faceimagestats_'+classifier+'_test_results.jl'),'rt') as res:
        for line in res:
            line_dict = json.loads(line)
            #print line_dict
            if first_clf:
                comp_classifiers[line_dict['cluster_id']] = [line_dict['score']]
            else:
                comp_classifiers[line_dict['cluster_id']].append(line_dict['score'])
    first_clf = False


[ -3.93865237e-01  -1.57149189e-01   4.58832256e-01   2.55910151e-01
  -7.76340544e-02   5.99192996e-02   1.39735913e+00   2.69492338e-01
  -1.31727956e+00  -2.18106139e-01  -6.96740134e-03   1.62046151e-01
   1.08750311e-05  -1.43432609e-01  -5.37348145e-02  -3.55529319e-03
  -1.29864805e-01   2.57205315e-05   1.71065912e-01  -3.26583116e-04]

In [23]:
print comp_classifiers


{u'a99a6739634570f1fbb9de84ddbf07e23c6a0a01': [0.18661923627370786], u'8ba21382d6dc205fab0fc6cf8df44116a56acdd0': [0.29453571898999986], u'e529091bf4a603e1a3a1a02849c7f49a6b53802d': [0.4063466270066474], u'7b189426fb3f9e7f064c90a36b9535ebb7addcea': [0.6323372626960693], u'c9f09cb1780306b8c136662c5762c5c4389aa7d7': [0.4674205065018157], u'5c2d657f13a027488a9d94f709df02878c6dec62': [0.0924261414192076], u'4de79fc40dd8249d3e996cb2f288a98774143c2c': [0.41526144505331786], u'2eab54f516897c7a30892053cf51924e3c0c7c04': [0.5327997757734022], u'b4676faf0ff2d53d156b012f2e30fcf885696f56': [0.5851951743521504], u'137003eae9020b75371fbc2096705ca0e0d83172': [0.5218474391647989], u'd7e32da2f52840ef88aa1222c93dff5461cbb7e8': [0.38560451075120944], u'39b8888fb61cdf580a0e74f8ac02a1ad35aabb36': [0.5714384361929691], u'3989e43536a6100c20789671572243d2f4238642': [0.6209923506286281], u'2f791ad7f4ab32363a72704c1f93c44450895c6c': [0.6134021656420956], u'cc1d3542eba81bf4cf886a12b43becc1e0d3690a': [0.6204541478354944], u'509326c19ea93763d1fc710dceecd273b6270b7b': [0.6225607041612269], u'7d478f441360ca156c1949c433cf07efd443096a': [0.19373110405635205], u'f5bbf042be59c09f726c298e4f45806ff14893c7': [0.6798092969355289], u'6346c598e16d75fd72bcdb5b630ffa998fe79d8d': [0.33910662203914826], u'1904d428f12a6a6f3fb5ee79c13ca5ebf90c1de4': [0.6200695356771909], u'c50493d469c2909911ff79baffac52e2f23a1a33': [0.5505349376928577], u'a30057883fe42c943365dfa7444af9b1b5e6cf9b': [0.4306717855377005], u'4fe14fb5bc4711d3d79f4a782c643e5b3dbdaaf7': [0.38852194215393787], u'56949438db45216eab2db751e293c51c0cbdb9c6': [0.6209923506286281], u'21d19d5334457cd636b6bef5d4e52972ef0dfa0d': [0.2950018776529729], u'0180f95e45f37d3733c9641efb9d11226fc83419': [0.5159360767985703], u'5ceca7c64c00a9d689232ea6348ed33efd4c9341': [0.26365390587927245], u'7e30109744efb3061744237576c1599ed6237dab': [0.3029246508630919], u'ca45e0bc8100bf4fb6f7f06ca40504eb5dc84404': [0.6290891714111088], u'8ecfe209139f1a9d4256c75740fe9e7ea1f31291': [0.13926256231062184], u'e6502b28cb5f4d73d1042328bf09dadca7f482d5': [0.518834423513738], u'30706411d250414c77a8fa6e618a07a5087a04cd': [0.3437457499453293], u'bd0162c3ea804a2ee64889aa33bd1ccc7ba915ff': [0.3977690876581629], u'5b18371bdcf34dd853929159ff5fc37d3ef8d889': [0.6199925951304018], u'6c97d7f9fc79720ac67343a179b9edfcfb5ced63': [0.6132064058117322], u'429df829356076209dfb576b1906d98536404e09': [0.5267374745876541], u'1487b1e01d4e19d843d31e9cf4fa73c7bca3d73e': [0.4686521103155589], u'b3379cd44bbc07fb49a1372e7a4e7936f14b54d4': [0.3357464818832205], u'0d59c51931adfaf19d85df33948867eb2606af59': [0.6208386087237348], u'adceceddf7647f06f5cc5d6e555dfa940cdc138a': [0.5031888950697898], u'ec4c8b2be0aa1b71620b3fe2200d1b2324affc2e': [0.2806276847821773], u'3ddc80cb50734f7844dc19e95515a1a09dfa0503': [0.5350223702440967], u'a4eaea0a9fcd23ccf929036fc235b36959f897f3': [0.5472534005870958], u'84cdcf8f3f3155e66200923976a6d922c8387d2b': [0.4670517574243653], u'91ca05ed1cb4ab55cc002e10daf7337cf21e563c': [0.4089811712548283], u'd75c94f86282a3355d92375aca5c3969845f7400': [0.44905568269251295], u'710aa51b7c8d508052595a3080419b830f852649': [0.620684842547937], u'de1f1f4faf7fcd4d4c8fb424211c635eaa1f60c1': [0.6588321047393989], u'c5f1ab051c18ce17596f37f00ec26eb334f70a68': [0.32554262265393963], u'60909d9cc88c2df9882d4659f89c36e70b214669': [0.5776004270871011], u'a16e7fc8ffdf9f6497fbbca1ba7128201a9cbb86': [0.4789653335689734], u'217e0468c7eaa12291a41e02b4f0eabf38c14ebe': [0.693597604665693], u'764b64b81a72f0502133fe970c1bc94a3b504c2a': [0.2838534273058022], u'd1ec58d6e59fcc7dc8cb8fb5580dcc4996bd6b6f': [0.3057916229264449], u'bbb4c38df0ce28c0e481ee271e608ef0fa06bff2': [0.4092716173631471], u'463cd64c3e8a12bb41282edcd0b9c890ffe69179': [0.5584288108982343], u'd04601fa8f63f5317a81c2c3ac0846de15854d64': [0.543339133305288], u'1b311cce7eff248dc8b526c0bd2d9d89792e7076': [0.47132135036416195], u'428de28dc982947d5f570fb2362d557d0f57d20c': [0.6192228585477467], u'e83eee5a954dbd89a54d2cbe73b4ea81f1da0f18': [0.42117462576840736], u'826b473e788c7630b2602c3d45e795d844b8011c': [0.6377688924685438], u'd0c09bf712f6032372d75507120bfce1f977db7a': [0.5513826328948519], u'8e86801e3d009873466c7b2333d09f3da067ce83': [0.17666976972775028], u'fb73c8c2f33f96c1346ab47ef688af1e20106b7d': [0.134218836777998], u'10cddb536c76d74abc8af2bad0118b33efef0996': [0.7453763327797902], u'e0a43a3a48ce3be73f208e166b16f9f649a46218': [0.528007073451821], u'73315bad4fbe5c47c5e55f47c1644af38de66d7b': [0.5668044766209326], u'364b436514765db56666915f64f27f253b6016f7': [0.44358916384554237], u'07201eb30fffed42a5002774a2acf30e929b6e0f': [0.3162045400874634], u'867c2c0674a5ad9861f06ee22aaafc5f2459a22c': [0.4595920231778323], u'ee3cc04333060212834810870aea5d42e98b026b': [0.6212816733987729], u'1ab2bf3e18eb1bb36011e7a736a80d0f7f71660c': [0.5489676254854485], u'68ddf2d05c794a4d7395eac63dc37e6558396081': [0.5345780472471287], u'14df233f87ae8091f9d9d3c3837323aca43fd485': [0.4682275687144139], u'ccd2412e5122d6f349316244c4738740c5cffe04': [0.35267504376843634], u'9c30f3bac08f163b9a358d0abc31045cf223471d': [0.6864791899053758], u'84a7a66611f9c3bbcbad74efe9577b5ea5e442e1': [0.5397542893228409], u'd3cbf2e5b160ed5f2f84a48e2c3434df06bc9fe8': [0.5485689344287938], u'5add154255d238a20953aee90db71ea5d4f316f2': [0.3289629476797469], u'de4c7ed6ce1b732b7d5f61f188557624519d8b00': [0.5447363684137477], u'03778bd89fb77881a3300a4888f8a6bbdf6494b2': [0.07480859421189491], u'd040a28055c038f3330f596611d7a545da6ba2b0': [0.6191458518528087], u'afd5b363d9f7ccc283e46fbb27af588e47e2928e': [0.4627337779560779], u'92631ee561a6f63de414c24167f8a7fc8f89950f': [0.5911085262616874], u'78309ff6c7eca192cb8094c23bbf7690d5bb0ae7': [0.38897789489991996], u'2abb648ef094058960e74bab3d438ac613b8a38e': [0.6859502297963591], u'2bfaf153d0588a50859e7355896ce6755e9a6f64': [0.327172538663806], u'04ba2065bf8a6abbcea760721d720678b45a0de0': [0.47023324333995614], u'91b66d358155a5c3a0dfadf1219fa759b3a741fb': [0.41505351421921477], u'cf50e7e83373670e6a4e4254bd9e7d756bcdc4c7': [0.42996546564912513], u'd300f62791384782ae75dd234d4ea84448762d15': [0.6193768539448422], u'757e36d7867f628b1c6ebb17b33df5dd3674c595': [0.6209154827117351], u'23dfb4578fd88d9661215bb6e1b5063f60826868': [0.6033131658583328], u'ce3ce5b5b55580424e2f008247309c9e5104327c': [0.512374838728102], u'f8a58e64a61ab00d999c205cf63e4310a3d8f5bd': [0.27794490457199367], u'f526e6b3f5ac8b6bcdc56ca79491cdb3f37df1b5': [0.6188377651766223], u'537cca294f96ca510b134e69aa8c4c1fccd7722d': [0.14057525536839904], u'c28e9e4ba5d18ea84e77d75e4c457d4de4bf765a': [0.5632198278123333], u'c6170ac0f46d1d9adf559ad3a2fd2c3f92711e2b': [0.6203772374918883], u'edfe41a52c0fc18f7de7aaa11cfbfb8af02116fc': [0.07041275353446083], u'ff9f213f43e0d9ab479af22617dda03bbc4f6e15': [0.700484905065804], u'c1671399fc5fedcaf20d48f6dd3adb9c819304fa': [0.6562780054927901], u'f9034404dfadebe763f6ce048c1a9bbdeccb8b59': [0.32498334350306995], u'ce3fb900eb7ca45e4a1dab30624b822fa40418d1': [0.6071965699855486], u'376a717c40a45d6db74f5b4c60bc607346661868': [0.6209923506286281], u'cf983138fd7ad07867f01a2439af23b03c29589c': [0.5792794687760974], u'd758ce5ba722d3fddda88a3a72852df84a3578ea': [0.301383948056177], u'27a2450c16c2380192d93087405f981ef4841058': [0.6044332663258497], u'c589fe6b7ee09585253dd669887d6cef1ee0100e': [0.5034623709522601], u'd9948542c966fc4cae1f13249f5c5ff88dd34883': [0.5396521936075307], u'd4e9556e9ea9a6e85fa0a0717023379d377570e5': [0.48184875036008], u'45a5fb6230119139230250f19c29b17a0b25e529': [0.6130758789277859], u'79e1e3a24194de81707cbdfb4dba15505f356689': [0.3311097024309546], u'fa91c6426380c478a2d867021858c3b11a870b20': [0.2972183502612583], u'29dd516a2c5c6230208e31ed664ca35b89f597a1': [0.6840512530968336], u'908fa17e0f0f7b2f8c74402007dab7c5cfa5feea': [0.3194971422344008], u'72139b3ab03d16c3de2078296e3a57bdcadaf86e': [0.6197617373208544], u'8a421d5128ff7f2e15acc3a47ce9abb51b1b020b': [0.5949917612919913], u'439909717b559b69b3f56f4bc65542741e5a00e5': [0.6200695356771909], u'e9b443bb11d426dbfbe4de2a29d26a78220828a0': [0.23146195184386617], u'47cb8a8f0fa7e73c1e0d7011aa1ecb16a9468922': [0.5278158631181414], u'30e1338ef1c4663a7f58aa844e56d5429dd458bd': [0.4158862367125925], u'21a481c408152dc0436c432ba3d614e4341199a5': [0.6965914454093562], u'bdc8bbab55339be5dbc2b051bf2183dcaafe5f52': [0.5368466565105856], u'a46b1bb98b2eda196a4edacacf411576b74fb61d': [0.6843640608759153], u'a17fd5ced8db4083b37ab8403c26f83be2a4a05f': [0.4317052726152673], u'7b1659324a197f9c9ed17214ea88ebb4f26982e9': [0.4262590951784745], u'50188d032b43bb9632691e9d0808e7508f34d6af': [0.5287983123811054], u'99aa37c3a01d0947939a6f09f6dcf737df83af00': [0.6204541478354944], u'7d44c773c4e497bb21816fa9ed18e1c9dd4207b7': [0.47074110982676676]}

In [ ]:


In [ ]: