Training model using processed data


In [1]:
import os
import sys
import numpy as np
import pickle
    
src = os.path.join(os.getcwd(), os.pardir, 'src')
sys.path.append(src)

from features import feature_proc

In [2]:
dataset_path = '../data/processed/SentiRuEval2016.pickle'
with open(dataset_path, 'rb') as bin_data:
    X, y, labels = pickle.load(bin_data)

In [3]:
train_data = set(["bank", "ttk"])
test_data = set(["bank", "ttk"])
selected = [feature_proc.get_sample_case(label, train_data, test_data) for label in labels]
X_train, X_test, y_train, y_test = feature_proc.split_data(X, y, selected)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-3-285b28135ac7> in <module>()
      2 test_data = set(["bank", "ttk"])
      3 selected = [feature_proc.get_sample_case(label, train_data, test_data) for label in labels]
----> 4 X_train, X_test, y_train, y_test = split_data(X, y, selected)

NameError: name 'split_data' is not defined

In [6]:



Out[6]:
6

In [ ]: