In [1]:
import os
import sys
root_path = os.path.abspath("../../../")
if root_path not in sys.path:
sys.path.append(root_path)
import time
import numpy as np
from Util.Util import DataUtil
from Util.ProgressBar import ProgressBar
train_num = 6000
(x_train, y_train), (x_test, y_test) = DataUtil.get_dataset(
"mushroom", "../../../_Data/mushroom.txt",
n_train=train_num, tar_idx=0
)
x_train, y_train, wc, features, feat_dicts, label_dict = DataUtil.quantize_data(x_train, y_train)
x_test, y_test = DataUtil.transform_data(x_test, y_test, wc, feat_dicts, label_dict)
In [2]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
In [3]:
with open("../../../_Data/prices.txt", "r") as file:
data = np.array([line.strip().split(",") for line in file], dtype=np.float32)
data[:3]
Out[3]:
In [4]:
length = len(data)
n_train, n_cv = int(0.7*length), int(0.15*length)
idx = np.random.permutation(length)
train_idx, cv_idx = idx[:n_train], idx[n_train:n_train+n_cv]
test_idx = idx[n_train+n_cv:]
train, cv, test = data[train_idx], data[cv_idx], data[test_idx]
print(len(train), len(cv), len(test), length)
In [5]:
letters = np.array(list("qwertyuiopasdfghjklzxcvbnm"))
data = letters[np.random.randint(0, 26, 10000)]
features = set(data)
feat_dic = {c: i for i, c in enumerate(sorted(features))}
def transform(data, feat_dic):
return [feat_dic[c] for c in data]
print(data[:3], transform(data, feat_dic)[:3])
In [6]:
data = ["a", "c", "b"]
print(data, transform(data, feat_dic))
In [7]:
def task(cost=0.5, epoch=3, name="", sub_task=None):
def sub():
bar = ProgressBar(max_value=epoch, name=name)
for _ in range(epoch):
time.sleep(cost)
if sub_task is not None:
sub_task()
bar.update()
return sub
task(name="Task1", sub_task=task(
name="Task2", sub_task=task(
name="Task3")))()