In [3]:
import mglearn
import numpy as np
import pandas as pd
import os
from scipy import signal
from sklearn.datasets import load_boston
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures
from mglearn.make_blobs import make_blobs
#DATA_PATH = os.path.join(os.path.dirname(__file__), "data")
def make_forge():
# a carefully hand-designed dataset lol
X, y = make_blobs(centers=2, random_state=4, n_samples=30)
y[np.array([7, 27])] = 0
mask = np.ones(len(X), dtype=np.bool)
mask[np.array([0, 1, 5, 26])] = 0
X, y = X[mask], y[mask]
return X, y
def make_wave(n_samples=100):
rnd = np.random.RandomState(42)
x = rnd.uniform(-3, 3, size=n_samples)
y_no_noise = (np.sin(4 * x) + x)
y = (y_no_noise + rnd.normal(size=len(x))) / 2
return x.reshape(-1, 1), y
def load_extended_boston():
boston = load_boston()
X = boston.data
X = MinMaxScaler().fit_transform(boston.data)
X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
return X, boston.target
def load_citibike():
data_mine = pd.read_csv(os.path.join(DATA_PATH, "citibike.csv"))
data_mine['one'] = 1
data_mine['starttime'] = pd.to_datetime(data_mine.starttime)
data_starttime = data_mine.set_index("starttime")
data_resampled = data_starttime.resample("3h").sum().fillna(0)
return data_resampled.one
def make_signals():
# fix a random state seed
rng = np.random.RandomState(42)
n_samples = 2000
time = np.linspace(0, 8, n_samples)
# create three signals
s1 = np.sin(2 * time) # Signal 1 : sinusoidal signal
s2 = np.sign(np.sin(3 * time)) # Signal 2 : square signal
s3 = signal.sawtooth(2 * np.pi * time) # Signal 3: saw tooth signal
# concatenate the signals, add noise
S = np.c_[s1, s2, s3]
S += 0.2 * rng.normal(size=S.shape)
S /= S.std(axis=0) # Standardize data
S -= S.min()
return S
In [ ]: