In [2]:
from pandas import Series
from sklearn.preprocessing import MinMaxScaler
In [6]:
data = [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0]
series = Series(data)
print(series)
In [8]:
values = series.values
values = values.reshape((len(values), 1))
In [11]:
print(values)
print(values.shape)
In [22]:
scaler = MinMaxScaler(feature_range=(0, 1))
print(scaler)
In [26]:
scaler = scaler.fit(values)
print('Min: %f, Max: %f' % (scaler.data_min_, scaler.data_max_))
In [27]:
normalized = scaler.transform(values)
print(normalized)
In [29]:
inversed = scaler.inverse_transform(normalized)
inversed
Out[29]:
In [31]:
from pandas import Series
from sklearn.preprocessing import StandardScaler
from math import sqrt
In [32]:
data = [1.0, 5.5, 9.0, 2.6, 8.8, 3.0, 4.1, 7.9, 6.3]
series = Series(data)
print(series)
In [36]:
values = series.values
values = values.reshape((len(values), 1))
print(values.shape)
In [39]:
scaler = StandardScaler()
scaler = scaler.fit(values)
print('Mean: %f, StandardDeviation: %f' % (scaler.mean_, sqrt(scaler.var_)))
standardized = scaler.transform(values)
print(standardized)
inversed = scaler.inverse_transform(standardized)
print(inversed)
In [55]:
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
In [56]:
data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']
values = array(data)
In [57]:
values
Out[57]:
In [72]:
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print(integer_encoded)
In [73]:
# onehot encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)
In [69]:
inverted = label_encoder.inverse_transform([argmax(onehot_encoded[0, :])])
inverted
Out[69]:
In [81]:
import numpy as np
from keras.preprocessing.sequence import pad_sequences
In [92]:
sequences = [
[1, 2, 3, 4],
[1, 2, 3],
[1]
]
In [93]:
sequences
Out[93]:
In [94]:
padded = pad_sequences(sequences)
print(padded)
In [95]:
padded = pad_sequences(sequences, padding='post')
print(padded)
In [96]:
from keras.preprocessing.sequence import pad_sequences
In [97]:
sequences = [
[1, 2, 3, 4],
[1, 2, 3],
[1]
]
In [100]:
truncated = pad_sequences(sequences, maxlen=2)
print(truncated)
In [101]:
truncated = pad_sequences(sequences, maxlen=2, truncating='post')
print(truncated)
In [104]:
from pandas import DataFrame
df = DataFrame()
df['t'] = [x for x in range(10)]
df['t-1'] = df['t'].shift(-1)
print(df)