In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
# 1 Million Points
x_data = np.linspace(0.0, 10.0, 1000000)
In [3]:
noise = np.random.randn(len(x_data))
In [4]:
# y = mx + b + noise_levels
b = 5
y_true = (0.5 * x_data ) + 5 + noise
In [5]:
# Create Pandas DataFrame of the input and outputs
my_data = pd.concat([pd.DataFrame(data = x_data,columns = ['X Data']),
pd.DataFrame(data = y_true,columns = ['Y'])],
axis = 1)
In [6]:
my_data.head()
Out[6]:
In [7]:
my_data.sample(n = 250).plot(kind = 'scatter',
x = 'X Data',
y = 'Y',
figsize = (8, 8))
Out[7]:
In [8]:
import tensorflow as tf
In [9]:
batch_size = 10
Variables
In [10]:
# Initialize the Variables
m = tf.Variable(0.5)
b = tf.Variable(1.0)
Placeholders
In [11]:
# Initialize the Placeholders of type float32 of shape [batchsize]
xph = tf.placeholder(tf.float32,[batch_size])
yph = tf.placeholder(tf.float32,[batch_size])
Graph
In [12]:
y_model = m * xph + b
Loss Function
In [13]:
error = tf.reduce_sum(tf.square(yph - y_model))
Optimizer
In [14]:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
train = optimizer.minimize(error)
Initialize Variables
In [15]:
init = tf.global_variables_initializer()
In [16]:
with tf.Session() as sess:
sess.run(init)
batches = 1000
for i in range(batches):
# Pick random indexes
rand_ind = np.random.randint(len(x_data),size = batch_size)
# Training dictionary
feed = {xph : x_data[rand_ind],
yph : y_true[rand_ind]}
# Training process
sess.run(train,
feed_dict = feed)
model_m, model_b = sess.run([m,b])
In [17]:
model_m
Out[17]:
In [18]:
model_b
Out[18]:
In [19]:
y_hat = x_data * model_m + model_b
In [20]:
my_data.sample(n=250).plot(kind='scatter',
x = 'X Data',
y = 'Y',
figsize = (8, 8))
plt.plot(x_data,y_hat,'r')
Out[20]:
In [21]:
# Input feature preparation (1 column)
feat_cols = [tf.feature_column.numeric_column('x',
shape = [1])]
In [22]:
# Intializing a linear regression estimator
estimator = tf.estimator.LinearRegressor(feature_columns = feat_cols)
In [23]:
from sklearn.model_selection import train_test_split
In [24]:
x_train, x_eval, y_train, y_eval = train_test_split(x_data,
y_true,
test_size = 0.3,
random_state = 101)
In [25]:
print(x_train.shape)
print(y_train.shape)
print(x_eval.shape)
print(y_eval.shape)
In [26]:
# Can also do .pandas_input_fn
# Input function used for the training
input_func = tf.estimator.inputs.numpy_input_fn({'x': x_train},
y_train,
batch_size = 4,
num_epochs = None,
shuffle = True)
In [27]:
train_input_func_for_eval = tf.estimator.inputs.numpy_input_fn({'x': x_train},
y_train,
batch_size = 4,
num_epochs = 1000,
shuffle = False)
In [28]:
eval_input_func_for_eval = tf.estimator.inputs.numpy_input_fn({'x' : x_eval},
y_eval,
batch_size = 4,
num_epochs = 1000,
shuffle = False)
In [29]:
estimator.train(input_fn = input_func,
steps = 1000)
Out[29]:
In [30]:
train_metrics = estimator.evaluate(input_fn = train_input_func_for_eval,
steps = 1000)
In [31]:
eval_metrics = estimator.evaluate(input_fn = eval_input_func_for_eval,
steps = 1000)
In [32]:
print("train metrics: {}".format(train_metrics))
print("eval metrics: {}".format(eval_metrics))
In [33]:
input_fn_predict = tf.estimator.inputs.numpy_input_fn({'x' : np.linspace(0, 10, 10)},
shuffle = False)
In [34]:
list(estimator.predict(input_fn = input_fn_predict))
Out[34]:
In [35]:
predictions = []# np.array([])
for x in estimator.predict(input_fn = input_fn_predict):
predictions.append(x['predictions'])
In [36]:
predictions
Out[36]:
In [37]:
my_data.sample(n = 250).plot(kind = 'scatter',
x = 'X Data',
y = 'Y',
figsize = (8, 8))
plt.plot(np.linspace(0, 10, 10), predictions,'r')
Out[37]: