In [1]:
##############################################################################
#
# Workshop: How to develop a personalised machine learning-based application
#
# Notebook 1: Introduction in Machine Learning, Linear Regression 
#
##############################################################################

In [2]:
# jupyter notebook instructions:
# - Every cell can be executed seperately from the rest.
# - You can execute cells in a non-sequential order (but be carefull of 
#   the dependencies between them).
# - Execute a cell by pressing the play button or Shift+Enter.

In [4]:
# Import necessary modules
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
# Define a function to model and create data.
# The function of this example will be a simple linear equation
# of the form f(x) = ax + b
def func(x, a, b):
    return a * x + b

In [6]:
# Generate some clean data
x = np.linspace(0, 10, 100)
y = func(x, 1, 2)

In [7]:
# Add some noise in the data, in the form of the error factor,
# epsilon: f(x) = ax + b + ε
y_noisy = y + 0.9 * np.random.normal(size=len(x))

In [8]:
# Fit a curve on the noisy data
popt, pcov = curve_fit(func, x, y_noisy)
print(popt)


[ 0.99951627  2.02179924]

In [9]:
# Assign the coefficients in better named variables
a = popt[0]
b = popt[1]

In [10]:
# Calculate the predicted values of y (y hat) for every x
y_predicted = [func(xi, a, b) for xi in x]

In [11]:
plt.scatter(x, y_noisy)
plt.plot(x, y_predicted )
plt.title('Fitting noisy data with a linear equation')
plt.show()



In [12]:
# Let's try the same for a Gaussian profile of a non-linear function
#         -(x - μ)^2
# a * exp ----------
#            2σ^2 
#
# define the function
def func2(x, a, b, c):
    return a * np.exp(-(x-b)**2/(2*c**2))

In [13]:
# Generate some fresh data
x2 = np.linspace(0, 10, 100)
y2 = func2(x2, 1, 5, 2)

In [14]:
# Add some noise
y_noisy2 = y2 + 0.2 * np.random.normal(size=len(x2))

In [15]:
# Fit the curve
popt2, pcov2 = curve_fit(func2, x2, y_noisy2)
print(popt2)


[ 1.00437185  5.04277047 -2.05398973]

In [16]:
# Assign the coefficients in better named variables
a2 = popt2[0]
b2 = popt2[1]
c2 = popt2[2]

In [17]:
# Calculate the predicted values of y (y hat) for every x
y_predicted2 = [func2(xi, a2, b2, c2) for xi in x2]

In [18]:
plt.scatter(x2, y_noisy2)
plt.plot(x2, y_predicted2 )
plt.title('Fitting noisy data with a linear equation')
plt.show()



In [ ]: