Praktikum Maschinelles Lernen WS 15/16

Name Vorname Matrikelnummer Datum
Alt Tobias 282385 18.12.2015
Hieke Manuel 283912 08.01.2016

Aufgabe 3.1 - Perzeptron


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
from numpy import linalg as LA
import scipy as sp
import urllib2
from urllib2 import urlopen, URLError, HTTPError
import zipfile
import tarfile
import sys
import os
from skimage import data, io, filter
from PIL import Image

Teil A - Toy Dataset


In [2]:
# Funktion zum Erstellen des Datensatzes
#-----------------------------------------------------------------------------
# loc : float Mean (“centre”) of the distribution.
# scale : float Standard deviation (spread or “width”) of the distribution.
# size : int or tuple of ints, optional
# numpy.random.normal(loc=0.0, scale=1.0, size=None)

def createToyDataSet(ypos,numberOfData,clusterDistance,varianz):
    #sigma = sqrt(clusterBright) # mean and standard deviation
    #data1 = np.random.normal(mu, sigma, numberOfData)
    #data2 = np.random.normal(mu, sigma, numberOfData)

    mu = clusterDistance         #loc Paramter -> Abstand
    sigma = sqrt(varianz)        #scale Parameter -> Clusterbreite
    sizeOfData = numberOfData    #Anzahl Daten

    #np.vstack -> Stack arrays in sequence vertically
    X = np.vstack([np.random.normal(ypos+mu, sigma, (sizeOfData, 2)), np.random.normal(ypos-mu, sigma, (sizeOfData, 2))])
    return X

In [3]:
#Graphische Darstellung
#-------------------------------------------------------------------------
def plotToyData(data,mu,varianz):
    fig = plt.figure()
    
    fig, ax = subplots(figsize=(14,6))
    data1 = data[0]
    data2 = data[1]
    #plot data histogramm
    ax = plt.subplot(1,2,1)
    title('x/y Histogramm')
    count, bins, ignored = ax.hist(data, 30, normed=True)
    ax.plot(bins, 1/(sqrt(varianz) * np.sqrt(2 * np.pi)) * np.exp( - (bins)**2 / (2 * varianz) ),
         linewidth=2, color='g')
    # 1. Gaussverteilungen - Cluster 1
    x_plot = np.linspace(mu - 4*sqrt(varianz), mu + 4*sqrt(varianz), 100) # the x-values to use in the plot
    # compute the values of this density at the locations given by x_plot
    py = 1/np.sqrt(4*np.pi*varianz)*np.exp(-0.5*(x_plot-mu)**2/varianz)
    # sample some random values from this density
    x_samps = data
    # Plot the density
    ax.plot(x_plot, py)
    
    # 2. Gaussverteilungen - Cluster 2
    x_plot = np.linspace(-mu - 4*sqrt(varianz), -mu + 4*sqrt(varianz), 100) # the x-values to use in the plot
    # compute the values of this density at the locations given by x_plot
    py = 1/np.sqrt(4*np.pi*varianz)*np.exp(-0.5*(x_plot+mu)**2/varianz)
    # sample some random values from this density
    x_samps = data
    # Plot the density
    ax.plot(x_plot, py)

    # Scatter plot
    ax = plt.subplot(1,2,2)
    colors = np.hstack([np.zeros(len(data)/2), np.ones(len(data)/2)])
    plt.scatter(data[:, 0], data[:, 1], c=colors, edgecolors='none',cmap=plt.cm.Accent)

In [4]:
#Erzeugen der Daten (wie gewünscht einstellbar)
#---------------------------------------------------------------------------
varianz = 0.5      #Clusterbreite
numberOfData = 200 #Anzahl neuer Datenpunkte pro Cluster
mean= 1.5          #Abstand
ypos = 0           #y-Achsen-Verschiebung

toyData = createToyDataSet(ypos,numberOfData,mean,varianz)

In [5]:
plotToyData(toyData, mean, varianz)


<matplotlib.figure.Figure at 0x7f4d66146cd0>

In [6]:
#Erzeugen des zugehörigen Labelvektor mit den Werten ±1
#-------------------------------------------------------------------
labelvector = np.ones(len(toyData)) 
labelvector[len(toyData)/2:] *= -1
print 'ToyData Größe    :',shape(toyData),' 1.Klasse: ',toyData[0][0],'  2.Klasse: ', toyData[1][0]
print 'Labelvektor Größe:',shape(labelvector),' 1.Klasse: ',labelvector[0],'\t\t2.Klasse: ', labelvector[200]


ToyData Größe    : (400, 2)  1.Klasse:  1.7575363978   2.Klasse:  1.56887151226
Labelvektor Größe: (400,)  1.Klasse:  1.0 		2.Klasse:  -1.0

Teil B - Perzeptron


In [ ]: