Bayesian Classifier

Erste Übungaufgabe aus dem Fach WT-2 Text- und Suchtechnologien bei Herrn Prof. Dr. Gefei Zhang

  • Sebastian Schmid -- S0543196

In [1]:
import numpy as np
$$ P(day \land season \land wind \land rain | class) = P(c | d) \cdot P(c|s) \cdot P(c|w) \cdot P(c|r) \cdot P(c) $$

In [2]:
def calculate_score(classname, _dat):
    C = data[data[:,len(_dat)] == classname] 
    attributes = [ C[C[:,i] == attr] for i, attr in enumerate(_dat) ]
    return (C.shape[0]/data.shape[0]) * np.prod( [(a.shape[0] / C.shape[0]) for a in attributes] )

In [1]:
classifier = lambda _dat, classes : classes[np.argmax(
    [calculate_score(classname, _dat) for classname in classes])]

In [4]:
data = np.genfromtxt('./weather.txt', delimiter=', ', dtype=str, skip_header=True)
classes = ['on time', 'late', 'very late']
classifier(['weekday', 'spring', 'normal', 'none'], classes)


Out[4]:
'on time'

In [5]:
data = np.genfromtxt('./lens24.dat', delimiter=',', dtype=int, skip_header=True)
classes = [1,2,3]
classifier([1,1,1,1], classes)


Out[5]:
3