In [1]:
%pdb
In [2]:
from IPython.display import display
import pprint
import bow
import numpy as np
import string
In [3]:
def laplacian_smoothing(
v1: int, v2: int, k: int, k_class: int
):
return (v1+k)/(v2+k*k_class)
In [4]:
def maximum_likelihood(
v1: int, v2: int, **kwargs: list
):
return v1/v2
In [5]:
def tokenize(text: str):
return [
word.strip(string.punctuation) for word in text.upper().split()
]
In [6]:
def p_query(
query: list,
domain: bow.BagOfWords,
bag: bow.BagOfWords,
p_method: object
):
return [
p_method(
v1=domain[word] if word in domain else 0,
v2=sum(domain.values()),
k=1,
k_class=len(bag) # movie and song
) for word in query
]
In [7]:
s_movie = """
A PERFECT WORLD
MY PERFECT WOMAN
PRETTY WOMAN
"""
s_song = """
A PERFECT DAY
ELECTRIC STORM
ANOTHER RAIN DAY
"""
queries = [
tokenize('Perfect Storm')
]
In [8]:
def test_perfect_storm(
movie: str, song: str, p_method: object, queries: list
):
movie = bow.BagOfWords(tokenize(s_movie))
song = bow.BagOfWords(tokenize(s_song))
bag = movie + song
print('\nDICTIONARIES')
print('\nmovies:')
pprint.pprint(str(movie))
print('\nsongs:')
pprint.pprint(str(song))
print('\nall:')
pprint.pprint(str(bag))
print('\nDICTIONARIES')
n_movie = s_movie.count('\n') - 1
n_song = s_song.count('\n') - 1
print('movie entries:', n_movie, '| song entries:', n_song)
p_movie = p_method(
v1=n_movie,
v2=n_movie+n_song,
k=1,
k_class=2 # movie and song
)
p_song = p_method(
v1=n_song,
v2=n_movie+n_song,
k=1,
k_class=2 # movie and song
)
print('P(MOVIE): ', p_movie, '| P(SONG):', p_song)
print('query:', queries)
for query in queries:
print('\nQuery: ', query)
p_q_movie = np.prod(p_query(query, movie, bag, p_method))
p_q_song = np.prod(p_query(query, song, bag, p_method))
print('P(%s|MOVIE)' % ','.join(query), p_q_movie)
print('P(%s|SONG)' % ','.join(query), p_q_song)
_total = p_q_movie*p_movie + p_q_song*p_song
p_movie_q = (p_q_movie*p_movie) / _total
p_song_q = (p_q_song*p_song) / _total
print('P(MOVIE|%s):' % ','.join(query), p_movie_q)
print('P(SONG|%s):' % ','.join(query), p_song_q)
In [9]:
test_perfect_storm(s_movie, s_song, laplacian_smoothing, queries)
In [10]:
test_perfect_storm(s_movie, s_song, maximum_likelihood, queries)