In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
%matplotlib inline
from sklearn.neighbors import KNeighborsClassifier
import math
In [16]:
music = pd.DataFrame()
# Some data to play with.
music['duration'] = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264,
205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
music['loudness'] = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24,
20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
16, 18, 4, 23, 34, 19, 14, 11, 37, 42]
# We know whether the songs in our training data are jazz or not.
music['jazz'] = [ 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 0, 0, 1, 1, 0, 0]
In [17]:
#Introduce the data point
# a = loudness
# b = duration
a = 134
b = 34
In [18]:
#Euclidean distance to all the points
for x in music[['loudness', 'duration']]:
distances=[]
distance = ((a-music['duration'])**2+(b-music['loudness'])**2)
distances.append(distance**0.5)
In [19]:
#Create a dataframe with the results
Dis = pd.DataFrame(distances).transpose()
Dis.columns = ["Distance"]
#Merge with the music dataframe
NNdata = pd.concat([music, Dis], axis=1, join='inner')
In [20]:
#Calculate the minimum distance & store it in a variable
minimum = NNdata.Distance.min()
#Classify as jazz or rock
NNdata.loc[NNdata['Distance'] == minimum,'jazz'].iloc[0]
Out[20]:
In [21]:
#Check if the classifier works
neighbors = KNeighborsClassifier(n_neighbors=1)
X = music[['loudness', 'duration']]
Y = music.jazz
neighbors.fit(X,Y)
## Predict for a song with 24 loudness that's 190 seconds long.
neighbors.predict([[a, b]])
Out[21]: