In [15]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
%matplotlib inline
from sklearn.neighbors import KNeighborsClassifier
import math

In [16]:
music = pd.DataFrame()

# Some data to play with.
music['duration'] = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264, 
                     205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
                     164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
music['loudness'] = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24, 
                     20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
                     16, 18, 4, 23, 34, 19, 14, 11, 37, 42]

# We know whether the songs in our training data are jazz or not.
music['jazz'] = [ 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
                  0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
                  1, 1, 1, 1, 0, 0, 1, 1, 0, 0]

In [17]:
#Introduce the data point
# a = loudness
# b = duration
a = 134
b = 34

In [18]:
#Euclidean distance to all the points
for x in music[['loudness', 'duration']]:
    distances=[]
    distance = ((a-music['duration'])**2+(b-music['loudness'])**2)
    distances.append(distance**0.5)

In [19]:
#Create a dataframe with the results
Dis = pd.DataFrame(distances).transpose()
Dis.columns = ["Distance"]

#Merge with the music dataframe
NNdata = pd.concat([music, Dis], axis=1, join='inner')

In [20]:
#Calculate the minimum distance & store it in a variable
minimum = NNdata.Distance.min()

#Classify as jazz or rock
NNdata.loc[NNdata['Distance'] == minimum,'jazz'].iloc[0]


Out[20]:
0

In [21]:
#Check if the classifier works
neighbors = KNeighborsClassifier(n_neighbors=1)
X = music[['loudness', 'duration']]
Y = music.jazz
neighbors.fit(X,Y)

## Predict for a song with 24 loudness that's 190 seconds long.
neighbors.predict([[a, b]])


Out[21]:
array([0], dtype=int64)