Tutorial: kNN#

[1]:

# Change directory, only necessary for this notebook
import sys, os
sys.path.insert(0,os.path.abspath('../../../'))

Imports#

[2]:

import pandas as pd
import numpy as np

from pyml.neighbors.knn import kNNClassifier
from pyml.utils.accuracy import MultiClassAccuracy

Read data#

[3]:

columns = ['sepal length in cm', 'sepal width in cm', 'petal length in cm', 'petal width in cm', 'class']
df = pd.read_csv('../../../data/iris.data', names=columns)
df.head()

[3]:

	sepal length in cm	sepal width in cm	petal length in cm	petal width in cm	class
0	5.1	3.5	1.4	0.2	Iris-setosa
1	4.9	3.0	1.4	0.2	Iris-setosa
2	4.7	3.2	1.3	0.2	Iris-setosa
3	4.6	3.1	1.5	0.2	Iris-setosa
4	5.0	3.6	1.4	0.2	Iris-setosa

Create train-test-split#

[4]:

msk = np.random.rand(len(df)) < 0.8
train = df[msk]
test = df[~msk]

X_train, y_train = train.iloc[:, 0:4].values, train.iloc[:, -1].values
X_test, y_test = test.iloc[:, 0:4].values, test.iloc[:, -1].values

“Train” / fit the model on the train data#

[5]:

# Initialize an instance of the kNN-Model
model = kNNClassifier(k = 3)

# Fitting the data will be fast since kNN's are lazy learner.
model.fit(X_train, y_train)

Make predictions and calculate accuracy#

[9]:

y_predictions = model.predict(X_test)

# Calcualte accuracy: correct predictions divided by total number of predictions
evaluation = MultiClassAccuracy()
accuracy = evaluation.calculate(y_predictions, y_test)

print(f'The accuracy of this model is {accuracy}.')

The accuracy of this model is 0.96.