Tutorial: kNN#

[1]:
# Change directory, only necessary for this notebook
import sys, os
sys.path.insert(0,os.path.abspath('../../../'))

Imports#

[2]:
import pandas as pd
import numpy as np

from pyml.neighbors.knn import kNNClassifier
from pyml.utils.accuracy import MultiClassAccuracy

Read data#

[3]:
columns = ['sepal length in cm', 'sepal width in cm', 'petal length in cm', 'petal width in cm', 'class']
df = pd.read_csv('../../../data/iris.data', names=columns)
df.head()
[3]:
sepal length in cm sepal width in cm petal length in cm petal width in cm class
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa

Create train-test-split#

[4]:
msk = np.random.rand(len(df)) < 0.8
train = df[msk]
test = df[~msk]

X_train, y_train = train.iloc[:, 0:4].values, train.iloc[:, -1].values
X_test, y_test = test.iloc[:, 0:4].values, test.iloc[:, -1].values

“Train” / fit the model on the train data#

[5]:
# Initialize an instance of the kNN-Model
model = kNNClassifier(k = 3)

# Fitting the data will be fast since kNN's are lazy learner.
model.fit(X_train, y_train)

Make predictions and calculate accuracy#

[9]:
y_predictions = model.predict(X_test)

# Calcualte accuracy: correct predictions divided by total number of predictions
evaluation = MultiClassAccuracy()
accuracy = evaluation.calculate(y_predictions, y_test)

print(f'The accuracy of this model is {accuracy}.')
The accuracy of this model is 0.96.