Tutorial: kNN#
[1]:
# Change directory, only necessary for this notebook
import sys, os
sys.path.insert(0,os.path.abspath('../../../'))
Imports#
[2]:
import pandas as pd
import numpy as np
from pyml.neighbors.knn import kNNClassifier
from pyml.utils.accuracy import MultiClassAccuracy
Read data#
[3]:
columns = ['sepal length in cm', 'sepal width in cm', 'petal length in cm', 'petal width in cm', 'class']
df = pd.read_csv('../../../data/iris.data', names=columns)
df.head()
[3]:
sepal length in cm | sepal width in cm | petal length in cm | petal width in cm | class | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
Create train-test-split#
[4]:
msk = np.random.rand(len(df)) < 0.8
train = df[msk]
test = df[~msk]
X_train, y_train = train.iloc[:, 0:4].values, train.iloc[:, -1].values
X_test, y_test = test.iloc[:, 0:4].values, test.iloc[:, -1].values
“Train” / fit the model on the train data#
[5]:
# Initialize an instance of the kNN-Model
model = kNNClassifier(k = 3)
# Fitting the data will be fast since kNN's are lazy learner.
model.fit(X_train, y_train)
Make predictions and calculate accuracy#
[9]:
y_predictions = model.predict(X_test)
# Calcualte accuracy: correct predictions divided by total number of predictions
evaluation = MultiClassAccuracy()
accuracy = evaluation.calculate(y_predictions, y_test)
print(f'The accuracy of this model is {accuracy}.')
The accuracy of this model is 0.96.