Introduction to k nearest neighbout

  1. classification algorithm

    Untitled

Untitled

  1. Changing the value of k will differ your result

Untitled

Untitled

  1. only parameter is k and distant matrics
  2. cons
    1. High Prediction Cost
    2. not good for high dimention
    3. categorical dosn’t work well

KNN with Python

# %%
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# %%
df = pd.read_csv('Classified Data',index_col=0)

# %%
df.info()

# %%
#scale down everything to the same scale
from sklearn.preprocessing import StandardScaler
# coz it will differ the distance
scaler = StandardScaler()

# %%
scaler.fit(df.drop('TARGET CLASS',axis=1))

# %%
scaled_features = scaler.transform(df.drop('TARGET CLASS',axis=1))
# scaled_features 
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
df_feat

# %%
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_feat,df['TARGET CLASS'],test_size=0.30)

# %%
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=100)
knn.fit(X_train, y_train)

# %%
from sklearn.metrics import classification_report,confusion_matrix
prediction = knn.predict(X_test)
print(classification_report(y_test,prediction))
confusion_matrix(y_test,prediction)

# %%
error_rate = []
for i in range(1,100):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    pred_i = knn.predict(X_test)
    error_rate.append(np.mean(pred_i!=y_test))

# %%
plt.figure(figsize=(10,6))
plt.plot(range(1,100),error_rate,color='blue',linestyle='--',marker='o')

# %%
knn = KNeighborsClassifier(n_neighbors=17)
knn.fit(X_train,y_train)
predict = knn.predict(X_test)
print(classification_report(y_test,predict))

exercise


import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('KNN_Project_Data')
df.head()

sns.pairplot(df,hue='TARGET CLASS',palette='bwr')

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

df.drop('TARGET CLASS',axis=1)

scaler.fit(df.drop('TARGET CLASS',axis=1))
scaled_features = scaler.transform(df.drop('TARGET CLASS',axis=1))
df_scaled = pd.DataFrame(scaled_features,columns = df.columns[:-1])

#-------------------------------------------------------------------------
X=df_scaled
y=df['TARGET CLASS']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.3,random_state=101)

err_list = []
for x in range(1,40):
    knn = KNeighborsClassifier(n_neighbors=x)
    knn.fit(X_train, y_train)
    predict_i = knn.predict(X_test)
    err_list.append(np.mean(predict_i!=y_test))

plt.plot(range(1,40), err_list)

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=30)
knn.fit(X_train,y_train)
prediction = knn.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,prediction))
confusion_matrix(y_test,prediction)