classification algorithm
# %%
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# %%
df = pd.read_csv('Classified Data',index_col=0)
# %%
df.info()
# %%
#scale down everything to the same scale
from sklearn.preprocessing import StandardScaler
# coz it will differ the distance
scaler = StandardScaler()
# %%
scaler.fit(df.drop('TARGET CLASS',axis=1))
# %%
scaled_features = scaler.transform(df.drop('TARGET CLASS',axis=1))
# scaled_features
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
df_feat
# %%
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_feat,df['TARGET CLASS'],test_size=0.30)
# %%
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=100)
knn.fit(X_train, y_train)
# %%
from sklearn.metrics import classification_report,confusion_matrix
prediction = knn.predict(X_test)
print(classification_report(y_test,prediction))
confusion_matrix(y_test,prediction)
# %%
error_rate = []
for i in range(1,100):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit(X_train, y_train)
pred_i = knn.predict(X_test)
error_rate.append(np.mean(pred_i!=y_test))
# %%
plt.figure(figsize=(10,6))
plt.plot(range(1,100),error_rate,color='blue',linestyle='--',marker='o')
# %%
knn = KNeighborsClassifier(n_neighbors=17)
knn.fit(X_train,y_train)
predict = knn.predict(X_test)
print(classification_report(y_test,predict))
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv('KNN_Project_Data')
df.head()
sns.pairplot(df,hue='TARGET CLASS',palette='bwr')
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df.drop('TARGET CLASS',axis=1)
scaler.fit(df.drop('TARGET CLASS',axis=1))
scaled_features = scaler.transform(df.drop('TARGET CLASS',axis=1))
df_scaled = pd.DataFrame(scaled_features,columns = df.columns[:-1])
#-------------------------------------------------------------------------
X=df_scaled
y=df['TARGET CLASS']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.3,random_state=101)
err_list = []
for x in range(1,40):
knn = KNeighborsClassifier(n_neighbors=x)
knn.fit(X_train, y_train)
predict_i = knn.predict(X_test)
err_list.append(np.mean(predict_i!=y_test))
plt.plot(range(1,40), err_list)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=30)
knn.fit(X_train,y_train)
prediction = knn.predict(X_test)
from sklearn.metrics import classification_report, confusion_matrix
print(classification_report(y_test,prediction))
confusion_matrix(y_test,prediction)