Untitled

Untitled

Untitled

Untitled

Untitled

Untitled

Untitled

Untitled

Type-I error: False Positive

Type-II error: False Negative

Untitled

Untitled

F P —> first see positive, matlab positive pridict kiya hai→ par vo false hai

lec2: Logistic Regresion with Python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
train_df = pd.read_csv('titanic_train.csv')
train_df.head()
#data analysis
train_df.isnull() #-> list of all boolean
# sns.heatmap(train_df)
sns.heatmap(train_df.isnull(),yticklabels=False,cbar=False)
sns.set_style('whitegrid') #personal choice the style is better
sns.countplot(x = 'Survived',hue='Pclass',data=train_df)

sns.displot(train_df['Age'].dropna(),kde=False)

train_df['SibSp'].plot.hist()

train_df['Fare'].plot.hist()
#interective data
import cufflinks as cf
cf.go_offline()
train_df['Fare'].iplot(kind='hist')

lec3: Cleaning our Data

#data Cleaning
#lot's of age are missing
def impute_age(col):
    age = col[0]
    pclass = col[1]
    if pd.isnull(age):
        if pclass == 1:
            return 37
        if pclass == 2:
            return 29
        else:
            return 24
    else:
        return age
train_df['Age'] = train_df[['Age','Pclass']].apply(impute_age,axis=1)
    
sns.heatmap(train_df.isnull(),)
# train_df['Age'] = train_df[['Age','Pclass']]
# train_df.head()

train_df.drop('Cabin',axis=1)
# train_df

train_df.dropna(inplace=True) #remove the row if value if False
# train_df
# train_df['Embarked'].isnull().nunique()

#dumy value format, as char is not defined
pd.get_dummies(train_df['Sex'])

#here it is binary, so it is True in Female which will mean that there will be False in Male
sex = pd.get_dummies(train_df['Sex'],drop_first=True)
embark = pd.get_dummies(train_df['Embarked'],drop_first=True)
train_df= pd.concat([train_df,sex,embark],axis=1)
train_df.drop(['Sex','Name','Embarked','Ticket'],axis=1,inplace=True,)
train_df.drop(['PassengerId'],axis=1,inplace=True)