Apply classification algorithms to predict whether a passenger survived the Titanic disaster using real Kaggle dataset features like Age, Gender, Fare, and Passenger Class.
Titanic dataset contains details for 891 passengers. Key columns include:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
# Load data
df = pd.read_csv("titanic.csv")
# Handle missing values
df['Age'].fillna(df['Age'].median(), inplace=True)
# Encode categorical variables
df['Sex'] = df['Sex'].map({'male':0, 'female':1})
# Select features
X = df[['Pclass', 'Sex', 'Age', 'Fare']]
y = df['Survived']
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)
# Predict & evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
