import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
# Load iris dataset
iris = load_iris()
# Features and target
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name='target')
# Combine for easy visualization
df = pd.concat([X, y], axis=1)
sns.pairplot(df, hue='target', palette='bright')
plt.suptitle('Pairplot of Iris Dataset', y=1.02)
plt.show()
# Check class distribution
sns.countplot(x='target', data=df, palette='bright')
plt.title('Class Distribution')
plt.show()
/tmp/ipython-input-868451768.py:6: FutureWarning:
Passing `palette` without assigning `hue` is deprecated and will be removed in v
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.2, random_state=42, stratify=y)
logreg = LogisticRegression(multi_class='ovr', max_iter=200)
logreg.fit(X_train, y_train)
/usr/local/lib/python3.12/dist-packages/sklearn/linear_model/_logistic.py:1256:
warnings.warn(
▾ LogisticRegression i ?
LogisticRegression(max_iter=200, multi_class='ovr')
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
# Confusion matrix visualization
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens',
xticklabels=iris.target_names,
yticklabels=iris.target_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=iris.target_names))
Accuracy: 0.90
Classification Report:
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 0.89 0.80 0.84 10
virginica 0.82 0.90 0.86 10
coefficients = pd.DataFrame(logreg.coef_, columns=iris.feature_names, index=iris.target_name
coefficients.plot(kind='bar', figsize=(8,6))
plt.title('Logistic Regression Coefficients by Class')
plt.ylabel('Coefficient Value')
plt.xticks(rotation=45)
plt.show()