11/13/24, 12:09 AM DL_LAB_5
U21EC082 - Jinhal Maheshwari
In [1]: # Aim: To study and implement the logistic regression algorithm with and withou
In [2]: import numpy as np
In [3]: import pandas as pd
data = pd.read_csv(r"C:\Users\aksha\Downloads\suv_data.csv")
print(data.head())
User ID Gender Age EstimatedSalary Purchased
0 15624510 Male 19 19000 0
1 15810944 Male 35 20000 0
2 15668575 Female 26 43000 0
3 15603246 Female 27 57000 0
4 15804002 Male 19 76000 0
In [4]: from sklearn.model_selection import train_test_split
X = data[['Age', 'EstimatedSalary']].values
y = data['Purchased'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random
In [5]: from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
In [6]: import numpy as np
def sigmoid(z):
return 1 / (1 + np.exp(-z))
class LogisticRegression:
def __init__(self, learning_rate=0.01, num_iterations=1000):
self.learning_rate = learning_rate
self.num_iterations = num_iterations
self.weights = None
self.bias = None
def fit(self, X, y):
num_samples, num_features = X.shape
self.weights = np.zeros(num_features)
self.bias = 0
for _ in range(self.num_iterations):
linear_model = np.dot(X, self.weights) + self.bias
file:///C:/Users/aksha/Downloads/DL_LAB_5 (4).html 1/3
11/13/24, 12:09 AM DL_LAB_5
y_predicted = sigmoid(linear_model)
dw = (1 / num_samples) * np.dot(X.T, (y_predicted - y))
db = (1 / num_samples) * np.sum(y_predicted - y)
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def predict(self, X):
linear_model = np.dot(X, self.weights) + self.bias
y_predicted = sigmoid(linear_model)
return np.array([1 if i > 0.5 else 0 for i in y_predicted])
# Train the model
model = LogisticRegression(learning_rate=0.01, num_iterations=1000)
model.fit(X_train, y_train)
In [7]: y_pred = model.predict(X_test)
In [8]: accuracy = np.sum(y_pred == y_test) / len(y_test)
print(f"Accuracy (Custom Logistic Regression): {accuracy * 100:.2f}%")
Accuracy (Custom Logistic Regression): 86.00%
In [9]: import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
def plot_decision_boundary(X, y, model):
X_set, y_set = X, y
X1, X2 = np.meshgrid(np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0]
np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1]
predictions = model.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1
plt.contourf(X1, X2, predictions, alpha=0.75, cmap=ListedColormap([(0.6, 0.6
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c=[(0, 0, 1), (0, 1, 0)][i], label=j, edgecolor='black') #
plt.title('Logistic Regression Decision Boundary (Custom Implementation)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
plot_decision_boundary(X_test, y_test, model)
C:\Users\aksha\AppData\Local\Temp\ipykernel_16312\520669100.py:16: UserWarning: *
c* argument looks like a single numeric RGB or RGBA sequence, which should be avo
ided as value-mapping will have precedence in case its length matches with *x* &
*y*. Please use the *color* keyword-argument or provide a 2D array with a single
row if you intend to specify the same RGB or RGBA value for all points.
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
file:///C:/Users/aksha/Downloads/DL_LAB_5 (4).html 2/3
11/13/24, 12:09 AM DL_LAB_5
In [10]: from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression
from sklearn.metrics import accuracy_score
sklearn_model = SklearnLogisticRegression()
sklearn_model.fit(X_train, y_train)
sklearn_y_pred = sklearn_model.predict(X_test)
sklearn_accuracy = accuracy_score(y_test, sklearn_y_pred)
print(f"Accuracy (Scikit-learn Logistic Regression): {sklearn_accuracy * 100:.2f
Accuracy (Scikit-learn Logistic Regression): 86.00%
file:///C:/Users/aksha/Downloads/DL_LAB_5 (4).html 3/3