Name-Ankit Kumar
Roll - BTECH/10066/22
LAB-5
Program1:
import csv
import numpy as np
import matplotlib.pyplot as plt
# Load the data
data = []
with open('Salary_data.csv', 'r') as file:
reader = csv.reader(file)
next(reader) # Skip header
for row in reader:
data.append([float(row[0]), float(row[1])])
# Separate the data into Experience (X) and Salary (Y)
X = np.array([row[0] for row in data])
Y = np.array([row[1] for row in data])
# Plot Experience vs. Salary
plt.scatter(X, Y, color='blue')
plt.xlabel('Experience (years)')
plt.ylabel('Salary')
plt.title('Experience vs. Salary')
plt.show()
# Initialize parameters
m = 0 # Slope
b = 0 # Intercept
learning_rate = 0.01
iterations = 1000
n = len(X)
# Function to compute Mean Squared Error
def compute_mse(X, Y, m, b):
total_error = 0
for i in range(len(X)):
total_error += (Y[i] - (m * X[i] + b)) ** 2
return total_error / n
# Gradient Descent
errors = []
for _ in range(iterations):
m_grad = 0
b_grad = 0
for i in range(len(X)):
m_grad += -2 * X[i] * (Y[i] - (m * X[i] + b))
b_grad += -2 * (Y[i] - (m * X[i] + b))
m -= (m_grad / n) * learning_rate
b -= (b_grad / n) * learning_rate
mse = compute_mse(X, Y, m, b)
errors.append(mse)
print(f"Final Parameters: m = {m}, b = {b}")
# Plot Training Error at Each Iteration
plt.plot(range(iterations), errors, color='red')
plt.xlabel('Iteration')
plt.ylabel('Mean Squared Error')
plt.title('Training Error at Each Iteration')
plt.show()
# Plot Experience vs. Salary with Best Fit Line
plt.scatter(X, Y, color='blue')
plt.plot(X, [m * x + b for x in X], color='red') # Best fit line
plt.xlabel('Experience (years)')
plt.ylabel('Salary')
plt.title('Experience vs. Salary with Best Fit Line')
plt.show()
OUTPUT:
Program2:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# Load the dataset
data = pd.read_csv('insurance.csv')
# Display the top 10 samples of the dataset
print(data.head(10))
# Display the features and label
features = data.columns[:-1]
label = data.columns[-1]
print("Features (Independent Variables):", features.tolist())
print("Label (Dependent Variable):", label)
# Remove missing value samples
data = data.dropna()
print("Number of samples after removing missing values:", len(data))
# Convert categorical variables to numeric using one-hot encoding
data = pd.get_dummies(data, columns=['sex', 'smoker', 'region'], drop_first=True)
# Update the features to reflect one-hot encoded columns
features = data.columns[:-1]
# Normalize the feature set
scaler = MinMaxScaler()
data[features] = scaler.fit_transform(data[features])
print("Normalized feature set:")
print(data.head(10))
# Split the data into training and testing sets
X = data[features]
y = data[label]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
print("Number of training samples:", len(X_train))
print("Number of testing samples:", len(X_test))
# Train the regression model
model = LinearRegression()
model.fit(X_train, y_train)
print("Model coefficients:", model.coef_)
print("Model intercept:", model.intercept_)
# Predict the test data
y_pred = model.predict(X_test)
# Calculate and display the testing error (Mean Squared Error)
mse = mean_squared_error(y_test, y_pred)
print("Testing Error (Mean Squared Error):", mse)
OUTPUT:
Program3:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
# Load the Iris dataset from the local CSV file
data = pd.read_csv('iris.csv')
# Display the top 10 samples of the dataset
print(data.head(10))
# Check the column names to identify the target variable
print("Column names:", data.columns)
# The target variable column name is 'Species'
target_variable = 'Species'
# Encode the class labels into numeric values
label_encoder = LabelEncoder()
data[target_variable] = label_encoder.fit_transform(data[target_variable])
# Split the data into features (X) and labels (y)
X = data.iloc[:, 1:-1] # Exclude the 'Id' column and the target variable column
y = data.iloc[:, -1]
# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Logistic Regression implementation
class LogisticRegression:
def __init__(self, learning_rate=0.01, iterations=1000):
self.learning_rate = learning_rate
self.iterations = iterations
def sigmoid(self, z):
return 1 / (1 + np.exp(-z))
def fit(self, X, y):
self.m, self.n = X.shape
self.weights = np.zeros(self.n)
self.bias = 0
self.errors = []
epsilon = 1e-7 # Small epsilon value to avoid log(0)
for _ in range(self.iterations):
linear_model = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(linear_model)
dw = (1 / self.m) * np.dot(X.T, (y_pred - y))
db = (1 / self.m) * np.sum(y_pred - y)
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
loss = - (1 / self.m) * np.sum(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1
- y_pred + epsilon))
self.errors.append(loss)
def predict(self, X):
linear_model = np.dot(X, self.weights) + self.bias
y_pred = self.sigmoid(linear_model)
return [1 if i > 0.5 else 0 for i in y_pred]
# Train the logistic regression model
log_reg = LogisticRegression(learning_rate=0.01, iterations=1000)
log_reg.fit(X_train, y_train)
print("Model weights:", log_reg.weights)
print("Model bias:", log_reg.bias)
# Predict the test data
y_pred = log_reg.predict(X_test)
# Calculate and display the accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
OUTPUT: