ml-regression
October 23, 2024
[3]: # Drop the unnecessary index column
import pandas as pd
data = pd.read_csv('/content/car [Link]')
# Check for missing values or inconsistencies
missing_values = [Link]().sum()
data_summary = [Link]()
missing_values, data_summary
[3]: (Car_Name 0
Year 0
Selling_Price 0
Present_Price 0
Kms_Driven 0
Fuel_Type 0
Seller_Type 0
Transmission 0
Owner 0
dtype: int64,
Year Selling_Price Present_Price Kms_Driven Owner
count 301.000000 301.000000 301.000000 301.000000 301.000000
mean 2013.627907 4.661296 7.628472 36947.205980 0.043189
std 2.891554 5.082812 8.644115 38886.883882 0.247915
min 2003.000000 0.100000 0.320000 500.000000 0.000000
25% 2012.000000 0.900000 1.200000 15000.000000 0.000000
50% 2014.000000 3.600000 6.400000 32000.000000 0.000000
75% 2016.000000 6.000000 9.900000 48767.000000 0.000000
max 2018.000000 35.000000 92.600000 500000.000000 3.000000)
[8]: import numpy as np
# Split the data into training and testing sets
X_car = pd.get_dummies(data[['Owner','Kms_Driven']], drop_first=True)
y = data['Selling_Price']
1
def train_test_split_manual(X, y, test_size=0.2, random_state=None):
if random_state is not None:
[Link](random_state)
indices = [Link](len(X))
test_set_size = int(len(X) * test_size)
test_indices = indices[:test_set_size]
train_indices = indices[test_set_size:]
return [Link][train_indices], [Link][test_indices], [Link][train_indices],␣
↪[Link][test_indices]
X_train, X_test, y_train, y_test = train_test_split_manual(X_car, y,␣
↪test_size=0.2, random_state=42)
# Implementing a simple linear regression model
class SimpleLinearRegression:
def __init__(self):
[Link] = None
def fit(self, X, y):
# Add a bias term (column of 1s) to the input matrix
X = np.c_[[Link]([Link][0]), X]
# Compute coefficients using the normal equation
[Link] = [Link]([Link](X)).dot(X.T).dot(y)
def predict(self, X):
# Add the bias term to the input matrix
X = np.c_[[Link]([Link][0]), X]
# Predict using the learned coefficients
return [Link]([Link])
# Train the linear regression model
model_car = SimpleLinearRegression()
model_car.fit(X_train, y_train)
y_car = model_car.predict(X_test)
def calculate_rmse(y_true, y_pred):
return [Link]([Link]((y_true - y_pred) ** 2))
# Calculate RMSE
rmse_car = calculate_rmse(y_test, y_car)
# Print the predictions for the test set
predicted_sales = [Link]({'Actual Selling Price': y_test, 'Predicted␣
↪Price': y_car})
# Display the first few rows of the predictions
2
print(predicted_sales.head())
print(f'RSME score: {rmse_car}')
Actual Selling Price Predicted Price
177 0.35 4.638736
289 10.11 4.570413
228 4.95 4.827647
198 0.15 2.985661
60 6.95 4.722701
RSME score: 4.786033522433056