import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
df = pd.read_csv("Housing.csv")
display(df.head())
price area bedrooms bathrooms stories mainroad guestroom
basement \
0 13300000 7420 4 2 3 yes no
no
1 12250000 8960 4 4 4 yes no
no
2 12250000 9960 3 2 2 yes no
yes
3 12215000 7500 4 2 2 yes no
yes
4 11410000 7420 4 1 2 yes yes
yes
hotwaterheating airconditioning parking prefarea furnishingstatus
0 no yes 2 yes furnished
1 no yes 3 no furnished
2 no no 2 yes semi-furnished
3 no yes 3 yes furnished
4 no yes 2 no furnished
X = df[['price']]
y = df['area']
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = LinearRegression()
model.fit(X_train_scaled, y_train)
LinearRegression()
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")
efficiency_percentage = r2 * 100
print(f"Model Efficiency: {efficiency_percentage:.2f}%")
Mean Squared Error: 2935124.1250945274
R² Score: 0.2863227465807434
Model Efficiency: 28.63%
coefficients = model.coef_
intercept = model.intercept_
print(f"Model Equation: Price = {intercept:.2f} +
{coefficients[0]:.2f} * Feature1")
Model Equation: Price = 5154.14 + 1175.36 * Feature1
if X.shape[1] == 1:
plt.scatter(X_test, y_test, color='blue', label='Actual Data')
plt.plot(X_test, y_pred, color='red', linewidth=2,
label='Predicted Line')
plt.xlabel('Price')
plt.ylabel('Area')
plt.legend()
plt.title('Linear Regression Prediction')
plt.show()