CODE
import pandas as pd
import [Link] as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from [Link] import mean_squared_error, r2_score
from xgboost import XGBRegressor, plot_importance
import joblib
# Load dataset
dataset_path = 'C:\\Users\\Manoj N\\Desktop\\AIML
PAPERS\\fuel_efficiency_large_dataset.xlsx'
df = pd.read_excel(dataset_path)
# Data exploration
print("First few rows of the dataset:")
print([Link]())
print("\nDataset Information:")
print([Link]())
print("\nStatistical Summary:")
print([Link]())
print("\nMissing values in each column:")
print([Link]().sum())
# Drop missing values
df = [Link]()
# Define features and target variable
features = df[['Aircraft_Weight_lbs', 'Engine_Power_HP', 'Altitude_feet',
'Cruise_Speed_knots', 'Number_of_Cylinders',
'Flight_Duration_hours', 'Outside_Air_Temperature_C',
'Fuel_Flow_Rate_gallons_hour']]
target = df['Fuel_Efficiency_mpg']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2,
random_state=42)
# Initialize the model
model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6,
random_state=42)
# Cross-validation
cv_scores = cross_val_score(model, X_train, y_train, cv=5)
print(f"Cross-Validation Scores: {cv_scores}")
print(f"Mean CV Score: {cv_scores.mean()}")
# Train the model
[Link](X_train, y_train)
# Predict and evaluate
y_pred = [Link](X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"\nMean Squared Error: {mse}")
print(f"R² Score: {r2}")
# Plot Actual vs Predicted
[Link](figsize=(10, 6))
[Link](y_test, y_test, color="red", label="Actual Fuel Efficiency", alpha=0.6)
[Link](y_test, y_pred, color="blue", label="Predicted Fuel Efficiency", alpha=0.6)
[Link]([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='black', linestyle='--',
label='Ideal Prediction')
[Link]("Fuel Efficiency (mpg)")
[Link]("Fuel Efficiency (mpg)")
[Link]("Actual vs Predicted Fuel Efficiency")
[Link]()
[Link]()
[Link]()
# Plot Actual Fuel Efficiency (Reference)
[Link](figsize=(10, 6))
[Link](y_test, y_test, color="red", label="Actual Fuel Efficiency", alpha=0.6)
[Link]([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='black', linestyle='--',
label='Ideal Line')
[Link]("Fuel Efficiency (mpg)")
[Link]("Fuel Efficiency (mpg)")
[Link]("Actual Fuel Efficiency (Reference)")
[Link]()
[Link]()
[Link]()
# Plot Predicted Fuel Efficiency (Reference)
[Link](figsize=(10, 6))
[Link](y_test, y_pred, color="blue", label="Predicted Fuel Efficiency", alpha=0.6)
[Link]([y_pred.min(), y_pred.max()], [y_pred.min(), y_pred.max()], color='black',
linestyle='--', label='Ideal Line')
[Link]("Fuel Efficiency (mpg)")
[Link]("Fuel Efficiency (mpg)")
[Link]("Predicted Fuel Efficiency (Reference)")
[Link]()
[Link]()
[Link]()
# Feature Importance
print("\nFeature Importance:")
plot_importance(model)
[Link]()
# Save the model
[Link](model, 'xgb_model_fuel_efficiency.joblib')
print("Model saved as xgb_model_fuel_efficiency.joblib")
RESULTS:
Mean Squared Error: 0.0004593857667961739
R² Score: 0.9936998769838343
Figure 5.1: Predicted fuel efficiency
Figure 5.2: Actual fuel efficiency
Figure 5.3: Actual vs Predicted fuel efficiency
Figure 5.4: Feature importance of parameter affectiong fuel efficiency