0% found this document useful (0 votes)
18 views2 pages

Python Prediction Project by Dikiza

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
18 views2 pages

Python Prediction Project by Dikiza

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

#

# Project: Predictive Academic Performance Model


# Description: This Python script demonstrates a simple machine learning model
# to predict a student's final grade based on a simulated dataset.
# It uses the pandas library for data handling and scikit-learn
# for the machine learning model itself.
#
# How to Run:
# 1. Ensure you have Python installed.
# 2. Install the required libraries:
# pip install pandas scikit-learn
# 3. Save this code as a .py file (e.g., 'predictive_model.py').
# 4. Run it from your terminal:
# python predictive_model.py
#
# This code is fully self-contained for a competition presentation.
#

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# --- 1. Create a Simulated Dataset ---


# In a real project, you would load data from a CSV file.
# For this demonstration, we'll create a synthetic dataset using a dictionary.
# The features are 'Hours_Studied', 'Attendance_Percentage', and 'Midterm_Grade'.
# The target variable (what we want to predict) is 'Final_Grade'.

print("--- Creating a simulated dataset... ---")


data = {
'Hours_Studied': [2, 5, 3, 7, 4, 6, 8, 3, 5, 9, 1, 6, 4, 8, 2],
'Attendance_Percentage': [85, 95, 88, 98, 92, 90, 99, 80, 94, 97, 75, 91, 89,
96, 82],
'Midterm_Grade': [70, 85, 75, 90, 80, 88, 95, 65, 83, 92, 60, 89, 78, 93, 72],
'Final_Grade': [75, 90, 80, 95, 85, 92, 98, 68, 87, 96, 62, 90, 82, 97, 76]
}

# Create a Pandas DataFrame from the dictionary.


# A DataFrame is a powerful data structure for tabular data.
df = pd.DataFrame(data)
print("\nSimulated Dataset:")
print(df)
print("\n" + "="*50 + "\n")

# --- 2. Data Preparation ---


# We need to separate our features (X) from our target variable (y).
# 'Final_Grade' is our target, and the other columns are our features.

print("--- Preparing data for the model... ---")


X = df[['Hours_Studied', 'Attendance_Percentage', 'Midterm_Grade']]
y = df['Final_Grade']

# Split the data into training and testing sets.


# We'll use 80% of the data for training and 20% for testing.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
print(f"Training set size: {X_train.shape[0]} samples")
print(f"Testing set size: {X_test.shape[0]} samples")
print("\n" + "="*50 + "\n")

# --- 3. Model Training ---


# Initialize the Linear Regression model.
print("--- Training the Linear Regression model... ---")
model = LinearRegression()

# Train the model using the training data.


# The model learns the relationship between X_train and y_train.
model.fit(X_train, y_train)
print("Model training complete.")
print("\n" + "="*50 + "\n")

# --- 4. Model Evaluation ---


# Make predictions on the test data.
print("--- Evaluating the model's performance... ---")
y_pred = model.predict(X_test)

# Calculate key performance metrics.


# Mean Squared Error (MSE): A lower value indicates a better fit.
mse = mean_squared_error(y_test, y_pred)
# R-squared (R2): A value closer to 1.0 indicates a better fit.
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")


print(f"R-squared (R²): {r2:.2f}")
print("\n" + "="*50 + "\n")

# --- 5. Making a New Prediction ---


# This is the final, impressive part of the demonstration.
# We can use our trained model to predict a new student's grade.

print("--- Making a new prediction... ---")


# Let's create a new student's data.
new_student_data = np.array([[6, 95, 85]]) # Hours, Attendance, Midterm
new_student_df = pd.DataFrame(new_student_data, columns=['Hours_Studied',
'Attendance_Percentage', 'Midterm_Grade'])

# Use the model to predict the final grade.


predicted_grade = model.predict(new_student_df)

print(f"New student data: {new_student_df.to_string(index=False)}")


print(f"\nPredicted Final Grade for this student: {predicted_grade[0]:.2f}")
print("\n" + "="*50 + "\n")

# Optional: Print the model's coefficients to show how it works.


print("--- Model Coefficients (insight into the model) ---")
coefficients = pd.DataFrame(model.coef_, X.columns, columns=['Coefficient'])
print(coefficients)
print("\n(This shows the influence of each feature on the final grade.)")

You might also like