0% found this document useful (0 votes)
12 views1 page

Data Bytes Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
12 views1 page

Data Bytes Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd

import pandas as pd

import [Link] as plt

# Loading the spreadsheet


file_path = '[Link]' # Update with the correct path if
necessary
excel_data = [Link](file_path)
df = excel_data.parse(excel_data.sheet_names[0]) # Load the first sheet

# Displaying basic information about the dataset


print("Initial dataset info:")
print([Link]())

# Removing rows with any missing values


[Link](inplace=True)

# Ensuring that numeric columns are of the correct type


# Adjusting based on your dataset columns
numeric_columns = ['Hours_Studied', 'Attendance', 'Sleep_Hours', 'Previous_Scores',
'Tutoring_Sessions',
'Physical_Activity', 'Exam_Score']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

# Droping irrelevant columns if necessary (customized as needed)


# [Link](columns=['Unnecessary_Column_Name'], inplace=True)

# Displayig cleaned dataset info


print("\nDataset info after cleaning:")
print([Link]())

# Saving the cleaned data to a new file


df.to_excel('Cleaned_StudentPerformanceFactors.xlsx', index=False)
print("Cleaned dataset saved as 'Cleaned_StudentPerformanceFactors.xlsx'")

# Visualization

# Bar Chart: Average Exam Score by Attendance Level


attendance_bins = [0, 50, 75, 100] # Define attendance levels
attendance_labels = ['Low (0-50%)', 'Medium (50-75%)', 'High (75-100%)']
df['Attendance_Level'] = [Link](df['Attendance'], bins=attendance_bins,
labels=attendance_labels)
avg_exam_score_by_attendance = [Link]('Attendance_Level')['Exam_Score'].mean()

[Link](figsize=(8, 5))
avg_exam_score_by_attendance.plot(kind='bar', color='skyblue', edgecolor='black')
[Link]('Average Exam Score by Attendance Level')
[Link]('Attendance Level')
[Link]('Average Exam Score')
[Link](rotation=45)
[Link]()

# Histogram: Distribution of Hours Studied


[Link](figsize=(8, 5))
[Link](df['Hours_Studied'], bins=10, color='lightgreen', edgecolor='black')
[Link]('Distribution of Hours Studied')
[Link]('Hours Studied')
[Link]('Frequency')
[Link]()

You might also like