0% found this document useful (0 votes)

4 views6 pages

Program 1

The document contains three Python programs that perform data analysis and preprocessing using pandas, numpy, and sklearn. Program 1 visualizes IPL 2022 batters' runs and matches, Program 2 handles missing data, applies one-hot encoding, and scales features on a sample dataset, and Program 3 analyzes the iris dataset, including visualizations such as pairplots, boxplots, and correlation heatmaps. Each program includes data loading, processing, and visualization steps.

Uploaded by

ca245213206

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

4 views6 pages

Program 1

Uploaded by

ca245213206

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

Program 1

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
file_path = "S:\ML\IPL 2022 Batters.csv"
data = pd.read_csv(file_path)
print ("First 5 rows of the dataset: ")
print(data.head())
print("\nData set Info: ")
print(data.info())
print("\nStatistical Summary: ")
print(data.describe())
x = data['Runs']
y = data['Mat'] # Corrected from data.Matches

plt.xlabel("Runs")
plt.ylabel("Matches")
plt.scatter(x, y, color='red')
plt.show()
Program 2
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
# Sample Data
data = {
'Age': [25, 30, np.nan, 45, 22, 38, 50, np.nan],
'Salary': [50000, 60000, 75000, np.nan, 48000, 8000, 9000, 65000],
'Gender': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female'],
'city': ['New York', 'London', 'Paris', 'New York', 'London', 'Paris', 'New York',
'London'],
'Purchase': ['No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes']
}

df = pd.DataFrame(data)
print("Original DataFrame: ")
print(df)
print("\n")

# Define imputers
imputer_numerical = SimpleImputer(strategy='mean')
imputer_categorical = SimpleImputer(strategy='most_frequent')

numerical_features = ['Age', 'Salary']

categorical_features = ['Gender', 'city']
# Step 1: Imputation
preprocessor = ColumnTransformer(
transformers=[
('num_imputer', imputer_numerical, numerical_features),
('cat_imputer', imputer_categorical, categorical_features)
],
remainder='passthrough'
)

df_imputed = pd.DataFrame(preprocessor.fit_transform(df),
columns=numerical_features + categorical_features + ['Purchase'])

print("DataFrame after Imputation: ")

print(df_imputed)
print("\n")

# Step 2: One-Hot Encoding

# Step 2: One-Hot Encoding
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
categorical_transformer = Pipeline(steps=[('onehot', encoder)])

preprocessor_encoding = ColumnTransformer(
transformers=[('cat', categorical_transformer, ['Gender', 'city'])],
remainder='passthrough'
)

df_encoded_array = preprocessor_encoding.fit_transform(df_imputed)
encoded_feature_names = preprocessor_encoding.named_transformers_['cat']
['onehot'].get_feature_names_out(['Gender', 'city'])
remaining_features = [col for col in df_imputed.columns if col not in ['Gender', 'city']]
df_encoded = pd.DataFrame(df_encoded_array,
columns=list(encoded_feature_names) + remaining_features)

print("DataFrame after One-Hot Encoding: ")

print(df_encoded)
print("\n")

# Step 3: Feature Scaling

scaler = StandardScaler()
numerical_transformer = Pipeline(steps=[('scaler', scaler)])

preprocessor_scaling = ColumnTransformer(
transformers=[('num', numerical_transformer, ['Age', 'Salary'])],
remainder='passthrough'
)

df_scaled_array = preprocessor_scaling.fit_transform(df_encoded)
df_scaled = pd.DataFrame(df_scaled_array, columns=df_encoded.columns)

print("DataFrame after Feature Scaling (Standardization): ")

print(df_scaled)

Program 3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets # Fixed: 'dataset' → 'datasets'

# Load the iris dataset

iris = datasets.load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names) # Fixed:
'features_names' → 'feature_names'

# Add species column and map target numbers to names

df['species'] = iris.target
df['species'] = df['species'].map({i: name for i, name in enumerate(iris.target_names)})

# Print the first 5 rows

print("First 5 rows of the dataset:")
print(df.head())

# Dataset info
print("\nDataset Info:")
print(df.info())

# Summary statistics
print("\nSummary statistics:")
print(df.describe())

# Class distribution
print("\nClass distribution:")
print(df['species'].value_counts()) # Fixed: 'value_counter()' → 'value_counts()'

# Pairplot
sns.pairplot(df, hue='species', palette='Set2') # Fixed: 'set 2' → 'Set2'
plt.suptitle("Pairplot of Iris Features", y=1.02)
plt.show()

# Boxplot
plt.figure(figsize=(10, 6))
sns.boxplot(data=df.iloc[:, :-1], orient="h", palette="Set3") # Fixed: use only numeric
columns
plt.title("Boxplot of Iris Features")
plt.show()

# Correlation heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap="coolwarm", fmt=".2f") # Fixed
syntax: corr() and commas
plt.title("Features Correlation Heatmap")
plt.show()

# Violin plots
plt.figure(figsize=(12, 8))
for i, col in enumerate(df.columns[:-1]):
plt.subplot(2, 2, i + 1)
sns.violinplot(x='species', y=col, data=df, palette='pastel')
plt.tight_layout()
plt.show()

Data Preprocessing Example Programs1
No ratings yet
Data Preprocessing Example Programs1
9 pages
Train
No ratings yet
Train
17 pages
Advanced Machine Learning Course Guide
No ratings yet
Advanced Machine Learning Course Guide
36 pages
Data Preprocessing Techniques in Python
No ratings yet
Data Preprocessing Techniques in Python
27 pages
Abhiml ML File
No ratings yet
Abhiml ML File
74 pages
Dealing With Categorical
No ratings yet
Dealing With Categorical
25 pages
ML Manual
No ratings yet
ML Manual
30 pages
Data Mining with Python Lab Guide
No ratings yet
Data Mining with Python Lab Guide
39 pages
Da 012307
No ratings yet
Da 012307
8 pages
AIML Project
No ratings yet
AIML Project
4 pages
ML-Lab05-Data Preprocessing Techniques in Python
No ratings yet
ML-Lab05-Data Preprocessing Techniques in Python
7 pages
AI&ML
No ratings yet
AI&ML
9 pages
Deep Learning Perceptron
No ratings yet
Deep Learning Perceptron
10 pages
Record
No ratings yet
Record
22 pages
ML Manual Final
No ratings yet
ML Manual Final
35 pages
1
No ratings yet
1
13 pages
Advanced Feature Engineering and Data Preprocessing in Machine Learning
No ratings yet
Advanced Feature Engineering and Data Preprocessing in Machine Learning
7 pages
Lab 08 - Data Preprocessing
No ratings yet
Lab 08 - Data Preprocessing
9 pages
ML Functions
No ratings yet
ML Functions
12 pages
DSBDA Practicals
No ratings yet
DSBDA Practicals
16 pages
ML Complete Notes Hridoy
No ratings yet
ML Complete Notes Hridoy
5 pages
Quantl Fairness 1758011712
No ratings yet
Quantl Fairness 1758011712
14 pages
Iii Aid - ML
No ratings yet
Iii Aid - ML
30 pages
ML Manual
No ratings yet
ML Manual
9 pages
Lab Manual 5 Solved 40
No ratings yet
Lab Manual 5 Solved 40
13 pages
83 Sklearn Pipeline
No ratings yet
83 Sklearn Pipeline
8 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
34 pages
ML Lab
No ratings yet
ML Lab
29 pages
Day 4 S3
No ratings yet
Day 4 S3
1 page
S3 Data Processing and Classification
No ratings yet
S3 Data Processing and Classification
25 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
ML Lab Codes
No ratings yet
ML Lab Codes
14 pages
Data Preprocessing and Model Training
No ratings yet
Data Preprocessing and Model Training
21 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
Data Mining Lab: Regression & Clustering
No ratings yet
Data Mining Lab: Regression & Clustering
36 pages
ML Spy Programs
No ratings yet
ML Spy Programs
16 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Machine Learning Lab Assignment 1
No ratings yet
Machine Learning Lab Assignment 1
23 pages
Lecture 5 Encoding
No ratings yet
Lecture 5 Encoding
35 pages
House Price Prediction with ML in Python
No ratings yet
House Price Prediction with ML in Python
13 pages
Machine Learning Data Prep Guide
No ratings yet
Machine Learning Data Prep Guide
17 pages
Practical 1: A. Design A Simple Machine Learning Model To Train The Training Instances and Test The Same
No ratings yet
Practical 1: A. Design A Simple Machine Learning Model To Train The Training Instances and Test The Same
30 pages
Aiml Programs
No ratings yet
Aiml Programs
12 pages
(Feature Engineering) (Extended-Cheatsheet)
100% (1)
(Feature Engineering) (Extended-Cheatsheet)
9 pages
Dav Lab Manual
No ratings yet
Dav Lab Manual
28 pages
Machine Learning Practicals
No ratings yet
Machine Learning Practicals
30 pages
Student Abandonment Classification in Brazil
No ratings yet
Student Abandonment Classification in Brazil
59 pages
Data Science Record - 05
No ratings yet
Data Science Record - 05
20 pages
ML Lab
No ratings yet
ML Lab
14 pages
Machine Learning Record VR19
No ratings yet
Machine Learning Record VR19
46 pages
ML File Divya Goyal
No ratings yet
ML File Divya Goyal
28 pages
Parth ML
No ratings yet
Parth ML
24 pages
23BCE7199 ML Lab Assignment
No ratings yet
23BCE7199 ML Lab Assignment
15 pages
CP4252 Machine Learning Laboratory
No ratings yet
CP4252 Machine Learning Laboratory
37 pages
Machinelearning
No ratings yet
Machinelearning
26 pages
Exp 6
No ratings yet
Exp 6
9 pages
ML Final Prac
No ratings yet
ML Final Prac
47 pages
Echoes From The Silent Station
No ratings yet
Echoes From The Silent Station
2 pages
The Lightbreath of Whispering Woods
No ratings yet
The Lightbreath of Whispering Woods
2 pages
Bound by Moonlight
No ratings yet
Bound by Moonlight
1 page
The Enchanted Brew
No ratings yet
The Enchanted Brew
1 page
Project Structure: Complete Android Studio Project Structure With All Necessary Code and XML
No ratings yet
Project Structure: Complete Android Studio Project Structure With All Necessary Code and XML
6 pages
Some Properties of Corn Grains and Their Flours I: Physicochemical, Functional and Chapati-Making Properties of Flours
No ratings yet
Some Properties of Corn Grains and Their Flours I: Physicochemical, Functional and Chapati-Making Properties of Flours
9 pages
7 220 V & 48 V DC Systems: 7.1 Guaranteed Technical Particulars Contractor's Data 220 V Battery Charger
No ratings yet
7 220 V & 48 V DC Systems: 7.1 Guaranteed Technical Particulars Contractor's Data 220 V Battery Charger
4 pages
Miller 2018
No ratings yet
Miller 2018
41 pages
Mobile Hydraulics Solutions Guide
No ratings yet
Mobile Hydraulics Solutions Guide
16 pages
Heat Transfer Lab Setup Guide
No ratings yet
Heat Transfer Lab Setup Guide
1 page
Rotary Dryer and Furnace Specs
No ratings yet
Rotary Dryer and Furnace Specs
4 pages
PM ICX Protocol EN V99 0623
No ratings yet
PM ICX Protocol EN V99 0623
81 pages
AI Algorithms and Programs
No ratings yet
AI Algorithms and Programs
15 pages
23uca04 - Java Class Notes
No ratings yet
23uca04 - Java Class Notes
232 pages
Fault Code Definition: No. FMI Number Definition
No ratings yet
Fault Code Definition: No. FMI Number Definition
2 pages
Midea V8 Wall Mounted VRF Units Guide
No ratings yet
Midea V8 Wall Mounted VRF Units Guide
18 pages
UNIT 35 Trigonometric Problems CSEC Revision Test: 17.6 First Window
No ratings yet
UNIT 35 Trigonometric Problems CSEC Revision Test: 17.6 First Window
8 pages
Chemistry Course Schedule Guide
0% (1)
Chemistry Course Schedule Guide
9 pages
Class Test 2 TE
No ratings yet
Class Test 2 TE
2 pages
Glorious Innings of Prof.a R Rao
100% (1)
Glorious Innings of Prof.a R Rao
24 pages
Exponential Fourier Series Lab Guide
No ratings yet
Exponential Fourier Series Lab Guide
2 pages
Relay Operation Guide
No ratings yet
Relay Operation Guide
6 pages
Junior Inter BIPC Study Guide
No ratings yet
Junior Inter BIPC Study Guide
2 pages
Protecting Groups in Luminescent Metal Nanoclusters
No ratings yet
Protecting Groups in Luminescent Metal Nanoclusters
19 pages
8085 Microprocessor 8251 Interface Guide
No ratings yet
8085 Microprocessor 8251 Interface Guide
14 pages
4 Mirror Anastigmatic Thesis
No ratings yet
4 Mirror Anastigmatic Thesis
274 pages
NEET Model Grand Test 2 - Physics
No ratings yet
NEET Model Grand Test 2 - Physics
25 pages
Engineering Student Project Report
No ratings yet
Engineering Student Project Report
7 pages
Online Chapter Tests: 7. Integrals
No ratings yet
Online Chapter Tests: 7. Integrals
3 pages
Catalogue
No ratings yet
Catalogue
27 pages
Pet 8 Rustpreventatives en PDF
No ratings yet
Pet 8 Rustpreventatives en PDF
7 pages
Seed Germination Methods and Results
No ratings yet
Seed Germination Methods and Results
9 pages
Sec 3 A Math WA1 Mock Exam 2023
No ratings yet
Sec 3 A Math WA1 Mock Exam 2023
5 pages
0 Method For Effective Color Change in Extrusion Blow Molding Accumulator Heads
No ratings yet
0 Method For Effective Color Change in Extrusion Blow Molding Accumulator Heads
5 pages
Replicating ChatGPT with LangChain
No ratings yet
Replicating ChatGPT with LangChain
6 pages

Program 1

Uploaded by

Program 1

Uploaded by

Program 1

numerical_features = ['Age', 'Salary']

print("DataFrame after Imputation: ")

# Step 2: One-Hot Encoding

print("DataFrame after One-Hot Encoding: ")

# Step 3: Feature Scaling

print("DataFrame after Feature Scaling (Standardization): ")

# Load the iris dataset

# Add species column and map target numbers to names

# Print the first 5 rows

You might also like