11/30/24, 3:34 PM Practical1c.
ipynb - Colab
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, Binarizer
# Create a sample dataset
data = pd.DataFrame({
'Category': ['A', 'B', 'C', 'A', 'B', 'C'], # Categorical variable
'Age': [23, 45, 31, 22, 35, 30], # Numerical variable
'Income': [50000, 60000, 70000, 80000, 90000, 100000], # Numerical variable
'Has_Car': ['Yes', 'No', 'Yes', 'No', 'Yes', 'No'] # Binary categorical variable
})
# Display the dataset
print("Sample Dataset:")
print(data)
Sample Dataset:
Category Age Income Has_Car
0 A 23 50000 Yes
1 B 45 60000 No
2 C 31 70000 Yes
3 A 22 80000 No
4 B 35 90000 Yes
5 C 30 100000 No
# Label Encoding for 'Category' column
label_encoder = LabelEncoder()
data['Category_Encoded'] = label_encoder.fit_transform(data['Category'])
# Label Encoding for binary column 'Has_Car'
data['Has_Car_Encoded'] = label_encoder.fit_transform(data['Has_Car'])
print("\nAfter Label Encoding:")
print(data)
After Label Encoding:
Category Age Income Has_Car Category_Encoded Has_Car_Encoded
0 A 23 50000 Yes 0 1
1 B 45 60000 No 1 0
2 C 31 70000 Yes 2 1
3 A 22 80000 No 0 0
4 B 35 90000 Yes 1 1
5 C 30 100000 No 2 0
# Min-Max Scaling for 'Income'
min_max_scaler = MinMaxScaler()
data['Income_MinMax'] = min_max_scaler.fit_transform(data[['Income']])
# Standard Scaling for 'Age'
standard_scaler = StandardScaler()
data['Age_Standardized'] = standard_scaler.fit_transform(data[['Age']])
print("\nAfter Scaling:")
print(data)
After Scaling:
Category Age Income Has_Car Category_Encoded Has_Car_Encoded \
0 A 23 50000 Yes 0 1
1 B 45 60000 No 1 0
2 C 31 70000 Yes 2 1
3 A 22 80000 No 0 0
4 B 35 90000 Yes 1 1
5 C 30 100000 No 2 0
Income_MinMax Age_Standardized
0 0.0 -1.035676
1 0.2 1.812434
2 0.4 0.000000
3 0.6 -1.165136
4 0.8 0.517838
5 1.0 -0.129460
# Binarization for 'Income' with a threshold of 75,000
binarizer = Binarizer(threshold=75000)
data['Income_Binary'] = binarizer.fit_transform(data[['Income']])
print("\nAfter Binarization:")
print(data)
After Binarization:
Category Age Income Has_Car Category_Encoded Has_Car_Encoded \
0 A 23 50000 Yes 0 1
1 B 45 60000 No 1 0
https://colab.research.google.com/drive/1vzCv7xFKj-Mru4D-MXvHU496haU-bL0I#scrollTo=8V8mxZ5Uhops&printMode=true 1/2
11/30/24, 3:34 PM Practical1c.ipynb - Colab
2 C 31 70000 Yes 2 1
3 A 22 80000 No 0 0
4 B 35 90000 Yes 1 1
5 C 30 100000 No 2 0
Income_MinMax Age_Standardized Income_Binary
0 0.0 -1.035676 0
1 0.2 1.812434 0
2 0.4 0.000000 0
3 0.6 -1.165136 1
4 0.8 0.517838 1
5 1.0 -0.129460 1
# Save the processed dataset
data.to_csv('processed_data.csv', index=False)
print("\nProcessed dataset saved as 'processed_data.csv'")
Processed dataset saved as 'processed_data.csv'
https://colab.research.google.com/drive/1vzCv7xFKj-Mru4D-MXvHU496haU-bL0I#scrollTo=8V8mxZ5Uhops&printMode=true 2/2