0% found this document useful (0 votes)
4 views6 pages

Program 1

The document contains three Python programs that perform data analysis and preprocessing using pandas, numpy, and sklearn. Program 1 visualizes IPL 2022 batters' runs and matches, Program 2 handles missing data, applies one-hot encoding, and scales features on a sample dataset, and Program 3 analyzes the iris dataset, including visualizations such as pairplots, boxplots, and correlation heatmaps. Each program includes data loading, processing, and visualization steps.

Uploaded by

ca245213206
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views6 pages

Program 1

The document contains three Python programs that perform data analysis and preprocessing using pandas, numpy, and sklearn. Program 1 visualizes IPL 2022 batters' runs and matches, Program 2 handles missing data, applies one-hot encoding, and scales features on a sample dataset, and Program 3 analyzes the iris dataset, including visualizations such as pairplots, boxplots, and correlation heatmaps. Each program includes data loading, processing, and visualization steps.

Uploaded by

ca245213206
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

Program 1

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
file_path = "S:\ML\IPL 2022 Batters.csv"
data = pd.read_csv(file_path)
print ("First 5 rows of the dataset: ")
print(data.head())
print("\nData set Info: ")
print(data.info())
print("\nStatistical Summary: ")
print(data.describe())
x = data['Runs']
y = data['Mat'] # Corrected from data.Matches

plt.xlabel("Runs")
plt.ylabel("Matches")
plt.scatter(x, y, color='red')
plt.show()
Program 2
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
# Sample Data
data = {
'Age': [25, 30, np.nan, 45, 22, 38, 50, np.nan],
'Salary': [50000, 60000, 75000, np.nan, 48000, 8000, 9000, 65000],
'Gender': ['Male', 'Female', 'Male', 'Female', 'Male', 'Female', 'Male', 'Female'],
'city': ['New York', 'London', 'Paris', 'New York', 'London', 'Paris', 'New York',
'London'],
'Purchase': ['No', 'Yes', 'No', 'Yes', 'No', 'Yes', 'No', 'Yes']
}

df = pd.DataFrame(data)
print("Original DataFrame: ")
print(df)
print("\n")

# Define imputers
imputer_numerical = SimpleImputer(strategy='mean')
imputer_categorical = SimpleImputer(strategy='most_frequent')

numerical_features = ['Age', 'Salary']


categorical_features = ['Gender', 'city']
# Step 1: Imputation
preprocessor = ColumnTransformer(
transformers=[
('num_imputer', imputer_numerical, numerical_features),
('cat_imputer', imputer_categorical, categorical_features)
],
remainder='passthrough'
)

df_imputed = pd.DataFrame(preprocessor.fit_transform(df),
columns=numerical_features + categorical_features + ['Purchase'])

print("DataFrame after Imputation: ")


print(df_imputed)
print("\n")

# Step 2: One-Hot Encoding


# Step 2: One-Hot Encoding
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
categorical_transformer = Pipeline(steps=[('onehot', encoder)])

preprocessor_encoding = ColumnTransformer(
transformers=[('cat', categorical_transformer, ['Gender', 'city'])],
remainder='passthrough'
)

df_encoded_array = preprocessor_encoding.fit_transform(df_imputed)
encoded_feature_names = preprocessor_encoding.named_transformers_['cat']
['onehot'].get_feature_names_out(['Gender', 'city'])
remaining_features = [col for col in df_imputed.columns if col not in ['Gender', 'city']]
df_encoded = pd.DataFrame(df_encoded_array,
columns=list(encoded_feature_names) + remaining_features)

print("DataFrame after One-Hot Encoding: ")


print(df_encoded)
print("\n")

# Step 3: Feature Scaling


scaler = StandardScaler()
numerical_transformer = Pipeline(steps=[('scaler', scaler)])

preprocessor_scaling = ColumnTransformer(
transformers=[('num', numerical_transformer, ['Age', 'Salary'])],
remainder='passthrough'
)

df_scaled_array = preprocessor_scaling.fit_transform(df_encoded)
df_scaled = pd.DataFrame(df_scaled_array, columns=df_encoded.columns)

print("DataFrame after Feature Scaling (Standardization): ")


print(df_scaled)

Program 3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets # Fixed: 'dataset' → 'datasets'

# Load the iris dataset


iris = datasets.load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names) # Fixed:
'features_names' → 'feature_names'

# Add species column and map target numbers to names


df['species'] = iris.target
df['species'] = df['species'].map({i: name for i, name in enumerate(iris.target_names)})

# Print the first 5 rows


print("First 5 rows of the dataset:")
print(df.head())

# Dataset info
print("\nDataset Info:")
print(df.info())

# Summary statistics
print("\nSummary statistics:")
print(df.describe())

# Class distribution
print("\nClass distribution:")
print(df['species'].value_counts()) # Fixed: 'value_counter()' → 'value_counts()'

# Pairplot
sns.pairplot(df, hue='species', palette='Set2') # Fixed: 'set 2' → 'Set2'
plt.suptitle("Pairplot of Iris Features", y=1.02)
plt.show()

# Boxplot
plt.figure(figsize=(10, 6))
sns.boxplot(data=df.iloc[:, :-1], orient="h", palette="Set3") # Fixed: use only numeric
columns
plt.title("Boxplot of Iris Features")
plt.show()

# Correlation heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(df.iloc[:, :-1].corr(), annot=True, cmap="coolwarm", fmt=".2f") # Fixed
syntax: corr() and commas
plt.title("Features Correlation Heatmap")
plt.show()

# Violin plots
plt.figure(figsize=(12, 8))
for i, col in enumerate(df.columns[:-1]):
plt.subplot(2, 2, i + 1)
sns.violinplot(x='species', y=col, data=df, palette='pastel')
plt.tight_layout()
plt.show()

You might also like