0% found this document useful (0 votes)

10 views6 pages

Model Training

vertopal tutoirial pdf help to check handout

Uploaded by

drsaheb422

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

10 views6 pages

Model Training

vertopal tutoirial pdf help to check handout

Uploaded by

drsaheb422

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 6

import pandas as pd

import numpy as np

df = pd.read_csv(r"..\\notebooks\\data\\gemstone.csv")

df.drop(labels=['id'], axis=1, inplace=True)

df.head()

carat cut color clarity depth table x y z

price
0 1.52 Premium F VS2 62.2 58.0 7.27 7.33 4.55
13619
1 2.03 Very Good J SI2 62.0 58.0 8.06 8.12 5.05
13387
2 0.70 Ideal G VS1 61.2 57.0 5.69 5.73 3.50
2772
3 0.32 Ideal G VS1 61.6 56.0 4.38 4.41 2.71
666
4 1.70 Premium G VS2 62.6 59.0 7.65 7.61 4.77
14453

X = df.drop(labels=['price'], axis=1)
y = df[['price']]

X.head()

carat cut color clarity depth table x y z

0 1.52 Premium F VS2 62.2 58.0 7.27 7.33 4.55
1 2.03 Very Good J SI2 62.0 58.0 8.06 8.12 5.05
2 0.70 Ideal G VS1 61.2 57.0 5.69 5.73 3.50
3 0.32 Ideal G VS1 61.6 56.0 4.38 4.41 2.71
4 1.70 Premium G VS2 62.6 59.0 7.65 7.61 4.77

price
0 13619
1 13387
2 2772
3 666
4 14453
... ...
193568 1130
193569 2874
193570 3036
193571 681
193572 2258

[193573 rows x 1 columns]

# Categorical features
cat_features = X.select_dtypes(include="object").columns
print(cat_features)

Index(['cut', 'color', 'clarity'], dtype='object')

# Numerical features
num_features = X.select_dtypes(exclude="object").columns
print(num_features)

Index(['carat', 'depth', 'table', 'x', 'y', 'z'], dtype='object')

# Define the custom ranking for each ordinal variable

cut_categories = ['Fair', 'Good', 'Very Good','Premium','Ideal']
color_categories = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
clarity_categories = ['I1','SI2','SI1','VS2','VS1','VVS2','VVS1','IF']

from sklearn.impute import SimpleImputer # handling missing values

from sklearn.preprocessing import StandardScaler # handling feature
scaling
from sklearn.preprocessing import OrdinalEncoder # ordinal encoding

## Pipelines
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Numrecial Pipeline
num_pipeline=Pipeline(

steps=[

('imputer',SimpleImputer()), # handling missing values

('scaler', StandardScaler()) # handling scaling of values
]

# Categorical Pipeline
cat_pipeline=Pipeline(

steps=[
('imputer',SimpleImputer(strategy='most_frequent')), #
handling missing values

('ordinalencoder',OrdinalEncoder(categories=[cut_categories,color_cate
gories,clarity_categories])) # handling categorical to numerical
conversion
]
)

preprocessor=ColumnTransformer(
[

('num_pipeline',num_pipeline,num_features),
('cat_pipeline',cat_pipeline,cat_features)
]
)

## Train test split

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,rand
om_state=30)

preprocessor.fit_transform(X_train)

array([[-0.97543926, -0.84960654, -0.12153081, ..., 4. ,

5. , 5. ],
[ 0.2351953 , 1.83363716, -0.12153081, ..., 1. ,
1. , 2. ],
[ 0.49461699, 0.81585507, 0.39980029, ..., 3. ,
3. , 4. ],
...,
[ 0.45138004, 1.55606023, -0.6428619 , ..., 1. ,
3. , 2. ],
[ 0.66756478, -1.77486298, 1.44246248, ..., 4. ,
3. , 4. ],
[ 0.25681377, 0.81585507, -0.12153081, ..., 4. ,
3. , 2. ]])

preprocessor.transform(X_train)

array([[-0.97543926, -0.84960654, -0.12153081, ..., 4. ,

array(['num_pipeline__carat', 'num_pipeline__depth',
'num_pipeline__table', 'num_pipeline__x', 'num_pipeline__y',
'num_pipeline__z', 'cat_pipeline__cut', 'cat_pipeline__color',
'cat_pipeline__clarity'], dtype=object)

X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=prepr
ocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocesso
r.get_feature_names_out())

X_train

num_pipeline__carat num_pipeline__depth num_pipeline__table

\
0 -0.975439 -0.849607 -0.121531

1 0.235195 1.833637 -0.121531

2 0.494617 0.815855 0.399800

3 -1.018676 0.260701 0.921131

4 -0.953821 -0.664555 -0.642862

... ... ... ...

135496 -1.040295 -0.016876 -0.642862

135497 0.991842 0.168176 -0.642862

135498 0.451380 1.556060 -0.642862

135499 0.667565 -1.774863 1.442462

135500 0.256814 0.815855 -0.121531

num_pipeline__x num_pipeline__y num_pipeline__z

cat_pipeline__cut \
0 -1.042757 -1.080970 -1.123150
4.0
1 0.318447 0.279859 0.485354
1.0
2 0.570855 0.606458 0.673737
3.0
3 -1.214034 -1.244270 -1.195605
3.0
4 -1.069801 -1.044681 -1.094168
4.0
... ... ... ...
...
135496 -1.268122 -1.244270 -1.239078
4.0
135497 1.048629 1.114501 1.079486
4.0
135498 0.516768 0.588314 0.702719
1.0
135499 0.868337 0.951202 0.688228
4.0
135500 0.381549 0.415942 0.470863
4.0

cat_pipeline__color cat_pipeline__clarity
0 5.0 5.0
1 1.0 2.0
2 3.0 4.0
3 3.0 3.0
4 6.0 5.0
... ... ...
135496 1.0 2.0
135497 3.0 1.0
135498 3.0 2.0
135499 3.0 4.0
135500 3.0 2.0

[135501 rows x 9 columns]

X_test

num_pipeline__carat num_pipeline__depth
num_pipeline__table \
0 -0.564688 -0.942132 -0.642862

1 -0.175556 1.000906 -0.121531

2 -1.061913 0.260701 -0.121531

3 0.970223 -0.201927 1.963794

4 -0.932202 -1.312235 0.399800

... ... ... ...

58067 1.013460 1.185958 -0.642862

58068 -0.997058 0.260701 -1.164193

58069 -0.197174 -3.347799 1.442462

58070 -0.824110 -0.201927 -0.121531

58071 2.613227 -0.757081 1.442462

num_pipeline__x num_pipeline__y num_pipeline__z

cat_pipeline__cut \
0 -0.429765 -0.464061 -0.500036
3.0
1 -0.042137 -0.028595 0.036132
2.0
2 -1.304180 -1.298703 -1.268060
4.0
3 1.048629 0.996563 0.978049
3.0
4 -1.006699 -0.990248 -1.065186
3.0
... ... ... ...
...
58067 1.003556 1.041924 1.151941
2.0
58068 -1.141917 -1.126331 -1.108659
4.0
58069 0.102096 0.071199 -0.224706
3.0
58070 -0.853450 -0.881382 -0.876803
4.0
58071 2.139394 2.039865 2.006912
3.0

cat_pipeline__color cat_pipeline__clarity
0 1.0 3.0
1 4.0 2.0
2 4.0 7.0
3 3.0 3.0
4 1.0 4.0
... ... ...
58067 4.0 3.0
58068 2.0 6.0
58069 6.0 3.0
58070 3.0 2.0
58071 6.0 3.0

[58072 rows x 9 columns]

SO far we have only learned Linear Regression and Logistic Regression.. and using it we are
trying to create a End To End project

But, after that, will you be taking the remaining ML Algorithms and End to End project using it?

PCA
No ratings yet
PCA
23 pages
DATA SCIENCE IDC 302 End Sem Project
No ratings yet
DATA SCIENCE IDC 302 End Sem Project
1 page
Linear Regression Techniques in Python
No ratings yet
Linear Regression Techniques in Python
4 pages
Python Cumprod Function Overview
100% (1)
Python Cumprod Function Overview
27 pages
Python - How To Make A 4d Plot With Matplotlib Using Arbitrary Data - Stack Overflow
No ratings yet
Python - How To Make A 4d Plot With Matplotlib Using Arbitrary Data - Stack Overflow
13 pages
Tarea 8
No ratings yet
Tarea 8
7 pages
ADADELTA
No ratings yet
ADADELTA
2 pages
Minutely
No ratings yet
Minutely
1 page
BHMC17 P5.ipynb - Colaboratory
No ratings yet
BHMC17 P5.ipynb - Colaboratory
4 pages
Project 4 - House Price Prediction - Ipynb - Colab
No ratings yet
Project 4 - House Price Prediction - Ipynb - Colab
5 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
Inbuilt Kmeans
No ratings yet
Inbuilt Kmeans
3 pages
Machine Learning Practical File MRIEM
No ratings yet
Machine Learning Practical File MRIEM
49 pages
7 Output
No ratings yet
7 Output
4 pages
Exercise 10
No ratings yet
Exercise 10
4 pages
ML FINAL Lab Manual
No ratings yet
ML FINAL Lab Manual
7 pages
MNIST Digit Recognition Guide
No ratings yet
MNIST Digit Recognition Guide
8 pages
Python Data Analysis Guide
No ratings yet
Python Data Analysis Guide
19 pages
Bank Nifty PDF
No ratings yet
Bank Nifty PDF
16 pages
Code
No ratings yet
Code
5 pages
Lab Manual ML
No ratings yet
Lab Manual ML
23 pages
Python Basics for Data Science
No ratings yet
Python Basics for Data Science
30 pages
Lab 8
No ratings yet
Lab 8
8 pages
LAMMPS Simulation Parameters
No ratings yet
LAMMPS Simulation Parameters
1,698 pages
Roll NO 2020
No ratings yet
Roll NO 2020
8 pages
Z-Test Implementation with Pandas
No ratings yet
Z-Test Implementation with Pandas
39 pages
ML LAB 12 - Jupyter Notebook
No ratings yet
ML LAB 12 - Jupyter Notebook
11 pages
Keeraiit 2
No ratings yet
Keeraiit 2
19 pages
1 Abril PDF
No ratings yet
1 Abril PDF
10 pages
Reanalysis Data Example - Ipynb
No ratings yet
Reanalysis Data Example - Ipynb
363 pages
Quality Prediction Checkpoint
No ratings yet
Quality Prediction Checkpoint
14 pages
CVDL (Practical No. 3)
No ratings yet
CVDL (Practical No. 3)
1 page
Pca 2382487
No ratings yet
Pca 2382487
8 pages
ML Labs
No ratings yet
ML Labs
14 pages
AI Regression & Classification Guide
No ratings yet
AI Regression & Classification Guide
47 pages
4.4. Data Standardization - Ipynb - Colaboratory
No ratings yet
4.4. Data Standardization - Ipynb - Colaboratory
1 page
Subspace Cluster I Nig
No ratings yet
Subspace Cluster I Nig
6 pages
Pre-Processing Techniques - Ipynb - Colab
No ratings yet
Pre-Processing Techniques - Ipynb - Colab
3 pages
Data - Preprocessing - Tools - Ipynb - Colaboratory
No ratings yet
Data - Preprocessing - Tools - Ipynb - Colaboratory
4 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
34 pages
Lab4 KNN
No ratings yet
Lab4 KNN
9 pages
Linear Reg 33
No ratings yet
Linear Reg 33
3 pages
Introduction to Machine Learning with Scikit-Learn
No ratings yet
Introduction to Machine Learning with Scikit-Learn
2 pages
Prac9 23bme053
No ratings yet
Prac9 23bme053
4 pages
Merged
No ratings yet
Merged
35 pages
DL Lab 3
No ratings yet
DL Lab 3
5 pages
Exp 5,6,7
No ratings yet
Exp 5,6,7
2 pages
Data Science Libraries
No ratings yet
Data Science Libraries
4 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Numpy Cheatsheet
No ratings yet
Numpy Cheatsheet
11 pages
K Fold
No ratings yet
K Fold
2 pages
Prasun Kumar - Assignment1
100% (1)
Prasun Kumar - Assignment1
9 pages
ML Experiment WithDataset
No ratings yet
ML Experiment WithDataset
23 pages
California Housing Data Analysis
No ratings yet
California Housing Data Analysis
1 page
DL Lab2
No ratings yet
DL Lab2
38 pages
Autoregressive Model with Statsmodels
No ratings yet
Autoregressive Model with Statsmodels
10 pages
ML Record
No ratings yet
ML Record
19 pages
EXAM PREPERATION - Ipynb - Colaboratory-1
No ratings yet
EXAM PREPERATION - Ipynb - Colaboratory-1
8 pages
PGP Encryption & Decryption
No ratings yet
PGP Encryption & Decryption
8 pages
COMPUTER (Function Questions)
No ratings yet
COMPUTER (Function Questions)
2 pages
EZ Capture For NW8 Install Guidelines
No ratings yet
EZ Capture For NW8 Install Guidelines
38 pages
APPLICATIONS OF IOT in SMART Wearables
No ratings yet
APPLICATIONS OF IOT in SMART Wearables
6 pages
Blowview Simulation & Optimization Guide
No ratings yet
Blowview Simulation & Optimization Guide
70 pages
IT Professional with Customer Care Experience
No ratings yet
IT Professional with Customer Care Experience
2 pages
Types of Hacking and Cyber Crimes
No ratings yet
Types of Hacking and Cyber Crimes
24 pages
1 1541-CAA20115Uen
No ratings yet
1 1541-CAA20115Uen
22 pages
Autoform Press Release
No ratings yet
Autoform Press Release
3 pages
OODM
No ratings yet
OODM
90 pages
Lecture 2.1
No ratings yet
Lecture 2.1
69 pages
DMS Project Report
No ratings yet
DMS Project Report
9 pages
XSG N PDF
No ratings yet
XSG N PDF
3 pages
Control Question
No ratings yet
Control Question
5 pages
Introduction to Linux Kernel Basics
No ratings yet
Introduction to Linux Kernel Basics
1 page
AutoCAD Transmittal Overview
No ratings yet
AutoCAD Transmittal Overview
2 pages
Resume Upload Testing Guide
No ratings yet
Resume Upload Testing Guide
5 pages
Bda Report
No ratings yet
Bda Report
10 pages
Oracle BI Filtering Guide
100% (1)
Oracle BI Filtering Guide
32 pages
RapidMiner: Free Data Mining Software
No ratings yet
RapidMiner: Free Data Mining Software
6 pages
Structuring System Requirements: Logic Modeling: Jeffrey A. Hoffer Joey F. George Joseph S. Valacich
No ratings yet
Structuring System Requirements: Logic Modeling: Jeffrey A. Hoffer Joey F. George Joseph S. Valacich
17 pages
Compliance Specification
No ratings yet
Compliance Specification
13 pages
TrakPro Installation Guide
No ratings yet
TrakPro Installation Guide
77 pages
Module 1: MEP Family Creation: What Is A Revit Family?
No ratings yet
Module 1: MEP Family Creation: What Is A Revit Family?
12 pages
Operating Systems Notes r18
No ratings yet
Operating Systems Notes r18
133 pages
Windows Crash Troubleshooting Guide
No ratings yet
Windows Crash Troubleshooting Guide
22 pages
Product Description ED1608 v1.06
No ratings yet
Product Description ED1608 v1.06
10 pages
Literature Review Firewall
No ratings yet
Literature Review Firewall
18 pages
130 Recording Format: Specification
No ratings yet
130 Recording Format: Specification
40 pages
Fluidity CFD Code Installation Guide
No ratings yet
Fluidity CFD Code Installation Guide
329 pages