0% found this document useful (0 votes)

6 views5 pages

Correlation Matrix

Uploaded by

schlaggen

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

6 views5 pages

Correlation Matrix

Uploaded by

schlaggen

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

Import

import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from [Link] import StandardScaler
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score

df = pd.read_csv('[Link]')

[Link]()

<class '[Link]'>
RangeIndex: 38291 entries, 0 to 38290
Columns: 428 entries, Time & Date to cpatToolfaceTar
dtypes: float64(34), int64(390), object(4)
memory usage: 125.0+ MB

cols = [Link]()

drop_col = []

for col in cols:

if df[col].isnull().sum() > 0.499 * len(df) or (df[col] ==
0).all():
drop_col.append(col)
else:
continue

len(drop_col)

376

df = [Link](drop_col, axis=1)

df = df.select_dtypes(include=['int64', 'float64'])

df = [Link](['Prop Spec Vol', 'Prp Spc Vol Ann', 'N2 Temp Ann', 'CO2
Temp Ann', 'Trt Num', 'Ann Pr @WH LOR'], axis=1)

# df = [Link]('Time & Date', axis=1)

[Link](figsize = (50, 50))

mask = [Link](np.ones_like([Link](), dtype=bool))
heatmap = [Link]([Link](), mask = mask, vmin = -1, vmax = 1,
annot = True)
heatmap.set_title('Correlation Heatmap', fontdict = {'fontsize' : 18},
pad = 12)

Text(0.5, 1.0, 'Correlation Heatmap')

df_2 = [Link]()

final_cols = [Link]()
while True:
corr_matrix = df[final_cols].corr().abs()
np.fill_diagonal(corr_matrix.values, 0)

max_corr = corr_matrix.[Link]()
if max_corr <= 0.5:
break

drop_corr = None
for col in corr_matrix.columns:
high_corrs = corr_matrix[col][corr_matrix[col] > 0.5]
if not high_corrs.empty:
drop_corr = high_corrs.index[0]
break

if drop_corr:
print(f"Dropping '{drop_corr}' due to correlation > 0.5")
final_cols.remove(drop_corr)
else:
break

Dropping 'Trt Time' due to correlation > 0.5

Dropping 'Stg Time' due to correlation > 0.5
Dropping 'Slurry Rate' due to correlation > 0.5
Dropping 'CT Running Len' due to correlation > 0.5
Dropping 'CT Run Speed' due to correlation > 0.5
Dropping 'SCC Cycle' due to correlation > 0.5
Dropping 'Max Allow CT Pr' due to correlation > 0.5
Dropping 'Clean Density' due to correlation > 0.5
Dropping 'Cln Density Ann' due to correlation > 0.5
Dropping 'Slurry Density' due to correlation > 0.5
Dropping 'Slurry Den Ann' due to correlation > 0.5
Dropping 'Stage At Bldr' due to correlation > 0.5
Dropping 'SF Calc Limit' due to correlation > 0.5
Dropping 'SF Allow Limit' due to correlation > 0.5
Dropping 'SFAllowInWell' due to correlation > 0.5
Dropping 'Annulus Pr @WH' due to correlation > 0.5
Dropping 'Max Allow WH Pr' due to correlation > 0.5
Dropping 'N2 Pressure' due to correlation > 0.5
Dropping 'N2 Pressure Ann' due to correlation > 0.5
Dropping 'CO2 Pressure' due to correlation > 0.5
Dropping 'CO2 Press Ann' due to correlation > 0.5
Dropping 'Tubing Guide Pr' due to correlation > 0.5
Dropping 'Pr @ Slry Den' due to correlation > 0.5
Dropping 'Pr@Slry Den Ann' due to correlation > 0.5
Dropping 'CO2 Temp' due to correlation > 0.5
Dropping 'SF Reel Inlet' due to correlation > 0.5
Dropping 'SF Tubing Guide' due to correlation > 0.5
Dropping 'SF Abv Stripper' due to correlation > 0.5
Dropping 'SF Blw Stripper' due to correlation > 0.5
Dropping 'Max OD Growth' due to correlation > 0.5
Dropping 'CT Depth' due to correlation > 0.5
Dropping 'CT Pipe Weight' due to correlation > 0.5
Dropping 'Warning Code' due to correlation > 0.5

len(final_cols)

df = df[final_cols]

[Link](figsize = (12, 8))

Text(0.5, 1.0, 'Correlation Heatmap')

X = [Link]('Treating Pr LOR', axis=1)
y = df['Treating Pr LOR']

X_train, X_test, y_train, y_test = train_test_split(X, y,

test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = [Link](X_test)

X_2 = df_2.drop('Treating Pr LOR', axis=1)

y_2 = df_2['Treating Pr LOR']

X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2, y_2,

test_size=0.2, random_state=42)

scaler2 = StandardScaler()
X_train_2 = scaler2.fit_transform(X_train_2)
X_test_2 = [Link](X_test_2)

lr = LinearRegression()
[Link](X_train, y_train)

y_pred = [Link](X_test)

mse = mean_squared_error(y_test, y_pred)

r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")

print(f"R-squared: {r2}")

Mean Squared Error: 31261.686890693905

R-squared: 0.13832227402498298

lr2 = LinearRegression()
[Link](X_train_2, y_train_2)

y_pred2 = [Link](X_test_2)

mse2 = mean_squared_error(y_test_2, y_pred2)

r2_2 = r2_score(y_test_2, y_pred2)

print(f"Mean Squared Error: {mse2}")

print(f"R-squared: {r2_2}")

Mean Squared Error: 29792.2029402794

R-squared: 0.17882621717997915

Project 11 Hotel Management
No ratings yet
Project 11 Hotel Management
3 pages
Project 07 Inventory
No ratings yet
Project 07 Inventory
3 pages
Project 03 Sales Management
No ratings yet
Project 03 Sales Management
3 pages
BA64 Group6 Geo-Economy
No ratings yet
BA64 Group6 Geo-Economy
30 pages
Ngan Thanh Tran Tran Thanh Ngan-11214243 415062 1736623253
No ratings yet
Ngan Thanh Tran Tran Thanh Ngan-11214243 415062 1736623253
2 pages
Student Stock Market Awareness
No ratings yet
Student Stock Market Awareness
6 pages
Chap - 24
No ratings yet
Chap - 24
61 pages
Lesson 1 WINDOWS AND INTERNET
No ratings yet
Lesson 1 WINDOWS AND INTERNET
18 pages
Chapter 2. Data Analysis and Processing - Full
No ratings yet
Chapter 2. Data Analysis and Processing - Full
49 pages
Lesson 2 WORD 2010
No ratings yet
Lesson 2 WORD 2010
14 pages
Econometricstrix Meeting 2020 December
No ratings yet
Econometricstrix Meeting 2020 December
5 pages
Athey, Imbens (2019) - Machine Learning Methods Economists Should Know About
No ratings yet
Athey, Imbens (2019) - Machine Learning Methods Economists Should Know About
41 pages
Statistics Explained An Introductory Guide For Life Scientists, 2nd Edition Unrestricted Download
100% (20)
Statistics Explained An Introductory Guide For Life Scientists, 2nd Edition Unrestricted Download
16 pages
Business Statistics For Contemporary Decision Making 7th Edition Black Get It Now
No ratings yet
Business Statistics For Contemporary Decision Making 7th Edition Black Get It Now
324 pages
Concrete Strength Evaluation Method
No ratings yet
Concrete Strength Evaluation Method
8 pages
Graduate Admission Prediction - Data Analytics
No ratings yet
Graduate Admission Prediction - Data Analytics
32 pages
Qaisar Khalid Mahmood Sociology HSR 2017 04.07.2018
No ratings yet
Qaisar Khalid Mahmood Sociology HSR 2017 04.07.2018
228 pages
Course Syllabus03 F24
No ratings yet
Course Syllabus03 F24
6 pages
Econometrics For Finance Lecture Note
No ratings yet
Econometrics For Finance Lecture Note
72 pages
For Reliance Company Provide Its Last 10 Years Data of Its Sales Revenue, PAT, EBIT, Stock Returns, Corelation and Regression.
No ratings yet
For Reliance Company Provide Its Last 10 Years Data of Its Sales Revenue, PAT, EBIT, Stock Returns, Corelation and Regression.
4 pages
Exam - Measurement Technique, ELA210, TEN1, 2023-12-08: Please Read These Instructions Carefully!
No ratings yet
Exam - Measurement Technique, ELA210, TEN1, 2023-12-08: Please Read These Instructions Carefully!
5 pages
The Impact of Social Support and Self-Esteem On Adolescent Substance Abuse Treatment Outcome
No ratings yet
The Impact of Social Support and Self-Esteem On Adolescent Substance Abuse Treatment Outcome
17 pages
FinQuiz Level2Mock2016Version1JunePMSolutions
No ratings yet
FinQuiz Level2Mock2016Version1JunePMSolutions
56 pages
Toward A Framework of Determinants of Financial
No ratings yet
Toward A Framework of Determinants of Financial
7 pages
IEEE Access Estimating Compressive Strength of CO2 Incorporated Concrete With Data Augmentation and E
No ratings yet
IEEE Access Estimating Compressive Strength of CO2 Incorporated Concrete With Data Augmentation and E
16 pages
Bivariate Versus Multivariate Smart Spectrophotometric Calibration
No ratings yet
Bivariate Versus Multivariate Smart Spectrophotometric Calibration
10 pages
Logistics Service Mode Selection For Last Mile Delivery An Analysis Method Considering Customer Utilit
No ratings yet
Logistics Service Mode Selection For Last Mile Delivery An Analysis Method Considering Customer Utilit
22 pages
PDF Business Analytics A Management Approach 2019 PDF - Compress
100% (1)
PDF Business Analytics A Management Approach 2019 PDF - Compress
180 pages
HG VP
No ratings yet
HG VP
59 pages
Inflation's Nonlinear Impact on Growth
No ratings yet
Inflation's Nonlinear Impact on Growth
75 pages
Bohannon SitToStand IES2007 PDF
No ratings yet
Bohannon SitToStand IES2007 PDF
5 pages
appendixAERnunn Slave Trade&originsofmistrustinafrica
No ratings yet
appendixAERnunn Slave Trade&originsofmistrustinafrica
20 pages
Consecration of Popular Music PDF
No ratings yet
Consecration of Popular Music PDF
14 pages
Forecasting Models - PPT
No ratings yet
Forecasting Models - PPT
57 pages
Affine Term Structure Models
No ratings yet
Affine Term Structure Models
42 pages
Nesting Classical Actuarial Models Into Neural Network
No ratings yet
Nesting Classical Actuarial Models Into Neural Network
27 pages
AP Stats: Scatterplots & Regression
0% (1)
AP Stats: Scatterplots & Regression
3 pages
Kerala VDO & Finger Length Study
No ratings yet
Kerala VDO & Finger Length Study
10 pages
Supervised Machine Learning - Javatpoint
No ratings yet
Supervised Machine Learning - Javatpoint
9 pages

Correlation Matrix

Uploaded by

Correlation Matrix

Uploaded by

Import

from sklearn.model_selection import train_test_split

for col in cols:

# df = [Link]('Time & Date', axis=1)

[Link](figsize = (50, 50))

Text(0.5, 1.0, 'Correlation Heatmap')

Dropping 'Trt Time' due to correlation > 0.5

[Link](figsize = (12, 8))

Text(0.5, 1.0, 'Correlation Heatmap')

X_train, X_test, y_train, y_test = train_test_split(X, y,

X_2 = df_2.drop('Treating Pr LOR', axis=1)

X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2, y_2,

mse = mean_squared_error(y_test, y_pred)

print(f"Mean Squared Error: {mse}")

Mean Squared Error: 31261.686890693905

mse2 = mean_squared_error(y_test_2, y_pred2)

print(f"Mean Squared Error: {mse2}")

Mean Squared Error: 29792.2029402794

You might also like