0% found this document useful (0 votes)
6 views5 pages

Correlation Matrix

Uploaded by

schlaggen
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
6 views5 pages

Correlation Matrix

Uploaded by

schlaggen
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Import

import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns

from sklearn.model_selection import train_test_split


from [Link] import StandardScaler
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score

df = pd.read_csv('[Link]')

[Link]()

<class '[Link]'>
RangeIndex: 38291 entries, 0 to 38290
Columns: 428 entries, Time & Date to cpatToolfaceTar
dtypes: float64(34), int64(390), object(4)
memory usage: 125.0+ MB

cols = [Link]()

drop_col = []

for col in cols:


if df[col].isnull().sum() > 0.499 * len(df) or (df[col] ==
0).all():
drop_col.append(col)
else:
continue

len(drop_col)

376

df = [Link](drop_col, axis=1)

df = df.select_dtypes(include=['int64', 'float64'])

df = [Link](['Prop Spec Vol', 'Prp Spc Vol Ann', 'N2 Temp Ann', 'CO2
Temp Ann', 'Trt Num', 'Ann Pr @WH LOR'], axis=1)

# df = [Link]('Time & Date', axis=1)

[Link](figsize = (50, 50))


mask = [Link](np.ones_like([Link](), dtype=bool))
heatmap = [Link]([Link](), mask = mask, vmin = -1, vmax = 1,
annot = True)
heatmap.set_title('Correlation Heatmap', fontdict = {'fontsize' : 18},
pad = 12)

Text(0.5, 1.0, 'Correlation Heatmap')

df_2 = [Link]()

final_cols = [Link]()
while True:
corr_matrix = df[final_cols].corr().abs()
np.fill_diagonal(corr_matrix.values, 0)

max_corr = corr_matrix.[Link]()
if max_corr <= 0.5:
break

drop_corr = None
for col in corr_matrix.columns:
high_corrs = corr_matrix[col][corr_matrix[col] > 0.5]
if not high_corrs.empty:
drop_corr = high_corrs.index[0]
break

if drop_corr:
print(f"Dropping '{drop_corr}' due to correlation > 0.5")
final_cols.remove(drop_corr)
else:
break

Dropping 'Trt Time' due to correlation > 0.5


Dropping 'Stg Time' due to correlation > 0.5
Dropping 'Slurry Rate' due to correlation > 0.5
Dropping 'CT Running Len' due to correlation > 0.5
Dropping 'CT Run Speed' due to correlation > 0.5
Dropping 'SCC Cycle' due to correlation > 0.5
Dropping 'Max Allow CT Pr' due to correlation > 0.5
Dropping 'Clean Density' due to correlation > 0.5
Dropping 'Cln Density Ann' due to correlation > 0.5
Dropping 'Slurry Density' due to correlation > 0.5
Dropping 'Slurry Den Ann' due to correlation > 0.5
Dropping 'Stage At Bldr' due to correlation > 0.5
Dropping 'SF Calc Limit' due to correlation > 0.5
Dropping 'SF Allow Limit' due to correlation > 0.5
Dropping 'SFAllowInWell' due to correlation > 0.5
Dropping 'Annulus Pr @WH' due to correlation > 0.5
Dropping 'Max Allow WH Pr' due to correlation > 0.5
Dropping 'N2 Pressure' due to correlation > 0.5
Dropping 'N2 Pressure Ann' due to correlation > 0.5
Dropping 'CO2 Pressure' due to correlation > 0.5
Dropping 'CO2 Press Ann' due to correlation > 0.5
Dropping 'Tubing Guide Pr' due to correlation > 0.5
Dropping 'Pr @ Slry Den' due to correlation > 0.5
Dropping 'Pr@Slry Den Ann' due to correlation > 0.5
Dropping 'CO2 Temp' due to correlation > 0.5
Dropping 'SF Reel Inlet' due to correlation > 0.5
Dropping 'SF Tubing Guide' due to correlation > 0.5
Dropping 'SF Abv Stripper' due to correlation > 0.5
Dropping 'SF Blw Stripper' due to correlation > 0.5
Dropping 'Max OD Growth' due to correlation > 0.5
Dropping 'CT Depth' due to correlation > 0.5
Dropping 'CT Pipe Weight' due to correlation > 0.5
Dropping 'Warning Code' due to correlation > 0.5

len(final_cols)

df = df[final_cols]

[Link](figsize = (12, 8))


mask = [Link](np.ones_like([Link](), dtype=bool))
heatmap = [Link]([Link](), mask = mask, vmin = -1, vmax = 1,
annot = True)
heatmap.set_title('Correlation Heatmap', fontdict = {'fontsize' : 18},
pad = 12)

Text(0.5, 1.0, 'Correlation Heatmap')


X = [Link]('Treating Pr LOR', axis=1)
y = df['Treating Pr LOR']

X_train, X_test, y_train, y_test = train_test_split(X, y,


test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = [Link](X_test)

X_2 = df_2.drop('Treating Pr LOR', axis=1)


y_2 = df_2['Treating Pr LOR']

X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(X_2, y_2,


test_size=0.2, random_state=42)

scaler2 = StandardScaler()
X_train_2 = scaler2.fit_transform(X_train_2)
X_test_2 = [Link](X_test_2)

lr = LinearRegression()
[Link](X_train, y_train)

y_pred = [Link](X_test)

mse = mean_squared_error(y_test, y_pred)


r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")


print(f"R-squared: {r2}")

Mean Squared Error: 31261.686890693905


R-squared: 0.13832227402498298

lr2 = LinearRegression()
[Link](X_train_2, y_train_2)

y_pred2 = [Link](X_test_2)

mse2 = mean_squared_error(y_test_2, y_pred2)


r2_2 = r2_score(y_test_2, y_pred2)

print(f"Mean Squared Error: {mse2}")


print(f"R-squared: {r2_2}")

Mean Squared Error: 29792.2029402794


R-squared: 0.17882621717997915

You might also like