Final Project
Kan Grizzel A. F. Kamagi (12191041) & Santy Sarah Zhafirah (12191073)
6/4/2021
head(dataku)
## # A tibble: 6 x 9
## ph Hardness Solids Chloramines Sulfate Conductivity Organic_carbon
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8.32 214. 22018. 8.06 357. 363. 18.4
## 2 9.09 181. 17979. 6.55 310. 398. 11.6
## 3 5.58 188. 28749. 7.54 327. 280. 8.40
## 4 10.2 248. 28750. 7.51 394. 284. 13.8
## 5 8.64 203. 13672. 4.56 303. 475. 12.4
## 6 11.2 227. 25485. 9.08 404. 564. 17.9
## # ... with 2 more variables: Trihalomethanes <dbl>, Turbidity <dbl>
str(dataku)
## tibble[,9] [50 x 9] (S3: tbl_df/tbl/[Link])
## $ ph : num [1:50] 8.32 9.09 5.58 10.22 8.64 ...
## $ Hardness : num [1:50] 214 181 188 248 203 ...
## $ Solids : num [1:50] 22018 17979 28749 28750 13672 ...
## $ Chloramines : num [1:50] 8.06 6.55 7.54 7.51 4.56 ...
## $ Sulfate : num [1:50] 357 310 327 394 303 ...
## $ Conductivity : num [1:50] 363 398 280 284 475 ...
## $ Organic_carbon : num [1:50] 18.4 11.6 8.4 13.8 12.4 ...
## $ Trihalomethanes: num [1:50] 100.3 32 54.9 84.6 62.8 ...
## $ Turbidity : num [1:50] 4.63 4.08 2.56 2.67 4.4 ...
summary(dataku)
## ph Hardness Solids Chloramines
## Min. : 3.445 Min. :100.5 Min. : 6743 Min. : 3.606
## 1st Qu.: 6.037 1st Qu.:174.2 1st Qu.:18167 1st Qu.: 5.777
## Median : 7.153 Median :198.7 Median :23428 Median : 6.834
## Mean : 7.226 Mean :194.8 Mean :23084 Mean : 6.779
## 3rd Qu.: 8.727 3rd Qu.:214.5 3rd Qu.:28749 3rd Qu.: 7.624
## Max. :11.180 Max. :300.3 Max. :41065 Max. :10.057
## Sulfate Conductivity Organic_carbon Trihalomethanes
## Min. :247.2 Min. :269.4 Min. : 6.016 Min. : 17.92
## 1st Qu.:310.3 1st Qu.:392.6 1st Qu.: 9.898 1st Qu.: 51.97
## Median :327.2 Median :441.8 Median :12.436 Median : 66.11
## Mean :332.9 Mean :440.2 Mean :12.759 Mean : 64.73
## 3rd Qu.:355.3 3rd Qu.:497.8 3rd Qu.:14.817 3rd Qu.: 78.12
## Max. :404.0 Max. :669.7 Max. :23.918 Max. :100.34
## Turbidity
## Min. :2.293
## 1st Qu.:3.629
## Median :3.932
## Mean :3.982
## 3rd Qu.:4.454
## Max. :5.326
library(corrplot)
## corrplot 0.84 loaded
crx <-cor(dataku)
crx
## ph Hardness Solids Chloramines
Sulfate
## ph 1.00000000 0.200750509 -0.36290358 -0.063470965
0.22070876
## Hardness 0.20075051 1.000000000 0.09003876 0.194117356
0.81324934
## Solids -0.36290358 0.090038757 1.00000000 0.464992982
0.41905328
## Chloramines -0.06347096 0.194117356 0.46499298 1.000000000
0.57463377
## Sulfate 0.22070876 0.813249342 0.41905328 0.574633768
1.00000000
## Conductivity -0.01018464 -0.165871008 0.07819625 0.079775598
-0.04307355
## Organic_carbon 0.35522635 0.022194555 -0.07288260 0.008957478
0.11694356
## Trihalomethanes -0.03662006 0.006917402 -0.14121243 -0.049074757
-0.12282116
## Turbidity 0.06118153 0.085096568 -0.07968185 0.370918433
0.17525484
## Conductivity Organic_carbon Trihalomethanes Turbidity
## ph -0.010184644 0.355226351 -0.036620063 0.06118153
## Hardness -0.165871008 0.022194555 0.006917402 0.08509657
## Solids 0.078196250 -0.072882603 -0.141212428 -0.07968185
## Chloramines 0.079775598 0.008957478 -0.049074757 0.37091843
## Sulfate -0.043073550 0.116943562 -0.122821158 0.17525484
## Conductivity 1.000000000 0.068074662 0.005177331 0.07043398
## Organic_carbon 0.068074662 1.000000000 -0.051785130 0.05466444
## Trihalomethanes 0.005177331 -0.051785130 1.000000000 0.05715654
## Turbidity 0.070433983 0.054664441 0.057156536 1.00000000
corrplot(crx, method = "color", type = "lower", [Link] = 30)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
library(lattice)
library(ggplot2)
findCorrelation(crx, cutoff = 0.8, verbose = TRUE, names = TRUE)
## Compare row 5 and column 2 with corr 0.813
## Means: 0.311 vs 0.156 so flagging column 5
## All correlations <= 0.8
## [1] "Sulfate"
sulfate <- dataku$Sulfate
hardness <- dataku$Hardness
plot(sulfate, hardness, col = c("black", "red"))
par(mfrow = c(1,2))
boxplot(dataku$Sulfate, main = "Sulfate")
boxplot(dataku$Hardness, main = "Hardness")
outliers <- boxplot(dataku$Hardness, plot = FALSE)$out
outliers
## [1] 100.4576 300.2925
dataku[which(dataku$Hardness %in% outliers),]
## # A tibble: 2 x 9
## ph Hardness Solids Chloramines Sulfate Conductivity Organic_carbon
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7.81 100. 12014. 5.21 247. 605. 9.61
## 2 10.2 300. 12652. 6.66 399. 487. 9.35
## # ... with 2 more variables: Trihalomethanes <dbl>, Turbidity <dbl>
dataku <- dataku[-which(dataku$Hardness %in% outliers),]
boxplot(dataku$Hardness, main = "Hardness")
outliers <- boxplot(dataku$Hardness, plot = FALSE)$out
outliers
## [1] 273.8138
dataku[which(dataku$Hardness %in% outliers),]
## # A tibble: 1 x 9
## ph Hardness Solids Chloramines Sulfate Conductivity Organic_carbon
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 9.18 274. 24041. 6.90 398. 478. 13.4
## # ... with 2 more variables: Trihalomethanes <dbl>, Turbidity <dbl>
dataku <- dataku[-which(dataku$Hardness %in% outliers),]
boxplot(dataku$Hardness, main = "Hardness")
head(dataku)
## # A tibble: 6 x 9
## ph Hardness Solids Chloramines Sulfate Conductivity Organic_carbon
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 8.32 214. 22018. 8.06 357. 363. 18.4
## 2 9.09 181. 17979. 6.55 310. 398. 11.6
## 3 5.58 188. 28749. 7.54 327. 280. 8.40
## 4 10.2 248. 28750. 7.51 394. 284. 13.8
## 5 8.64 203. 13672. 4.56 303. 475. 12.4
## 6 11.2 227. 25485. 9.08 404. 564. 17.9
## # ... with 2 more variables: Trihalomethanes <dbl>, Turbidity <dbl>
str(dataku)
## tibble[,9] [47 x 9] (S3: tbl_df/tbl/[Link])
## $ ph : num [1:47] 8.32 9.09 5.58 10.22 8.64 ...
## $ Hardness : num [1:47] 214 181 188 248 203 ...
## $ Solids : num [1:47] 22018 17979 28749 28750 13672 ...
## $ Chloramines : num [1:47] 8.06 6.55 7.54 7.51 4.56 ...
## $ Sulfate : num [1:47] 357 310 327 394 303 ...
## $ Conductivity : num [1:47] 363 398 280 284 475 ...
## $ Organic_carbon : num [1:47] 18.4 11.6 8.4 13.8 12.4 ...
## $ Trihalomethanes: num [1:47] 100.3 32 54.9 84.6 62.8 ...
## $ Turbidity : num [1:47] 4.63 4.08 2.56 2.67 4.4 ...
summary(dataku)
## ph Hardness Solids Chloramines
## Min. : 3.445 Min. :116.3 Min. : 6743 Min. : 3.606
## 1st Qu.: 5.841 1st Qu.:175.0 1st Qu.:19096 1st Qu.: 5.797
## Median : 7.120 Median :198.6 Median :23827 Median : 6.846
## Mean : 7.110 Mean :192.9 Mean :23522 Mean : 6.812
## 3rd Qu.: 8.578 3rd Qu.:213.7 3rd Qu.:28765 3rd Qu.: 7.767
## Max. :11.180 Max. :259.0 Max. :41065 Max. :10.057
## Sulfate Conductivity Organic_carbon Trihalomethanes
## Min. :266.9 Min. :269.4 Min. : 6.016 Min. : 17.92
## 1st Qu.:310.5 1st Qu.:380.3 1st Qu.:10.136 1st Qu.: 50.79
## Median :326.7 Median :430.5 Median :12.509 Median : 65.73
## Mean :331.9 Mean :434.9 Mean :12.885 Mean : 64.49
## 3rd Qu.:349.9 3rd Qu.:491.3 3rd Qu.:15.243 3rd Qu.: 78.61
## Max. :404.0 Max. :669.7 Max. :23.918 Max. :100.34
## Turbidity
## Min. :2.293
## 1st Qu.:3.631
## Median :3.924
## Mean :3.982
## 3rd Qu.:4.418
## Max. :5.326
library(corrplot)
correlation_matrix <-cor(dataku)
correlation_matrix
## ph Hardness Solids Chloramines
Sulfate
## ph 1.00000000 0.12164401 -0.3394913 -0.05805491
0.172850305
## Hardness 0.12164401 1.00000000 0.1164963 0.18197992
0.752346046
## Solids -0.33949132 0.11649632 1.0000000 0.45533802
0.474210029
## Chloramines -0.05805491 0.18197992 0.4553380 1.00000000
0.609290272
## Sulfate 0.17285030 0.75234605 0.4742100 0.60929027
1.000000000
## Conductivity -0.05329541 -0.15938732 0.1635334 0.12791796
0.010886676
## Organic_carbon 0.40959835 0.03216390 -0.1364765 -0.01316106
0.121523719
## Trihalomethanes -0.05187998 -0.01707589 -0.1406342 -0.04821940
-0.159162914
## Turbidity 0.02219425 -0.14413893 -0.1152796 0.35731988
0.009823881
## Conductivity Organic_carbon Trihalomethanes Turbidity
## ph -0.053295415 0.40959835 -0.051879980 0.022194254
## Hardness -0.159387316 0.03216390 -0.017075886 -0.144138925
## Solids 0.163533366 -0.13647649 -0.140634202 -0.115279595
## Chloramines 0.127917965 -0.01316106 -0.048219400 0.357319878
## Sulfate 0.010886676 0.12152372 -0.159162914 0.009823881
## Conductivity 1.000000000 0.12219983 -0.003709589 0.142812251
## Organic_carbon 0.122199831 1.00000000 -0.048890699 0.045568504
## Trihalomethanes -0.003709589 -0.04889070 1.000000000 0.054071620
## Turbidity 0.142812251 0.04556850 0.054071620 1.000000000
corrplot(correlation_matrix, method = "color", type = "lower", [Link] = 30)
pca <- princomp(dataku, cor = TRUE, scores = TRUE)
summary(pca, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.542230 1.2865755 1.1905802 1.0413163 0.9585635
## Proportion of Variance 0.264275 0.1839196 0.1574979 0.1204822 0.1020938
## Cumulative Proportion 0.264275 0.4481946 0.6056925 0.7261747 0.8282684
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.8016643 0.72607634 0.55639248 0.257213679
## Proportion of Variance 0.0714073 0.05857632 0.03439696 0.007350986
## Cumulative Proportion 0.8996757 0.95825206 0.99264901 1.000000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
Comp.9
## ph 0.647 0.156 0.688 0.234
0.154
## Hardness 0.425 0.290 -0.351 -0.220 -0.132 0.411 -0.340
0.507
## Solids 0.421 -0.391 0.273 -0.153 -0.383 0.152 0.586
0.233
## Chloramines 0.490 -0.157 0.318 -0.186 0.143 -0.232 0.231 -0.624
0.288
## Sulfate 0.613 0.177
-0.759
## Conductivity -0.168 0.493 0.385 -0.509 0.557
## Organic_carbon 0.504 0.322 0.280 -0.233 -0.500 -0.503
## Trihalomethanes -0.135 -0.668 -0.684 -0.207 0.113
## Turbidity 0.627 -0.408 0.392 0.142 -0.255 0.441
str(pca)
## List of 7
## $ sdev : Named num [1:9] 1.542 1.287 1.191 1.041 0.959 ...
## ..- attr(*, "names")= chr [1:9] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ loadings: 'loadings' num [1:9, 1:9] 0.00255 0.42501 0.42149 0.49009
0.61305 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:9] "ph" "Hardness" "Solids" "Chloramines" ...
## .. ..$ : chr [1:9] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ center : Named num [1:9] 7.11 192.9 23521.59 6.81 331.89 ...
## ..- attr(*, "names")= chr [1:9] "ph" "Hardness" "Solids"
"Chloramines" ...
## $ scale : Named num [1:9] 2 33.66 7650.66 1.54 33.29 ...
## ..- attr(*, "names")= chr [1:9] "ph" "Hardness" "Solids"
"Chloramines" ...
## $ [Link] : int 47
## $ scores : num [1:47, 1:9] 0.835 -0.734 0.182 2.01 -1.584 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:9] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ call : language princomp(x = dataku, cor = TRUE, scores = TRUE)
## - attr(*, "class")= chr "princomp"
pca$scores
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
Comp.6
## [1,] 0.83537697 1.459207088 0.83328767 -1.87713760 -0.83711976
-1.305154144
## [2,] -0.73368175 0.689620847 -0.01808034 0.79579704 1.74579905
0.490445980
## [3,] 0.18182662 -1.105247619 -2.51539852 0.19553031 0.75440820
-1.008353084
## [4,] 2.00999656 1.962542178 -2.24314592 -0.89884430 -0.85086815
-1.088067028
## [5,] -1.58420090 0.980378893 0.23267778 -0.12477974 0.10601668
1.360302281
## [6,] 2.70559338 2.092424982 1.81588643 0.02274932 -1.12646024
0.337759522
## [7,] 0.17557736 -0.312320922 0.34876183 0.27018560 -0.80356994
-1.569307523
## [8,] -2.83586662 0.620859338 -0.77917218 0.14764064 -0.51445622
-1.110968219
## [9,] 2.38352909 -1.727336230 1.08085334 -0.06576437 -0.69531580
-0.627311827
## [10,] -0.36849969 0.427483989 -1.78728888 1.20670052 -1.23646571
0.554642386
## [11,] -0.59346021 -0.212291202 0.81754537 1.05773370 -1.57146813
-0.906640733
## [12,] 0.09132290 -1.396843646 1.91854363 -0.45624587 1.20399045
-0.123693706
## [13,] 0.36749487 -0.065481054 1.46751397 -1.50514356 0.28595422
-0.485783020
## [14,] 2.59087705 -1.297940064 -0.02359614 1.09552260 1.01670547
-0.236064824
## [15,] -0.46587506 0.060789475 -0.30476304 -2.00903699 1.24615680
1.005791442
## [16,] -0.97062834 0.174380102 2.19051202 -0.38260610 1.22203962
-0.996373415
## [17,] 2.10308155 0.072711157 -1.74759037 0.81812003 0.19072866
0.478483764
## [18,] -0.52505073 -0.667410745 -0.49552160 0.40544275 0.57056958
0.711384894
## [19,] -1.42115764 -1.958825548 -1.00067309 0.03859002 -0.61775485
0.344277344
## [20,] 0.12027186 1.393758836 0.28351674 1.25486261 1.35688146
-0.172615151
## [21,] -0.28411415 -0.631577362 -0.27281403 -1.15078432 1.41504880
-0.002154818
## [22,] 1.85365524 0.261005723 -0.36075398 -1.53613813 0.96962674
0.220267644
## [23,] -0.47052963 3.433083829 1.29680664 -0.75977132 0.28454905
-1.340317411
## [24,] -1.79944338 1.060642312 -1.36271687 -0.80065954 -1.03010688
0.416520003
## [25,] -1.71788086 1.158407696 -0.50490671 -1.15011829 0.58882473
-0.122349319
## [26,] -0.79770261 1.077199079 -0.73337190 0.87377995 0.42354572
0.262447104
## [27,] -2.21143926 -0.917195710 0.48505411 0.69094918 -0.79298055
0.648842623
## [28,] 0.09828236 2.072451971 0.96111006 0.98296578 -0.16833507
0.785450529
## [29,] 0.24562663 1.697046576 0.78294831 1.48858170 0.88643952
-0.033755433
## [30,] 2.40188613 -0.325936173 -0.68538202 0.23641566 0.90726808
1.775685302
## [31,] -0.26398252 -1.573087540 -0.14200601 -2.03623827 -0.53220012
-0.007085688
## [32,] 1.19931822 -1.195168113 -2.07183785 -0.98281135 0.77557452
-0.616557054
## [33,] -0.13107457 0.574634295 -0.99829126 1.40344865 1.81878838
0.510330924
## [34,] 2.83938809 -1.366925461 2.02485127 -0.20345891 -1.57399949
1.276730397
## [35,] 0.08126472 0.627190918 0.35371310 1.55735205 -0.47542742
-0.161136502
## [36,] -3.22787465 0.316428527 -0.59562096 -1.21998698 -0.56780720
1.150236429
## [37,] -0.85433398 -2.493674687 0.88696516 1.33505375 0.67114624
-0.335475903
## [38,] -1.26034081 -0.975593451 2.70009760 0.38769758 0.52438028
0.274293235
## [39,] -1.53899514 -1.905853598 -0.99861024 -1.02670277 0.41253834
-1.158650427
## [40,] 0.39939077 -1.859421174 -0.06167952 1.64279259 -0.23804273
-1.078003866
## [41,] -0.82101562 0.180761939 -0.98688213 1.93770354 -1.19392487
-0.598639565
## [42,] 2.46303992 -0.740992341 -0.59259982 -0.56026464 -0.15587938
-0.117490689
## [43,] -1.18791755 0.479241076 0.13455350 -0.77238019 -0.80321638
1.276026909
## [44,] 2.10655163 -0.009092158 -1.15993082 -0.26483347 -1.96724509
0.807079867
## [45,] -2.59584035 -1.740273092 1.13325879 -0.12920269 -1.48485401
0.407608573
## [46,] 1.47449313 0.030589662 0.78867771 -0.60809855 0.08580463
0.408030798
## [47,] -0.06693902 1.575647403 -0.09450085 0.67539236 -0.22528723
-0.300688601
## Comp.7 Comp.8 Comp.9
## [1,] -0.49955243 0.072140378 -0.019598764
## [2,] 0.58678736 -0.130597329 0.330011004
## [3,] 0.72491435 -0.787045119 0.167140746
## [4,] 1.09390449 0.035743435 -0.128529295
## [5,] -0.17798408 0.557459615 0.212975591
## [6,] 0.71817481 -0.079304148 -0.259829623
## [7,] 0.45018884 0.168640767 0.126731333
## [8,] -0.44214871 0.558632331 -0.184278655
## [9,] 0.75496100 0.357838034 0.131330785
## [10,] 0.12432910 0.313567879 -0.115332232
## [11,] -0.41742979 0.578682768 0.165801940
## [12,] 0.21951394 -1.628166963 -0.410533452
## [13,] -0.67528439 -0.216041412 0.482449830
## [14,] -1.31061800 -0.402150126 -0.454939756
## [15,] -0.11862205 0.594927581 -0.404287250
## [16,] 1.68997292 0.242620053 -0.115214845
## [17,] 0.41685849 0.348282520 -0.298236432
## [18,] -0.65080119 -0.311672057 -0.220335724
## [19,] -0.47529904 -0.095707158 0.365639994
## [20,] -0.26675413 0.702400636 0.390341399
## [21,] -0.45734191 -0.837674867 0.209874344
## [22,] -0.54834102 1.400620864 -0.432390500
## [23,] -1.25896693 -0.348742610 -0.054541361
## [24,] -0.51094083 -0.408336961 -0.297188300
## [25,] 1.49001792 0.455995081 -0.030711736
## [26,] 0.04359672 0.551672905 0.198791866
## [27,] 0.44435773 0.092778607 -0.239340471
## [28,] -0.40908103 -1.105325563 0.131160536
## [29,] 0.61270045 -0.005327059 0.035269600
## [30,] 1.25564031 0.075177539 0.091362881
## [31,] 0.07180436 0.139542919 -0.118371347
## [32,] -0.92503335 0.450769962 0.207726793
## [33,] -1.14623164 0.190523065 0.141388535
## [34,] -0.84064824 0.409048604 0.326761704
## [35,] -0.48296540 0.130075486 -0.603953357
## [36,] -0.05529957 -0.048074643 -0.101786496
## [37,] 0.95675212 0.343096578 -0.114422493
## [38,] -0.33539619 -0.015791436 -0.287573588
## [39,] -0.36930270 -0.502217185 0.112149872
## [40,] -0.79826151 0.399575379 -0.095551065
## [41,] -0.11273598 -0.764219573 0.002449963
## [42,] 1.21616556 -0.679284732 0.116621529
## [43,] 0.06630628 -1.009207155 0.133451816
## [44,] -0.24432055 -0.591491432 -0.050757889
## [45,] 0.72647562 0.601690174 0.123687627
## [46,] -0.72412898 0.432640668 0.468794504
## [47,] 0.59006726 -0.237766299 0.365790438
data2 <- cbind(dataku, pca$scores[,1:2])
cxx <- cor(dataku, data2[,8:9])
cxx
## Trihalomethanes Turbidity
## ph -0.051879980 0.022194254
## Hardness -0.017075886 -0.144138925
## Solids -0.140634202 -0.115279595
## Chloramines -0.048219400 0.357319878
## Sulfate -0.159162914 0.009823881
## Conductivity -0.003709589 0.142812251
## Organic_carbon -0.048890699 0.045568504
## Trihalomethanes 1.000000000 0.054071620
## Turbidity 0.054071620 1.000000000
plot(pca, type = "l", main = "Scree Diagram")
plot(pca$sdev^2,type = "l", main = "Scree Diagram", xlab = "Component
Number", ylab = "Component Variance")
component_1 <- pca$loadings[,1]
component_1
## ph Hardness Solids Chloramines
Sulfate
## 0.002550478 0.425010709 0.421494476 0.490086466
0.613050771
## Conductivity Organic_carbon Trihalomethanes Turbidity
## 0.059677905 0.030176610 -0.135417797 0.053625983
biplot(pca)
pca <- princomp(dataku, cor = TRUE, scores = TRUE)
summary(pca, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.542230 1.2865755 1.1905802 1.0413163 0.9585635
## Proportion of Variance 0.264275 0.1839196 0.1574979 0.1204822 0.1020938
## Cumulative Proportion 0.264275 0.4481946 0.6056925 0.7261747 0.8282684
## Comp.6 Comp.7 Comp.8 Comp.9
## Standard deviation 0.8016643 0.72607634 0.55639248 0.257213679
## Proportion of Variance 0.0714073 0.05857632 0.03439696 0.007350986
## Cumulative Proportion 0.8996757 0.95825206 0.99264901 1.000000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
Comp.9
## ph 0.647 0.156 0.688 0.234
0.154
## Hardness 0.425 0.290 -0.351 -0.220 -0.132 0.411 -0.340
0.507
## Solids 0.421 -0.391 0.273 -0.153 -0.383 0.152 0.586
0.233
## Chloramines 0.490 -0.157 0.318 -0.186 0.143 -0.232 0.231 -0.624
0.288
## Sulfate 0.613 0.177
-0.759
## Conductivity -0.168 0.493 0.385 -0.509 0.557
## Organic_carbon 0.504 0.322 0.280 -0.233 -0.500 -0.503
## Trihalomethanes -0.135 -0.668 -0.684 -0.207 0.113
## Turbidity 0.627 -0.408 0.392 0.142 -0.255 0.441
plot(pca)
sapply(1:3, function(f)factanal(dataku, factors = f, method = "mle",)$PVAL)
## objective objective objective
## 2.172024e-05 5.541861e-03 2.415850e-01
pca.fa1 <- factanal(dataku, factors=3, rotation = "varimax", scores =
"regression")
pca.fa1
##
## Call:
## factanal(x = dataku, factors = 3, scores = "regression", rotation =
"varimax")
##
## Uniquenesses:
## ph Hardness Solids Chloramines
Sulfate
## 0.107 0.005 0.429 0.416
0.005
## Conductivity Organic_carbon Trihalomethanes Turbidity
## 0.930 0.798 0.950 0.944
##
## Loadings:
## Factor1 Factor2 Factor3
## ph 0.943
## Hardness 0.464 0.878
## Solids 0.660 -0.320 -0.180
## Chloramines 0.742 -0.183
## Sulfate 0.911 0.200 0.355
## Conductivity 0.113 -0.238
## Organic_carbon 0.445
## Trihalomethanes -0.192
## Turbidity -0.215
##
## Factor1 Factor2 Factor3
## SS loadings 2.091 1.247 1.079
## Proportion Var 0.232 0.139 0.120
## Cumulative Var 0.232 0.371 0.491
##
## Test of the hypothesis that 3 factors are sufficient.
## The chi square statistic is 15 on 12 degrees of freedom.
## The p-value is 0.242
pca.fa1$scores
## Factor1 Factor2 Factor3
## [1,] 0.52535875 0.66204824 0.36902454
## [2,] -0.85689787 0.81499195 -0.03779627
## [3,] 0.04736087 -0.81513350 -0.08875855
## [4,] 1.29374519 1.42235192 1.01751202
## [5,] -1.44102769 0.55410166 1.03621995
## [6,] 1.90823291 2.08407983 -0.07380404
## [7,] 0.20957298 0.11899883 -1.03529444
## [8,] -1.45704035 0.01293190 -0.43323171
## [9,] 1.52699051 -0.36866700 -0.97276122
## [10,] -0.21352129 0.03665845 0.83295054
## [11,] -0.39440696 -0.22456610 -0.59409785
## [12,] 0.78320815 -0.48810562 -2.08483774
## [13,] -0.31965922 -0.32721346 0.38013935
## [14,] 2.16333604 -1.40996791 -0.47179376
## [15,] -0.04444010 0.05264123 0.58386032
## [16,] -0.14151963 1.63737180 -2.59558456
## [17,] 1.51243027 0.15875901 0.60425171
## [18,] -0.13709599 -0.86492782 0.13811080
## [19,] -1.28963796 -1.82732087 0.55954006
## [20,] -0.42958314 0.92746477 0.30681354
## [21,] -0.44654772 -0.85357028 0.38434746
## [22,] 1.41767605 0.09017500 0.78163313
## [23,] -0.28766705 1.53881289 0.32033311
## [24,] -0.94777518 -0.05441126 1.07012926
## [25,] -1.03237047 1.54154390 -0.26826137
## [26,] -0.82288050 0.64577488 0.60182463
## [27,] -0.98180000 -0.21175146 -1.05095006
## [28,] -0.20910713 1.04654596 0.62031441
## [29,] 0.15071807 1.67482560 -0.62218684
## [30,] 1.18667504 0.52432458 0.83080749
## [31,] -0.01602592 -1.03210743 0.03216364
## [32,] 0.39084245 -1.62064102 1.31082285
## [33,] -0.41435503 -0.35514227 0.92195735
## [34,] 1.25754241 -1.00923937 0.94889944
## [35,] 0.79837983 0.33371034 -0.86865893
## [36,] -2.04114165 -0.09366887 0.75551199
## [37,] -0.04511002 -0.63671035 -2.45508837
## [38,] -0.25876909 -0.21746334 -1.89986613
## [39,] -0.90600134 -1.72840111 -0.47968469
## [40,] 0.58452059 -1.48863346 -1.11882004
## [41,] -0.38279794 -0.26726007 -0.30626381
## [42,] 1.43723109 0.05000297 0.05879165
## [43,] -1.03235316 0.06950782 0.92954911
## [44,] 1.16144739 -0.54579111 1.64815846
## [45,] -1.56902896 -0.49627493 -1.09768167
## [46,] 0.22416349 -0.27734451 1.19803488
## [47,] -0.46087074 1.21668958 0.31372037
plot(pca.fa1$scores)
load <- pca.fa1$loadings[,1:2]
plot(load, type="p", pch=20, cex=2, col="red", xlim=c(-1.0, 1.0),
ylim=c(-1.0, 1.4))
text(load, labels=names(dataku), pos=2, cex=0.75)