0% found this document useful (0 votes)
21 views2 pages

Solution 2-Programming

Uploaded by

poonlam167
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
21 views2 pages

Solution 2-Programming

Uploaded by

poonlam167
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Solution 2

Code

Problem7
Hide

X <- c(-0.6, 3.1, 25.3, -16.8, -7.1, -6.2, 25.2, 22.6, 26)

qqnorm(X)
qqline(X)

Hide

qj <- qnorm(ppoints(X))
rq <- cor(sort(X), qj)
rq

[1] 0.9383158

Problem8
Hide

sales<-c(108.28,152.36,95.04,65.45,62.97,263.99,265.19,285.06,92.01,165.68)
profits<-c(17.05,16.59,10.91,14.14,9.52,25.33,18.54,15.73,8.10,11.13)
assets<-c(1484.10,750.33,766.42,1110.46,1031.29,195.26,193.83,191.11,1175.16,211.15)

World<-[Link]([Link](sales=sales,profits=profits,assets=assets))
n <- nrow(World)

es_cov <- cov(World)


es_mean <- colMeans(World)
hat_World <- World - matrix(1, nrow = n, ncol = 1) %*% matrix(es_mean, nrow = 1)

dj <- rowSums((World %*% solve(es_cov)) * World)

qqplot(qchisq(ppoints(n,a=1/2),3),dj)

library(ggplot2)
ggplot2:: last_plot()+qqline(dj,distribution=function(p) qchisq(p,df=4),prob=c(0.1,0.6))

integer(0)

Problem9
(a)
Hide

#Psych <- [Link](file = "[Link]", head = T, sep = c(",", " "))


Psych <- [Link](Psych)
nrow(Psych)

[1] 130

Hide

qqnorm(Psych[,1],main = "Independence")
qqline(Psych[,1])

Hide

qj1 <- qnorm(ppoints(Psych[,1]))


rq1 <- cor(sort(Psych[,1]), qj1)
rq1

[1] 0.9881301

Hide

qqnorm(Psych[,2],main = "Support")
qqline(Psych[,2])

Hide

qj2 <- qnorm(ppoints(Psych[,2]))


rq2 <- cor(sort(Psych[,2]), qj2)
rq2

[1] 0.989288

Hide

qqnorm(Psych[,3],main = "Benevolence")
qqline(Psych[,3])

Hide

qj3 <- qnorm(ppoints(Psych[,3]))


rq3 <- cor(sort(Psych[,3]), qj3)
rq3

[1] 0.9925086

Hide

qqnorm(Psych[,4],main = "Conformity")
qqline(Psych[,4])

Hide

qj4 <- qnorm(ppoints(Psych[,4]))


rq4 <- cor(sort(Psych[,4]), qj4)
rq4

[1] 0.99338

Hide

qqnorm(Psych[,5],main = "Leadership")
qqline(Psych[,5])

Hide

qj5 <- qnorm(ppoints(Psych[,5]))


rq5 <- cor(sort(Psych[,5]), qj5)
rq5

[1] 0.9812888

Compare with critical points of n = 150 and we accept Benevolence and Conformity to be normal
(b)
Check Multivariate Distribution

Hide

n <- nrow(Psych)

es_cov <- cov(Psych[,1:5])


es_mean <- colMeans(Psych[,1:5])
hat_Psych <- Psych[,1:5] - matrix(1, nrow = n, ncol = 1) %*% matrix(es_mean, nrow = 1)

dj <- rowSums((hat_Psych %*% solve(es_cov)) * hat_Psych)

# chi - test

qqplot(qchisq(ppoints(n), df = 5), dj, main = "chi-test Psychological")


library(ggplot2)
ggplot2:: last_plot()+qqline(dj,distribution=function(p) qchisq(p,df=5),prob=c(0.1,0.6))

integer(0)

For random varianble y ~ chi(5), with probability 95%, y < 11.07 . And then check the proportion:
Hide

length(which(dj<11.07))/n

[1] 0.9615385

We accept it is multi-Gaussian

(c)
Here is the method of selecting transformation: (You may also select by intuition but we need to guarantee the correlation is larger than the critical
point)

Hide

lambda <- seq(from = 0, to = 5, by = 0.1)


loss <- matrix(0, nrow = length(lambda))
X <- Psych[,5]

for (i in 1:length(lambda)) {
if(lambda[i]==0){trans_data <- log(X)}else{trans_data <- ((X^lambda[i])-1)/lambda[i]}
loss[i] <- (lambda[i]-1) * (sum(log(X))) - n * log(var(trans_data))/2
}
plot(lambda, loss)

Hide

lambda[[Link](loss)]

[1] 0.4

You may also write the selection of λ as a function. Here are the selectio results:

Hide

# lambda = 0.5
qqnorm(Psych[,1]^(1/2),main = "Independence")
qqline(Psych[,1]^(1/2))

Hide

qj1 <- qnorm(ppoints(Psych[,1]^(1/2)))


rq1 <- cor(sort(Psych[,1]^(1/2)), qj1)
rq1

[1] 0.9949123

Hide

# lambda = 1.4
qqnorm(Psych[,2]^1.4,main = "Support")
qqline(Psych[,2]^1.4)

Hide

qj2 <- qnorm(ppoints(Psych[,2]^1.4))


rq2 <- cor(sort(Psych[,2]^1.4), qj2)
rq2

[1] 0.9919774

Hide

# lambda = 0.4
qqnorm(Psych[,5]^0.4,main = "Support")
qqline(Psych[,5]^0.4)

Hide

qj2 <- qnorm(ppoints(Psych[,5]^0.4))


rq2 <- cor(sort(Psych[,5]^0.4), qj2)
rq2

[1] 0.9964541

Problem12
(a).
Hide

#Sweat <- [Link](file = "[Link]", head = T, sep = c(",", " "))


Sweat <- [Link](Sweat)
n <- nrow(Sweat)
p <- ncol(Sweat)

es_mean <- colMeans(Sweat)


es_cov <- cov(Sweat)

eigen1 <- eigen(es_cov)


eigen_vector <- eigen1$vectors
eigen_value <- eigen1$values

crit_value = p*(n-1)/(n*(n-p)) * qf(0.9,p,n-p)


distance1 = sqrt(eigen_value[1])*sqrt(crit_value)
distance1

[1] 9.050674

Hide

distance2 = sqrt(eigen_value[2])*sqrt(crit_value)
distance2

[1] 1.360786

Hide

distance3 = sqrt(eigen_value[3])*sqrt(crit_value)
distance3

[1] 0.7292367

(b)
Hide

qqnorm(Sweat[,1], main = "Sweat Rate")


qqline(Sweat[,1])

Hide

qj1 <- qnorm(ppoints(Sweat[,1]))


rq1 <- cor(sort(Sweat[,1]), qj1)
rq1

[1] 0.9866356

Hide

qqnorm(Sweat[,2], main = "Sodium")


qqline(Sweat[,2])

Hide

qj2 <- qnorm(ppoints(Sweat[,2]))


rq2 <- cor(sort(Sweat[,2]), qj2)
rq2

[1] 0.9919817

Hide

qqnorm(Sweat[,3], main = "Potassium")


qqline(Sweat[,3])

Hide

qj3 <- qnorm(ppoints(Sweat[,3]))


rq3 <- cor(sort(Sweat[,3]), qj3)
rq3
[1] 0.9846733

Hide

pairs(Sweat)

It seems that it justifies the multivariate normal distribution.

(c)
T 2 confidence interval:
Hide

# T2
crit_value = p*(n-1)/(n*(n-p)) * qf(0.95,p,n-p)
mu1_interval_T <- c(es_mean[1] - sqrt(crit_value * es_cov[1,1]), es_mean[1] + sqrt(crit_value * es_cov[1,1]))
mu1_interval_T

[Link] [Link]
3.397768 5.882232

Hide

mu2_interval_T <- c(es_mean[2] - sqrt(crit_value * es_cov[2,2]), es_mean[2] + sqrt(crit_value * es_cov[2,2]))


mu2_interval_T

Sodium Sodium
35.05241 55.74759

Hide

mu3_interval_T <- c(es_mean[3] - sqrt(crit_value * es_cov[3,3]), es_mean[3] + sqrt(crit_value * es_cov[3,3]))


mu3_interval_T

Potassium Potassium
8.570664 11.359336

Bonferroni Interval

Hide

qt95 <- qt(1-0.05/(2*p), df= n-1)

# Bonferroni
mu1_interval_F <- c(es_mean[1] - qt95 * sqrt( es_cov[1,1]/n), es_mean[1] + qt95 * sqrt( es_cov[1,1]/n))
mu1_interval_F

[Link] [Link]
3.643952 5.636048

Hide

mu2_interval_F <- c(es_mean[2] - qt95 * sqrt( es_cov[2,2]/n), es_mean[2] + qt95 * sqrt( es_cov[2,2]/n))
mu2_interval_F

Sodium Sodium
37.10308 53.69692

Hide

mu3_interval_F <- c(es_mean[3] - qt95 * sqrt( es_cov[3,3]/n), es_mean[3] + qt95 * sqrt( es_cov[3,3]/n))
mu3_interval_F

Potassium Potassium
8.846992 11.083008

Problem 13
a.

Hide

xbar=c(95.52,164.38,55.69,93.39,17.98,31.13)
S=matrix(c(3266.46,1343.97,731.54,1175.5,162.68,238.37,1343.97,721.91,324.25,537.35,80.17,117.73,731.54,324.25,17
9.28,281.17,39.15,56.8,1175.5,537.35,281.17,474.98,63.73,94.85,162.68,80.17,39.15,63.73,9.95,13.88,238.37,117.73,
56.8,94.85,13.88,21.26),nrow=6,byrow=TRUE)
n=61
p=6
for(i in 1:p)
{
print(c(xbar[i]-sqrt(qchisq(1-0.05,df=p))*sqrt(S[i,i]/n),
xbar[i]+sqrt(qchisq(1-0.05,df=p))*sqrt(S[i,i]/n)))
}

[1] 69.55347 121.48653


[1] 152.1728 176.5872
[1] 49.60667 61.77333
[1] 83.48823 103.29177
[1] 16.54687 19.41313
[1] 29.03513 33.22487

b.

Hide

library(MVQuickGraphs)
[Link]=xbar[c(1,4)]
[Link]=S[c(1,4),c(1,4)]
confidenceEllipse([Link],eigen([Link]),n=61,p=2,alpha = 0.05)

c.

Hide

t=qt(1-0.05/(2*p),n-1)
for(i in 1:p)
{
print(c(xbar[i]-t*sqrt(S[i,i]/n),
xbar[i]+t*sqrt(S[i,i]/n)))
}

[1] 75.55331 115.48669


[1] 154.9934 173.7666
[1] 51.01229 60.36771
[1] 85.77614 101.00386
[1] 16.87801 19.08199
[1] 29.51917 32.74083

d.

Hide

confidenceEllipse([Link],eigen([Link]),n=61,p=2,alpha = 0.05)
rect( xbar[1]-t*sqrt(S[1,1]/n),xbar[4]-t*sqrt(S[4,4]/n), xbar[1]+t*sqrt(S[1,1]/n),xbar[4]+t*sqrt(S[4,4]/n))

The confidence rectangle is larger because Bonferroni confidence intervals consider multi-variable tests simultaneously. For only two dimensions,
the confidence rectangle is always loose.

e.

Hide

m=7
a=c(0,0,0,0,-1,1)
t=qt(1-0.05/(2*m),n-1)
print(c(t(a)%*%xbar-t*sqrt(t(a)%*%S%*%a/n),
t(a)%*%xbar+t*sqrt(t(a)%*%S%*%a/n)))

[1] 12.48757 13.81243

Problem 14
a.

Hide

#Femal_Bear <-[Link]("Female_Bear.txt",header=TRUE,sep=",")
Femal_Bear <-[Link](Femal_Bear)
xbar=colMeans(Femal_Bear)
S=cov(Femal_Bear)
#(a)
t=matrix(0,nrow=4,ncol=4)
s=diag(4)
y=cbind(t,s)
ybar=y%*%xbar
yS=y%*%S%*%t(y)
n=7
p=4
a=diag(4)
for(i in 1:p)
{
ai=a[i,]
b=sqrt(p*(n-1)/n/(n-p)*qf(1-0.05,df1=p,df2=n-p)*yS[i,i])
print(c(t(ai)%*%ybar-b,t(ai)%*%ybar+b))
}

[1] 130.6851 155.8863


[1] 127.0216 191.5498
[1] 160.3082 185.9776
[1] 155.3749 198.9108

b.

Hide

#(b)
z=Femal_Bear[,5:8]
t=matrix(0,nrow=3,ncol=4)
s=diag(3)
s<-cbind(-s,c(0,0,0))+cbind(c(0,0,0),s)
s

[,1] [,2] [,3] [,4]


[1,] -1 1 0 0
[2,] 0 -1 1 0
[3,] 0 0 -1 1

Hide

zbar=colMeans(z)
zS=cov(z)
for(i in 1:3)
{
ai=s[i,]
b=sqrt(p*(n-1)/n/(n-p)*qf(1-0.05,df1=p,df2=n-p)*t(ai)%*%zS%*%ai)
print(c(t(ai)%*%zbar-b,t(ai)%*%zbar+b))
}

[1] -21.22649 53.22649


[1] -22.73077 50.44505
[1] -20.65385 28.65385

c.

Hide

#(c)
A=matrix(c(-1,1,0,0,0,0,-1,1),nrow=2,byrow=TRUE)
B=matrix(0,nrow=2,ncol=4)
A=cbind(B,A)
zbar=A%*%xbar
zS=A%*%S%*%t(A)
confidenceEllipse(zbar,eigen(zS),n=7,p=2,alpha = 0.05)

Hide

NA
NA

d.

Hide

#(d)
A=matrix(c(-1,1,-0,0,0,-1,1,0,0,0,-1,1),nrow=3,byrow=TRUE)
A=rbind(diag(4),A)
A=cbind(matrix(0,nrow=7,ncol=4),A)
zbar=A%*%xbar
zS=A%*%S%*%t(A)
p=7
n=dim(Femal_Bear)[1]
t=qt(1-0.05/(2*p),n-1)
for(i in 1:p)
{
print(c(zbar[i]-t*sqrt(zS[i,i]/n),
zbar[i]+t*sqrt(zS[i,i]/n)))
}

[1] 137.3884 149.1831


[1] 144.1854 174.3860
[1] 167.1359 179.1498
[1] 166.9550 187.3307
[1] -1.422784 33.422784
[1] -3.266772 30.981058
[1] -7.538521 15.538521

e.

Hide

#(e)
A=matrix(c(-1,1,0,0,0,0,-1,1),nrow=2,byrow=TRUE)
A=cbind(matrix(0,nrow=2,ncol=4),A)
zbar=A%*%xbar
zS=A%*%S%*%t(A)
zbar

[,1]
[1,] 16
[2,] 4

Hide

p=2
n=dim(Femal_Bear)[1]
t=qt(1-0.05/(2*p),n-1)
for(i in 1:p)
{
print(c(zbar[i]-t*sqrt(zS[i,i]/n),
zbar[i]+t*sqrt(zS[i,i]/n)))
}

[1] 3.059795 28.940205


[1] -4.56986 12.56986

Hide

confidenceEllipse(zbar,eigen(zS),n=7,p=2,alpha = 0.05)
#rect(3.059795,-4.56986,28.940205,12.56986)
rect(zbar[1]-t*sqrt(zS[1,1]/n),zbar[2]-t*sqrt(zS[2,2]/n),zbar[1]+t*sqrt(zS[1,1]/n), zbar[2]+t*sqrt(zS[2,2]/n))

You might also like