R-code for Question 1
# Load necessary libraries
library(MASS)
library(car)
# Generate Dataset
set.seed(123)
n <- 50
x1 <- rnorm(n, mean=3, sd=0.75)
x2 <- rnorm(n, mean=5, sd=0.5)
y <- 3 + 2 * x1 - 1.5 * x2 + rnorm(n, mean=0, sd = 3)
data <- data.frame(y, x1, x2)
# Fit OLS model
ols_model <- lm(y ~ x1 + x2, data = data)
summary(ols_model)
# Fit robust regression model
robust_model <- rlm(y ~ x1 + x2, data = data, method = "M")
summary(robust_model)
# Residual Analysis
residuals <- residuals(robust_model)
# Plot residuals to check for patterns
par(mfrow = c(2, 2))
plot(residuals, main = "Robust Residuals", ylab = "Residuals", xlab = "Index")
abline(h = 0, col = "red", lty = 2)
# Plot to check for normality of residuals
hist(residuals, main = "Residuals Histogram", xlab = "Residuals", breaks = 10)
qqnorm(residuals, main = "Q-Q Plot of Residuals")
qqline(residuals, col = "red")
# Plot Residuals vs Fitted Values (Homoscedasticity check)
plot(fitted(robust_model), residuals, main = "Residuals vs Fitted", xlab =
"Fitted Values", ylab = "Residuals")
abline(h = 0, col = "red", lty = 2)
# Standardized residuals (for detecting outliers) (Residuals > 2 are flagged)
robust_std_res <- residuals/summary(robust_model)$sigma
outliers <- which(abs(robust_std_res) > 2)
cat(outliers)
# Highlight outliers in residual plot
plot(fitted(robust_model), residuals, main = "Outliers Highlighted", xlab =
"Fitted Values", ylab = "Residuals", col = ifelse(abs(robust_std_res) > 2, "red",
"black"))
abline(h = 0, col = "blue", lty = 2)
# Influence Diagnostics
# Leverage: Hat values
leverage <- hatvalues(robust_model)
plot(leverage, main = "Leverage Plot", ylab = "Leverage", xlab = "Index", col =
ifelse(leverage > 2 * mean(leverage), "red", "black"))
abline(h = 2 * mean(leverage), col = "red", lty = 2)
#Identify leverage ponits (# Threshold for leverage (2 * mean leverage))
leverage_points <- which(leverage > 2 * mean(leverage))
cat(leverage_points)
# Cook's Distance
cooks_d <- cooks.distance(ols_model)
plot(cooks_d, main = "Cook's Distance", ylab = "Cook's Distance", xlab = "Index",
col = ifelse(cooks_d > 4/n, "red", "black"))
abline(h = 4 / n, col = "red", lty = 2)
# Identify influential points
influential_points <- which(cooks_d > (4 / n))
cat(influential_points)
R code for Question 2:
library(MASS)
library(car)
data(mtcars)
head(mtcars)
#Fit a linear model
ols_model <- lm(mpg ~ wt + hp, data = mtcars)
summary(ols_model)
# Fit a robust regression model
robust_model <- rlm(mpg ~ wt + hp, data = mtcars, method="M")
summary(robust_model)
# Residual analysis
residuals <- residuals(robust_model)
# Standardized residuals (for detecting outliers)
std_residuals <- residuals/summary(robust_model)$sigma
outliers <- which(abs(std_residuals) > 2)
cat(outliers)
# Highlight outliers in residual plot
par(mfrow = c(2, 2))
plot(fitted(robust_model), residuals, main = "Outliers Highlighted", xlab =
"Fitted Values", ylab = "Residuals", col = ifelse(abs(std_residuals) > 2, "red",
"black"))
abline(h = 0, col = "blue", lty = 2)
# Leverage values
leverage <- hatvalues(robust_model)
leverage_points <- which(leverage > 2 * mean(leverage))
cat(leverage_points)
plot(leverage, main = "Leverage Plot", ylab = "Leverage", xlab = "Index", col =
ifelse(leverage > 2 * mean(leverage), "red", "black"))
abline(h = 2 * mean(leverage), col = "red", lty = 2)
# Cook's distance
cooks_d <- cooks.distance(ols_model)
influential_points <- which(cooks_d > (4 / length(mtcars$mpg)))
cat(influential_points)
plot(cooks_d, main = "Cook's Distance", ylab = "Cook's Distance", xlab = "Index",
col = ifelse(cooks_d > 4/length(mtcars$mpg), "red", "black"))
abline(h = 4 / length(mtcars$mpg), col = "red", lty = 2)
R-code for Question 3:
df <- data.frame(
x1 = c(1, 3, 3, 4, 4, 6, 6, 8, 9, 3, 11, 16, 16, 18, 19, 20, 23, 23, 24, 25),
x2 = c(7, 7, 4, 29, 13, 34, 17, 19, 20, 12, 25, 26, 26, 26, 27, 29, 30, 31, 31,
32),
y = c(17, 170, 19, 194, 24, 2, 25, 29, 30, 32, 44, 60, 61, 63, 63, 64, 61, 67,
59, 70)
)
# Fit the linear model
ols_model <- lm(y ~ x1 + x2, data = df)
summary(ols_model)
# Load the MASS package
library(MASS)
# Fit the robust regression model
robust_model <- rlm(y ~ x1 + x2, data = df)
summary(robust_model)
#Compare between lm & rlm by residual standard error
summary(ols_model)$sigma
summary(robust_model)$sigma
# Residual analysis
residuals <- residuals(robust_model)
# Standardized residuals (for detecting outliers)
std_residuals <- residuals/summary(robust_model)$sigma
outliers <- which(abs(std_residuals) > 2)
cat(outliers)
# Highlight outliers in residual plot
par(mfrow = c(2, 2))
plot(fitted(robust_model), residuals, main = "Outliers Highlighted", xlab =
"Fitted Values", ylab = "Residuals", col = ifelse(abs(std_residuals) > 2, "red",
"black"))
abline(h = 0, col = "blue", lty = 2)
# Leverage values
leverage <- hatvalues(robust_model)
leverage_points <- which(leverage > 2 * mean(leverage))
cat(leverage_points)
plot(leverage, main = "Leverage Plot", ylab = "Leverage", xlab = "Index", col =
ifelse(leverage > 2 * mean(leverage), "red", "black"))
abline(h = 2 * mean(leverage), col = "red", lty = 2)
# Cook's distance
cooks_d <- cooks.distance(ols_model)
influential_points <- which(cooks_d > (4 / length(df$y)))
cat(influential_points)
plot(cooks_d, main = "Cook's Distance", ylab = "Cook's Distance", xlab = "Index",
col = ifelse(cooks_d > 4/length(df$y), "red", "black"))
abline(h = 4 / length(df$y), col = "red", lty = 2)
R-code for Question 4:
# Install and load the quantreg package
install.packages("quantreg")
library(quantreg)
# Data
data(mtcars)
# Perform quantile regression at the 0.25th, 0.5th (median), and 0.75th quantiles
qr_model25 <- rq(mpg ~ wt + hp, data = mtcars, tau = 0.25)
qr_model50 <- rq(mpg ~ wt + hp, data = mtcars, tau = 0.50)
qr_model75 <- rq(mpg ~ wt + hp, data = mtcars, tau = 0.75)
# Display results
summary(qr_model25)
summary(qr_model50)
summary(qr_model75)
# Fit quantile regression for multiple quantiles
qr_multiple <- rq(mpg ~ wt + hp, data = mtcars, tau = c(0.25, 0.50, 0.75))
summary(qr_multiple)