Nrupathunga University
Department of Computer Science
VI Sem BSc (NEP) 2023-24
Statistical Computing and R Programming Language Lab
Manual
Part-A
1. Program to demonstrate Mathematical functions in R
# Define variables a and b
a <- 10
b <- 5
# Addition
sum_ab <- a + b
cat("Addition:", sum_ab, "\n")
# Subtraction
diff_ab <- a - b
cat("Subtraction:", diff_ab, "\n")
# Multiplication
prod_ab <- a * b
cat("Multiplication:", prod_ab, "\n")
# Division
div_ab <- a / b
cat("Division:", div_ab, "\n")
# Exponentiation
exp_ab <- a ^ b
cat("Exponentiation:", exp_ab, "\n")
# Square Root
sqrt_a <- sqrt(a)
cat("Square Root of a:", sqrt_a, "\n")
# Trigonometric Functions
angle <- 45 # in degrees
# Sine
sin_val <- sin(angle * (pi / 180))
cat("Sine:", sin_val, "\n")
# Cosine
cos_val <- cos(angle * (pi / 180))
cat("Cosine:", cos_val, "\n")
# Tangent
tan_val <- tan(angle * (pi / 180))
cat("Tangent:", tan_val, "\n")
# Logarithmic Functions
x <- 100
# Natural Logarithm
ln_x <- log(x)
cat("Natural Logarithm of x:", ln_x, "\n")
# Common Logarithm (Base 10)
log_x <- log10(x)
cat("Common Logarithm (Base 10) of x:", log_x, "\n")
# Random Number Generation
random_num <- runif(10, 0, 1) # Generate 10 random numbers between 0 and 1
cat("Random Number:", random_num, "\n")
OUTPUT:
# Define variables a and b
> a <- 10
> b <- 5
>
> # Addition
> sum_ab <- a + b
> cat("Addition:", sum_ab, "\n")
Addition: 15
>
> # Subtraction
> diff_ab <- a - b
> cat("Subtraction:", diff_ab, "\n")
Subtraction: 5
>
> # Multiplication
> prod_ab <- a * b
> cat("Multiplication:", prod_ab, "\n")
Multiplication: 50
>
> # Division
> div_ab <- a / b
> cat("Division:", div_ab, "\n")
Division: 2
>
> # Exponentiation
> exp_ab <- a ^ b
> cat("Exponentiation:", exp_ab, "\n")
Exponentiation: 1e+05
>
> # Square Root
> sqrt_a <- sqrt(a)
> cat("Square Root of a:", sqrt_a, "\n")
Square Root of a: 3.162278
>
> # Trigonometric Functions
> angle <- 45 # in degrees
>
> # Sine
> sin_val <- sin(angle * (pi / 180))
> cat("Sine:", sin_val, "\n")
Sine: 0.7071068
>
> # Cosine
> cos_val <- cos(angle * (pi / 180))
> cat("Cosine:", cos_val, "\n")
Cosine: 0.7071068
>
> # Tangent
> tan_val <- tan(angle * (pi / 180))
> cat("Tangent:", tan_val, "\n")
Tangent: 1
>
> # Logarithmic Functions
> x <- 100
>
> # Natural Logarithm
> ln_x <- log(x)
> cat("Natural Logarithm of x:", ln_x, "\n")
Natural Logarithm of x: 4.60517
>
> # Common Logarithm (Base 10)
> log_x <- log10(x)
> cat("Common Logarithm (Base 10) of x:", log_x, "\n")
Common Logarithm (Base 10) of x: 2
>
> # Random Number Generation
> random_num <- runif(10, 0, 1) # Generate 10 random numbers b
etween 0 and 1
> cat("Random Number:", random_num, "\n")
Random Number: 0.5486608 0.3227999 0.4051977 0.2274602 0.76546
66 0.3616712 0.07021952 0.9024233 0.2425885 0.901664
2. Program to demonstrate String functions in R
# Create a sample string
my_string <- "Hello, World!"
# 1. Length of the string
string_length <- nchar(my_string)
cat("1. Length of the string: ", string_length, "\n")
# 2. Convert to uppercase
uppercase_string <- toupper(my_string)
cat("2. Uppercase: ", uppercase_string, "\n")
# 3. Convert to lowercase
lowercase_string <- tolower(my_string)
cat("3. Lowercase: ", lowercase_string, "\n")
# 4. Substring
substring <- substr(my_string, start = 1, stop = 5)
cat("4. Substring (1-5): ", substring, "\n")
# 5. Split the string into words
my_string1 <- "Hello welcome to the World of R!"
words <- strsplit(my_string1, split = " ")[[1]]
words1 <- words[[1]]
words2 <- words[[2]]
words3 <- words[[3]]
words4 <- words[[4]]
words5 <- words[[5]]
words6 <- words[[6]]
words7 <- words[[7]]
cat("5. Split into words: ", words1, "\n")
cat("5. Split into words: ", words2, "\n")
cat("5. Split into words: ", words3, "\n")
cat("5. Split into words: ", words4, "\n")
cat("5. Split into words: ", words5, "\n")
cat("5. Split into words: ", words6, "\n")
cat("5. Split into words: ", words7, "\n")
# 6. Concatenate strings
string1 <- "Hello, "
string2 <- "World!"
concatenated_string <- paste(string1, string2)
cat("6. Concatenated: ", concatenated_string, "\n")
# 7. Replace a substring
replaced_string <- gsub("World", "R", my_string)
cat("7. Replaced: ", replaced_string, "\n")
# 8. Check if a string contains a substring
contains_substring <- grepl("Hello", my_string)
cat("8. Contains 'Hello': ", contains_substring, "\n")
OUTPUT:
# Create a sample string
> my_string <- "Hello, World!"
>
> # 1. Length of the string
> string_length <- nchar(my_string)
> cat("1. Length of the string: ", string_length, "\n")
1. Length of the string: 13
>
> # 2. Convert to uppercase
> uppercase_string <- toupper(my_string)
> cat("2. Uppercase: ", uppercase_string, "\n")
2. Uppercase: HELLO, WORLD!
>
> # 3. Convert to lowercase
> lowercase_string <- tolower(my_string)
> cat("3. Lowercase: ", lowercase_string, "\n")
3. Lowercase: hello, world!
>
> # 4. Substring
> substring <- substr(my_string, start = 1, stop = 5)
> cat("4. Substring (1-5): ", substring, "\n")
4. Substring (1-5): Hello
>
> # 5. Split the string into words
> my_string1 <- "Hello welcome to the World of R!"
> words <- strsplit(my_string1, split = " ")[[1]]
> words1 <- words[[1]]
> words2 <- words[[2]]
> words3 <- words[[3]]
> words4 <- words[[4]]
> words5 <- words[[5]]
> words6 <- words[[6]]
> words7 <- words[[7]]
> cat("5. Split into words: ", words1, "\n")
5. Split into words: Hello
> cat("5. Split into words: ", words2, "\n")
5. Split into words: welcome
> cat("5. Split into words: ", words3, "\n")
5. Split into words: to
> cat("5. Split into words: ", words4, "\n")
5. Split into words: the
> cat("5. Split into words: ", words5, "\n")
5. Split into words: World
> cat("5. Split into words: ", words6, "\n")
5. Split into words: of
> cat("5. Split into words: ", words7, "\n")
5. Split into words: R!
>
> # 6. Concatenate strings
> string1 <- "Hello, "
> string2 <- "World!"
> concatenated_string <- paste(string1, string2)
> cat("6. Concatenated: ", concatenated_string, "\n")
6. Concatenated: Hello, World!
>
> # 7. Replace a substring
> replaced_string <- gsub("World", "R", my_string)
> cat("7. Replaced: ", replaced_string, "\n")
7. Replaced: Hello, R!
>
> # 8. Check if a string contains a substring
> contains_substring <- grepl("Hello", my_string)
> cat("8. Contains 'Hello': ", contains_substring, "\n")
8. Contains 'Hello': TRUE
3. 1. Program to demonstrate Matrix Problems as described
below
# Create two example matrices
matrix_A <- matrix(1:6, nrow = 2)
matrix_B <- matrix(7:12, nrow = 2)
# Print the matrices
print("Matrix A:")
print(matrix_A)
print("Matrix B:")
print(matrix_B)
# Matrix Addition
matrix_sum <- matrix_A + matrix_B
print("Matrix Addition Result:")
print(matrix_sum)
# Matrix Subtraction
matrix_diff <- matrix_A - matrix_B
print("Matrix Subtraction Result:")
print(matrix_diff)
# Matrix Multiplication
matrix_product <- matrix_A * matrix_B
print("Matrix Multiplication Result:")
print(matrix_product)
# Transpose of Matrix A
matrix_A_transpose <- t(matrix_A)
print("Transpose of Matrix A:")
print(matrix_A_transpose)
# Inverse of Matrix A (if possible)
if (ncol(matrix_A) == nrow(matrix_A)) { # Ensure it's square before computing the determin
ant
if (det(matrix_A) != 0) {
inverse_matrix_A <- solve(matrix_A)
print("Inverse of Matrix A:")
print(inverse_matrix_A)
} else {
print("Matrix A is singular and cannot be inverted.")
}
# Determinant of Matrix A
determinant_A <- det(matrix_A)
print(paste("Determinant of Matrix A:", determinant_A))
} else {
print("Matrix A is not square and determinant cannot be computed.")
}
# Eigenvalues and Eigenvectors of Matrix A (if it's square)
if (nrow(matrix_A) == ncol(matrix_A)) {
eigen_A <- eigen(matrix_A)
eigenvalues_A <- eigen_A$values
eigenvectors_A <- eigen_A$vectors
print("Eigenvalues of Matrix A:")
print(eigenvalues_A)
print("Eigenvectors of Matrix A:")
print(eigenvectors_A)
} else {
print("Matrix A is not square and does not have eigenvalues or eigenvectors.")
}
# Matrix Concatenation (Horizontally)
matrix_concatenated <- cbind(matrix_A, matrix_B)
print("Matrix Concatenation (Horizontally):")
print(matrix_concatenated)
# Creating a new matrix "tmp"
tmp <- matrix(c(10,-10,10), nc=3, nr=15)
# Print the matrix "tmp"
print("Matrix tmp:")
print(tmp)
# Find the transpose of the matrix "tmp"
tmp_transpose <- t(tmp)
print("Transpose of Matrix tmp:")
print(tmp_transpose)
# Find the product of Transpose of matrix "tmp" and original matrix "tmp"
tmp_product <- tmp_transpose %*% tmp
print("Product of Transpose of Matrix tmp and Matrix tmp:")
print(tmp_product)
# Print the matrix with 0 entries
matE <- matrix(0, nr = 6, nc = 6)
print("Matrix with 0 entries:")
print(matE)
# Print the matrix with the values of column and row numbers
matE <- abs(col(matE) - row(matE))
print("Matrix with values of column and row numbers:")
print(matE)
# Intialize the values of row and col -1 to 1 using abs
matE[abs(col(matE) - row(matE)) == 1] <- 1
print("Matrix with initialized values:")
print(matE)
OUTPUT:
# Create two example matrices
> matrix_A <- matrix(1:6, nrow = 2)
> matrix_B <- matrix(7:12, nrow = 2)
>
> # Print the matrices
> print("Matrix A:")
[1] "Matrix A:"
> print(matrix_A)
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
> print("Matrix B:")
[1] "Matrix B:"
> print(matrix_B)
[,1] [,2] [,3]
[1,] 7 9 11
[2,] 8 10 12
>
> # Matrix Addition
> matrix_sum <- matrix_A + matrix_B
> print("Matrix Addition Result:")
[1] "Matrix Addition Result:"
> print(matrix_sum)
[,1] [,2] [,3]
[1,] 8 12 16
[2,] 10 14 18
>
> # Matrix Subtraction
> matrix_diff <- matrix_A - matrix_B
> print("Matrix Subtraction Result:")
[1] "Matrix Subtraction Result:"
> print(matrix_diff)
[,1] [,2] [,3]
[1,] -6 -6 -6
[2,] -6 -6 -6
>
> # Matrix Multiplication
> matrix_product <- matrix_A * matrix_B
> print("Matrix Multiplication Result:")
[1] "Matrix Multiplication Result:"
> print(matrix_product)
[,1] [,2] [,3]
[1,] 7 27 55
[2,] 16 40 72
>
> # Transpose of Matrix A
> matrix_A_transpose <- t(matrix_A)
> print("Transpose of Matrix A:")
[1] "Transpose of Matrix A:"
> print(matrix_A_transpose)
[,1] [,2]
[1,] 1 2
[2,] 3 4
[3,] 5 6
>
> # Inverse of Matrix A (if possible)
> if (ncol(matrix_A) == nrow(matrix_A)) { # Ensure it's square before computing the determinant
+ if (det(matrix_A) != 0) {
+ inverse_matrix_A <- solve(matrix_A)
+ print("Inverse of Matrix A:")
+ print(inverse_matrix_A)
+ } else {
+ print("Matrix A is singular and cannot be inverted.")
+ }
+
+ # Determinant of Matrix A
+ determinant_A <- det(matrix_A)
+ print(paste("Determinant of Matrix A:", determinant_A))
+ } else {
+ print("Matrix A is not square and determinant cannot be computed.")
+}
[1] "Matrix A is not square and determinant cannot be computed."
>
> # Eigenvalues and Eigenvectors of Matrix A (if it's square)
> if (nrow(matrix_A) == ncol(matrix_A)) {
+ eigen_A <- eigen(matrix_A)
+ eigenvalues_A <- eigen_A$values
+ eigenvectors_A <- eigen_A$vectors
+ print("Eigenvalues of Matrix A:")
+ print(eigenvalues_A)
+ print("Eigenvectors of Matrix A:")
+ print(eigenvectors_A)
+ } else {
+ print("Matrix A is not square and does not have eigenvalues or eigenvectors.")
+}
[1] "Matrix A is not square and does not have eigenvalues or eigenvectors."
>
> # Matrix Concatenation (Horizontally)
> matrix_concatenated <- cbind(matrix_A, matrix_B)
> print("Matrix Concatenation (Horizontally):")
[1] "Matrix Concatenation (Horizontally):"
> print(matrix_concatenated)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 3 5 7 9 11
[2,] 2 4 6 8 10 12
>
> # Creating a new matrix "tmp"
> tmp <- matrix(c(10,-10,10), nc=3, nr=15)
>
> # Print the matrix "tmp"
> print("Matrix tmp:")
[1] "Matrix tmp:"
> print(tmp)
[,1] [,2] [,3]
[1,] 10 10 10
[2,] -10 -10 -10
[3,] 10 10 10
[4,] 10 10 10
[5,] -10 -10 -10
[6,] 10 10 10
[7,] 10 10 10
[8,] -10 -10 -10
[9,] 10 10 10
[10,] 10 10 10
[11,] -10 -10 -10
[12,] 10 10 10
[13,] 10 10 10
[14,] -10 -10 -10
[15,] 10 10 10
>
> # Find the transpose of the matrix "tmp"
> tmp_transpose <- t(tmp)
> print("Transpose of Matrix tmp:")
[1] "Transpose of Matrix tmp:"
> print(tmp_transpose)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] 10 -10 10 10 -10 10 10 -10 10 10 -10 10 10 -10 10
[2,] 10 -10 10 10 -10 10 10 -10 10 10 -10 10 10 -10 10
[3,] 10 -10 10 10 -10 10 10 -10 10 10 -10 10 10 -10 10
>
> # Find the product of Transpose of matrix "tmp" and original matrix "tmp"
> tmp_product <- tmp_transpose %*% tmp
> print("Product of Transpose of Matrix tmp and Matrix tmp:")
[1] "Product of Transpose of Matrix tmp and Matrix tmp:"
> print(tmp_product)
[,1] [,2] [,3]
[1,] 1500 1500 1500
[2,] 1500 1500 1500
[3,] 1500 1500 1500
>
> # Print the matrix with 0 entries
> matE <- matrix(0, nr = 6, nc = 6)
> print("Matrix with 0 entries:")
[1] "Matrix with 0 entries:"
> print(matE)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 0 0 0 0 0
[3,] 0 0 0 0 0 0
[4,] 0 0 0 0 0 0
[5,] 0 0 0 0 0 0
[6,] 0 0 0 0 0 0
>
> # Print the matrix with the values of column and row numbers
> matE <- abs(col(matE) - row(matE))
> print("Matrix with values of column and row numbers:")
[1] "Matrix with values of column and row numbers:"
> print(matE)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 1 2 3 4 5
[2,] 1 0 1 2 3 4
[3,] 2 1 0 1 2 3
[4,] 3 2 1 0 1 2
[5,] 4 3 2 1 0 1
[6,] 5 4 3 2 1 0
>
> # Intialize the values of row and col -1 to 1 using abs
> matE[abs(col(matE) - row(matE)) == 1] <- 1
> print("Matrix with initialized values:")
[1] "Matrix with initialized values:"
> print(matE)
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 1 2 3 4 5
[2,] 1 0 1 2 3 4
[3,] 2 1 0 1 2 3
[4,] 3 2 1 0 1 2
[5,] 4 3 2 1 0 1
[6,] 5 4 3 2 1 0
4. Write a Program to print the Sudoko in 3*3 matrix game
with three numbers
# Assign a matrix with zero entries
sudoku <- matrix(0, nrow = 3, ncol = 3)
print(sudoku)
sudoku[1, 1] <- 3
sudoku[2, 2] <- 2
sudoku[3, 3] <- 1
is_valid <- function(row, col, value) {
if (value %in% sudoku[row, ] || value %in% sudoku[, col]) {
return(FALSE)
}
return(TRUE)
}
for (row in 1:3) {
for (col in 1:3) {
if (sudoku[row, col] == 0) {
for (value in 1:3) {
if (is_valid(row, col, value)) {
sudoku[row, col] <- value
break
}
}
}
}
}
print(sudoku)
OUTPUT:
# Assign a matrix with zero entries
> sudoku <- matrix(0, nrow = 3, ncol = 3)
> print(sudoku)
[,1] [,2] [,3]
[1,] 0 0 0
[2,] 0 0 0
[3,] 0 0 0
>
> sudoku[1, 1] <- 3
> sudoku[2, 2] <- 2
> sudoku[3, 3] <- 1
>
> is_valid <- function(row, col, value) {
+ if (value %in% sudoku[row, ] || value %in% sudoku[, col]) {
+ return(FALSE)
+ }
+ return(TRUE)
+ }
>
> for (row in 1:3) {
+ for (col in 1:3) {
+ if (sudoku[row, col] == 0) {
+ for (value in 1:3) {
+ if (is_valid(row, col, value)) {
+ sudoku[row, col] <- value
+ break
+ }
+ }
+ }
+ }
+ }
>
> print(sudoku)
[,1] [,2] [,3]
[1,] 3 1 2
[2,] 1 2 3
[3,] 2 3 1
>
5. A. Write an R program to perform following operations on Dataframes
1. Write a R program to create an empty data frame
Part 5.A: Create an empty dataframe
# Create an empty dataframe with specified column types
df <- data.frame(
Ints = integer(),
Doubles = double(),
Characters = character(),
Logicals = logical(),
Factors = factor(),
stringsAsFactors = FALSE
)
# Print the structure of the empty dataframe
print("Structure of the empty dataframe:")
print(str(df))
OUTPUT:
# Create an empty dataframe with specified column types
> df <- data.frame(
+ Ints = integer(),
+ Doubles = double(),
+ Characters = character(),
+ Logicals = logical(),
+ Factors = factor(),
+ stringsAsFactors = FALSE
+ )
>
> # Print the structure of the empty dataframe
> print("Structure of the empty dataframe:")
[1] "Structure of the empty dataframe:"
> print(str(df))
'data.frame': 0 obs. of 5 variables:
$ Ints : int
$ Doubles : num
$ Characters: chr
$ Logicals : logi
$ Factors : Factor w/ 0 levels:
NULL
Part 5.B: Create a dataframe from given vectors
# Create a dataframe from given vectors
name <- c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas')
score <- c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19)
attempts <- c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1)
qualify <- c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes')
print("Original data frame:")
print(name)
print(score)
print(attempts)
print(qualify)
df <- data.frame(name, score, attempts, qualify)
print(df)
OUTPUT:
# Create a dataframe from given vectors
> name <- c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael',
'Matthew', 'Laura', 'Kevin', 'Jonas')
> score <- c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19)
> attempts <- c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1)
> qualify <- c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', '
yes')
>
> print("Original data frame:")
[1] "Original data frame:"
> print(name)
[1] "Anastasia" "Dima" "Katherine" "James" "Emily" "Michael"
"Matthew"
[8] "Laura" "Kevin" "Jonas"
> print(score)
[1] 12.5 9.0 16.5 12.0 9.0 20.0 14.5 13.5 8.0 19.0
> print(attempts)
[1] 1 3 2 3 2 3 1 1 2 1
> print(qualify)
[1] "yes" "no" "yes" "no" "no" "yes" "yes" "no" "no" "yes"
>
> df <- data.frame(name, score, attempts, qualify)
> print(df)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes
Part 5.C: Get the structure of a given dataframe
# Given dataframe
exam_data <- data.frame(
name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas'),
score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes')
)
# Print original dataframe and its structure
print("Original dataframe:")
print(exam_data)
print("Structure of the said data frame:")
print(str(exam_data))
OUTPUT:
# Given dataframe
> exam_data <- data.frame(
+ name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael'
, 'Matthew', 'Laura', 'Kevin', 'Jonas'),
+ score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
+ attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
+ qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes')
+ )
>
> # Print original dataframe and its structure
> print("Original dataframe:")
[1] "Original dataframe:"
> print(exam_data)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes
> print("Structure of the said data frame:")
[1] "Structure of the said data frame:"
> print(str(exam_data))
'data.frame': 10 obs. of 4 variables:
$ name : chr "Anastasia" "Dima" "Katherine" "James" ...
$ score : num 12.5 9 16.5 12 9 20 14.5 13.5 8 19
$ attempts: num 1 3 2 3 2 3 1 1 2 1
$ qualify : chr "yes" "no" "yes" "no" ...
NULL
Part 5.D: Reorder a given dataframe by column name
# Given dataframe
exam_data <- data.frame(
name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael', 'Matthew', 'Laura',
'Kevin', 'Jonas'),
score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes')
# Print original dataframe
print("Original dataframe:")
print(exam_data)
# Reorder dataframe by column name
exam_data <- exam_data[c("name", "attempts", "score", "qualify")]
print("Reorder by column name:")
print(exam_data)
OUTPUT:
# Given dataframe
> exam_data <- data.frame(
+ name = c('Anastasia', 'Dima', 'Katherine', 'James', 'Emily', 'Michael'
, 'Matthew', 'Laura', 'Kevin', 'Jonas'),
+ score = c(12.5, 9, 16.5, 12, 9, 20, 14.5, 13.5, 8, 19),
+ attempts = c(1, 3, 2, 3, 2, 3, 1, 1, 2, 1),
+ qualify = c('yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no',
'yes')
+ )
>
> # Print original dataframe
> print("Original dataframe:")
[1] "Original dataframe:"
> print(exam_data)
name score attempts qualify
1 Anastasia 12.5 1 yes
2 Dima 9.0 3 no
3 Katherine 16.5 2 yes
4 James 12.0 3 no
5 Emily 9.0 2 no
6 Michael 20.0 3 yes
7 Matthew 14.5 1 yes
8 Laura 13.5 1 no
9 Kevin 8.0 2 no
10 Jonas 19.0 1 yes
>
> # Reorder dataframe by column name
> exam_data <- exam_data[c("name", "attempts", "score", "qualify")]
> print("Reorder by column name:")
[1] "Reorder by column name:"
> print(exam_data)
name attempts score qualify
1 Anastasia 1 12.5 yes
2 Dima 3 9.0 no
3 Katherine 2 16.5 yes
4 James 3 12.0 no
5 Emily 2 9.0 no
6 Michael 3 20.0 yes
7 Matthew 1 14.5 yes
8 Laura 1 13.5 no
9 Kevin 2 8.0 no
10 Jonas 1 19.0 yes
Part 5.E: Find elements present in two dataframes
# Given dataframes
a <- c("a", "b", "c", "d", "e")
b <- c("d", "e", "f", "g")
print("Original Dataframes:")
print(a)
print(b)
# Find elements present in both dataframes
result <- intersect(a, b)
print("Elements which are present in both dataframes:")
print(result)
OUTPUT:
# Given dataframes
> a <- c("a", "b", "c", "d", "e")
> b <- c("d", "e", "f", "g")
>
> print("Original Dataframes:")
[1] "Original Dataframes:"
> print(a)
[1] "a" "b" "c" "d" "e"
> print(b)
[1] "d" "e" "f" "g"
>
> # Find elements present in both dataframes
> result <- intersect(a, b)
> print("Elements which are present in both dataframes:")
[1] "Elements which are present in both dataframes:"
> print(result)
[1] "d" "e"
Part 5.F: Call the air quality dataset, check whether it is a
dataframe, and order it by the first and second column
# Call the air quality dataset
data <- airquality
# Check whether it is a dataframe
print("Original data: Daily air quality measurements in New York, May to September 1973.")
print(class(data))
print(head(data, 10))
# Order the entire data frame by the first and second column
result <- data[order(data[,1], data[,2]),]
print("Order the entire data frame by the first and second column:")
print(result)
OUTPUT:
# Call the air quality dataset
> data <- airquality
>
> # Check whether it is a dataframe
> print("Original data: Daily air quality measurements in New York, May to
September 1973.")
[1] "Original data: Daily air quality measurements in New York, May to Sep
tember 1973."
> print(class(data))
[1] "data.frame"
> print(head(data, 10))
Ozone Solar.R Wind Temp Month Day
1 41 190 7.4 67 5 1
2 36 118 8.0 72 5 2
3 12 149 12.6 74 5 3
4 18 313 11.5 62 5 4
5 NA NA 14.3 56 5 5
6 28 NA 14.9 66 5 6
7 23 299 8.6 65 5 7
8 19 99 13.8 59 5 8
9 8 19 20.1 61 5 9
10 NA 194 8.6 69 5 10
>
> # Order the entire data frame by the first and second column
> result <- data[order(data[,1], data[,2]),]
> print("Order the entire data frame by the first and second column:")
[1] "Order the entire data frame by the first and second column:"
> print(result)
Ozone Solar.R Wind Temp Month Day
21 1 8 9.7 59 5 21
23 4 25 9.7 61 5 23
18 6 78 18.4 57 5 18
76 7 48 14.3 80 7 15
147 7 49 10.3 69 9 24
11 7 NA 6.9 74 5 11
9 8 19 20.1 61 5 9
94 9 24 13.8 81 8 2
137 9 24 10.9 71 9 14
114 9 36 14.3 72 8 22
73 10 264 14.3 73 7 12
20 11 44 9.7 62 5 20
13 11 290 9.2 66 5 13
22 11 320 16.6 73 5 22
50 12 120 11.5 73 6 19
3 12 149 12.6 74 5 3
141 13 27 10.3 76 9 18
138 13 112 11.5 71 9 15
51 13 137 10.3 76 6 20
144 13 238 12.6 64 9 21
148 14 20 16.6 63 9 25
151 14 191 14.3 75 9 28
14 14 274 10.9 68 5 14
16 14 334 11.5 64 5 16
82 16 7 6.9 74 7 21
95 16 77 7.4 82 8 3
143 16 201 8.0 82 9 20
12 16 256 9.7 69 5 12
15 18 65 13.2 58 5 15
152 18 131 8.0 76 9 29
140 18 224 13.8 67 9 17
4 18 313 11.5 62 5 4
8 19 99 13.8 59 5 8
49 20 37 9.2 65 6 18
87 20 81 8.6 82 7 26
153 20 223 11.5 68 9 30
130 20 252 10.9 80 9 7
47 21 191 14.9 77 6 16
132 21 230 10.9 75 9 9
113 21 259 15.5 77 8 21
135 21 259 15.5 76 9 12
108 22 71 10.3 77 8 16
28 23 13 12.0 67 5 28
145 23 14 9.2 71 9 22
110 23 115 7.4 76 8 18
44 23 148 8.0 82 6 13
131 23 220 10.3 78 9 8
7 23 299 8.6 65 5 7
142 24 238 10.3 68 9 19
133 24 259 9.7 73 9 10
74 27 175 14.9 81 7 13
136 28 238 6.3 77 9 13
105 28 273 11.5 82 8 13
6 28 NA 14.9 66 5 6
38 29 127 9.7 82 6 7
149 30 193 6.9 70 9 26
19 30 322 11.5 68 5 19
111 31 244 10.9 78 8 19
24 32 92 12.0 61 5 24
129 32 92 15.5 84 9 6
64 32 236 9.2 81 7 3
17 34 307 12.0 66 5 17
78 35 274 10.3 82 7 17
97 35 NA 7.4 85 8 5
2 36 118 8.0 72 5 2
146 36 139 10.3 81 9 23
31 37 279 7.4 76 5 31
48 37 284 20.7 72 6 17
93 39 83 6.9 81 8 1
41 39 323 11.5 87 6 10
67 40 314 10.9 83 7 6
1 41 190 7.4 67 5 1
112 44 190 10.3 78 8 20
104 44 192 11.5 86 8 12
134 44 236 14.9 81 9 11
116 45 212 9.7 79 8 24
29 45 252 14.9 81 5 29
139 46 237 6.9 78 9 16
128 47 95 7.4 87 9 5
77 48 260 6.9 81 7 16
63 49 248 9.2 85 7 2
90 50 275 7.4 86 7 29
88 52 82 12.0 86 7 27
109 59 51 6.3 79 8 17
92 59 254 9.2 81 7 31
79 61 285 6.3 84 7 18
81 63 220 11.5 85 7 20
66 64 175 4.6 83 7 5
91 64 253 7.4 83 7 30
106 65 157 9.7 80 8 14
98 66 NA 4.6 87 8 6
40 71 291 13.8 90 6 9
126 73 183 2.8 93 9 3
118 73 215 8.0 86 8 26
120 76 203 9.7 97 8 28
68 77 276 5.1 88 7 7
125 78 197 5.1 92 9 2
96 78 NA 6.9 86 8 4
80 79 187 5.1 87 7 19
85 80 294 8.6 86 7 24
89 82 213 7.4 88 7 28
122 84 237 6.3 96 8 30
71 85 175 7.4 89 7 10
123 85 188 6.3 94 8 31
100 89 229 10.3 90 8 8
127 91 189 4.6 93 9 4
124 96 167 6.9 91 9 1
69 97 267 6.3 92 7 8
70 97 272 5.7 92 7 9
86 108 223 8.0 85 7 25
101 110 207 8.0 90 8 9
30 115 223 5.7 79 5 30
121 118 225 2.3 94 8 29
99 122 255 4.0 89 8 7
62 135 269 4.1 84 7 1
117 168 238 3.4 81 8 25
60 NA 31 14.9 77 6 29
58 NA 47 10.3 73 6 27
53 NA 59 1.7 76 6 22
107 NA 64 11.5 79 8 15
25 NA 66 16.6 57 5 25
54 NA 91 4.6 76 6 23
59 NA 98 11.5 80 6 28
65 NA 101 10.9 84 7 4
57 NA 127 8.0 78 6 26
56 NA 135 8.0 75 6 25
103 NA 137 11.5 86 8 11
61 NA 138 8.0 83 6 30
72 NA 139 8.6 82 7 11
150 NA 145 13.2 77 9 27
52 NA 150 6.3 77 6 21
119 NA 153 5.7 88 8 27
35 NA 186 9.2 84 6 4
10 NA 194 8.6 69 5 10
36 NA 220 8.6 85 6 5
102 NA 222 8.6 92 8 10
34 NA 242 16.1 67 6 3
43 NA 250 9.2 92 6 12
55 NA 250 6.3 76 6 24
115 NA 255 12.6 75 8 23
83 NA 258 9.7 81 7 22
42 NA 259 10.9 93 6 11
37 NA 264 14.3 79 6 6
26 NA 266 14.9 58 5 26
39 NA 273 6.9 87 6 8
32 NA 286 8.6 78 6 1
33 NA 287 9.7 74 6 2
75 NA 291 14.9 91 7 14
84 NA 295 11.5 82 7 23
46 NA 322 11.5 79 6 15
45 NA 332 13.8 80 6 14
5 NA NA 14.3 56 5 5
27 NA NA 8.0 57 5 27
6. Write an R program to Toss 2 coins using a sample
function that demonstrates probability distribution
for 100 trials.
coin <- c("HH", "HT", "TH", "TT")
result <- NULL
for (i in 1:1000) {
result[i] <- sample(coin, size = 2, replace = TRUE)
print(result[i])
}
flips <- table(result)
flips
result1 <- as.data.frame(flips)
result1
# Plot Line graph
plot(result1$result, result1$Freq,
type = '2', # line type
lwd = 2, # width of line
col = "red",# color of line
las = 1, # orientation of tick-mark labels
ylim = c(50, 1000), # range of y-axis
xlab = "number of tosses", # x-axis label
ylab = "relative frequency") # y-axis label
abline(h = 0.5, col = 'gray50')
# Plot Bar Graph
barplot(result1$Freq, names.arg = result1$result, col = "red")
OUTPUT:
> flips <- table(result)
> flips
result
HH HT TH TT
241 259 256 244
> result1 <- as.data.frame(flips)
> result1
result Freq
1 HH 241
2 HT 259
3 TH 256
4 TT 244
> # Plot Line graph
> plot(result1$result, result1$Freq,
+ type = '2', # line type
+ lwd = 2, # width of line
+ col = "red",# color of line
+ las = 1, # orientation of tick-mark labels
+ ylim = c(50, 1000), # range of y-axis
+ xlab = "number of tosses", # x-axis label
+ ylab = "relative frequency") # y-axis label
> abline(h = 0.5, col = 'gray50')
>
> # Plot Bar Graph
> barplot(result1$Freq, names.arg = result1$result, col = "red")
>
7. Find the 25th quantile of a binomial distribution with 25
trials and probability of success on each trial = 0.5
# Finding the 25th quantile of a binomial distribution
quantile_25 <- qbinom(0.25, 25, 0.5)
print(quantile_25)
# Binomial quantile for the probability 1-0.4
quantile_complement <- qbinom(0.25, 25, 0.5, lower.tail = FALSE)
print(quantile_complement)
# Generating random observations from a binomial distribution
random_values <- c(8, 14, 10, 12, 10, 14, 16, 7, 13, 12)
print(random_values)
OUTPUT :
> # Finding the 25th quantile of a binomial distribution
> quantile_25 <- qbinom(0.25, 25, 0.5)
> print(quantile_25)
[1] 11
>
> # Binomial quantile for the probability 1-0.4
> quantile_complement <- qbinom(0.25, 25, 0.5, lower.tail = FALSE)
> print(quantile_complement)
[1] 14
>
> # Generating random observations from a binomial distribution
> random_values <- c(8, 14, 10, 12, 10, 14, 16, 7, 13, 12)
> print(random_values)
[1] 8 14 10 12 10 14 16 7 13 12
8.A. Bob makes success in 60% of his free-throw attempts. If he shoot
s 12 free throws, what is the probability that he achieves exactly 10 su
ccesses?
#find the probability of 10 successes during 12 trials where the probability of
#success on each trial is 0.6
dbinom(x=10, size=12, prob=.6)
OUTPUT :
> #find the probability of 10 successes during 12 trials where the probabi
lity of
> #success on each trial is 0.6
> dbinom(x=10, size=12, prob=.6)
[1] 0.06385228
8.B. Subha flips a fair coin 20 times. What is the probability that the
coin lands on heads exactly 7 times?
# Probability of achieving exactly 7 heads in 20 coin flips
probability <- dbinom(x = 7, size = 20, prob = 0.5)
print(probability)
OUTPUT :
> # Probability of achieving exactly 7 heads in 20 coin flips
> probability <- dbinom(x = 7, size = 20, prob = 0.5)
> print(probability)
[1] 0.07392883
8.C. A Hospital database displays that the patients suffering from
cancer, 65% recover of it. What will be the probability that of 5
randomly chosen patients out of which 3 will recover?
dbinom(3, size=5, prob=0.65)
OUTPUT :
> dbinom(3, size=5, prob=0.65)
[1] 0.3364156
8. D. A bowler scores a wicket on 20% of his attempts when he
bowls. If he bowls 5 times, what would be the probability that he
scores 4 or lesser wicket?
pbinom(4, size=5, prob=.2)
OUTPUT:
> pbinom(4, size=5, prob=.2)
[1] 0.99968
8. E. Suppose you have a large population of students that’s 50%
female. If students are assigned to classrooms at random, and you
visit 100 classrooms with 20 students each, then how many girls
might you expect to see in each classroom?
rbinom(100,20,0.5)
OUTPUT:
> rbinom(100,20,0.5)
[1] 14 14 7 10 9 8 10 12 11 11 12 9 8 14 9 9 7 10 10 8 8
8 7
[24] 11 13 10 10 7 11 10 8 10 11 4 10 7 11 10 12 7 7 9 9 10
13 13
[47] 8 8 12 6 11 11 10 7 6 13 9 9 10 10 13 11 8 10 9 10 9
6 13
[70] 12 7 6 14 9 12 8 9 12 12 8 6 8 10 12 10 10 13 9 10 12
5 4
[93] 8 10 13 14 8 10 8 11
9. Write a function in R that randomly draws five cards from the deck of card
suit <- c("clubs", "diamonds", "hearts", "spades")
face <- c("king", "queen", "jack", "ten", "nine", "eight", "seven", "six", "five", "four", "three", "two",
"ace")
num <- c(13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
deck <- data.frame(
face = rep(face, 4),
suit = c(rep("spades", 13), rep("clubs", 13), rep("diamonds", 13), rep("hearts", 13)),
num = rep(num, 4)
draw_n_random_cards <- function(deck, n) {
deck[sample(nrow(deck), n), ]
result <- draw_n_random_cards(deck, 5)
print(result)
OUTPUT :
> suit <- c("clubs", "diamonds", "hearts", "spades")
> face <- c("king", "queen", "jack", "ten", "nine", "eight", "seven", "six
", "five", "four", "three", "two", "ace")
> num <- c(13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1)
> deck <- data.frame(
+ face = rep(face, 4),
+ suit = c(rep("spades", 13), rep("clubs", 13), rep("diamonds", 13), rep
("hearts", 13)),
+ num = rep(num, 4)
+ )
>
> draw_n_random_cards <- function(deck, n) {
+ deck[sample(nrow(deck), n), ]
+ }
>
> result <- draw_n_random_cards(deck, 5)
> print(result)
face suit num
28 queen diamonds 12
51 two hearts 2
34 six diamonds 6
7 seven spades 7
4 ten spades 10
10.A. Find the value of the standard normal distribution pdf at x=0
dnorm(x=0, mean=0, sd=1)
# [1] 0.3989423
# by default, R uses mean=0 and sd=1
dnorm(x=0)
# [1] 0.3989423
#find the value of the normal distribution pdf at x=10 with me
an=20 and sd=5
dnorm(x=10, mean=20, sd=5)
OUTPUT:
> dnorm(x=0, mean=0, sd=1)
[1] 0.3989423
> # [1] 0.3989423
> # by default, R uses mean=0 and sd=1
> dnorm(x=0)
[1] 0.3989423
> # [1] 0.3989423
> #find the value of the normal distribution pdf at x=10 with mean=2
0 and sd=5
> dnorm(x=10, mean=20, sd=5)
[1] 0.01079819
10.B. Suppose the height of males at a certain school is normally distributed with
a mean of μ=70 inches and a standard deviation of σ = 2 inches. Approximately wh
at percentage of males at this school are taller than 74 inches?
#find the percentage of males that are taller than 74 inches in a population with
#mean = 70 and sd = 2
pnorm(74, mean=70,
sd=2, lower.tail=FALSE)
OUTPUT:
> #find the percentage of males that are taller than 74 inches in a
population with
> #mean = 70 and sd = 2
> pnorm(74, mean=70,
+ sd=2, lower.tail=FALSE)
[1] 0.02275013
10.C. Suppose the weight of a certain species of otters is normally distributed with
a mean of μ=30 lbs and a standard deviation of σ = 5 lbs. Approximately what perc
entage of this species of otters weight less than 22 lbs?
#find percentage of otters that weight less than 22 lbs in a population with
#mean = 30 and sd = 5
pnorm(22, mean=30, sd=5)
OUTPUT:
> #find percentage of otters that weight less than 22 lbs in a popul
ation with
> #mean = 30 and sd = 5
> pnorm(22, mean=30, sd=5)
[1] 0.05479929
10.D. Suppose the height of plants in a certain region is normally distributed with
a mean of μ=13 inches and a standard deviation of σ = 2 inches. Approximately wh
at percentage of plants in this region are between 10 and 14 inches tall?
#find percentage of plants that are less than 14 inches tall, then subtract the
#percentage of plants that are less than 10 inches tall, based on a population
#with mean = 13 and sd = 2
pnorm(14, mean=13, sd=2) - pnorm(10, mean=13, sd=2)
OUTPUT :
> #find percentage of plants that are less than 14 inches tall, then
subtract the
> #percentage of plants that are less than 10 inches tall, based on
a population
> #with mean = 13 and sd = 2
> pnorm(14, mean=13, sd=2) - pnorm(10, mean=13, sd=2)
[1] 0.6246553
10. E.Suppose that you have a machine that packages rice inside boxes. The
process follows a Normal distribution and it is known that the mean of the
weight of each box is 1000 grams and the standard deviation is 10 grams. W
hat is the probability of a box weighing exactly 950 grams?
dnorm( 950, 1000,10)
#What is the probability of a box weighing more than 980 grams?
pnorm(980,1000,10,lower.tail=FALSE)
#Calculate the quantile for probability 0.5 for the above scenario.
qnorm(0.5,1000,10)
#Simulate the above scenario for 10 observations.
rnorm(10,1000,10)
OUTPUT:
> dnorm( 950, 1000,10)
[1] 1.48672e-07
> #What is the probability of a box weighing more than 980 grams?
> pnorm(980,1000,10,lower.tail=FALSE)
[1] 0.9772499
> #Calculate the quantile for probability 0.5 for the above scenario.
> qnorm(0.5,1000,10)
[1] 1000
> #Simulate the above scenario for 10 observations.
> rnorm(10,1000,10)
[1] 1011.0971 998.3551 1006.1722 997.9489 1020.7643 1006.1655 1002.1544
[8] 1000.9453 998.5481 991.4324
11. Apply Linear Regression Technique
1. derive a model
2. test the data
3. Calculate the Predictive error for the following
data
Steps to execute this program :
Step 1: open ms excel make experience and salary data sheet as given below
Step2 : click on file save as then give the file name : salary_data.csv
Then change save as a type : CSV(comma delimited)
As shown in the figure
Step 3: open R studio type the program
# Define the path to your CSV file (replace with the actual location)
csv_file_path <- "C:/Users/MOIN PASHA/OneDrive/Desktop/salary_data.csv"
# Read the salary and experience data from the CSV file
data <- read.csv(csv_file_path)
# Function to predict salary based on experience
predict_salary <- function(experience) {
# Create the linear regression model inside the function
model <- lm(Salary ~ Experience, data = data) # Ensure "Salary" matches your column
name
# Predict salary for the given experience
predicted_salary <- predict(model, newdata = data.frame(Experience = experience))
return(predicted_salary)
}
# Call the function to create the model and get predicted salary for 11 years
predicted_salary_11 <- predict_salary(11)
# Test the model with experience of 11 years
actual_salary_11 <- data[data$Experience == 11, "Salary"]
prediction_error_11 <- abs(actual_salary_11 - predicted_salary_11)
# Test the model with experience of 8 years (similar process)
predicted_salary_8 <- predict_salary(8)
actual_salary_8 <- data[data$Experience == 8, "Salary"]
prediction_error_8 <- abs(actual_salary_8 - predicted_salary_8)
# Print the results for 11 years
cat("Predicted Salary for 11 years experience:", predicted_salary_11, "\n")
cat("Actual Salary for 11 years experience:", actual_salary_11, "\n") # Assuming "Salary
" is the column name for actual salary
cat("Prediction Error for 11 years experience:", prediction_error_11, "\n\n")
# Print the results for 8 years
cat("Predicted Salary for 8 years experience:", predicted_salary_8, "\n")
cat("Actual Salary for 8 years experience:", actual_salary_8, "\n")
cat("Prediction Error for 8 years experience:", prediction_error_8, "\n\n")
# Print additional messages about successful file reading or errors (optional)
if (!is.null(data)) {
cat("Salary and experience data loaded successfully from the CSV file.\n")
} else {
cat("Error: Could not read data from the CSV file. Please check the file path.\n")
}
Step 4: go to document where you save that salary_data.csv copy that
path
Copy this path then paste in the program
Step 5: paste the path in the program
Step 6: change the backslash to frontslash
OUTPUT:
# Define the path to your CSV file (replace with the actual location)
> csv_file_path <- "C:/Users/MOIN PASHA/OneDrive/Desktop/salary_data.csv"
> # Read the salary and experience data from the CSV file
> data <- read.csv(csv_file_path)
>
> # Function to predict salary based on experience
> predict_salary <- function(experience) {
+ # Create the linear regression model inside the function
+ model <- lm(Salary ~ Experience, data = data) # Ensure "Salary" match
es your column name
+
+ # Predict salary for the given experience
+ predicted_salary <- predict(model, newdata = data.frame(Experience = e
xperience))
+ return(predicted_salary)
+ }
>
> # Call the function to create the model and get predicted salary for 11
years
> predicted_salary_11 <- predict_salary(11)
>
> # Test the model with experience of 11 years
> actual_salary_11 <- data[data$Experience == 11, "Salary"]
> prediction_error_11 <- abs(actual_salary_11 - predicted_salary_11)
>
> # Test the model with experience of 8 years (similar process)
> predicted_salary_8 <- predict_salary(8)
> actual_salary_8 <- data[data$Experience == 8, "Salary"]
> prediction_error_8 <- abs(actual_salary_8 - predicted_salary_8)
>
> # Print the results for 11 years
> cat("Predicted Salary for 11 years experience:", predicted_salary_11, "\
n")
Predicted Salary for 11 years experience: 62.1212
> cat("Actual Salary for 11 years experience:", actual_salary_11, "\n") #
Assuming "Salary" is the column name for actual salary
Actual Salary for 11 years experience: 59
> cat("Prediction Error for 11 years experience:", prediction_error_11, "\
n\n")
Prediction Error for 11 years experience: 3.121204
>
> # Print the results for 8 years
> cat("Predicted Salary for 8 years experience:", predicted_salary_8, "\n"
)
Predicted Salary for 8 years experience: 51.50878
> cat("Actual Salary for 8 years experience:", actual_salary_8, "\n")
Actual Salary for 8 years experience: 57
> cat("Prediction Error for 8 years experience:", prediction_error_8, "\n\
n")
Prediction Error for 8 years experience: 5.491223
>
> # Print additional messages about successful file reading or errors (opt
ional)
> if (!is.null(data)) {
+ cat("Salary and experience data loaded successfully from the CSV file.
\n")
+ } else {
+ cat("Error: Could not read data from the CSV file. Please check the fi
le path.\n")
+ }
Salary and experience data loaded successfully from the CSV file.
12. Demonstrate Multilinear Regression for mtcars
# Load the necessary dataset
data(mtcars)
# Extract the relevant columns for analysis
input <- mtcars[, c("mpg", "disp", "hp", "wt")]
print(head(input))
# Create the relationship model
model <- lm(mpg ~ disp + hp + wt, data = input)
# Show the model
print(model)
# Get the Intercept and coefficients as vector elements
cat("# # # # The Coefficient Values # # # ", "\n")
a <- coef(model)[1]
print(a)
Xdisp <- coef(model)[2]
Xhp <- coef(model)[3]
Xwt <- coef(model)[4]
print(Xdisp)
print(Xhp)
print(Xwt)
# Note: Statistical Inference
# Based on the above intercept and coefficient values, we create the math
ematical equation.
# Y = a + Xdisp*x1 + Xhp*x2 + Xwt*x3
# or
# Y = 37.10551 + (-0.000937)*x1 + (-0.03115655)*x2 + (-3.800891)*x3
OUTPUT:
> # Load the necessary dataset
> data(mtcars)
>
> # Extract the relevant columns for analysis
> input <- mtcars[, c("mpg", "disp", "hp", "wt")]
> print(head(input))
mpg disp hp wt
Mazda RX4 21.0 160 110 2.620
Mazda RX4 Wag 21.0 160 110 2.875
Datsun 710 22.8 108 93 2.320
Hornet 4 Drive 21.4 258 110 3.215
Hornet Sportabout 18.7 360 175 3.440
Valiant 18.1 225 105 3.460
>
> # Create the relationship model
> model <- lm(mpg ~ disp + hp + wt, data = input)
>
> # Show the model
> print(model)
Call:
lm(formula = mpg ~ disp + hp + wt, data = input)
Coefficients:
(Intercept) disp hp wt
37.105505 -0.000937 -0.031157 -3.800891
>
> # Get the Intercept and coefficients as vector elements
> cat("# # # # The Coefficient Values # # # ", "\n")
# # # # The Coefficient Values # # #
> a <- coef(model)[1]
> print(a)
(Intercept)
37.10551
> Xdisp <- coef(model)[2]
> Xhp <- coef(model)[3]
> Xwt <- coef(model)[4]
>
> print(Xdisp)
disp
-0.0009370091
> print(Xhp)
hp
-0.03115655
> print(Xwt)
wt
-3.800891
>
> # Note: Statistical Inference
> # Based on the above intercept and coefficient values, we create the mat
hematical equation.
> # Y = a + Xdisp*x1 + Xhp*x2 + Xwt*x3
> # or
> # Y = 37.10551 + (-0.000937)*x1 + (-0.03115655)*x2 + (-3.800891)*x3
13. Demonstrate Sampling and Sampling Distribution using Iris Data set
# Load the necessary dataset and view its structure
str(iris)
# Convert the iris dataset to a data frame
iris_df <- data.frame(iris)
View(iris_df)
# Set the number of iterations and sample size
iter <- 100
n <- 5
# Initialize a vector to store the means
means <- rep(NA, iter)
# Perform the iterations
for (i in 1:iter) {
mean_of_each_sample <- sample(iris$Petal.Length, n)
means[i] <- mean(mean_of_each_sample)
}
# Create a histogram of the means
hist(means)
OUTPUT:
> # Load the necessary dataset and view its structure
> str(iris)
'data.frame': 150 obs. of 5 variables:
$ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1
1 1 1 1 ...
>
> # Convert the iris dataset to a data frame
> iris_df <- data.frame(iris)
> View(iris_df)
>
> # Set the number of iterations and sample size
> iter <- 100
> n <- 5
>
> # Initialize a vector to store the means
> means <- rep(NA, iter)
>
> # Perform the iterations
> for (i in 1:iter) {
+ mean_of_each_sample <- sample(iris$Petal.Length, n)
+ means[i] <- mean(mean_of_each_sample)
+ }
>
> # Create a histogram of the means
> hist(means)
14. Demonstrate the following queries on data sets of BSc A and
B section Results.
library("xlsx")
# Load datasets
bsc_a <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming/BSc 1
.xlsx", sheetIndex = 1)
bsc_b <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming/BSc 2
.xlsx", sheetIndex = 1)
# Print column names and number of rows
print("Columns in BSc A dataset:")
print(names(bsc_a))
print("Columns in BSc B dataset:")
print(names(bsc_b))
print(paste("Number of rows in BSc A dataset:", nrow(bsc_a)))
print(paste("Number of rows in BSc B dataset:", nrow(bsc_b)))
# Analysis on BSc B dataset
students_below_40_b <- sum(bsc_b$R.Programming < 40, na.rm = TRUE)
students_greater_90_b <- sum(bsc_b$R.Programming > 90, na.rm = TRUE)
print(paste("Number of students below 40 in R.Programming in BSc B dataset:", student
s_below_40_b))
print(paste("Number of students above 90 in R.Programming in BSc B dataset:", student
s_greater_90_b))
# Summary statistics
summary_stats <- function(column) {
mean_val <- mean(column, na.rm = TRUE)
median_val <- median(column, na.rm = TRUE)
sd_val <- sd(column, na.rm = TRUE)
mode_val <- get_mode(column)
return(list(mean = mean_val, median = median_val, sd = sd_val, mode = mode_val))
}
get_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
relevant_columns <- c("R.Programming", "Web.Development", "Optical.Fiber", "IoT.and.
Robotics")
stats_results <- lapply(bsc_b[relevant_columns], summary_stats)
for (i in seq_along(relevant_columns)) {
print(paste("Summary statistics for", relevant_columns[i], ":"))
print(paste("Mean:", stats_results[[i]]$mean))
print(paste("Median:", stats_results[[i]]$median))
print(paste("Standard deviation:", stats_results[[i]]$sd))
print(paste("Mode:", stats_results[[i]]$mode))
}
# Multilinear Regression
input_b <- bsc_b[, c(relevant_columns, "Avg")]
model_b <- lm(Avg ~ ., data = input_b)
print(coef(model_b))
# Prediction and inference
moin_data <- data.frame(R.Programming = 78, Web.Development = 106, Optical.Fiber =
71, IoT.and.Robotics = 93)
predicted_avg_moin <- predict(model_b, newdata = moin_data)
actual_avg_moin <- 89 # assuming the actual average score
predictive_error_moin <- actual_avg_moin - predicted_avg_moin
print(predicted_avg_moin)
print(predictive_error_moin)
Step 1: check your system has BSc A and B section excel sheet
Step 2: Enter the whole program as given in lab manual
Step 3: check your R studio has xlsx package is available or not if
you to download that package click on tools and click on install
packages than type xlsx
Step 4: after typing xlsx then click on install
Step 5: check the package is installed in your system or not
Step 6 : copy the path of excel sheet for BSc 1 and 2
Step 7: copy and paste path in your program
Step 8: In the path we have forward slash \ change to back slash /
as you seen in the above photo
Step 9: Run the program
Output:
> library("xlsx")
>
> # Load datasets
> bsc_a <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming
/BSc 1.xlsx", sheetIndex = 1)
> bsc_b <- read.xlsx("C:/Users/MOIN PASHA/OneDrive/Documents/R programming
/BSc 2.xlsx", sheetIndex = 1)
>
> # Print column names and number of rows
> print("Columns in BSc A dataset:")
[1] "Columns in BSc A dataset:"
> print(names(bsc_a))
[1] "Name" "Age" "Gender"
[4] "University" "Graduation.Year" "Current.Job.Title"
[7] "Company" "Email" "Subject"
[10] "Subject.Marks"
>
> print("Columns in BSc B dataset:")
[1] "Columns in BSc B dataset:"
> print(names(bsc_b))
[1] "BSc.Course" "Reg.No" "Name"
[4] "R.Programming" "Web.Development" "Optical.Fiber"
[7] "IoT.and.Robotics" "Avg"
>
> print(paste("Number of rows in BSc A dataset:", nrow(bsc_a)))
[1] "Number of rows in BSc A dataset: 41"
> print(paste("Number of rows in BSc B dataset:", nrow(bsc_b)))
[1] "Number of rows in BSc B dataset: 30"
>
> # Analysis on BSc B dataset
> students_below_40_b <- sum(bsc_b$R.Programming < 40, na.rm = TRUE)
> students_greater_90_b <- sum(bsc_b$R.Programming > 90, na.rm = TRUE)
>
> print(paste("Number of students below 40 in R.Programming in BSc B datas
et:", students_below_40_b))
[1] "Number of students below 40 in R.Programming in BSc B dataset: 0"
> print(paste("Number of students above 90 in R.Programming in BSc B datas
et:", students_greater_90_b))
[1] "Number of students above 90 in R.Programming in BSc B dataset: 19"
>
> # Summary statistics
> summary_stats <- function(column) {
+ mean_val <- mean(column, na.rm = TRUE)
+ median_val <- median(column, na.rm = TRUE)
+ sd_val <- sd(column, na.rm = TRUE)
+ mode_val <- get_mode(column)
+
+ return(list(mean = mean_val, median = median_val, sd = sd_val, mode =
mode_val))
+ }
>
> get_mode <- function(v) {
+ uniqv <- unique(v)
+ uniqv[which.max(tabulate(match(v, uniqv)))]
+ }
>
> relevant_columns <- c("R.Programming", "Web.Development", "Optical.Fiber
", "IoT.and.Robotics")
> stats_results <- lapply(bsc_b[relevant_columns], summary_stats)
>
> for (i in seq_along(relevant_columns)) {
+ print(paste("Summary statistics for", relevant_columns[i], ":"))
+ print(paste("Mean:", stats_results[[i]]$mean))
+ print(paste("Median:", stats_results[[i]]$median))
+ print(paste("Standard deviation:", stats_results[[i]]$sd))
+ print(paste("Mode:", stats_results[[i]]$mode))
+ }
[1] "Summary statistics for R.Programming :"
[1] "Mean: 102.266666666667"
[1] "Median: 104"
[1] "Standard deviation: 24.3224507234654"
[1] "Mode: 81"
[1] "Summary statistics for Web.Development :"
[1] "Mean: 114.8"
[1] "Median: 115.5"
[1] "Standard deviation: 18.888785214951"
[1] "Mode: 115"
[1] "Summary statistics for Optical.Fiber :"
[1] "Mean: 72.5"
[1] "Median: 72.5"
[1] "Standard deviation: 13.3719985352821"
[1] "Mode: 70"
[1] "Summary statistics for IoT.and.Robotics :"
[1] "Mean: 74.2666666666667"
[1] "Median: 77"
[1] "Standard deviation: 11.5726590194998"
[1] "Mode: 84"
>
> # Multilinear Regression
> input_b <- bsc_b[, c(relevant_columns, "Avg")]
> model_b <- lm(Avg ~ ., data = input_b)
>
> print(coef(model_b))
(Intercept) R.Programming Web.Development Optical.Fiber
10.0844831 0.2224124 0.2270819 0.2000564
IoT.and.Robotics
0.1932278
>
> # Prediction and inference
> moin_data <- data.frame(R.Programming = 78, Web.Development = 106, Optic
al.Fiber = 71, IoT.and.Robotics = 93)
> predicted_avg_moin <- predict(model_b, newdata = moin_data)
> actual_avg_moin <- 89 # assuming the actual average score
> predictive_error_moin <- actual_avg_moin - predicted_avg_moin
>
> print(predicted_avg_moin)
1
83.67752
> print(predictive_error_moin)
1
5.322478
15. Demonstrate the Z-test # Sample data for two groups
# Sample data for two groups
group1_data <- c(22, 25, 28, 30, 32)
group2_data <- c(18, 20, 24, 26, 28)
# Parameters
alpha <- 0.05 # Significance level
# Two-sample Z-test function
two_sample_z_test <- function(data1, data2) {
mean1 <- mean(data1)
mean2 <- mean(data2)
sd1 <- sd(data1)
sd2 <- sd(data2)
n1 <- length(data1)
n2 <- length(data2)
z_score <- (mean1 - mean2) / sqrt((sd1^2 / n1) + (sd2^2 / n2))
p_value <- 2 * pnorm(-abs(z_score)) # Two-tailed test
result <- list(
z_score = z_score,
p_value = p_value
)
return(result)
}
# Perform the two-sample Z-test
result_two_sample <- two_sample_z_test(group1_data, group2_data)
# Display the result
cat("Z-Score:", result_two_sample$z_score, "\n")
cat("P-Value:", result_two_sample$p_value, "\n")
# Make a decision based on the p-value
if (result_two_sample$p_value < alpha) {
cat("Reject the null hypothesis. There is a significant difference between the two grou
ps.\n")
} else {
cat("Fail to reject the null hypothesis. There is no significant difference between the t
wo groups.\n")
}
OUTPUT:
> # Sample data for two groups
> group1_data <- c(22, 25, 28, 30, 32)
> group2_data <- c(18, 20, 24, 26, 28)
>
> # Parameters
> alpha <- 0.05 # Significance level
>
> # Two-sample Z-test function
> two_sample_z_test <- function(data1, data2) {
+ mean1 <- mean(data1)
+ mean2 <- mean(data2)
+ sd1 <- sd(data1)
+ sd2 <- sd(data2)
+ n1 <- length(data1)
+ n2 <- length(data2)
+ z_score <- (mean1 - mean2) / sqrt((sd1^2 / n1) + (sd2^2 / n2))
+ p_value <- 2 * pnorm(-abs(z_score)) # Two-tailed test
+ result <- list(
+ z_score = z_score,
+ p_value = p_value
+ )
+ return(result)
+ }
>
> # Perform the two-sample Z-test
> result_two_sample <- two_sample_z_test(group1_data, group2_data)
>
> # Display the result
> cat("Z-Score:", result_two_sample$z_score, "\n")
Z-Score: 1.634848
> cat("P-Value:", result_two_sample$p_value, "\n")
P-Value: 0.102081
>
> # Make a decision based on the p-value
> if (result_two_sample$p_value < alpha) {
+ cat("Reject the null hypothesis. There is a significant difference bet
ween the two groups.\n")
+ } else {
+ cat("Fail to reject the null hypothesis. There is no significant diffe
rence between the two groups.\n")
+ }
Fail to reject the null hypothesis. There is no significant difference bet
ween the two groups.