R Programming Tutorial for Beginners
###################################################
# #
# R Programming Tutorial for Beginners #
# Joaquim Schork #
# #
###################################################
## Tomado de: https://statisticsglobe.com/wp-content/uploads/2022/01/Introduction-
to-the-R-Programming-Language-by-Statistics-Globe.txt
## Fijando el directorio de trabajo...
setwd("C:/datos")
## Desinstalando objetos de la sesión anterior...
rm(list = ls())
##### Data Manipulation in R #####
vec_1 <- c(1, 1, 5, 3, 1, 5) # Create vector object
vec_1 # Print vector object
data_1 <- data.frame(x1 = c(7, 2, 8, 3, 3, 7), # Create data frame
x2 = c("x", "y", "x", "x", "x", "y"),
x3 = 11:16)
data_1 # Print data frame
list_1 <- list(1:5, # Create list
vec_1,
data_1)
list_1 # Print list
class(vec_1) # Check class of vector elements
vec_2 <- c("a", "b", "a", "c") # Create character vector
vec_2 # Create character vector
class(vec_2) # Check class of vector elements
vec_3 <- factor(c("gr1", "gr1", "gr2", "gr3", "gr2")) # Create factor vector
vec_3 # Print factor vector
class(vec_3) # Check class of vector elements
vec_4 <- as.character(vec_3) # Convert factor to character
vec_4 # Print updated vector
class(vec_4) # Check class of updated vector elements
data_2 <- data_1 # Create duplicate of data frame
data_2$x4 <- vec_1 # Add new column to data frame
data_2 # Print updated data frame
data_3 <- data_2[ , colnames(data_2) != "x2"] # Remove column from data
frame
data_3 # Print updated data frame
data_4 <- data_3 # Create duplicate of data frame
colnames(data_4) <- c("col_A", "col_B", "col_C") # Change column names
data_4 # Print updated data frame
data_5 <- rbind(data_3, 101:103) # Add new row to data frame
data_5 # Print updated data frame
data_6 <- data_5[data_5$x1 > 3, ] # Remove rows from data frame
data_6 # Print updated data frame
data_7 <- data.frame(ID = 101:106, # Create first data frame
x1 = letters[1:6],
x2 = letters[6:1])
data_7 # Print first data frame
data_8 <- data.frame(ID = 104:108, # Create second data frame
y1 = 1:5,
y2 = 5:1,
y3 = 5)
data_8 # Print second data frame
data_9 <- merge(x = data_7, # Merge two data frames
y = data_8,
by = "ID",
all = TRUE)
data_9 # Print merged data frame
vec_5 <- vec_1 # Create duplicate of vector
vec_5[vec_5 == 1] <- 99 # Replace certain value in vector
vec_5 # Print updated vector
data_10 <- data_1 # Create duplicate of data frame
data_10$x2[data_10$x2 == "y"] <- "new" # Replace values in column
data_10 # Print updated data frame
getwd() # Get current working directory
setwd("C:/datos")
getwd() # Get current working directory
write.csv(data_10, # Export data frame to CSV file
"data_10.csv",
row.names = FALSE)
data_11 <- read.csv("data_10.csv") # Import data frame from CSV file
data_11 # Print imported data frame
##### Creating Graphics in R #####
data(iris) # Load iris data set
head(iris) # Print head of iris data set
plot(x = iris$Sepal.Length, # Draw Base R scatterplot
y = iris$Sepal.Width,
col = iris$Species)
plot(density(x = iris$Sepal.Length)) # Draw Base R density plot
hist(x = iris$Sepal.Length) # Draw Base R histogram
boxplot(iris$Sepal.Length ~ iris$Species) # Draw Base R boxplot
install.packages("ggplot2") # Install ggplot2 package
library("ggplot2") # Load ggplot2
ggplot(iris, # Draw ggplot2 scatterplot
aes(x = Sepal.Length,
y = Sepal.Width,
col = Species)) +
geom_point()
ggplot(iris, # Draw ggplot2 density plot
aes(x = Sepal.Length)) +
geom_density()
ggplot(iris, # Draw multiple ggplot2 density plots
aes(x = Sepal.Length,
col = Species)) +
geom_density()
ggplot(iris, # Fill ggplot2 density plots
aes(x = Sepal.Length,
col = Species,
fill = Species)) +
geom_density()
ggplot(iris, # Opacity of ggplot2 density plots
aes(x = Sepal.Length,
col = Species,
fill = Species)) +
geom_density(alpha = 0.3)
ggplot(iris, # Draw ggplot2 histogram
aes(x = Sepal.Length)) +
geom_histogram()
ggplot(iris, # Draw ggplot2 boxplot
aes(x = Species,
y = Sepal.Length)) +
geom_boxplot()
ggplot(iris, # Add colors to ggplot2 boxplot
aes(x = Species,
y = Sepal.Length,
fill = Species)) +
geom_boxplot()
iris_groups <- iris # Create duplicate of iris data set
iris_groups$Sub <- letters[1:3] # Add subgroups to data
iris_groups <- aggregate(formula = Sepal.Length ~ Species + Sub, # Mean by
subgroup
data = iris_groups,
FUN = mean)
iris_groups # Print aggregated iris data set
ggplot(iris_groups, # Draw ggplot2 barplot
aes(x = Species,
y = Sepal.Length)) +
geom_bar(stat = "identity")
ggplot(iris_groups, # Draw stacked ggplot2 barplot
aes(x = Species,
y = Sepal.Length,
fill = Sub)) +
geom_bar(stat = "identity")
ggplot(iris_groups, # Draw grouped ggplot2 barplot
aes(x = Species,
y = Sepal.Length,
fill = Sub)) +
geom_bar(stat = "identity",
position = "dodge")
##### Data Analysis & Descriptive Statistics in R #####
mean(vec_1) # Calculate mean
median(vec_1) # Calculate median
min(vec_1) # Calculate minimum
max(vec_1) # Calculate maximum
sum(vec_1) # Calculate sum
var(vec_1) # Calculate variance
sd(vec_1) # Calculate standard deviation
summary(vec_1) # Calculate multiple descriptive statistics
table(vec_1) # Create frequency table
table(data_1[ , c("x1", "x2")]) # Create contingency table
mod_1 <- lm(formula = Sepal.Width ~ Sepal.Length, # Estimate linear regression
model
data = iris)
summary(mod_1) # Summary statistics of model
ggplot(iris, # Draw scatterplot with regression line
aes(x = Sepal.Length,
y = Sepal.Width)) +
geom_point() +
geom_smooth(method = "lm")
mod_2 <- lm(formula = Sepal.Width ~ Sepal.Length + Species, # Model wit multiple
predictors
data = iris)
summary(mod_2) # Summary statistics of model
ggplot(iris, # Draw multiple regression lines
aes(x = Sepal.Length,
y = Sepal.Width,
col = Species)) +
geom_point() +
geom_smooth(method = "lm")
##### Advanced Techniques in R #####
vec_6 <- numeric() # Create empty numeric vector
vec_6 # Print empty numeric vector
for(i in 1:length(vec_1)) { # Apply for loop to vector
vec_6[i] <- vec_1[i] + i
}
vec_1 # Print vec_1 for comparison
vec_6 # Print new vector
data_12 <- data_1 # Create duplicate of data frame
data_12$x4 <- NA # Add new column containing only NA
data_12 # Print new data frame
for(i in 1:nrow(data_1)) { # Loop over rows of data frame
data_12$x4[i] <- data_12$x1[i] + i * data_12$x3[i]
}
data_12 # Print updated data frame
vec_7 <- character() # Create empty character vector
vec_7 # Print empty character vector
for(i in 1:length(vec_1)) { # for loop & nested if else statement
if(vec_1[i] > 3) {
vec_7[i] <- "high"
} else {
vec_7[i] <- "low"
}
}
vec_7 # Print updated vector
vec_8 <- ifelse(test = vec_1 > 3, # Apply ifelse function
yes = "high",
no = "low")
vec_8 # Print new vector
fun_1 <- function(x) { # Create simple user-defined function
out <- x^2 + 5 * x
out
}
fun_1(x = vec_1) # Apply simple user-defined function
fun_2 <- function(x, y) { # Create complex user-defined function
if(y > 3) {
out <- (x^2 + 5 * x) / y
} else {
out <- (x^2 + 5 * x) / (10 * y)
}
out
}
for(i in 1:5) { # Complex user-defined function in for loop
print(paste0("This is the result of iteration ",
i,
": ",
fun_2(x = 5, y = i)))
}