R
What is R
• R is a popular programming language used for
statistical computing and graphical presentation.
• Its most common use is to analyze and visualize data.
Why Use R?
• It is a great resource for data analysis, data visualization,
data science and machine learning
• It provides many statistical techniques (such as statistical
tests, classification, clustering and data reduction)
• It is easy to draw graphs in R, like pie charts, histograms,
box plot, scatter plot, etc++
• It works on different platforms (Windows, Mac, Linux)
• It is open-source and free
• It has a large community support
• It has many packages (libraries of functions) that can be
used to solve different problems
Try in Rstudio
• print("Hello World!")
• for (x in 1:10)
{
print(x)
}
• "Hello World!“
Creating Variables in R
• name <- "John"
age <- 40
• name
• print(name)
Data Type
•numeric - (10.5, 55, 787)
•integer - (1L, 55L, 100L, where the letter "L"
declares this as an integer)
•complex - (9 + 3i, where "i" is the imaginary part)
•character (a.k.a. string) - ("k", "R is exciting",
"FALSE", "11.5")
•logical (a.k.a. boolean) - (TRUE or FALSE)
Data Types
• # numeric
x <- 10.5
class(x)
# integer
x <- 1000L
class(x)
# complex
x <- 9i + 3
class(x)
# character/string
x <- "R is exciting"
class(x)
# logical/boolean
x <- TRUE
class(x)
R Numbers
• x <- 10.5 # numeric
y <- 10L # integer
z <- 1i # complex
• x <- 10.5
y <- 55
# Print values of x and y
x
y
# Print the class name of x and y
class(x)
class(y)
• x <- 1000L
y <- 55L
# Print values of x and y
x
y
# Print the class name of x and y
class(x)
class(y)
• x <- 3+5i
y <- 5i
# Print values of x and y
x
y
# Print the class name of x and y
class(x)
class(y)
Math
• max(5, 10, 15)
min(5, 10, 15)
• sqrt(16)
• abs(-4.7)
• ceiling(1.4)
floor(1.4)
R Strings
• "hello"
'hello’
• str <- "Hello"
str # print the value of str
• str <- “You are best."
str # print the value of str
• cat(str)
• str <- "Hello World!"
nchar(str)
• str <- "Hello World!"
grepl("H", str)
grepl("Hello", str)
grepl("X", str)
• str1 <- "Hello"
str2 <- "World"
paste(str1, str2)
• str <- "We are the so-called "Vikings", from the
north."
str
• str <- "We are the so-called \"Vikings\", from the
north."
str
cat(str)
R Booleans / Logical Values
• 10 > 9 # TRUE because 10 is greater than 9
10 == 9 # FALSE because 10 is not equal to 9
10 < 9 # FALSE because 10 is greater than 9
• a <- 10
b <- 9
a > b
• a <- 200
b <- 33
if (b > a) {
print ("b is greater than a")
} else {
print("b is not greater than a")
}
R Operators
R If ... Else
• a <- 33
b <- 200
if (b > a) {
print("b is greater than a")
}
• a <- 33
b <- 33
if (b > a) {
print("b is greater than a")
} else if (a == b) {
print ("a and b are equal")
}
• a <- 200
b <- 33
if (b > a) {
print("b is greater than a")
} else if (a == b) {
print("a and b are equal")
} else {
print("a is greater than b")
}
{
x <- as.integer(readline(prompt = "Enter first number :"))
y <- as.integer(readline(prompt = "Enter second number :"))
z <- as.integer(readline(prompt = "Enter third number :"))
if (x > y) {
if (x > z)
print(paste("Greatest is :", x))
else
print(paste("Greatest is :", z))
} else {
if (y > z)
print(paste("Greatest is :", y))
else{
print(paste("Greatest is :", z))
}
}
}
• x <- 41
if (x > 10) {
print("Above ten")
if (x > 20) {
print("and also above 20!")
} else {
print("but not above 20.")
}
} else {
print("below 10.")
}
• a <- 200
b <- 33
c <- 500
if (a > b & c > a){
print("Both conditions are true")
}
• a <- 200
b <- 33
c <- 500
if (a > b | a > c){
print("At least one of the conditions is true")
}
R While Loop
• i <- 1
while (i < 6) {
print(i)
i <- i + 1
}
R For Loop
• for (x in 1:10) {
print(x)
}
• fruits <- list("apple", "banana", "cherry")
for (x in fruits) {
print(x)
}
• dice <- c(1, 2, 3, 4, 5, 6)
for (x in dice) {
print(x)
}
• dice <- 1:6
for(x in dice) {
if (x == 6) {
print(paste("The dice number is",
x, "Yahtzee!"))
} else {
print(paste("The dice number is", x, "Not
Yahtzee"))
}
}
R Vectors
A vector is simply a list of items that are of
the same type.
To combine the list of items to a vector, use
the c() function and separate the items by a
comma .
• # Vector of strings
fruits <- c("banana", "apple", "orange")
# Print fruits
fruits
• # Vector of numerical values
numbers <- c(1, 2, 3)
# Print numbers
numbers
• # Vector with numerical values in a sequence
numbers <- 1:10
numbers
• # Vector with numerical decimals in a sequence
numbers1 <- 1.5:6.5
numbers1
• # Vector with numerical decimals in a sequence
where the last element is not used
numbers2 <- 1.5:6.3
numbers2
• # Vector of logical values
log_values <- c(TRUE, FALSE, TRUE, FALSE)
log_values
• fruits <- c("banana", "apple", "orange")
length(fruits)
• fruits <-
c("banana", "apple", "orange", "mango", "lemon")
numbers <- c(13, 3, 5, 7, 20, 2)
sort(fruits) # Sort a string
sort(numbers) # Sort numbers
• fruits <- c("banana", "apple", "orange")
# Access the first item (banana)
fruits[1]
• fruits <-
c("banana", "apple", "orange", "mango", "lemon")
# Access the first and third item (banana and orange)
fruits[c(1, 3)]
• fruits <-
c("banana", "apple", "orange", "mango", "lemon")
# Access all items except for the first item
fruits[c(-1)]
• fruits <-
c("banana", "apple", "orange", "mango", "lemon")
# Change "banana" to "pear"
fruits[1] <- "pear"
# Print fruits
fruits
• repeat_each <- rep(c(1,2,3), each = 3)
repeat_each
• repeat_times <- rep(c(1,2,3), times = 3)
repeat_times
• repeat_indepent <- rep(c(1,2,3), times = c(5,2,1))
repeat_indepent
• numbers <- 1:10
numbers
• numbers <- seq(from = 0, to = 100, by = 20)
numbers
Lists
• # List of strings
thislist <- list("apple", "banana", "cherry")
# Print the list
thislist
• thislist <- list("apple", "banana", "cherry")
thislist[1]
• thislist <- list("apple", "banana", "cherry")
thislist[1] <- "blackcurrant"
# Print the updated list
thislist
• thislist <- list("apple", "banana", "cherry")
length(thislist)
Matrices
A matrix is a two dimensional data set with columns and rows.
A column is a vertical representation of data, while a row is a horizontal representation of data.
A matrix can be created with the matrix() function. Specify the nrow and ncol parameters to get the amount of rows and columns:
• # Create a matrix
thismatrix <- matrix(c(1,2,3,4,5,6), nrow = 3, ncol
= 2)
# Print the matrix
thismatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"), nrow
= 2, ncol = 2)
thismatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
thismatrix[1, 2]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
thismatrix[2,]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"), nrow
= 2, ncol = 2)
thismatrix[,2]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","grape",
"pineapple", "pear", "melon", "fig"), nrow = 3, ncol
= 3)
thismatrix[c(1,2),]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","grape", "pin
eapple", "pear", "melon", "fig"), nrow = 3, ncol = 3)
thismatrix[, c(1,2)]
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","grape", "pin
eapple", "pear", "melon", "fig"), nrow = 3, ncol = 3)
newmatrix <- cbind(thismatrix,
c("strawberry", "blueberry", "raspberry"))
# Print the new matrix
newmatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange","gra
pe", "pineapple", "pear", "melon", "fig"), nrow
= 3, ncol = 3)
newmatrix <- rbind(thismatrix,
c("strawberry", "blueberry", "raspberry"))
# Print the new matrix
newmatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange", "ma
ngo", "pineapple"), nrow = 3, ncol =2)
#Remove the first row and the first column
thismatrix <- thismatrix[-c(1), -c(1)]
thismatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
"apple" %in% thismatrix
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
dim(thismatrix)
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
dim(thismatrix)
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
length(thismatrix)
• thismatrix <-
matrix(c("apple", "banana", "cherry", "orange"),
nrow = 2, ncol = 2)
for (rows in 1:nrow(thismatrix)) {
for (columns in 1:ncol(thismatrix)) {
print(thismatrix[rows, columns])
}
}
# Combine matrices
Matrix1 <- matrix(c("apple", "banana", "cherry", "grape"),
nrow = 2, ncol = 2)
Matrix2 <-
matrix(c("orange", "mango", "pineapple", "watermelon"), nrow
= 2, ncol = 2)
# Adding it as a rows
Matrix_Combined <- rbind(Matrix1, Matrix2)
Matrix_Combined
# Adding it as a columns
Matrix_Combined <- cbind(Matrix1, Matrix2)
Matrix_Combined
Arrays
• # An array with one dimension with values ranging
from 1 to 24
thisarray <- c(1:24)
thisarray
# An array with more than one dimension
multiarray <- array(thisarray, dim = c(4, 3, 2))
multiarray
• thisarray <- c(1:24)
multiarray <- array(thisarray, dim = c(4, 3, 2))
multiarray[2, 3, 2]
• thisarray <- c(1:24)
# Access all the items from the first row from matrix one
multiarray <- array(thisarray, dim = c(4, 3, 2))
multiarray[c(1),,1]
# Access all the items from the first column from matrix one
multiarray <- array(thisarray, dim = c(4, 3, 2))
multiarray[,c(1),1]
• thisarray <- c(1:24)
multiarray <- array(thisarray, dim = c(4, 3, 2))
2 %in% multiarray
• thisarray <- c(1:24)
multiarray <- array(thisarray, dim = c(4, 3, 2))
dim(multiarray)
• thisarray <- c(1:24)
multiarray <- array(thisarray, dim = c(4, 3, 2))
length(multiarray)
• thisarray <- c(1:24)
multiarray <- array(thisarray, dim = c(4, 3, 2))
for(x in multiarray){
print(x)
}
R Data Frames
• Data Frames are data displayed in a format as a table.
• Data Frames can have different types of data inside it. While the first
column can be character, the second and third can be numeric or
logical. However, each column should have the same type of data.
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame
summary(Data_Frame)
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame[1]
Data_Frame[["Training"]]
Data_Frame$Training
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
# Add a new row
New_row_DF <- rbind(Data_Frame,
c("Strength", 110, 110))
# Print the new row
New_row_DF
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
# Add a new column
New_col_DF <- cbind(Data_Frame, Steps
= c(1000, 6000, 2000))
# Print the new column
New_col_DF
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
# Remove the first row and column
Data_Frame_New <- Data_Frame[-c(1), -c(1)]
# Print the new data frame
Data_Frame_New
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
dim(Data_Frame)
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
ncol(Data_Frame)
nrow(Data_Frame)
• Data_Frame <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
length(Data_Frame)
• Data_Frame1 <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame2 <- data.frame (
Training = c("Stamina", "Stamina", "Strength"),
Pulse = c(140, 150, 160),
Duration = c(30, 30, 20)
)
New_Data_Frame <- rbind(Data_Frame1, Data_Frame2)
New_Data_Frame
• Data_Frame3 <- data.frame (
Training = c("Strength", "Stamina", "Other"),
Pulse = c(100, 150, 120),
Duration = c(60, 30, 45)
)
Data_Frame4 <- data.frame (
Steps = c(3000, 6000, 2000),
Calories = c(300, 400, 300)
)
New_Data_Frame1 <- cbind(Data_Frame3, Data_Frame4)
New_Data_Frame1
R Factors
• Factors are used to categorize data. Examples of factors
are:
• Demography: Male/Female
• Music: Rock, Pop, Classic, Jazz
• Training: Strength, Stamina
• # Create a factor
music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
# Print the factor
music_genre
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop", "
Jazz", "Rock", "Jazz"))
levels(music_genre)
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop", "
Jazz", "Rock", "Jazz"), levels =
c("Classic", "Jazz", "Pop", "Rock", "Other"))
levels(music_genre)
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
length(music_genre)
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
music_genre[3]
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
music_genre[3] <- "Pop"
music_genre[3]
error
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"))
music_genre[3] <- "Opera"
music_genre[3]
• music_genre <-
factor(c("Jazz", "Rock", "Classic", "Classic", "Pop
", "Jazz", "Rock", "Jazz"), levels =
c("Classic", "Jazz", "Pop", "Rock", "Opera"))
music_genre[3] <- "Opera"
music_genre[3]
R plot
• plot(1, 3)
• plot(c(1, 8), c(3, 10))
• plot(c(1, 2, 3, 4, 5), c(3, 7, 8, 9, 12))
• x <- c(1, 2, 3, 4, 5)
y <- c(3, 7, 8, 9, 12)
plot(x, y)
• plot(1:10)
• plot(1:10, type="l")
• plot(1:10, main="My Graph", xlab="The x-axis",
ylab="The y axis")
• plot(1:10, col="red")
• plot(1:10, cex=2)
• #Use cex=number to change the size of the points (1
is default, while 0.5 means 50% smaller, and 2
means 100% larger)
• plot(1:10, pch=25, cex=2)
R Line
• plot(1:10, type="l")
• plot(1:10, type="l", col="blue")
• plot(1:10, type="l", lwd=2)
• Plot(1:10, type="l", lwd=5, lty=3)
• #lty parameter with a value from 0 to 6
• line1 <- c(1,2,3,4,5,10)
line2 <- c(2,5,7,8,9,10)
plot(line1, type = "l", col = "blue")
lines(line2, type="l", col = "red")
R Scatter Plot
• x <- c(5,7,8,7,2,2,9,4,11,12,9,6)
y <- c(99,86,87,88,111,103,87,94,78,77,85,86)
plot(x, y)
• x <- c(5,7,8,7,2,2,9,4,11,12,9,6)
y <- c(99,86,87,88,111,103,87,94,78,77,85,86)
plot(x, y, main="Observation of Cars", xlab="Car
age", ylab="Car speed")
• # day one, the age and speed of 12 cars:
x1 <- c(5,7,8,7,2,2,9,4,11,12,9,6)
y1 <- c(99,86,87,88,111,103,87,94,78,77,85,86)
# day two, the age and speed of 15 cars:
x2 <- c(2,2,8,1,15,8,12,9,7,3,11,4,7,14,12)
y2 <-
c(100,105,84,105,90,99,90,95,94,100,79,112,91,80,85)
plot(x1, y1, main="Observation of Cars", xlab="Car
age", ylab="Car speed", col="red", cex=2)
points(x2, y2, col="blue", cex=2)
• # Create a vector of pies
x <- c(10,20,30,40)
# Display the pie chart
pie(x)
• # Create a vector of pies
x <- c(10,20,30,40)
# Display the pie chart and start the first pie at 90
degrees
pie(x, init.angle = 90)
• # Create a vector of pies
x <- c(10,20,30,40)
# Create a vector of labels
mylabel <-
c("Apples", "Bananas", "Cherries", "Dates")
# Display the pie chart with labels
pie(x, label = mylabel, main = "Fruits")
• # Create a vector of colors
colors <- c("blue", "yellow", "green", "black")
# Display the pie chart with colors
pie(x, label = mylabel, main = "Fruits", col =
colors)
• # Create a vector of labels
mylabel <- c("Apples", "Bananas", "Cherries", "Dates")
# Create a vector of colors
colors <- c("blue", "yellow", "green", "black")
# Display the pie chart with colors
pie(x, label = mylabel, main = "Pie Chart", col = colors)
# Display the explanation box
legend("bottomright", mylabel, fill = colors)
• The legend can be positioned as either:
bottomright, bottom, bottomleft, left, topleft, top,
topright, right, center
R Bar Charts
• # x-axis values
x <- c("A", "B", "C", "D")
# y-axis values
y <- c(2, 4, 6, 8)
barplot(y, names.arg = x)
• x <- c("A", "B", "C", "D")
y <- c(2, 4, 6, 8)
barplot(y, names.arg = x, col = "red")
• x <- c("A", "B", "C", "D")
y <- c(2, 4, 6, 8)
barplot(y, names.arg = x, horiz = TRUE)
• x <- c("A", "B", "C", "D")
y <- c(2, 4, 6, 8)
barplot(y, names.arg = x, density = 10)
• x <- c("A", "B", "C", "D")
y <- c(2, 4, 6, 8)
barplot(y, names.arg = x, width = c(1,2,3,4))
• https://rpubs.com/Yogesh20/653388
R stastics
• mtcars
• ?mtcars
• Data_Cars <- mtcars # create a variable of the
mtcars data set for better organization
# Use dim() to find the dimension of the data set
dim(Data_Cars)
# Use names() to find the names of the variables
from the data set
names(Data_Cars)
• Data_Cars <- mtcars
rownames(Data_Cars)
• Data_Cars <- mtcars
Data_Cars$cyl
• Data_Cars <- mtcars
sort(Data_Cars$cyl)
• Data_Cars <- mtcars
summary(Data_Cars)
• Data_Cars <- mtcars
max(Data_Cars$hp)
min(Data_Cars$hp)
• Data_Cars <- mtcars
which.max(Data_Cars$hp)
which.min(Data_Cars$hp)
• Data_Cars <- mtcars
rownames(Data_Cars)[which.max(Data_Cars$hp)]
rownames(Data_Cars)[which.min(Data_Cars$hp)]
• Data_Cars <- mtcars
mean(Data_Cars$wt)
• Data_Cars <- mtcars
# c() specifies which percentile you want
quantile(Data_Cars$wt, c(0.75))
• Data_Cars <- mtcars
quantile(Data_Cars$wt)