ls()
rm(x)
rm(list = ls())
help(rm)
x <- c(1:4,10,"Ram");x
y <- 3:10
rm(x)
# setting working directory
getwd()
setwd("E:\\DataScience\\Week1")
history()
[Link]()
###ctrl+L
version
[Link]("installr")
require(installr)
library(installr)
if(!require(installr)) {
[Link]("installr");require(installr)
updateR()
###Installing and using packages
[Link]('reshape2')
library(reshape2)
[Link]("installR")
# The basic arithmetic operator
# Addition
x <- 3 + 7
# Substraction
7-3
# Multiplication
3*7
# Divison
7/3
# Exponentiation
2^3
# Modulo: returns the remainder of the division of 8/3
8 %% 3
log2(x) # logarithms base 2 of x
log10(x) # logaritms base 10 of x
exp(x) # Exponential of x
cos(x) # Cosine of x
sin(x) # Sine of x
tan(x) #Tangent of x
abs(x) # absolute value of x
sqrt(x) # square root of x
# Logical
logi <- c(TRUE,FALSE, TRUE, TRUE)
class(logi)
age <- c(34,35,36,34,34,35,36,35)
class(age)
[Link](age)
[Link](age)
[Link](age)
[Link](logi)
age <- [Link](age)
median(x = 1:10)
median(x <- 1:10)
# x <- y <- 5
# x <- y = 5
# Vectors
car_name <- c("Honda","BMW","Ferrari")
car_color = c("Black","Blue","Red")
car_cc = c(2000,3400,4000)
# List
cars <- list(name =c("Honda","BMW","Ferrari"),
color =c("Black","Blue","Red"),
cc =c(2000,3400,4000,6000,5000))
list(car_name,car_color,car_cc)
cars
class(cars)
# Matrix
mdat <- matrix(c(1,2,3, 11,12,13), nrow =2, ncol =3, byrow =F,
dimnames =list(c("row1", "row2"), c("C.1", "C.2", "C.3")));mdat
t(mdat)
# Deconstruction
c(mdat)
rowSums(mdat)
colSums(mdat)
# dataframe
cars <- [Link](name =c("Honda","BMW","Ferrari"),
color =c("Black","Blue","Red"),
cc =c(2000,3400,4000));cars
cars
# Factors
apple_colors <- c('green','green','yellow','red','red','red','green')
class(apple_colors)
factor_apple <- factor(apple_colors)
nlevels(factor_apple)
levels(factor_apple)
# String
x <- c("Hello World!")
print(x)
class(x)
length(x)
nchar(x)
substr(x,2,4)
substring(x, 2, 4:6)
######Indexing#######
# A sample vector
v <- c(1,4,4,3,2,2,3)
y <- c(v,c(1,23))
v[c(2,3,4)]
v[c(1,3,6)]
v[-1]
v[c(-1,-3)]
v[2:4]
length(v)
v[length(v)]
# Create a sample data frame
data <- [Link](header=T, text='
subject gender size
1 M 7
2 F 6
3 F 9
4 M 11
'); data
names(data)
colnames(data)
dimnames(data)
rownames(data)
dim(data)
# Get the element at row 1, column 3
data[1,3]
data[1,"size"]
# Get rows 1 and 2, and all columns
data[1:2, ]
data[,1:2]
dimnames(data)
dimnames(data)[[1]]
data[c(1,3), ]
# Get rows 1 and 2, and only column 2
data[1:2, c(1,2)]
data[c(1,3), 2]
# Get rows 1 and 2, and only the columns named "gender" and "size"
data[1:2, c("gender","size")]
data[c(1,2), c(2,3)]
data[['size']]
###Indexing with a boolean vector
v>2
v[v>2]
v[ c(F,T,T,T,F,F,T)]
# A boolean vector
data$subject < 3
data[data$subject < 3, ]
data[c(TRUE,TRUE,FALSE,FALSE), ]
which(data$subject < 3)
data
##Negative indexing
# Drop the first element
v[-1]
# Drop first three
v[-1:-3]
# Drop just the last element
v[-length(v)]
#####Getting a subset of a data structure
subset(v, v<3)
v[v<3]
# Another vector
t <- c("small", "small", "large", "medium")
# Remove "small" entries
subset(t, t!="small")
t[t!="small"]
# One important difference between the two methods is that you can assign values to
# elements with square bracket indexing, but you cannot with subset().
v[v>3] <- 9
subset(v, v<3) <- 9
subset(data, subject < 3)
data[data$subject < 3, ]
# Subset of particular rows and columns
subset(data, subject < 3, select = -subject)
subset(data, subject < 3, select = c(gender,size))
subset(data, subject < 3, select = gender:size)
data[data$subject < 3, c("gender","size")]
# Logical AND of two conditions
subset(data, subject < 3 & gender=="M")
data[data$subject < 3 & data$gender=="M", ]
# Logical OR of two conditions
subset(data, subject < 3 | gender=="M")
data[data$subject < 3 | data$gender=="M", ]
# Condition based on transformed data
subset(data, log2(size) > 3 )
data[log2(data$size) > 3, ]
# Subset if elements are in another vector
# subset(data, subject %in% c(1,3))
# data[data$subject %in% c(1,3), ]
####vector filled with values########
rep(1, 50)
rep(F, 20)
rep(1:5, 4)
rep(1:5, each=4)
# Use it on a factor
rep(factor(LETTERS[1:3]), 5)
seq(0,10, by=2)
seq(0,10,length=20)
round(seq(0,10,length=20),2)
###Information about variables####
n <- 1:4
let <- LETTERS[1:4]
df <- [Link](n, let)
# List currently defined variables
ls()
# Check if a variable named "x" exists
exists("x")
# Delete variable x
rm(x)
###Information about size/structure
# Get information about structure
str(n)
str(df)
# Get the length of a vector
length(n)
# Length probably doesn't give us what we want here:
length(df)
# Number of rows
nrow(df)
# Number of columns
ncol(df)
# Get rows and columns
dim(df)
########Working with NULL, NA, and NaN#######
x <- 5
x>2
y <- NA
y>5
z <- NaN
z>5
[Link](x)
[Link](y)
[Link](z)
vy <- c(1, 2, 3, NA, 5)
sum(vy)
sum(vy, [Link]=TRUE)
vz <- c(1, 2, 3, NaN, 5)
sum(vz, [Link]=TRUE)
vx <- c(1, 2, 3, NULL, 5)
sum(vx)
vy[]
vz[]
# R Datasets
data()
data(mtcars)
str(mtcars)
help(mtcars)
head(mtcars)
tail(mtcars)
dim(mtcars)
colnames(mtcars)
summary(mtcars)
# Apply functions
data <- matrix(c(1:10, 21:30), nrow = 5, ncol = 4)
data <- matrix(c(1:10, 21:30), nrow = 5, ncol = 4, byrow = TRUE)
data
# apply
apply(data, 1, sum)
apply(data, 2, mean)
# lapply
data <- list(x = 1:5, y = 6:10, z = 11:15)
lapply(data, FUN = mean)
# sapply
# sapply is the same as lapply, but returns a vector instead of a list.
sapply(data, FUN = mean)
i39 <- sapply(3:9, seq)
sapply(i39, fivenum)
# tapply
# tapply splits the array based on specified data, usually factor levels and then applies the function to it.
library(datasets)
data()
data(mtcars)
help(mtcars)
str(mtcars)
head(mtcars)
apply(mtcars,2,mean)
tapply(mtcars$wt, mtcars$cyl, mean)
groups <- [Link](c(1,1,1,3,4,4,5,6,2,3,5))
#Calculate the number of times each number repeats
tapply(groups, groups, length)
#The output is similar to the function table
table(groups)
# mapply
# mapply is a multivariate version of sapply. It will apply the specified
# function to the first element of each argument first, followed by the
# second element, and so on.
# x <- 1:5
# b <- 6:10
# mapply(sum, x, b)
# # vapply
# vapply(i39, fivenum,c("Min." =0, "1st Qu." =0, "Median" =0, "3rd Qu." =0, "Max." =0))
#Rowbind
x <- c(1:10)
y <- c(11:20)
z <- c(21:30)
a <- rbind(x,y,z);a
# column bind
b <- cbind(x,y,z);b
# Data description
data(mtcars)
head(mtcars$mpg)
help(mtcars)
head(mpg)
attach(mtcars)
head(disp)
detach(mtcars)
dim(mtcars)
str(mtcars)
class(mtcars)
head(mtcars)
tail(mtcars)
# UDF
totalsum <- function(x){
y = sum(x)
return(y)
x <- c(1:130)
totalsum(x)
fib <- function(Fibonacci_terms){
for(i in Fibonacci_terms){
Fibonacci_ter1[i] <- Fibonacci_terms[i-2] + Fibonacci_terms[i-1]
return(Fibonacci_ter1)
xterms <- seq(3, 15, by=1)
fib(xterms)
n <- 1:4
let <- LETTERS[1:4]
df <- [Link](n,let)
myfunction <- function(x) {
y=x-5
return (y)
x<-c(1:10)
myfunction(x)
x <- c(1:10)
y <- c(11:20)
z <- c(21:30)
myfunction <- function( x,y,z) {
y = x+y+z
return (y)
mytable <- [Link](x,y,z,total = myfunction(x,y,z))
mytable
x <- c(1:10)
y <- c(11:20)
z <- c(21:30)
total<-myfunction(x,y,z)
check_total <- function(x) {
y=ifelse(total>5,"yes","no")
return(y)
mydata <- [Link](x,y,z,total,status=ifelse(total>5,"yes","no"))
mydata
mydata$status2 <- ifelse(total>40,"yes","no")
mydata
mydata1 <-subset(mydata,mydata$status=="yes")
mydata2 <-subset(mydata,mydata$status=="no")
mydata1
mydata2
xterm <- seq(3,15,by=1);xterm
fib <- function(fibonacii_terms){
for (i in fibonacii_terms)
fibonacii_terms[i]=fibonacii_terms[i-2] + fibonacii_terms[i-1]
return(fibonacii_terms)
y<-fib(xterm)
mydata
[Link](mydata,"d:/[Link]",[Link]=FALSE,[Link]=TRUE,sep="\t")
str(mtcars)
arrange
mydata<-order(mtcars$mpg ,mtcars$cyl,decreasing =TRUE)
head(mydata)