0% found this document useful (0 votes)

22 views20 pages

Data Science

The document contains a comprehensive guide on using R programming for data manipulation and analysis, covering topics such as variable management, data structures (vectors, lists, matrices, data frames), and basic arithmetic operations. It also includes instructions for installing and updating packages, applying functions, and subsetting data. Additionally, the document demonstrates the use of user-defined functions and data input/output methods.

Uploaded by

algobeetrading

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

22 views20 pages

Data Science

Uploaded by

algobeetrading

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

ls()

rm(x)

rm(list = ls())

help(rm)

x <- c(1:4,10,"Ram");x

y <- 3:10

rm(x)

# setting working directory

getwd()

setwd("E:\\DataScience\\Week1")

history()

[Link]()

###ctrl+L

version

[Link]("installr")

require(installr)

library(installr)

if(!require(installr)) {
[Link]("installr");require(installr)

updateR()

###Installing and using packages

[Link]('reshape2')

library(reshape2)

[Link]("installR")

# The basic arithmetic operator

# Addition

x <- 3 + 7

# Substraction

7-3

# Multiplication

3*7

# Divison

7/3

# Exponentiation

2^3

# Modulo: returns the remainder of the division of 8/3

8 %% 3
log2(x) # logarithms base 2 of x

log10(x) # logaritms base 10 of x

exp(x) # Exponential of x

cos(x) # Cosine of x

sin(x) # Sine of x

tan(x) #Tangent of x

abs(x) # absolute value of x

sqrt(x) # square root of x

# Logical

logi <- c(TRUE,FALSE, TRUE, TRUE)

class(logi)

age <- c(34,35,36,34,34,35,36,35)

class(age)

[Link](age)

[Link](logi)

age <- [Link](age)

median(x = 1:10)

median(x <- 1:10)

# x <- y <- 5

# x <- y = 5

# Vectors

car_name <- c("Honda","BMW","Ferrari")

car_color = c("Black","Blue","Red")

car_cc = c(2000,3400,4000)

# List

cars <- list(name =c("Honda","BMW","Ferrari"),

color =c("Black","Blue","Red"),

cc =c(2000,3400,4000,6000,5000))

list(car_name,car_color,car_cc)

cars

class(cars)
# Matrix

mdat <- matrix(c(1,2,3, 11,12,13), nrow =2, ncol =3, byrow =F,

dimnames =list(c("row1", "row2"), c("C.1", "C.2", "C.3")));mdat

t(mdat)

# Deconstruction

c(mdat)

rowSums(mdat)

colSums(mdat)

# dataframe

cars <- [Link](name =c("Honda","BMW","Ferrari"),

color =c("Black","Blue","Red"),

cc =c(2000,3400,4000));cars

cars

# Factors

apple_colors <- c('green','green','yellow','red','red','red','green')

class(apple_colors)

factor_apple <- factor(apple_colors)

nlevels(factor_apple)

levels(factor_apple)

# String
x <- c("Hello World!")

print(x)

class(x)

length(x)

nchar(x)

substr(x,2,4)

substring(x, 2, 4:6)

######Indexing#######

# A sample vector

v <- c(1,4,4,3,2,2,3)

y <- c(v,c(1,23))

v[c(2,3,4)]

v[c(1,3,6)]

v[-1]

v[c(-1,-3)]

v[2:4]

length(v)

v[length(v)]

# Create a sample data frame

data <- [Link](header=T, text='

subject gender size

1 M 7
2 F 6

3 F 9

4 M 11

'); data

names(data)

colnames(data)

dimnames(data)

rownames(data)

dim(data)

# Get the element at row 1, column 3

data[1,3]

data[1,"size"]

# Get rows 1 and 2, and all columns

data[1:2, ]

data[,1:2]

dimnames(data)

dimnames(data)[[1]]

data[c(1,3), ]
# Get rows 1 and 2, and only column 2

data[1:2, c(1,2)]

data[c(1,3), 2]

# Get rows 1 and 2, and only the columns named "gender" and "size"

data[1:2, c("gender","size")]

data[c(1,2), c(2,3)]

data[['size']]

###Indexing with a boolean vector

v>2

v[v>2]

v[ c(F,T,T,T,F,F,T)]

# A boolean vector

data$subject < 3

data[data$subject < 3, ]

data[c(TRUE,TRUE,FALSE,FALSE), ]

which(data$subject < 3)

data

##Negative indexing

# Drop the first element

v[-1]

# Drop first three

v[-1:-3]

# Drop just the last element

v[-length(v)]

#####Getting a subset of a data structure

subset(v, v<3)

v[v<3]

# Another vector

t <- c("small", "small", "large", "medium")

# Remove "small" entries

subset(t, t!="small")

t[t!="small"]

# One important difference between the two methods is that you can assign values to

# elements with square bracket indexing, but you cannot with subset().

v[v>3] <- 9

subset(v, v<3) <- 9

subset(data, subject < 3)

data[data$subject < 3, ]

# Subset of particular rows and columns

subset(data, subject < 3, select = -subject)

subset(data, subject < 3, select = c(gender,size))

subset(data, subject < 3, select = gender:size)

data[data$subject < 3, c("gender","size")]

# Logical AND of two conditions

subset(data, subject < 3 & gender=="M")

data[data$subject < 3 & data$gender=="M", ]

# Logical OR of two conditions

subset(data, subject < 3 | gender=="M")

data[data$subject < 3 | data$gender=="M", ]

# Condition based on transformed data

subset(data, log2(size) > 3 )

data[log2(data$size) > 3, ]

# Subset if elements are in another vector

# subset(data, subject %in% c(1,3))

# data[data$subject %in% c(1,3), ]

####vector filled with values########

rep(1, 50)

rep(F, 20)
rep(1:5, 4)

rep(1:5, each=4)

# Use it on a factor

rep(factor(LETTERS[1:3]), 5)

seq(0,10, by=2)

seq(0,10,length=20)

round(seq(0,10,length=20),2)

###Information about variables####

n <- 1:4

let <- LETTERS[1:4]

df <- [Link](n, let)

# List currently defined variables

ls()

# Check if a variable named "x" exists

exists("x")

# Delete variable x

rm(x)

###Information about size/structure

# Get information about structure

str(n)

str(df)

# Get the length of a vector

length(n)

# Length probably doesn't give us what we want here:

length(df)

# Number of rows

nrow(df)

# Number of columns

ncol(df)

# Get rows and columns

dim(df)

########Working with NULL, NA, and NaN#######

x <- 5

x>2

y <- NA

y>5

z <- NaN

z>5
[Link](x)

[Link](y)

[Link](z)

vy <- c(1, 2, 3, NA, 5)

sum(vy)

sum(vy, [Link]=TRUE)

vz <- c(1, 2, 3, NaN, 5)

sum(vz, [Link]=TRUE)

vx <- c(1, 2, 3, NULL, 5)

sum(vx)

vy[![Link](vy)]

vz[![Link](vz)]

# R Datasets

data()

data(mtcars)

str(mtcars)

help(mtcars)

head(mtcars)

tail(mtcars)

dim(mtcars)
colnames(mtcars)

summary(mtcars)

# Apply functions

data <- matrix(c(1:10, 21:30), nrow = 5, ncol = 4)

data <- matrix(c(1:10, 21:30), nrow = 5, ncol = 4, byrow = TRUE)

data

# apply

apply(data, 1, sum)

apply(data, 2, mean)

# lapply

data <- list(x = 1:5, y = 6:10, z = 11:15)

lapply(data, FUN = mean)

# sapply

# sapply is the same as lapply, but returns a vector instead of a list.

sapply(data, FUN = mean)

i39 <- sapply(3:9, seq)

sapply(i39, fivenum)
# tapply

# tapply splits the array based on specified data, usually factor levels and then applies the function to it.

library(datasets)

data()

data(mtcars)

help(mtcars)

str(mtcars)

head(mtcars)

apply(mtcars,2,mean)

tapply(mtcars$wt, mtcars$cyl, mean)

groups <- [Link](c(1,1,1,3,4,4,5,6,2,3,5))

#Calculate the number of times each number repeats

tapply(groups, groups, length)

#The output is similar to the function table

table(groups)

# mapply

# mapply is a multivariate version of sapply. It will apply the specified

# function to the first element of each argument first, followed by the

# second element, and so on.

# x <- 1:5
# b <- 6:10

# mapply(sum, x, b)

# # vapply

# vapply(i39, fivenum,c("Min." =0, "1st Qu." =0, "Median" =0, "3rd Qu." =0, "Max." =0))

#Rowbind

x <- c(1:10)

y <- c(11:20)

z <- c(21:30)

a <- rbind(x,y,z);a

# column bind

b <- cbind(x,y,z);b

# Data description

data(mtcars)

head(mtcars$mpg)

help(mtcars)

head(mpg)

attach(mtcars)

head(disp)

detach(mtcars)

dim(mtcars)
str(mtcars)

class(mtcars)

head(mtcars)

tail(mtcars)

# UDF

totalsum <- function(x){

y = sum(x)

return(y)

x <- c(1:130)

totalsum(x)

fib <- function(Fibonacci_terms){

for(i in Fibonacci_terms){

Fibonacci_ter1[i] <- Fibonacci_terms[i-2] + Fibonacci_terms[i-1]

return(Fibonacci_ter1)

xterms <- seq(3, 15, by=1)

fib(xterms)
n <- 1:4

let <- LETTERS[1:4]

df <- [Link](n,let)

myfunction <- function(x) {

y=x-5

return (y)

x<-c(1:10)

myfunction(x)

x <- c(1:10)

y <- c(11:20)

z <- c(21:30)
myfunction <- function( x,y,z) {

y = x+y+z

return (y)

mytable <- [Link](x,y,z,total = myfunction(x,y,z))

mytable

x <- c(1:10)

y <- c(11:20)

z <- c(21:30)

total<-myfunction(x,y,z)

check_total <- function(x) {

y=ifelse(total>5,"yes","no")

return(y)

mydata <- [Link](x,y,z,total,status=ifelse(total>5,"yes","no"))

mydata

mydata$status2 <- ifelse(total>40,"yes","no")

mydata

mydata1 <-subset(mydata,mydata$status=="yes")
mydata2 <-subset(mydata,mydata$status=="no")

mydata1

mydata2

xterm <- seq(3,15,by=1);xterm

fib <- function(fibonacii_terms){

for (i in fibonacii_terms)

fibonacii_terms[i]=fibonacii_terms[i-2] + fibonacii_terms[i-1]

return(fibonacii_terms)

y<-fib(xterm)

mydata

[Link](mydata,"d:/[Link]",[Link]=FALSE,[Link]=TRUE,sep="\t")

str(mtcars)

arrange

mydata<-order(mtcars$mpg ,mtcars$cyl,decreasing =TRUE)

head(mydata)

R Programming Basics: Vectors, Matrices, Dataframes
No ratings yet
R Programming Basics: Vectors, Matrices, Dataframes
13 pages
R File Code
No ratings yet
R File Code
16 pages
Lab 02 - Compound Data Structures
No ratings yet
Lab 02 - Compound Data Structures
12 pages
A Short List of Some Useful R Commands: Input and Display
No ratings yet
A Short List of Some Useful R Commands: Input and Display
2 pages
Essential R Commands Guide
No ratings yet
Essential R Commands Guide
11 pages
R Machine Learning Lab Guide
0% (1)
R Machine Learning Lab Guide
9 pages
R Programming Cheat Sheet
No ratings yet
R Programming Cheat Sheet
1 page
A Short List of The Most Useful R Commands
No ratings yet
A Short List of The Most Useful R Commands
8 pages
R Studio
No ratings yet
R Studio
8 pages
R Notes.
No ratings yet
R Notes.
9 pages
DMPA Codes
No ratings yet
DMPA Codes
16 pages
Applied Statistics MAT1011
No ratings yet
Applied Statistics MAT1011
22 pages
R Syntax Examples 1
No ratings yet
R Syntax Examples 1
6 pages
Practical 1 - Basics of R
No ratings yet
Practical 1 - Basics of R
8 pages
Apply Functions With Purrr::: Cheat Sheet
No ratings yet
Apply Functions With Purrr::: Cheat Sheet
2 pages
Simple Tutorial in R
No ratings yet
Simple Tutorial in R
15 pages
Arunav Da Prac
No ratings yet
Arunav Da Prac
55 pages
Basics: TH TH TH TH TH TH TH
No ratings yet
Basics: TH TH TH TH TH TH TH
3 pages
Statistic and R Programming Lab Exercise
No ratings yet
Statistic and R Programming Lab Exercise
8 pages
R Programming Basics for Beginners
No ratings yet
R Programming Basics for Beginners
2 pages
R Program
No ratings yet
R Program
22 pages
Teaching R
No ratings yet
Teaching R
15 pages
R Programming: Vector and Matrix Basics
No ratings yet
R Programming: Vector and Matrix Basics
3 pages
R Basics for Beginners
No ratings yet
R Basics for Beginners
24 pages
Unit - 3 Learning Notes
No ratings yet
Unit - 3 Learning Notes
8 pages
Introduction to R Programming Basics
No ratings yet
Introduction to R Programming Basics
32 pages
Standard Deviation in RStudio Guide
No ratings yet
Standard Deviation in RStudio Guide
10 pages
Session Set Working Directory Choose Directlry
No ratings yet
Session Set Working Directory Choose Directlry
17 pages
R Reference Guide for Programmers
No ratings yet
R Reference Guide for Programmers
6 pages
R Reference Card
No ratings yet
R Reference Card
6 pages
Analysis Using Statistical: Introduction & Data Exploration
No ratings yet
Analysis Using Statistical: Introduction & Data Exploration
23 pages
RSTUDIO
No ratings yet
RSTUDIO
44 pages
Day 2
No ratings yet
Day 2
5 pages
R Programming
No ratings yet
R Programming
50 pages
UL2
No ratings yet
UL2
2 pages
R Cheatsheet Base R
No ratings yet
R Cheatsheet Base R
2 pages
R Commands
No ratings yet
R Commands
18 pages
R Lecture 2-1
No ratings yet
R Lecture 2-1
28 pages
18 3 24 Upto Week 6 A B Latest 1
No ratings yet
18 3 24 Upto Week 6 A B Latest 1
25 pages
R Practicals
No ratings yet
R Practicals
32 pages
CH 3
No ratings yet
CH 3
33 pages
Nishant R File
No ratings yet
Nishant R File
49 pages
Workshop Activity: X Seq y Length
No ratings yet
Workshop Activity: X Seq y Length
3 pages
Stastistics and Probability With R Programming Language: Lab Report
67% (3)
Stastistics and Probability With R Programming Language: Lab Report
44 pages
R Programming Cheat Sheet for Biometrics
100% (2)
R Programming Cheat Sheet for Biometrics
35 pages
R Programming Cheat Sheet Guide
No ratings yet
R Programming Cheat Sheet Guide
4 pages
R Programming Cheat Sheet for Biometrics
100% (1)
R Programming Cheat Sheet for Biometrics
4 pages
Purrr
No ratings yet
Purrr
2 pages
R Network Analysis with igraph Guide
No ratings yet
R Network Analysis with igraph Guide
62 pages
Base R
No ratings yet
Base R
9 pages
Model 1
No ratings yet
Model 1
14 pages
R Intro STAT5000
No ratings yet
R Intro STAT5000
17 pages
R Programming-1
No ratings yet
R Programming-1
6 pages
Matrix, Dataframes, List
No ratings yet
Matrix, Dataframes, List
8 pages
My First Script.r
No ratings yet
My First Script.r
32 pages
An R Tutorial Starting Out
No ratings yet
An R Tutorial Starting Out
9 pages
Natural Gas Monthly
No ratings yet
Natural Gas Monthly
8 pages
Natural Gas Daily
No ratings yet
Natural Gas Daily
109 pages
D9 - R
No ratings yet
D9 - R
5 pages
D7 R
No ratings yet
D7 R
1 page
SOFARSOLAR Intelligent Anti-Reflux Box SAR-100 Installation Instructions
100% (1)
SOFARSOLAR Intelligent Anti-Reflux Box SAR-100 Installation Instructions
37 pages
Structural Rehabilitation Table Content
No ratings yet
Structural Rehabilitation Table Content
4 pages
MATH 135 Midterm Solutions Analysis
No ratings yet
MATH 135 Midterm Solutions Analysis
4 pages
Lesson 3 Differnetial Leveling
No ratings yet
Lesson 3 Differnetial Leveling
26 pages
Pokemon The Last Fire Red
100% (1)
Pokemon The Last Fire Red
11 pages
Baron & Ensley (2006)
No ratings yet
Baron & Ensley (2006)
14 pages
Revision Worksheet ICT & Self
No ratings yet
Revision Worksheet ICT & Self
5 pages
100 Report Card Comments You Can Use Now The Teacher Next Door
No ratings yet
100 Report Card Comments You Can Use Now The Teacher Next Door
20 pages
Computer Science XII Exam Paper 2024
No ratings yet
Computer Science XII Exam Paper 2024
5 pages
افعال السجايا
No ratings yet
افعال السجايا
9 pages
C++ Program: Stack, Linked List, Queue
100% (1)
C++ Program: Stack, Linked List, Queue
6 pages
Bristol Lair
No ratings yet
Bristol Lair
40 pages
Elektrikal LT4
No ratings yet
Elektrikal LT4
2 pages
Higer Bus 3906 Maintenance Guidelines
No ratings yet
Higer Bus 3906 Maintenance Guidelines
10 pages
REERTR
No ratings yet
REERTR
4 pages
CHAPTR 10 Summary
No ratings yet
CHAPTR 10 Summary
4 pages
Section I - Installation Preparation Kit Contents General Applicability
No ratings yet
Section I - Installation Preparation Kit Contents General Applicability
3 pages
Thinking Like An Economist
100% (1)
Thinking Like An Economist
40 pages
Household Electrical Safety Guide
No ratings yet
Household Electrical Safety Guide
64 pages
Hemrayev Maksat. Technology of Obtaining Fire Resistant Materials
No ratings yet
Hemrayev Maksat. Technology of Obtaining Fire Resistant Materials
5 pages
Thermax - Safety
No ratings yet
Thermax - Safety
67 pages
Professional Tarot Reader Course
No ratings yet
Professional Tarot Reader Course
16 pages
Phosphoric Acid Production Guide
No ratings yet
Phosphoric Acid Production Guide
75 pages
Structuring a TOK Essay Guide
No ratings yet
Structuring a TOK Essay Guide
3 pages
Durga Mongo
100% (2)
Durga Mongo
5 pages
CameraTracker Release Notes
No ratings yet
CameraTracker Release Notes
11 pages
Effectiveness of A 2D TLD and Its Numerical Modeling: M. J. Tait, A.M.ASCE N. Isyumov, F.ASCE and A. A. El Damatty
No ratings yet
Effectiveness of A 2D TLD and Its Numerical Modeling: M. J. Tait, A.M.ASCE N. Isyumov, F.ASCE and A. A. El Damatty
13 pages
Kubota Diesel Tractor: F12/R12 Transmission
No ratings yet
Kubota Diesel Tractor: F12/R12 Transmission
16 pages
Ex HM 2300
No ratings yet
Ex HM 2300
1 page
Pyhton Potential Interview Questions
No ratings yet
Pyhton Potential Interview Questions
34 pages