0% found this document useful (0 votes)
55 views3 pages

R Cheatsheet ABCD

Uploaded by

perazajesus26
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
55 views3 pages

R Cheatsheet ABCD

Uploaded by

perazajesus26
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

Advanced Statistics and Data Science (ABCD) CHEAT SHEET

Word Equations Summary Tables Simple Statistics


outcome = explanatory + other stuff # compute five-number summary mean(data_set$Y)
favstats(~ Y, data = data_set) var(data_set$Y)
Y = X + other stuff sd(data_set$Y)
# create frequency table
Basics tally(data_set$Y) cohensD(Y ~ X, data = data_set)
tally(~ Y, data = data_set) cor(Y ~ X, data = data_set)
print("Hello world!")
# tally by condition b1(Y ~ X, data = data_set)
# assign value to object tally(~ Y < 1900, data = data_set) b1(one_model)
my_number <- 5
# two-way frequency table pre(Y ~ X, data = data_set)
# combine values into vector tally(Y ~ X, data = data_set, margin = TRUE, f(Y ~ X, data = data_set)
my_vector <- c(1, 2, 3) format = “proportion”)
# sample F for X2
# first element in vector f(Y ~ X1 + X2,
my_vector[1] data = data_set, predictor = ~X2)

# orders values or cases


sort(my_vector) Data Frame
# arithmetic operations # structure of data frame # arrange rows by variable
sum(1, 2, 100), +, -, *, / str(data_set) arrange(data_set, Y)
sqrt(157)
abs(data_set$Y) # view first/last six rows # creates data frame from csv file
head(data_set) data_set <- read.csv("file_name", header = TRUE)
# logical operations tail(data_set)
>, <, >=, <=, ==, !=, |, & # convert quantitative variable
# select multiple variables # to categorical
# results in a new variable with values select(data_set, Y1, Y2) factor(data_set$Y)
# of TRUE or FALSE factor(data_set$Y,
data_set$C <- data_set$A > data_set$B # first six rows of selected variables levels = c(1,2),
head(select(data_set, Y1, Y2)) labels = c("A", "B"))

Probability Distribution # transform values


# select variable (a column) recode(data_set$Y, "0" = 0, "1" = 50, "2" = 100)
# calculate the probability area
xpnorm(65.1, data_set$mean, data_set$sd) data_set$Y
# creates two equal sized groups
# find rows that meet condition ntile(data_set$Y, 2)
zscore(data_set$Y)
data_set[data_set$Y > 40] # convert categorical variable
# returns t at this probability filter(data_set, Y > 300) # to quantitative
qt(.975, df = 999) as.numeric(data_set$Y)
# returns F at this probability # find rows that do not have NA
qf(.95, df1 = 1, df2 = 100) filter(data_set, is.na(Y) == FALSE)
filter(data_set, !is.na(Y))
# CI using t distribution
confint(empty_model)

# calculate p-value using F-distribution


xpf(sample_f, df1 = 2 , df2 = 10)

Page: 1 ▷ Updated: 2024-10 ▷ Learn more about CourseKata @ https://coursekata.org


Advanced Statistics and Data Science (ABCD) CHEAT SHEET
Simulation Fitting and Evaluating Models
# sample without replacement # randomize sampling distribution of b1s, # empty model
sample(data_set, 6) # centered on 0 empty_model <- lm(Y ~ NULL,
sdob1 <- do(1000) * data = data_set)
# sample with replacement b1(shuffle(Y) ~ X, data = data_set)
resample(data_set, 10) # use one explanatory variable
# bootstrap sampling distribution of b1s, one_model <- lm(Y ~ X, data = data_set)
do(3) * resample (data_set, 10) # centered on sample b1
sdob1_boot <- do(1000) * # use more than one explanatory variable
# mixes up values in a variable b1(Y ~ X, data = resample(data_set)) # multivariate model
shuffle(data_set$Y) multi_model <- lm(Y ~ X1 + X2, data = data_set)
# count the number of b1s at the upper
# simulate sampling 10000 Ys # and lower extreme # all the model comparisons that can be
# from normal distribution tally(sdob1$b1 > sample_b1 | # made in relation to the multivariate model
sim_Y <- rnorm(10000, Y_stats$mean, sdob1$b1 < -sample_b1) generate_models(multi_model)
Y_stats$sd)
# model predictions and residuals
# put simulated Ys into dataframe # return TRUE for middle 95% of distribution data_set$empty_predict <- predict(empty_model)
data_set<- data.frame(sim_Y) middle(sdob1$b1, .95) data_set$empty_resid <- resid(empty_model)
# randomize sampling distribution of PREs # produce ANOVA table
# simulate
sdopre <- do(1000) * pre(shuffle(Y) ~ X, anova(empty_model)
# sampling distribution of means data = data_set)
sdom_sim <- do(10000) * mean(rnorm(157, supernova(one_model)
Y_stats$mean, Y_stats$sd)) # randomize sampling distribution of Fs
sdof <- do(1000) * # t-test, using pooled variance
# bootstrap f(shuffle(Y) ~ X, data = data_set) t.test(Tip ~ Condition, data = data_set,
# sampling distribution of means var.equal=TRUE)
sdom_boot <- do(10000) * # counts extreme Fs
mean(resample(data_set$Y, 157)) sample_f <- f(shuffle(Y) ~ X, data = data_set) # pairwise comparison corrections:
tally(~f > sample_f, data = sdof) # "Tukey","Bonferroni","none"
pairwise(one_model, correction = "none")

Page: 2 ▷ Updated: 2024-10 ▷ Learn more about CourseKata @ https://coursekata.org


Advanced Statistics and Data Science (ABCD) CHEAT SHEET
Visualizations
gf_histogram(~ Y, data = data_set) %>% # sampling distribution of b1
gf_point(Y ~ X, data = data_set) # change labels gf_histogram(~b1, data = sdob1,
gf_labs(title = "Graph Title", fill = ~middle(b1, .95)) %>%
x = "Y_Name", y = "Frequency") # modify the limits on x- and y-axes
gf_lims(x = c(-12, 12), y = c(0, 70))

gf_jitter(Y ~ X, data = data_set)


# faceted grid of histograms
gf_histogram(~ Y, data = data_set) %>%
gf_facet_grid(X ~ .)

gf_point(Y ~ X, data = data_set) %>%


# add model predictions as red points
gf_point(Y ~ X , shape = 1, size = 3,
color = "firebrick") %>%
# add best fitting model as a red line
gf_boxplot(Y ~ X, data = data_set) gf_model(one_model, color = “red”)

gf_dhistogram(~ Y, data = data_set,


fill = "orange") %>%
gf_density()

gf_boxplot(Y ~ X, data = data_set, fill = pairwise(one_model, plot = TRUE)


"orange") %>%
gf_jitter(height = 0, alpha = .2, size = 3)
gf_bar( ~ Y, data = data_set)

Page: 3 ▷ Updated: 2024-10 ▷ Learn more about CourseKata @ https://coursekata.org

You might also like