0% found this document useful (0 votes)

8 views8 pages

Week 3 Lecture

The document discusses statistical methods applied to datasets, including bootstrapping and permutation testing, using R programming with libraries such as tidyverse and readxl. It presents analyses on birth weights and arsenic levels, showcasing original and bootstrap sample statistics, histograms, and confidence intervals. Additionally, it examines diving scores to assess differences between semifinal and final rounds, concluding that the mean scores do not significantly differ.

Uploaded by

christelsiaw

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

8 views8 pages

Week 3 Lecture

Uploaded by

christelsiaw

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

week 3 lecture

2025-08-25

library(tidyverse)
library(magrittr) #pipping operator %>%
library(tinytex) #knit to pdf
library(readxl) #read excel files
library(resampledata) #data sets from Chihara and Hesterberg's book

df <- read.csv("birthweight.csv", stringsAsFactors = T)

head(df)

## id age tobacco alcohol gender weight gestation smoker

## 1 1 30-34 No No Male 3827 40 No
## 2 2 30-34 No No Male 3629 38 No
## 3 3 35-39 No No Female 3062 37 No
## 4 4 20-24 No No Female 3430 39 No
## 5 5 25-29 No No Male 3827 38 No
## 6 6 35-39 No No Female 3119 39 No

ori.sample <- df %>% summarise(n = n(),

Mean = mean(weight),
SD = sd(weight))

ori.sample

## n Mean SD
## 1 1009 3448.26 487.736

set.seed(123)

weight <- df$weight

n <- length(weight)

N <- 10ˆ4
weight.boot <- numeric(N)

for (i in 1:N) {
samp <- sample(weight, size = n, replace = T) # draw resample
weight.boot[i] <- mean(samp) # compute mean and store it as my.boot
}

Shape

1
resamples<-as.data.frame(weight.boot)

ggplot(data=resamples, mapping=aes(x=weight.boot)) +
geom_histogram(binwidth=10, fill="salmon",color="black") +
labs(x="BirthWeight(ingrams)",y="Frequency",title="DistributionofBirthWeight") +
theme_classic()

DistributionofBirthWeight

2500

2000

1500
Frequency

1000

500

3400 3440 3480 3520

BirthWeight(ingrams)

Spread

boot.sample <- resamples %>% summarise(n = n(),

Mean = mean(weight.boot),
SD = sd(weight.boot))
boot.sample

## n Mean SD
## 1 10000 3448.297 15.36148

a <- rbind(ori.sample, boot.sample)

rownames(a) <- c("Original Sample", "Bootstrap Sample")
a %>% format(scientific = F)

## n Mean SD
## Original Sample 1009 3448.260 487.73604
## Bootstrap Sample 10000 3448.297 15.36148

2
Percentile Interval

quantile(weight.boot, c(0.025,0.975))

## 2.5% 97.5%
## 3418.475 3478.640

dat <- read.csv("arsenic.csv", stringsAsFactors = T)

highcontam <- dat %>% filter(Arsenic > 10)
100*(nrow(highcontam)/nrow(dat))

## [1] 57.56458

Shape, Center & Spread for means

set.seed(123)

arsenic <- dat$Arsenic

n.arsenic <- length(arsenic)

N <- 10ˆ4
boot.arsenic <- numeric(n.arsenic)

for (i in 1:N) {
x <- sample(arsenic, size = n.arsenic, replace = T)
boot.arsenic[i] <- mean(x)
}

hist(boot.arsenic)

3
Histogram of boot.arsenic
2000
1500
Frequency

1000
500
0

60 80 100 120 140 160 180 200

boot.arsenic

95% CI

paste("Bootstrap Mean =", round(mean(boot.arsenic),2))

## [1] "Bootstrap Mean = 125.32"

paste("Bootstrap SE =", round(sd(boot.arsenic),2))

## [1] "Bootstrap SE = 18.16"

quantile(boot.arsenic, c(0.025,0.975))

## 2.5% 97.5%
## 92.80887 163.96348

Permutation testing & Bootstrapping

dive <- Diving2017 # data from library(resampledata)

dive

## Name Country Semifinal Final

## 1 CHEONG Jun Hoong Malaysia 325.50 397.50
## 2 SI Yajie China 382.80 396.00

4
## 3 REN Qian China 367.50 391.95
## 4 KIM Mi Rae North Korea 346.00 385.55
## 5 WU Melissa Australia 318.70 370.20
## 6 KIM Kuk Hyang North Korea 360.85 360.00
## 7 ITAHASHI Minami Japan 313.70 357.85
## 8 BENFEITO Meaghan Canada 355.15 331.40
## 9 PAMG Pandelela Malaysia 322.75 322.40
## 10 CHAMANDY Olivia Canada 320.55 307.15
## 11 PARRATTO Jessica USA 322.75 302.35
## 12 MURILLO URREA Carolina Colombia 325.75 283.35

mean(dive$Semifinal)

## [1] 338.5

mean(dive$Final)

## [1] 350.475

mean(dive$Final)-mean(dive$Semifinal)

## [1] 11.975

set.seed(987)

Diff <- dive$Final-dive$Semifinal #differenceoftwoscores

obs <- mean(Diff) #meandifference

N <- 10ˆ5-1
result <- numeric(N)
for(i in 1:N) {
Sign <- sample(c(-1,1),size=12,replace=T) #randomvectorof1'sand-1's
Diff2 <- Diff*Sign #randompairsof(a-b)or(b-a)
result[i] <- mean(Diff2) #meanofthedifference
}

hist(result,col="steelblue")
abline(v=mean(obs),col="red",lty=2)

5
Histogram of result
15000
Frequency

10000
5000
0

−30 −20 −10 0 10 20 30

result

2 *(sum(result >=obs) + 1)/ (N+1) #P-value

## [1] 0.25664

dive.tidy <- dive %>%

select(Semifinal, Final) %>%
pivot_longer(cols=everything(), names_to = "round", values_to = "time")

head(dive.tidy,3)

## # A tibble: 3 x 2
## round time
## <chr> <dbl>
## 1 Semifinal 326.
## 2 Final 398.
## 3 Semifinal 383.

set.seed(987)

obs.ind <- mean(dive$Final)- mean(dive$Semifinal)

time <- dive.tidy$time
N.ind <- 10ˆ5
result.ind <- numeric(N.ind)

6
for (i in 1:N.ind) {
index.ind <- sample(length(time), size = 0.5*length(time), replace = F)
result.ind[i] <- mean(time[-index.ind])- mean(time[index.ind])
}

hist(result.ind, col = "steelblue")

abline(v=mean(obs.ind), col = "red", lty = 2)

Histogram of result.ind
12000
Frequency

8000
4000
0

−40 −20 0 20 40

result.ind

2*(sum(result.ind >= obs.ind) + 1) / (N+1)

## [1] 0.37566

Bootstrap

dive.boot <- dive %>% mutate(difference=dive$Final-dive$Semifinal)

head(dive.boot)

## Name Country Semifinal Final difference

## 1 CHEONG Jun Hoong Malaysia 325.50 397.50 72.00
## 2 SI Yajie China 382.80 396.00 13.20
## 3 REN Qian China 367.50 391.95 24.45
## 4 KIM Mi Rae North Korea 346.00 385.55 39.55
## 5 WU Melissa Australia 318.70 370.20 51.50
## 6 KIM Kuk Hyang North Korea 360.85 360.00 -0.85

7
set.seed(987)

N.boot<-10ˆ5
result.boot<-numeric(N.boot)
dif<-dive.boot$difference

for(i in 1:N.boot) {
x.boot <-sample(dif,size= length(dif),replace=T)
result.boot[i]<-mean(x.boot)
}

hist(result.boot)

Histogram of result.boot
10000 15000 20000
Frequency

5000
0

−20 0 20 40

result.boot

95% CI

quantile(result.boot, c(0.025,0.975))

## 2.5% 97.5%
## -6.61250 31.04583

Since 0 is part of the interval, we cannot conclude that the mean scores for divers differ between the semifinal
and final rounds.
Quiz 3 #“‘{r} #quiz3 <- read.csv(” .csv”)
#“‘

R Statistical Analysis and Sampling Techniques
No ratings yet
R Statistical Analysis and Sampling Techniques
38 pages
R Programming Basics and Data Analysis
No ratings yet
R Programming Basics and Data Analysis
18 pages
ProbList5 24 SLN
No ratings yet
ProbList5 24 SLN
9 pages
Svy Cautions X
No ratings yet
Svy Cautions X
12 pages
Homework 1: Statistics 109 Due February 17, 2019 at 11:59pm EST
No ratings yet
Homework 1: Statistics 109 Due February 17, 2019 at 11:59pm EST
23 pages
q3 Stat2100 Bautista-Lhuriely
No ratings yet
q3 Stat2100 Bautista-Lhuriely
11 pages
Diet Impact on Weight Loss Analysis
No ratings yet
Diet Impact on Weight Loss Analysis
13 pages
Stata Commands for Data Analysis
No ratings yet
Stata Commands for Data Analysis
8 pages
Programming With R Test 2
50% (2)
Programming With R Test 2
5 pages
HW4 Solutions: Problem 6.2
No ratings yet
HW4 Solutions: Problem 6.2
8 pages
Introductory Statics For The Life and Biomedical Sciences
100% (1)
Introductory Statics For The Life and Biomedical Sciences
348 pages
Assignment# 06
No ratings yet
Assignment# 06
16 pages
Lab Test
No ratings yet
Lab Test
7 pages
Stata Commands for Data Analysis
No ratings yet
Stata Commands for Data Analysis
14 pages
ANOVA Models
No ratings yet
ANOVA Models
44 pages
Experimental Design & Data Analysis
No ratings yet
Experimental Design & Data Analysis
310 pages
Analyzing Birth Weight and Ethnicity Data
No ratings yet
Analyzing Birth Weight and Ethnicity Data
5 pages
Advanced Statistical Methods Using R
No ratings yet
Advanced Statistical Methods Using R
32 pages
R Survival Analysis Guide
No ratings yet
R Survival Analysis Guide
16 pages
R - Tutorial 1 - Survival Analysis in R
100% (1)
R - Tutorial 1 - Survival Analysis in R
16 pages
Lab Wk1soln PDF
No ratings yet
Lab Wk1soln PDF
14 pages
Project of Biostatistics#02-RaeesaAli-MS - BIOTECH
No ratings yet
Project of Biostatistics#02-RaeesaAli-MS - BIOTECH
27 pages
3.ANOVA IIb-laboratory - Solution
No ratings yet
3.ANOVA IIb-laboratory - Solution
13 pages
BAN5
No ratings yet
BAN5
2 pages
Chapter 10 Analysis Examples Replication Fall 2011 R
No ratings yet
Chapter 10 Analysis Examples Replication Fall 2011 R
7 pages
Bio Stat
No ratings yet
Bio Stat
472 pages
Algorithm M
No ratings yet
Algorithm M
8 pages
Q3 - Stat2100 Dupol Melkiancaesar
No ratings yet
Q3 - Stat2100 Dupol Melkiancaesar
12 pages
BM-1, Applied Statistics, Lesson 2: Comparing Two Groups (And One Group)
No ratings yet
BM-1, Applied Statistics, Lesson 2: Comparing Two Groups (And One Group)
39 pages
Biostatistics Course Notes & Exercises
No ratings yet
Biostatistics Course Notes & Exercises
300 pages
R
No ratings yet
R
4 pages
Data Analysis with R: Tables & Plots
No ratings yet
Data Analysis with R: Tables & Plots
13 pages
Hypothesis Testing in R
No ratings yet
Hypothesis Testing in R
13 pages
EDUC/PSY 6600: Unit 3 Homework: Your Name Spring 2018
No ratings yet
EDUC/PSY 6600: Unit 3 Homework: Your Name Spring 2018
73 pages
WEEK 3 Activity - Assignment 1
No ratings yet
WEEK 3 Activity - Assignment 1
5 pages
Prob Lab
No ratings yet
Prob Lab
10 pages
Statistical Test Selection Guide
No ratings yet
Statistical Test Selection Guide
7 pages
Openintro Statistics 4Th Edition David Diez PDF
No ratings yet
Openintro Statistics 4Th Edition David Diez PDF
135 pages
Textbook Practice Problems 1
No ratings yet
Textbook Practice Problems 1
39 pages
R Notes For Data Analysis and Statistical Inference
No ratings yet
R Notes For Data Analysis and Statistical Inference
10 pages
F24 Lab-01
No ratings yet
F24 Lab-01
4 pages
Logit Probit
No ratings yet
Logit Probit
66 pages
R Code Default Data PDF
No ratings yet
R Code Default Data PDF
10 pages
Logistic Regression (With R) : 1 Theory
No ratings yet
Logistic Regression (With R) : 1 Theory
15 pages
W11 Exercisesolutions
No ratings yet
W11 Exercisesolutions
6 pages
Fundamentals of Biostatistics 8th Edition by Rosner ISBN 130526892X Solution Manual
100% (76)
Fundamentals of Biostatistics 8th Edition by Rosner ISBN 130526892X Solution Manual
19 pages
R For Health Data Science - 1st Edition Optimized DOCX Download
100% (13)
R For Health Data Science - 1st Edition Optimized DOCX Download
14 pages
Department of Statistics: COURSE STATS 330/762
No ratings yet
Department of Statistics: COURSE STATS 330/762
8 pages
Caderno 2 - Exercícios 5 A 11
No ratings yet
Caderno 2 - Exercícios 5 A 11
16 pages
Approximate
No ratings yet
Approximate
4 pages
R Course
No ratings yet
R Course
7 pages
Applied Economics: Module No. 5: Week 5: First Quarter
No ratings yet
Applied Economics: Module No. 5: Week 5: First Quarter
9 pages
Log
No ratings yet
Log
322 pages
Dpsa Circular
No ratings yet
Dpsa Circular
4 pages
Common CAE Engineering Mistakes
No ratings yet
Common CAE Engineering Mistakes
8 pages
How To Read An IEC Metric Motor Nameplate - Emotors Direct
No ratings yet
How To Read An IEC Metric Motor Nameplate - Emotors Direct
6 pages
2025 Innovation and Technology Scholarship Application - PDF
No ratings yet
2025 Innovation and Technology Scholarship Application - PDF
8 pages
Manual de Usuario Lanzar VIBE 416 (9 Páginas) 3
No ratings yet
Manual de Usuario Lanzar VIBE 416 (9 Páginas) 3
1 page
Eng Hmudvbeuj-1.312
No ratings yet
Eng Hmudvbeuj-1.312
194 pages
Elza's Motivation and Performance Analysis
No ratings yet
Elza's Motivation and Performance Analysis
4 pages
Introduction To Merger and Acquisition (6 Files Merged)
No ratings yet
Introduction To Merger and Acquisition (6 Files Merged)
132 pages
Liquid Diffusion Coefficient Experiment
No ratings yet
Liquid Diffusion Coefficient Experiment
9 pages
Technicolor DWA0120 User Guide
No ratings yet
Technicolor DWA0120 User Guide
8 pages
Overview and Applications of Profinet: Andy Verwer Verwer Training & Consultancy LTD
No ratings yet
Overview and Applications of Profinet: Andy Verwer Verwer Training & Consultancy LTD
33 pages
Recent Progress and Future Prospects of Silicon Solar Module Recycling
No ratings yet
Recent Progress and Future Prospects of Silicon Solar Module Recycling
9 pages
Basic Issue in Inventory Valuation
No ratings yet
Basic Issue in Inventory Valuation
2 pages
Full Paper - Case Study Format
No ratings yet
Full Paper - Case Study Format
4 pages
Stages of Taxes
No ratings yet
Stages of Taxes
2 pages
Omeprazole: by Jennica Mae V. Cuico
No ratings yet
Omeprazole: by Jennica Mae V. Cuico
7 pages
Machining Aerospace Materials
No ratings yet
Machining Aerospace Materials
10 pages
Good Practices in Usability Testing On People With Disabilities
No ratings yet
Good Practices in Usability Testing On People With Disabilities
4 pages
Integration Pathways For Traditional and Digital M
No ratings yet
Integration Pathways For Traditional and Digital M
4 pages
Getting Started Guide
No ratings yet
Getting Started Guide
62 pages
Parent Consent Letter for UK Visa
100% (2)
Parent Consent Letter for UK Visa
1 page
Vendor Compliance Guidelines for Kerry
100% (1)
Vendor Compliance Guidelines for Kerry
3 pages
PWM Circuit Design and Analysis
No ratings yet
PWM Circuit Design and Analysis
8 pages
Minnesota Housing Discrimination Case
No ratings yet
Minnesota Housing Discrimination Case
64 pages
PTS 2 - Bahasa Inggris Sastra - Kelas 10
No ratings yet
PTS 2 - Bahasa Inggris Sastra - Kelas 10
3 pages
Successful - Business - Planning - Energising - Your - Compa... - (TWO - The - Strategic - Business - Plan - Tactical - Section)
No ratings yet
Successful - Business - Planning - Energising - Your - Compa... - (TWO - The - Strategic - Business - Plan - Tactical - Section)
54 pages
NHSFPX4000 Assessment 3 Applying Ethical Principles
No ratings yet
NHSFPX4000 Assessment 3 Applying Ethical Principles
6 pages
Lokpal and Lokayukta - UPSC Notes
No ratings yet
Lokpal and Lokayukta - UPSC Notes
6 pages

Week 3 Lecture

Uploaded by

Week 3 Lecture

Uploaded by

week 3 lecture

df <- read.csv("birthweight.csv", stringsAsFactors = T)

## id age tobacco alcohol gender weight gestation smoker

ori.sample <- df %>% summarise(n = n(),

weight <- df$weight

3400 3440 3480 3520

boot.sample <- resamples %>% summarise(n = n(),

a <- rbind(ori.sample, boot.sample)

dat <- read.csv("arsenic.csv", stringsAsFactors = T)

Shape, Center & Spread for means

arsenic <- dat$Arsenic

60 80 100 120 140 160 180 200

paste("Bootstrap Mean =", round(mean(boot.arsenic),2))

## [1] "Bootstrap Mean = 125.32"

paste("Bootstrap SE =", round(sd(boot.arsenic),2))

## [1] "Bootstrap SE = 18.16"

Permutation testing & Bootstrapping

dive <- Diving2017 # data from library(resampledata)

## Name Country Semifinal Final

Diff <- dive$Final-dive$Semifinal #differenceoftwoscores

−30 −20 −10 0 10 20 30

2 *(sum(result >=obs) + 1)/ (N+1) #P-value

dive.tidy <- dive %>%

obs.ind <- mean(dive$Final)- mean(dive$Semifinal)

hist(result.ind, col = "steelblue")

2*(sum(result.ind >= obs.ind) + 1) / (N+1)

dive.boot <- dive %>% mutate(difference=dive$Final-dive$Semifinal)

## Name Country Semifinal Final difference

You might also like