BAN430: Lecture 2 notes
Sondre Hølleland
2023-01-18
Warm-up
library(fpp3) # note that this also loads the tidyverse packages
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.8 v tsibble 1.1.3
## v dplyr 1.0.10 v tsibbledata 0.4.1
## v tidyr 1.2.1 v feasts 0.3.0
## v lubridate 1.9.0 v fable 0.3.2
## v ggplot2 3.4.0
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(readxl)
# -- Read data --
dat <- read_excel("data/US10YTRR.xlsx", sheet = "Mid")
# To distinguish functions from the column names
names(dat) <- toupper(names(dat))
dat %>% mutate(DATE = as.Date(DATE)) # base R
## # A tibble: 8,804 x 2
## DATE PRICE
## <date> <dbl>
## 1 2022-08-30 96.9
## 2 2022-08-29 96.9
## 3 2022-08-26 97.6
## 4 2022-08-25 97.6
## 5 2022-08-24 96.9
## 6 2022-08-23 97.4
## 7 2022-08-22 97.7
## 8 2022-08-19 98.1
## 9 2022-08-18 98.8
1
## 10 2022-08-17 98.7
## # ... with 8,794 more rows
dat <- dat %>% mutate(DATE = date(DATE)) # lubridate function
mdat <- dat %>%
mutate(YEAR = year(DATE)) %>% # point 4
# filter(year >= 2010, year < 2022)
filter(between(YEAR, 2010,2021)) %>%
select(-YEAR) %>%
mutate(YEARMONTH = tsibble::yearmonth(DATE)) %>%
group_by(YEARMONTH) %>%
summarize(MEANPRICE = mean(PRICE, na.rm =T))
mdat %>%
ggplot(aes(x= YEARMONTH, y = MEANPRICE)) +
geom_line()
108
104
MEANPRICE
100
96
2010 Jan 2015 Jan 2020 Jan
YEARMONTH
mdat %>%
ggplot(aes(x= YEARMONTH, y = MEANPRICE)) +
geom_point()
2
108
104
MEANPRICE
100
96
2010 Jan 2015 Jan 2020 Jan
YEARMONTH
mdat %>%
ggplot(aes(x= YEARMONTH, y = MEANPRICE)) +
geom_line() +
geom_point()
108
104
MEANPRICE
100
96
2010 Jan 2015 Jan 2020 Jan
YEARMONTH
mdat %>%
ggplot(aes(x= YEARMONTH, y = MEANPRICE)) +
geom_line(col = "blue") +
geom_point(col = "green")
3
108
104
MEANPRICE
100
96
2010 Jan 2015 Jan 2020 Jan
YEARMONTH
mdat %>%
ggplot(aes(x= YEARMONTH, y = MEANPRICE)) +
geom_line(col = "blue") +
geom_point(col = "green", size =3)
108
104
MEANPRICE
100
96
2010 Jan 2015 Jan 2020 Jan
YEARMONTH
mdat %>%
ggplot(aes(x= YEARMONTH, y = MEANPRICE)) +
geom_line(col = "blue") +
geom_point(col = "green", size =3) +
theme_bw() +
scale_x_yearmonth(breaks = seq(yearmonth("2010 Jan"), yearmonth("2022 Jan"), by = 12),
labels = 2010:2022)+
theme(panel.grid.minor = element_blank())+
labs(y = "Monthly 10Y US bond index",
x = "Time (month)")
4
Monthly 10Y US bond index 108
104
100
96
2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
Time (month)
tsibble
Now, we will move from the tidyverse to the tidyverts!
dat_tsbl <- dat %>%
as_tsibble(index = DATE)
class(dat_tsbl)
## [1] "tbl_ts" "tbl_df" "tbl" "data.frame"
dat_tsbl %>%
autoplot() # .vars = PRICE
## Plot variable not specified, automatically selected `.vars = PRICE`
110
PRICE
100
90
1990 2000 2010 2020
DATE [1D]
# Change the default theme:
theme_set(theme_bw())
5
dat_tsbl %>%
autoplot(.vars = PRICE, color = "magenta")
110
PRICE
100
90
1990 2000 2010 2020
DATE [1D]
Easy to aggregate to different time scales:
# MONTHLY
dat_tsbl %>%
index_by(YEARMONTH = ~ yearmonth(.)) %>%
summarise(
MEANPRICE = mean(PRICE, na.rm = TRUE)
) %>%
autoplot(MEANPRICE)
110
MEANPRICE
105
100
95
1990 Jan 2000 Jan 2010 Jan 2020 Jan
YEARMONTH [1M]
# QUARTERLY
dat_tsbl %>%
6
index_by(YEARQUARTER = ~ yearquarter(.)) %>%
summarise(
MEANPRICE = mean(PRICE, na.rm = TRUE)
) %>%
autoplot(MEANPRICE)
105.0
102.5
MEANPRICE
100.0
97.5
1990 Q1 2000 Q1 2010 Q1 2020 Q1
YEARQUARTER [1Q]
# WEEKLY
dat_tsbl %>%
index_by(YEARWEEK = ~ yearweek(.)) %>%
summarise(
MEANPRICE = mean(PRICE, na.rm = TRUE)
) %>%
autoplot(MEANPRICE)
115
110
MEANPRICE
105
100
95
90
1990 W01 1999 W52 2009 W53 2020 W01
YEARWEEK [1W]
# YEARLY
dat_tsbl %>%
index_by(YEARWEEK = ~ year(.)) %>%
7
summarise(
MEANPRICE = mean(PRICE, na.rm = TRUE)
) %>%
autoplot(MEANPRICE)
102
101
MEANPRICE
100
99
98
1990 2000 2010 2020
YEARWEEK [1Y]
Weather data from NY airports
weather <- nycflights13::weather %>%
select(origin, time_hour, temp, humid, precip)
weather_tsbl <- as_tsibble(weather, key = origin, index = time_hour)
weather_tsbl
## # A tsibble: 26,115 x 5 [1h] <America/New_York>
## # Key: origin [3]
## origin time_hour temp humid precip
## <chr> <dttm> <dbl> <dbl> <dbl>
## 1 EWR 2013-01-01 01:00:00 39.0 59.4 0
## 2 EWR 2013-01-01 02:00:00 39.0 61.6 0
## 3 EWR 2013-01-01 03:00:00 39.0 64.4 0
## 4 EWR 2013-01-01 04:00:00 39.9 62.2 0
## 5 EWR 2013-01-01 05:00:00 39.0 64.4 0
## 6 EWR 2013-01-01 06:00:00 37.9 67.2 0
## 7 EWR 2013-01-01 07:00:00 39.0 64.4 0
## 8 EWR 2013-01-01 08:00:00 39.9 62.2 0
## 9 EWR 2013-01-01 09:00:00 39.9 62.2 0
## 10 EWR 2013-01-01 10:00:00 41 59.6 0
## # ... with 26,105 more rows
full_weather <- weather_tsbl %>%
fill_gaps(precip = 0) %>%
group_by_key() %>%
tidyr::fill(temp, humid, .direction = "down")
full_weather
## # A tsibble: 26,190 x 5 [1h] <America/New_York>
8
## # Key: origin [3]
## # Groups: origin [3]
## origin time_hour temp humid precip
## <chr> <dttm> <dbl> <dbl> <dbl>
## 1 EWR 2013-01-01 01:00:00 39.0 59.4 0
## 2 EWR 2013-01-01 02:00:00 39.0 61.6 0
## 3 EWR 2013-01-01 03:00:00 39.0 64.4 0
## 4 EWR 2013-01-01 04:00:00 39.9 62.2 0
## 5 EWR 2013-01-01 05:00:00 39.0 64.4 0
## 6 EWR 2013-01-01 06:00:00 37.9 67.2 0
## 7 EWR 2013-01-01 07:00:00 39.0 64.4 0
## 8 EWR 2013-01-01 08:00:00 39.9 62.2 0
## 9 EWR 2013-01-01 09:00:00 39.9 62.2 0
## 10 EWR 2013-01-01 10:00:00 41 59.6 0
## # ... with 26,180 more rows
full_weather %>%
group_by_key() %>%
index_by(year_month = ~ yearmonth(.)) %>% # monthly aggregates
summarise(
avg_temp = mean(temp, na.rm = TRUE),
ttl_precip = sum(precip, na.rm = TRUE)
) %>%
autoplot(ttl_precip)
7.5
origin
ttl_precip
5.0 EWR
JFK
LGA
2.5
0.0
2013 Jan 2013 Apr 2013 Jul 2013 Oct
year_month [1M]
Power example
windpower <- readRDS("data/OffshoreWindtwoLocationsFiveYears.rds")
wind_tsbl <- windpower %>%
as_tsibble(key = Place, index = datetime)
wind_tsbl %>%
autoplot(powerprod)
9
1.5e+07
1.0e+07
powerprod
Place
Sørlig Nordsjø 2
Utsira Nord
5.0e+06
0.0e+00
2016 2018 2020
datetime [1h]
wind_tsbl %>%
group_by_key() %>%
index_by(date = ~ as.Date(.)) %>%
summarise(
Power = mean(powerprod, na.rm = TRUE)
) %>%
autoplot(Power)
1.5e+07
1.0e+07
Place
Power
Sørlig Nordsjø 2
Utsira Nord
5.0e+06
0.0e+00
2016 2018 2020
date [1D]
wind_tsbl %>%
group_by_key() %>%
index_by(YEARMONTH = ~ yearmonth(.)) %>%
summarise(
Power = mean(powerprod, na.rm = TRUE)
) %>%
autoplot(Power)
10
1.3e+07
1.1e+07
Place
Power
Sørlig Nordsjø 2
9.0e+06
Utsira Nord
7.0e+06
2016 Jan 2018 Jan 2020 Jan
YEARMONTH [1M]
11