We decide up the place first put up on this collection left us: to deal with the duty of multi-step time collection forecasting.
Our first try was a sort of workaround. The mannequin had been skilled to supply a single prediction, akin to the subsequent second. Due to this fact, if we wanted an extended forecast, all we might do is use that prediction and feed it again to the mannequin, shifting the enter sequence by a worth (of ((x_{tn}, …, x_t)) to ((x_{tn-1}, …, x_{t+1}))say).
As a substitute, the brand new mannequin shall be designed (and skilled) to forecast a configurable variety of observations at a time. The structure will stay primary (as primary as attainable, given the duty) and may due to this fact function a foundation for later makes an attempt.
We work with the identical information as earlier than, vic_elec
of tsibbledata
.
Nonetheless, in comparison with the final time, the dataset
The category has to alter. Whereas, beforehand, for every merchandise within the lot the target (y
) was a single worth, now it’s a vector, identical to the enter, x
. And identical to n_timesteps
was (and nonetheless is) used to specify the size of the enter stream, there’s now a second parameter, n_forecast
to set the goal measurement.
In our instance, n_timesteps
and n_forecast
are set to the identical worth, however this doesn’t should be the case. You possibly can additionally practice in week-long sequences after which forecast occasions over a single day or month.
Aside from the truth that .getitem()
now returns a vector for y
in addition to x
There may be not a lot to say about creating information units. Right here is the whole code to configure the info enter pipeline:
n_timesteps <- 7 * 24 * 2
n_forecast <- 7 * 24 * 2
batch_size <- 32
vic_elec_get_year <- operate(yr, month = NULL) {
vic_elec %>%
filter(yr(Date) == yr, month(Date) == if (is.null(month)) month(Date) else month) %>%
as_tibble() %>%
choose(Demand)
}
elec_train <- vic_elec_get_year(2012) %>% as.matrix()
elec_valid <- vic_elec_get_year(2013) %>% as.matrix()
elec_test <- vic_elec_get_year(2014, 1) %>% as.matrix()
train_mean <- imply(elec_train)
train_sd <- sd(elec_train)
elec_dataset <- dataset(
title = "elec_dataset",
initialize = operate(x, n_timesteps, n_forecast, sample_frac = 1) {
self$n_timesteps <- n_timesteps
self$n_forecast <- n_forecast
self$x <- torch_tensor((x - train_mean) / train_sd)
n <- size(self$x) - self$n_timesteps - self$n_forecast + 1
self$begins <- type(pattern.int(
n = n,
measurement = n * sample_frac
))
},
.getitem = operate(i) {
begin <- self$begins(i)
finish <- begin + self$n_timesteps - 1
pred_length <- self$n_forecast
checklist(
x = self$x(begin:finish),
y = self$x((finish + 1):(finish + pred_length))$squeeze(2)
)
},
.size = operate() {
size(self$begins)
}
)
train_ds <- elec_dataset(elec_train, n_timesteps, n_forecast, sample_frac = 0.5)
train_dl <- train_ds %>% dataloader(batch_size = batch_size, shuffle = TRUE)
valid_ds <- elec_dataset(elec_valid, n_timesteps, n_forecast, sample_frac = 0.5)
valid_dl <- valid_ds %>% dataloader(batch_size = batch_size)
test_ds <- elec_dataset(elec_test, n_timesteps, n_forecast)
test_dl <- test_ds %>% dataloader(batch_size = 1)
The mannequin replaces the one linear layer that, within the earlier put up, was tasked with producing the ultimate prediction, with a small community, full with two linear layers and, optionally, dropout.
In ahead()
first we apply the RNN and, as within the earlier put up, we make use of the outputs
solely; or extra particularly, the output
akin to the ultimate time step. (See that earlier put up for a detailed dialogue about what torch
RNN returns.)
mannequin <- nn_module(
initialize = operate(sort, input_size, hidden_size, linear_size, output_size,
num_layers = 1, dropout = 0, linear_dropout = 0) {
self$sort <- sort
self$num_layers <- num_layers
self$linear_dropout <- linear_dropout
self$rnn <- if (self$sort == "gru") {
nn_gru(
input_size = input_size,
hidden_size = hidden_size,
num_layers = num_layers,
dropout = dropout,
batch_first = TRUE
)
} else {
nn_lstm(
input_size = input_size,
hidden_size = hidden_size,
num_layers = num_layers,
dropout = dropout,
batch_first = TRUE
)
}
self$mlp <- nn_sequential(
nn_linear(hidden_size, linear_size),
nn_relu(),
nn_dropout(linear_dropout),
nn_linear(linear_size, output_size)
)
},
ahead = operate(x) {
x <- self$rnn(x)
x((1))( ,-1, ..) %>%
self$mlp()
}
)
For mannequin instantiation, we now have an extra configuration parameter, associated to the quantity of dropout between the 2 linear layers.
internet <- mannequin(
"gru", input_size = 1, hidden_size = 32, linear_size = 512, output_size = n_forecast, linear_dropout = 0
)
# coaching RNNs on the GPU presently prints a warning that will muddle
# the console
# see https://github.com/mlverse/torch/points/461
# alternatively, use
# machine <- "cpu"
machine <- torch_device(if (cuda_is_available()) "cuda" else "cpu")
internet <- internet$to(machine = machine)
The coaching process has not modified in any respect.
optimizer <- optim_adam(internet$parameters, lr = 0.001)
num_epochs <- 30
train_batch <- operate(b) {
optimizer$zero_grad()
output <- internet(b$x$to(machine = machine))
goal <- b$y$to(machine = machine)
loss <- nnf_mse_loss(output, goal)
loss$backward()
optimizer$step()
loss$merchandise()
}
valid_batch <- operate(b) {
output <- internet(b$x$to(machine = machine))
goal <- b$y$to(machine = machine)
loss <- nnf_mse_loss(output, goal)
loss$merchandise()
}
for (epoch in 1:num_epochs) {
internet$practice()
train_loss <- c()
coro::loop(for (b in train_dl) {
loss <-train_batch(b)
train_loss <- c(train_loss, loss)
})
cat(sprintf("nEpoch %d, coaching: loss: %3.5f n", epoch, imply(train_loss)))
internet$eval()
valid_loss <- c()
coro::loop(for (b in valid_dl) {
loss <- valid_batch(b)
valid_loss <- c(valid_loss, loss)
})
cat(sprintf("nEpoch %d, validation: loss: %3.5f n", epoch, imply(valid_loss)))
}
# Epoch 1, coaching: loss: 0.65737
#
# Epoch 1, validation: loss: 0.54586
#
# Epoch 2, coaching: loss: 0.43991
#
# Epoch 2, validation: loss: 0.50588
#
# Epoch 3, coaching: loss: 0.42161
#
# Epoch 3, validation: loss: 0.50031
#
# Epoch 4, coaching: loss: 0.41718
#
# Epoch 4, validation: loss: 0.48703
#
# Epoch 5, coaching: loss: 0.39498
#
# Epoch 5, validation: loss: 0.49572
#
# Epoch 6, coaching: loss: 0.38073
#
# Epoch 6, validation: loss: 0.46813
#
# Epoch 7, coaching: loss: 0.36472
#
# Epoch 7, validation: loss: 0.44957
#
# Epoch 8, coaching: loss: 0.35058
#
# Epoch 8, validation: loss: 0.44440
#
# Epoch 9, coaching: loss: 0.33880
#
# Epoch 9, validation: loss: 0.41995
#
# Epoch 10, coaching: loss: 0.32545
#
# Epoch 10, validation: loss: 0.42021
#
# Epoch 11, coaching: loss: 0.31347
#
# Epoch 11, validation: loss: 0.39514
#
# Epoch 12, coaching: loss: 0.29622
#
# Epoch 12, validation: loss: 0.38146
#
# Epoch 13, coaching: loss: 0.28006
#
# Epoch 13, validation: loss: 0.37754
#
# Epoch 14, coaching: loss: 0.27001
#
# Epoch 14, validation: loss: 0.36636
#
# Epoch 15, coaching: loss: 0.26191
#
# Epoch 15, validation: loss: 0.35338
#
# Epoch 16, coaching: loss: 0.25533
#
# Epoch 16, validation: loss: 0.35453
#
# Epoch 17, coaching: loss: 0.25085
#
# Epoch 17, validation: loss: 0.34521
#
# Epoch 18, coaching: loss: 0.24686
#
# Epoch 18, validation: loss: 0.35094
#
# Epoch 19, coaching: loss: 0.24159
#
# Epoch 19, validation: loss: 0.33776
#
# Epoch 20, coaching: loss: 0.23680
#
# Epoch 20, validation: loss: 0.33974
#
# Epoch 21, coaching: loss: 0.23070
#
# Epoch 21, validation: loss: 0.34069
#
# Epoch 22, coaching: loss: 0.22761
#
# Epoch 22, validation: loss: 0.33724
#
# Epoch 23, coaching: loss: 0.22390
#
# Epoch 23, validation: loss: 0.34013
#
# Epoch 24, coaching: loss: 0.22155
#
# Epoch 24, validation: loss: 0.33460
#
# Epoch 25, coaching: loss: 0.21820
#
# Epoch 25, validation: loss: 0.33755
#
# Epoch 26, coaching: loss: 0.22134
#
# Epoch 26, validation: loss: 0.33678
#
# Epoch 27, coaching: loss: 0.21061
#
# Epoch 27, validation: loss: 0.33108
#
# Epoch 28, coaching: loss: 0.20496
#
# Epoch 28, validation: loss: 0.32769
#
# Epoch 29, coaching: loss: 0.20223
#
# Epoch 29, validation: loss: 0.32969
#
# Epoch 30, coaching: loss: 0.20022
#
# Epoch 30, validation: loss: 0.33331
From the best way the loss decreases on the coaching set, we conclude that sure, the mannequin is studying one thing. It can in all probability proceed to enhance for fairly a while but. Nonetheless, we see much less enchancment on the validation set.
Naturally, we are actually curious in regards to the predictions of the take a look at units. (Bear in mind, for testing we selected the “significantly tough” month of January 2014, significantly tough as a result of a warmth wave that resulted in exceptionally excessive demand.)
With none loops to code, the analysis now turns into fairly easy:
internet$eval()
test_preds <- vector(mode = "checklist", size = size(test_dl))
i <- 1
coro::loop(for (b in test_dl) {
enter <- b$x
output <- internet(enter$to(machine = machine))
preds <- as.numeric(output)
test_preds((i)) <- preds
i <<- i + 1
})
vic_elec_jan_2014 <- vic_elec %>%
filter(yr(Date) == 2014, month(Date) == 1)
test_pred1 <- test_preds((1))
test_pred1 <- c(rep(NA, n_timesteps), test_pred1, rep(NA, nrow(vic_elec_jan_2014) - n_timesteps - n_forecast))
test_pred2 <- test_preds((408))
test_pred2 <- c(rep(NA, n_timesteps + 407), test_pred2, rep(NA, nrow(vic_elec_jan_2014) - 407 - n_timesteps - n_forecast))
test_pred3 <- test_preds((817))
test_pred3 <- c(rep(NA, nrow(vic_elec_jan_2014) - n_forecast), test_pred3)
preds_ts <- vic_elec_jan_2014 %>%
choose(Demand) %>%
add_column(
mlp_ex_1 = test_pred1 * train_sd + train_mean,
mlp_ex_2 = test_pred2 * train_sd + train_mean,
mlp_ex_3 = test_pred3 * train_sd + train_mean) %>%
pivot_longer(-Time) %>%
update_tsibble(key = title)
preds_ts %>%
autoplot() +
scale_colour_manual(values = c("#08c5d1", "#00353f", "#ffbf66", "#d46f4d")) +
theme_minimal()
Determine 1: Predictions for one week in January 2014.
Examine this to the forecast obtained by feeding again the predictions. Demand profiles all through the day look rather more practical now. How in regards to the phases of maximum demand? Clearly, these usually are not mirrored within the forecast, identical to within the “loop method”. In actual fact, the forecast permits us to acquire attention-grabbing details about the character of this mannequin: apparently, it actually likes to fluctuate across the common; “Put together” it with inputs that vary round a considerably larger degree, and you will shortly return to your consolation zone.
Seeing how above we supplied an choice to make use of dropout throughout the MLP, you could be questioning if this may assist with forecasts on the take a look at set. Seems that wasn’t the case in my experiments. Maybe this isn’t so unusual both: how, within the absence of exterior indicators (temperature), ought to the community know that prime demand is coming?
In our evaluation, we are able to make an additional distinction. With the primary week of predictions, what we see is a scarcity of foresight for one thing that could not had been moderately anticipated (two or two and a half days, say, of exceptionally excessive demand). Within the second, all of the community would have needed to do was keep on the present elevated degree. Will probably be attention-grabbing to see how the architectures we focus on subsequent deal with this.
Lastly, an extra thought you will have had is: what would occur if we used temperature as a second enter variable? Certainly, coaching efficiency improved, however no efficiency affect was noticed on the validation and take a look at units. Nonetheless, you might discover the code helpful: it may be simply prolonged to information units with extra predictors. For that reason we reproduce it within the appendix.
Thanks for studying!
# Knowledge enter code modified to accommodate two predictors
n_timesteps <- 7 * 24 * 2
n_forecast <- 7 * 24 * 2
vic_elec_get_year <- operate(yr, month = NULL) {
vic_elec %>%
filter(yr(Date) == yr, month(Date) == if (is.null(month)) month(Date) else month) %>%
as_tibble() %>%
choose(Demand, Temperature)
}
elec_train <- vic_elec_get_year(2012) %>% as.matrix()
elec_valid <- vic_elec_get_year(2013) %>% as.matrix()
elec_test <- vic_elec_get_year(2014, 1) %>% as.matrix()
train_mean_demand <- imply(elec_train( , 1))
train_sd_demand <- sd(elec_train( , 1))
train_mean_temp <- imply(elec_train( , 2))
train_sd_temp <- sd(elec_train( , 2))
elec_dataset <- dataset(
title = "elec_dataset",
initialize = operate(information, n_timesteps, n_forecast, sample_frac = 1) {
demand <- (information( , 1) - train_mean_demand) / train_sd_demand
temp <- (information( , 2) - train_mean_temp) / train_sd_temp
self$x <- cbind(demand, temp) %>% torch_tensor()
self$n_timesteps <- n_timesteps
self$n_forecast <- n_forecast
n <- nrow(self$x) - self$n_timesteps - self$n_forecast + 1
self$begins <- type(pattern.int(
n = n,
measurement = n * sample_frac
))
},
.getitem = operate(i) {
begin <- self$begins(i)
finish <- begin + self$n_timesteps - 1
pred_length <- self$n_forecast
checklist(
x = self$x(begin:finish, ),
y = self$x((finish + 1):(finish + pred_length), 1)
)
},
.size = operate() {
size(self$begins)
}
)
### relaxation similar to single-predictor code above
Picture by Monica Bourgeau in unpack