3.1 C
New York
Thursday, December 5, 2024

A primary try at multi-step prediction


We decide up the place first put up on this collection left us: to deal with the duty of multi-step time collection forecasting.

Our first try was a sort of workaround. The mannequin had been skilled to supply a single prediction, akin to the subsequent second. Due to this fact, if we wanted an extended forecast, all we might do is use that prediction and feed it again to the mannequin, shifting the enter sequence by a worth (of ((x_{tn}, …, x_t)) to ((x_{tn-1}, …, x_{t+1}))say).

As a substitute, the brand new mannequin shall be designed (and skilled) to forecast a configurable variety of observations at a time. The structure will stay primary (as primary as attainable, given the duty) and may due to this fact function a foundation for later makes an attempt.

We work with the identical information as earlier than, vic_elec of tsibbledata.

Nonetheless, in comparison with the final time, the dataset The category has to alter. Whereas, beforehand, for every merchandise within the lot the target (y) was a single worth, now it’s a vector, identical to the enter, x. And identical to n_timesteps was (and nonetheless is) used to specify the size of the enter stream, there’s now a second parameter, n_forecastto set the goal measurement.

In our instance, n_timesteps and n_forecast are set to the identical worth, however this doesn’t should be the case. You possibly can additionally practice in week-long sequences after which forecast occasions over a single day or month.

Aside from the truth that .getitem() now returns a vector for y in addition to xThere may be not a lot to say about creating information units. Right here is the whole code to configure the info enter pipeline:

n_timesteps <- 7 * 24 * 2
n_forecast <- 7 * 24 * 2 
batch_size <- 32

vic_elec_get_year <- operate(yr, month = NULL) {
  vic_elec %>%
    filter(yr(Date) == yr, month(Date) == if (is.null(month)) month(Date) else month) %>%
    as_tibble() %>%
    choose(Demand)
}

elec_train <- vic_elec_get_year(2012) %>% as.matrix()
elec_valid <- vic_elec_get_year(2013) %>% as.matrix()
elec_test <- vic_elec_get_year(2014, 1) %>% as.matrix()

train_mean <- imply(elec_train)
train_sd <- sd(elec_train)

elec_dataset <- dataset(
  title = "elec_dataset",
  
  initialize = operate(x, n_timesteps, n_forecast, sample_frac = 1) {
    
    self$n_timesteps <- n_timesteps
    self$n_forecast <- n_forecast
    self$x <- torch_tensor((x - train_mean) / train_sd)
    
    n <- size(self$x) - self$n_timesteps - self$n_forecast + 1
    
    self$begins <- type(pattern.int(
      n = n,
      measurement = n * sample_frac
    ))
    
  },
  
  .getitem = operate(i) {
    
    begin <- self$begins(i)
    finish <- begin + self$n_timesteps - 1
    pred_length <- self$n_forecast
    
    checklist(
      x = self$x(begin:finish),
      y = self$x((finish + 1):(finish + pred_length))$squeeze(2)
    )
    
  },
  
  .size = operate() {
    size(self$begins) 
  }
)

train_ds <- elec_dataset(elec_train, n_timesteps, n_forecast, sample_frac = 0.5)
train_dl <- train_ds %>% dataloader(batch_size = batch_size, shuffle = TRUE)

valid_ds <- elec_dataset(elec_valid, n_timesteps, n_forecast, sample_frac = 0.5)
valid_dl <- valid_ds %>% dataloader(batch_size = batch_size)

test_ds <- elec_dataset(elec_test, n_timesteps, n_forecast)
test_dl <- test_ds %>% dataloader(batch_size = 1)

The mannequin replaces the one linear layer that, within the earlier put up, was tasked with producing the ultimate prediction, with a small community, full with two linear layers and, optionally, dropout.

In ahead()first we apply the RNN and, as within the earlier put up, we make use of the outputs solely; or extra particularly, the output akin to the ultimate time step. (See that earlier put up for a detailed dialogue about what torch RNN returns.)

mannequin <- nn_module(
  
  initialize = operate(sort, input_size, hidden_size, linear_size, output_size,
                        num_layers = 1, dropout = 0, linear_dropout = 0) {
    
    self$sort <- sort
    self$num_layers <- num_layers
    self$linear_dropout <- linear_dropout
    
    self$rnn <- if (self$sort == "gru") {
      nn_gru(
        input_size = input_size,
        hidden_size = hidden_size,
        num_layers = num_layers,
        dropout = dropout,
        batch_first = TRUE
      )
    } else {
      nn_lstm(
        input_size = input_size,
        hidden_size = hidden_size,
        num_layers = num_layers,
        dropout = dropout,
        batch_first = TRUE
      )
    }
    
    self$mlp <- nn_sequential(
      nn_linear(hidden_size, linear_size),
      nn_relu(),
      nn_dropout(linear_dropout),
      nn_linear(linear_size, output_size)
    )
    
  },
  
  ahead = operate(x) {
    
    x <- self$rnn(x)
    x((1))( ,-1, ..) %>% 
      self$mlp()
    
  }
  
)

For mannequin instantiation, we now have an extra configuration parameter, associated to the quantity of dropout between the 2 linear layers.

internet <- mannequin(
  "gru", input_size = 1, hidden_size = 32, linear_size = 512, output_size = n_forecast, linear_dropout = 0
  )

# coaching RNNs on the GPU presently prints a warning that will muddle 
# the console
# see https://github.com/mlverse/torch/points/461
# alternatively, use 
# machine <- "cpu"
machine <- torch_device(if (cuda_is_available()) "cuda" else "cpu")

internet <- internet$to(machine = machine)

The coaching process has not modified in any respect.

optimizer <- optim_adam(internet$parameters, lr = 0.001)

num_epochs <- 30

train_batch <- operate(b) {
  
  optimizer$zero_grad()
  output <- internet(b$x$to(machine = machine))
  goal <- b$y$to(machine = machine)
  
  loss <- nnf_mse_loss(output, goal)
  loss$backward()
  optimizer$step()
  
  loss$merchandise()
}

valid_batch <- operate(b) {
  
  output <- internet(b$x$to(machine = machine))
  goal <- b$y$to(machine = machine)
  
  loss <- nnf_mse_loss(output, goal)
  loss$merchandise()
  
}

for (epoch in 1:num_epochs) {
  
  internet$practice()
  train_loss <- c()
  
  coro::loop(for (b in train_dl) {
    loss <-train_batch(b)
    train_loss <- c(train_loss, loss)
  })
  
  cat(sprintf("nEpoch %d, coaching: loss: %3.5f n", epoch, imply(train_loss)))
  
  internet$eval()
  valid_loss <- c()
  
  coro::loop(for (b in valid_dl) {
    loss <- valid_batch(b)
    valid_loss <- c(valid_loss, loss)
  })
  
  cat(sprintf("nEpoch %d, validation: loss: %3.5f n", epoch, imply(valid_loss)))
}
# Epoch 1, coaching: loss: 0.65737 
# 
# Epoch 1, validation: loss: 0.54586 
# 
# Epoch 2, coaching: loss: 0.43991 
# 
# Epoch 2, validation: loss: 0.50588 
# 
# Epoch 3, coaching: loss: 0.42161 
# 
# Epoch 3, validation: loss: 0.50031 
# 
# Epoch 4, coaching: loss: 0.41718 
# 
# Epoch 4, validation: loss: 0.48703 
# 
# Epoch 5, coaching: loss: 0.39498 
# 
# Epoch 5, validation: loss: 0.49572 
# 
# Epoch 6, coaching: loss: 0.38073 
# 
# Epoch 6, validation: loss: 0.46813 
# 
# Epoch 7, coaching: loss: 0.36472 
# 
# Epoch 7, validation: loss: 0.44957 
# 
# Epoch 8, coaching: loss: 0.35058 
# 
# Epoch 8, validation: loss: 0.44440 
# 
# Epoch 9, coaching: loss: 0.33880 
# 
# Epoch 9, validation: loss: 0.41995 
# 
# Epoch 10, coaching: loss: 0.32545 
# 
# Epoch 10, validation: loss: 0.42021 
# 
# Epoch 11, coaching: loss: 0.31347 
# 
# Epoch 11, validation: loss: 0.39514 
# 
# Epoch 12, coaching: loss: 0.29622 
# 
# Epoch 12, validation: loss: 0.38146 
# 
# Epoch 13, coaching: loss: 0.28006 
# 
# Epoch 13, validation: loss: 0.37754 
# 
# Epoch 14, coaching: loss: 0.27001 
# 
# Epoch 14, validation: loss: 0.36636 
# 
# Epoch 15, coaching: loss: 0.26191 
# 
# Epoch 15, validation: loss: 0.35338 
# 
# Epoch 16, coaching: loss: 0.25533 
# 
# Epoch 16, validation: loss: 0.35453 
# 
# Epoch 17, coaching: loss: 0.25085 
# 
# Epoch 17, validation: loss: 0.34521 
# 
# Epoch 18, coaching: loss: 0.24686 
# 
# Epoch 18, validation: loss: 0.35094 
# 
# Epoch 19, coaching: loss: 0.24159 
# 
# Epoch 19, validation: loss: 0.33776 
# 
# Epoch 20, coaching: loss: 0.23680 
# 
# Epoch 20, validation: loss: 0.33974 
# 
# Epoch 21, coaching: loss: 0.23070 
# 
# Epoch 21, validation: loss: 0.34069 
# 
# Epoch 22, coaching: loss: 0.22761 
# 
# Epoch 22, validation: loss: 0.33724 
# 
# Epoch 23, coaching: loss: 0.22390 
# 
# Epoch 23, validation: loss: 0.34013 
# 
# Epoch 24, coaching: loss: 0.22155 
# 
# Epoch 24, validation: loss: 0.33460 
# 
# Epoch 25, coaching: loss: 0.21820 
# 
# Epoch 25, validation: loss: 0.33755 
# 
# Epoch 26, coaching: loss: 0.22134 
# 
# Epoch 26, validation: loss: 0.33678 
# 
# Epoch 27, coaching: loss: 0.21061 
# 
# Epoch 27, validation: loss: 0.33108 
# 
# Epoch 28, coaching: loss: 0.20496 
# 
# Epoch 28, validation: loss: 0.32769 
# 
# Epoch 29, coaching: loss: 0.20223 
# 
# Epoch 29, validation: loss: 0.32969 
# 
# Epoch 30, coaching: loss: 0.20022 
# 
# Epoch 30, validation: loss: 0.33331 

From the best way the loss decreases on the coaching set, we conclude that sure, the mannequin is studying one thing. It can in all probability proceed to enhance for fairly a while but. Nonetheless, we see much less enchancment on the validation set.

Naturally, we are actually curious in regards to the predictions of the take a look at units. (Bear in mind, for testing we selected the “significantly tough” month of January 2014, significantly tough as a result of a warmth wave that resulted in exceptionally excessive demand.)

With none loops to code, the analysis now turns into fairly easy:

internet$eval()

test_preds <- vector(mode = "checklist", size = size(test_dl))

i <- 1

coro::loop(for (b in test_dl) {
  
  enter <- b$x
  output <- internet(enter$to(machine = machine))
  preds <- as.numeric(output)
  
  test_preds((i)) <- preds
  i <<- i + 1
  
})

vic_elec_jan_2014 <- vic_elec %>%
  filter(yr(Date) == 2014, month(Date) == 1)

test_pred1 <- test_preds((1))
test_pred1 <- c(rep(NA, n_timesteps), test_pred1, rep(NA, nrow(vic_elec_jan_2014) - n_timesteps - n_forecast))

test_pred2 <- test_preds((408))
test_pred2 <- c(rep(NA, n_timesteps + 407), test_pred2, rep(NA, nrow(vic_elec_jan_2014) - 407 - n_timesteps - n_forecast))

test_pred3 <- test_preds((817))
test_pred3 <- c(rep(NA, nrow(vic_elec_jan_2014) - n_forecast), test_pred3)


preds_ts <- vic_elec_jan_2014 %>%
  choose(Demand) %>%
  add_column(
    mlp_ex_1 = test_pred1 * train_sd + train_mean,
    mlp_ex_2 = test_pred2 * train_sd + train_mean,
    mlp_ex_3 = test_pred3 * train_sd + train_mean) %>%
  pivot_longer(-Time) %>%
  update_tsibble(key = title)


preds_ts %>%
  autoplot() +
  scale_colour_manual(values = c("#08c5d1", "#00353f", "#ffbf66", "#d46f4d")) +
  theme_minimal()

Predictions one week in advance for January 2014.

Determine 1: Predictions for one week in January 2014.

Examine this to the forecast obtained by feeding again the predictions. Demand profiles all through the day look rather more practical now. How in regards to the phases of maximum demand? Clearly, these usually are not mirrored within the forecast, identical to within the “loop method”. In actual fact, the forecast permits us to acquire attention-grabbing details about the character of this mannequin: apparently, it actually likes to fluctuate across the common; “Put together” it with inputs that vary round a considerably larger degree, and you will shortly return to your consolation zone.

Seeing how above we supplied an choice to make use of dropout throughout the MLP, you could be questioning if this may assist with forecasts on the take a look at set. Seems that wasn’t the case in my experiments. Maybe this isn’t so unusual both: how, within the absence of exterior indicators (temperature), ought to the community know that prime demand is coming?

In our evaluation, we are able to make an additional distinction. With the primary week of predictions, what we see is a scarcity of foresight for one thing that could not had been moderately anticipated (two or two and a half days, say, of exceptionally excessive demand). Within the second, all of the community would have needed to do was keep on the present elevated degree. Will probably be attention-grabbing to see how the architectures we focus on subsequent deal with this.

Lastly, an extra thought you will have had is: what would occur if we used temperature as a second enter variable? Certainly, coaching efficiency improved, however no efficiency affect was noticed on the validation and take a look at units. Nonetheless, you might discover the code helpful: it may be simply prolonged to information units with extra predictors. For that reason we reproduce it within the appendix.

Thanks for studying!

# Knowledge enter code modified to accommodate two predictors

n_timesteps <- 7 * 24 * 2
n_forecast <- 7 * 24 * 2

vic_elec_get_year <- operate(yr, month = NULL) {
  vic_elec %>%
    filter(yr(Date) == yr, month(Date) == if (is.null(month)) month(Date) else month) %>%
    as_tibble() %>%
    choose(Demand, Temperature)
}

elec_train <- vic_elec_get_year(2012) %>% as.matrix()
elec_valid <- vic_elec_get_year(2013) %>% as.matrix()
elec_test <- vic_elec_get_year(2014, 1) %>% as.matrix()

train_mean_demand <- imply(elec_train( , 1))
train_sd_demand <- sd(elec_train( , 1))

train_mean_temp <- imply(elec_train( , 2))
train_sd_temp <- sd(elec_train( , 2))

elec_dataset <- dataset(
  title = "elec_dataset",
  
  initialize = operate(information, n_timesteps, n_forecast, sample_frac = 1) {
    
    demand <- (information( , 1) - train_mean_demand) / train_sd_demand
    temp <- (information( , 2) - train_mean_temp) / train_sd_temp
    self$x <- cbind(demand, temp) %>% torch_tensor()
    
    self$n_timesteps <- n_timesteps
    self$n_forecast <- n_forecast
    
    n <- nrow(self$x) - self$n_timesteps - self$n_forecast + 1
    self$begins <- type(pattern.int(
      n = n,
      measurement = n * sample_frac
    ))
    
  },
  
  .getitem = operate(i) {
    
    begin <- self$begins(i)
    finish <- begin + self$n_timesteps - 1
    pred_length <- self$n_forecast
    
    checklist(
      x = self$x(begin:finish, ),
      y = self$x((finish + 1):(finish + pred_length), 1)
    )
    
  },
  
  .size = operate() {
    size(self$begins)
  }
  
)

### relaxation similar to single-predictor code above

Picture by Monica Bourgeau in unpack

Related Articles

Latest Articles