ion") %>%
select(value) %>%
pull()
test_vals <- df_processed_tbl %>%
filter(key == "testing") %>%
select(value) %>%
pull()
# build the windowed matrices
train_matrix <- build_matrix(
train_vals, n_timesteps + n_predictions)
valid_matrix <- build_matrix(
valid_vals, n_timesteps + n_predictions)
test_matrix <- build_matrix(
test_vals, n_timesteps + n_predictions)
# separate matrices into training and testing parts
# also, discard last batch if there are fewer than batch_size samples
# (a purely technical requirement)
X_train <- train_matrix[, 1:n_timesteps]
y_train <- train_matrix[, (n_timesteps + 1):(n_timesteps * 2)]
X_train <- X_train[1:(nrow(X_train) %/% batch_size * batch_size), ]
y_train <- y_train[1:(nrow(y_train) %/% batch_size * batch_size), ]
X_valid <- valid_matrix[, 1:n_timesteps]
y_valid <- valid_matrix[, (n_timesteps + 1):(n_timesteps * 2)]
X_valid <- X_valid[1:(nrow(X_valid) %/% batch_size * batch_size), ]
y_valid <- y_valid[1:(nrow(y_valid) %/% batch_size * batch_size), ]
X_test <- test_matrix[, 1:n_timesteps]
y_test <- test_matrix[, (n_timesteps + 1):(n_timesteps * 2)]
X_test <- X_test[1:(nrow(X_test) %/% batch_size * batch_size), ]
y_test <- y_test[1:(nrow(y_test) %/% batch_size * batch_size), ]
# add on the required third axis
X_train <- reshape_X_3d(X_train)
X_valid <- reshape_X_3d(X_valid)
X_test <- reshape_X_3d(X_test)
y_train <- reshape_X_3d(y_train)
y_valid <- reshape_X_3d(y_valid)
y_test <- reshape_X_3d(y_test)
构建 LSTM 模型
现在我们的数据具有了所需的形式,让我们构建最终模型。与深度学习一样,一项重要且经常耗时的工作是调整超参数。为了使这篇文章保持独立,并且考虑到这主要是关于如何在 R 中使用 LSTM 的教程,让我们假设在经过大量实验后发现了以下设置(实际上实验确实发生了,但没有达到最佳的表现)。
我们没有硬编码超参数,而是使用 tfruns
设置了一个环境,在环境中可以轻松地实现网格搜索。
我们会注释这些参数的作用,但具体含义留到以后再解释。
FLAGS <- flags(
# There is a so-called "stateful LSTM" in Keras. While LSTM is stateful per se,
# this adds a further tweak where the hidden states get initialized with values
# from the item at same position in the previous batch.
# This is helpful just under specific circumstances, or if you want to create an
# "infinite stream" of states, in which case you'd use 1 as the batch size.
# Below, we show how the code would have to be changed to use this, but it won't be further
# discussed here.
flag_boolean("stateful", FALSE),
# Should we use several layers of LSTM?
# Again, just included for completeness, it did not yield any superior performance on this task.
# This will actually stack exactly one additional layer of LSTM units.
flag_boolean("stack_layers", FALSE),
# number of samples fed to the model in one go
flag_integer("batch_size", 10),
# size of the hidden state, equals size of predictions
flag_integer("n_timesteps", 12),
# how many epochs to train for
flag_integer("n_epochs", 100),
# fraction of the units to drop for the linear transformation of the inputs
flag_numeric("dropout", 0.2),
# fraction of the units to drop for the linear transformation of the recurrent state
flag_numeric("recurrent_dropout", 0.2),
# loss function. Found to work better for this specific case than mean squared error
flag_string("loss", "logcosh"),
# optimizer = stochastic gradient descent. Seemed to work better than adam or rmsprop here
# (as indicated b