y limited testing)
flag_string("optimizer_type", "sgd"),
# size of the LSTM layer
flag_integer("n_units", 128),
# learning rate
flag_numeric("lr", 0.003),
# momentum, an additional parameter to the SGD optimizer
flag_numeric("momentum", 0.9),
# parameter to the early stopping callback
flag_integer("patience", 10))
# the number of predictions we'll make equals the length of the hidden state
n_predictions <- FLAGS$n_timesteps
# how many features = predictors we have
n_features <- 1
# just in case we wanted to try different optimizers, we could add here
optimizer <- switch(
FLAGS$optimizer_type,
sgd = optimizer_sgd(
lr = FLAGS$lr,
momentum = FLAGS$momentum))
# callbacks to be passed to the fit() function
# We just use one here: we may stop before n_epochs if the loss on the validation set
# does not decrease (by a configurable amount, over a configurable time)
callbacks <- list(
callback_early_stopping(
patience = FLAGS$patience))
经过所有这些准备工作,构建和训练模型的代码就相当简短!让我们首先快速查看“长版本”,这将允许你测试堆叠多个 LSTM 或使用状态 LSTM,然后通过最终的短版本(两者都不做)并对其进行评论。
完整的代码,仅供参考。
model <- keras_model_sequential()
model %>%
layer_lstm(
units = FLAGS$n_units,
batch_input_shape = c(
FLAGS$batch_size,
FLAGS$n_timesteps,
n_features),
dropout = FLAGS$dropout,
recurrent_dropout = FLAGS$recurrent_dropout,
return_sequences = TRUE,
stateful = FLAGS$stateful)
if (FLAGS$stack_layers)
{
model %>%
layer_lstm(
units = FLAGS$n_units,
dropout = FLAGS$dropout,
recurrent_dropout = FLAGS$recurrent_dropout,
return_sequences = TRUE,
stateful = FLAGS$stateful)
}
model %>% time_distributed(
layer_dense(units = 1))
model %>%
compile(
loss = FLAGS$loss,
optimizer = optimizer,
metrics = list("mean_squared_error"))
if (!FLAGS$stateful) {
model %>% fit(
x = X_train,
y = y_train,
validation_data = list(X_valid, y_valid),
batch_size = FLAGS$batch_size,
epochs = FLAGS$n_epochs,
callbacks = callbacks)
} else
{
for (i in 1:FLAGS$n_epochs)
{
model %>% fit(
x = X_train,
y = y_train,
validation_data = list(X_valid, y_valid),
callbacks = callbacks,
batch_size = FLAGS$batch_size,
epochs = 1,
shuffle = FALSE)
model %>% reset_states()
}
}
if (FLAGS$stateful)
model %>% reset_states()
现在让我们逐步完成下面更简单但更好(或同样)的配置。
# create the model
model <- keras_model_sequential()
# add layers
# we have just two, the LSTM and the time_distributed
model %>%
layer_lstm(
units = FLAGS$n_units,
# the first layer in a model needs to know the shape of the input data
batch_input_shape = c(
FLAGS$batch_size, FLAGS$n_timesteps, n_features),
dropout = FLAGS$dropout,
recurrent_dropout = FLAGS$recurrent_dropout,
# by default, an LSTM just returns the final state
return_sequences = TRUE) %>%
time_distributed(layer_dense(units = 1))
model %>%
compile(
loss = FLAGS$loss,
optimizer = optimizer,
# in addition to the loss, Keras will inform us about current MSE while training
metrics = list("mean_squared_error"))
history <- model %>% fit(
x = X_train,
y = y_train,
validation_data = list(X_valid, y_valid),
batch_size = FLAGS$batch_size,
epochs = FLAGS$n_epochs,
callbacks = callbacks)
正如我们所见,训练在约 55 个周期后停止,因为验证集损失不再减少。我们还发现验证集上的表现比训练集上的性能差——通常表明过度拟合。
这个话题,我们将在另一个时间单独讨论,但有趣的是,使用更高 dropout
和 recurrent_dropout
(与增加的模型容量