本文整理汇总了Python中tensor2tensor.layers.common_hparams.basic_params1函数的典型用法代码示例。如果您正苦于以下问题:Python basic_params1函数的具体用法?Python basic_params1怎么用?Python basic_params1使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了basic_params1函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: autoencoder_basic
def autoencoder_basic():
"""Basic autoencoder model."""
hparams = common_hparams.basic_params1()
hparams.optimizer = "Adam"
hparams.learning_rate_constant = 0.0002
hparams.learning_rate_warmup_steps = 500
hparams.learning_rate_schedule = "constant * linear_warmup"
hparams.label_smoothing = 0.0
hparams.batch_size = 128
hparams.hidden_size = 64
hparams.num_hidden_layers = 5
hparams.initializer = "uniform_unit_scaling"
hparams.initializer_gain = 1.0
hparams.weight_decay = 0.0
hparams.kernel_height = 4
hparams.kernel_width = 4
hparams.dropout = 0.1
hparams.add_hparam("max_hidden_size", 1024)
hparams.add_hparam("bottleneck_bits", 128)
hparams.add_hparam("bottleneck_noise", 0.1)
hparams.add_hparam("bottleneck_warmup_steps", 3000)
hparams.add_hparam("bottleneck_max_prob", 1.0)
hparams.add_hparam("sample_height", 32)
hparams.add_hparam("sample_width", 32)
hparams.add_hparam("discriminator_batchnorm", True)
hparams.add_hparam("num_sliced_vecs", 4096)
hparams.add_hparam("gan_loss_factor", 0.0)
return hparams
开发者ID:kltony,项目名称:tensor2tensor,代码行数:28,代码来源:autoencoders.py
示例2: next_frame_base
def next_frame_base():
"""Common HParams for next_frame models."""
hparams = common_hparams.basic_params1()
# Loss cutoff.
hparams.add_hparam("video_modality_loss_cutoff", 0.01)
# Additional resizing the frames before feeding them to model.
hparams.add_hparam("preprocess_resize_frames", None)
# How many data points to suffle. Ideally should be part of problem not model!
hparams.add_hparam("shuffle_buffer_size", 128)
# Tiny mode. For faster tests.
hparams.add_hparam("tiny_mode", False)
# In case a model supports smaller/faster version.
hparams.add_hparam("small_mode", False)
# In case a model has stochastic version.
hparams.add_hparam("stochastic_model", False)
# Internal loss for recurrent models.
hparams.add_hparam("internal_loss", True)
# choose from: concat, multiplicative, multi_additive
hparams.add_hparam("action_injection", "multi_additive")
# Scheduled sampling method. Choose between
# ground_truth_only, prediction_only, prob, count, prob_inverse_exp.
hparams.add_hparam("scheduled_sampling_mode", "prediction_only")
hparams.add_hparam("scheduled_sampling_decay_steps", 10000)
hparams.add_hparam("scheduled_sampling_max_prob", 1.0)
hparams.add_hparam("scheduled_sampling_k", 900.0)
return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:26,代码来源:base.py
示例3: ppo_base_v1
def ppo_base_v1():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.learning_rate = 1e-4
hparams.add_hparam("init_mean_factor", 0.1)
hparams.add_hparam("init_logstd", 0.1)
hparams.add_hparam("policy_layers", (100, 100))
hparams.add_hparam("value_layers", (100, 100))
hparams.add_hparam("num_agents", 30)
hparams.add_hparam("clipping_coef", 0.2)
hparams.add_hparam("gae_gamma", 0.99)
hparams.add_hparam("gae_lambda", 0.95)
hparams.add_hparam("entropy_loss_coef", 0.01)
hparams.add_hparam("value_loss_coef", 1)
hparams.add_hparam("optimization_epochs", 15)
hparams.add_hparam("epoch_length", 200)
hparams.add_hparam("epochs_num", 2000)
hparams.add_hparam("eval_every_epochs", 10)
hparams.add_hparam("num_eval_agents", 3)
hparams.add_hparam("video_during_eval", False)
hparams.add_hparam("save_models_every_epochs", 30)
hparams.add_hparam("optimization_batch_size", 50)
hparams.add_hparam("max_gradients_norm", 0.5)
hparams.add_hparam("simulated_environment", False)
hparams.add_hparam("simulation_random_starts", False)
hparams.add_hparam("intrinsic_reward_scale", 0.)
return hparams
开发者ID:kltony,项目名称:tensor2tensor,代码行数:27,代码来源:rl.py
示例4: revnet_base
def revnet_base():
"""Default hparams for Revnet."""
hparams = common_hparams.basic_params1()
hparams.add_hparam('num_channels', [64, 128, 256, 416])
hparams.add_hparam('num_layers_per_block', [1, 1, 10, 1])
hparams.add_hparam('bottleneck', True)
hparams.add_hparam('first_batch_norm', [False, True, True, True])
hparams.add_hparam('init_stride', 2)
hparams.add_hparam('init_kernel_size', 7)
hparams.add_hparam('init_maxpool', True)
hparams.add_hparam('strides', [1, 2, 2, 2])
hparams.add_hparam('num_channels_init_block', 64)
hparams.add_hparam('dim', '2d')
# Variable init
hparams.initializer = 'normal_unit_scaling'
hparams.initializer_gain = 2.
# Optimization
hparams.optimizer = 'Momentum'
hparams.optimizer_momentum_momentum = 0.9
hparams.optimizer_momentum_nesterov = True
hparams.weight_decay = 1e-4
hparams.clip_grad_norm = 0.0
# (base_lr=0.1) * (batch_size=128*8 (on TPU, or 8 GPUs)=1024) / (256.)
hparams.learning_rate = 0.4
hparams.learning_rate_decay_scheme = 'cosine'
# For image_imagenet224, 120k training steps, which effectively makes this a
# cosine decay (i.e. no cycles).
hparams.learning_rate_cosine_cycle_steps = 120000
# Can run with a batch size of 128 with Problem ImageImagenet224
hparams.batch_size = 128
return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:34,代码来源:revnet.py
示例5: testNeuralGPU
def testNeuralGPU(self):
hparams = common_hparams.basic_params1()
batch_size = 3
input_length = 5
target_length = input_length
input_vocab_size = 9
target_vocab_size = 11
p_hparams = problem_hparams.test_problem_hparams(input_vocab_size,
target_vocab_size)
inputs = -1 + np.random.random_integers(
input_vocab_size, size=(batch_size, input_length, 1, 1))
targets = -1 + np.random.random_integers(
target_vocab_size, size=(batch_size, target_length, 1, 1))
with self.test_session() as session:
features = {
"inputs": tf.constant(inputs, dtype=tf.int32),
"targets": tf.constant(targets, dtype=tf.int32)
}
model = neural_gpu.NeuralGPU(hparams, tf.estimator.ModeKeys.TRAIN,
p_hparams)
logits, _ = model(features)
session.run(tf.global_variables_initializer())
res = session.run(logits)
self.assertEqual(res.shape, (batch_size, target_length, 1, 1,
target_vocab_size))
开发者ID:AranKomat,项目名称:tensor2tensor,代码行数:25,代码来源:neural_gpu_test.py
示例6: resnet_base
def resnet_base():
"""Set of hyperparameters."""
# For imagenet on TPU:
# Set train_steps=120000
# Set eval_steps=48
# Base
hparams = common_hparams.basic_params1()
# Model-specific parameters
hparams.add_hparam("layer_sizes", [3, 4, 6, 3])
hparams.add_hparam("filter_sizes", [64, 64, 128, 256, 512])
hparams.add_hparam("block_fn", "bottleneck")
hparams.add_hparam("use_nchw", True)
# Variable init
hparams.initializer = "normal_unit_scaling"
hparams.initializer_gain = 2.
# Optimization
hparams.optimizer = "Momentum"
hparams.optimizer_momentum_momentum = 0.9
hparams.optimizer_momentum_nesterov = True
hparams.weight_decay = 1e-4
hparams.clip_grad_norm = 0.0
# (base_lr=0.1) * (batch_size=128*8 (on TPU, or 8 GPUs)=1024) / (256.)
hparams.learning_rate = 0.4
hparams.learning_rate_decay_scheme = "cosine"
# For image_imagenet224, 120k training steps, which effectively makes this a
# cosine decay (i.e. no cycles).
hparams.learning_rate_cosine_cycle_steps = 120000
hparams.batch_size = 128
return hparams
开发者ID:kltony,项目名称:tensor2tensor,代码行数:34,代码来源:resnet.py
示例7: bluenet_base
def bluenet_base():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.batch_size = 4096
hparams.hidden_size = 256
hparams.dropout = 0.2
hparams.symbol_dropout = 0.5
hparams.label_smoothing = 0.1
hparams.clip_grad_norm = 2.0
hparams.num_hidden_layers = 8
hparams.kernel_height = 3
hparams.kernel_width = 3
hparams.learning_rate_decay_scheme = "exp10k"
hparams.learning_rate = 0.05
hparams.learning_rate_warmup_steps = 3000
hparams.initializer_gain = 1.0
hparams.weight_decay = 3.0
hparams.num_sampled_classes = 0
hparams.sampling_method = "argmax"
hparams.optimizer_adam_epsilon = 1e-6
hparams.optimizer_adam_beta1 = 0.85
hparams.optimizer_adam_beta2 = 0.997
hparams.add_hparam("anneal_until", 40000)
hparams.add_hparam("batch_deviation_loss_factor", 5.0)
return hparams
开发者ID:AranKomat,项目名称:tensor2tensor,代码行数:25,代码来源:bluenet.py
示例8: testSymbolModalityTargetsFactored
def testSymbolModalityTargetsFactored(self):
batch_size = 10
num_datashards = 5
length = 6
height = 7
hidden_size = 9
vocab_size = 11
model_hparams = common_hparams.basic_params1()
model_hparams.factored_logits = True
model_hparams.hidden_size = hidden_size
model_hparams.mode = tf.estimator.ModeKeys.TRAIN
body_output = -1 + np.random.random_integers(
100, size=(batch_size, length, height, hidden_size))
targets = -1 + np.random.random_integers(
vocab_size, size=(batch_size, length, height, 1))
m = modalities.SymbolModality(model_hparams, vocab_size)
data_parallelism = expert_utils.Parallelism(
["/device:CPU:0"] * num_datashards)
with self.test_session() as session:
sharded_body_output = tf.split(tf.to_float(body_output), num_datashards)
sharded_targets = tf.split(targets, num_datashards)
sharded_logits = m.top_sharded(sharded_body_output, sharded_targets,
data_parallelism)
train_loss = m.loss_sharded(sharded_logits, sharded_targets,
data_parallelism)
logits = tf.concat(sharded_logits, 0)
session.run(tf.global_variables_initializer())
res1, res2 = session.run((logits, train_loss))
self.assertEqual(res1.shape, (batch_size, length, height, 1, vocab_size))
self.assertEqual(res2.shape, ())
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:30,代码来源:modalities_test.py
示例9: ppo_base_v1
def ppo_base_v1():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.learning_rate = 1e-4
hparams.add_hparam("init_mean_factor", 0.1)
hparams.add_hparam("init_logstd", 0.1)
hparams.add_hparam("policy_layers", (100, 100))
hparams.add_hparam("value_layers", (100, 100))
hparams.add_hparam("clipping_coef", 0.2)
hparams.add_hparam("gae_gamma", 0.99)
hparams.add_hparam("gae_lambda", 0.95)
hparams.add_hparam("entropy_loss_coef", 0.01)
hparams.add_hparam("value_loss_coef", 1)
hparams.add_hparam("optimization_epochs", 15)
hparams.add_hparam("epoch_length", 200)
hparams.add_hparam("epochs_num", 2000)
hparams.add_hparam("eval_every_epochs", 10)
hparams.add_hparam("save_models_every_epochs", 30)
hparams.add_hparam("optimization_batch_size", 50)
hparams.add_hparam("max_gradients_norm", 0.5)
hparams.add_hparam("intrinsic_reward_scale", 0.)
hparams.add_hparam("logits_clip", 0.0)
hparams.add_hparam("dropout_ppo", 0.1)
hparams.add_hparam("effective_num_agents", None)
return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:25,代码来源:rl.py
示例10: shakeshake_cifar10
def shakeshake_cifar10():
"""Parameters for CIFAR-10."""
tf.logging.warning("shakeshake_cifar10 hparams have not been verified to "
"achieve good performance.")
hparams = common_hparams.basic_params1()
# This leads to effective batch size 128 when number of GPUs is 1
hparams.batch_size = 4096 * 8
hparams.hidden_size = 16
hparams.dropout = 0
hparams.label_smoothing = 0.0
hparams.clip_grad_norm = 2.0
hparams.num_hidden_layers = 26
hparams.kernel_height = -1 # Unused
hparams.kernel_width = -1 # Unused
hparams.learning_rate_decay_scheme = "cosine"
# Model should be run for 700000 steps with batch size 128 (~1800 epochs)
hparams.learning_rate_cosine_cycle_steps = 700000
hparams.learning_rate = 0.2
hparams.learning_rate_warmup_steps = 3000
hparams.initializer = "uniform_unit_scaling"
hparams.initializer_gain = 1.0
# TODO(rshin): Adjust so that effective value becomes ~1e-4
hparams.weight_decay = 3.0
hparams.optimizer = "Momentum"
hparams.optimizer_momentum_momentum = 0.9
hparams.add_hparam("base_filters", 16)
hparams.add_hparam("shakeshake_type", "batch")
return hparams
开发者ID:zeyu-h,项目名称:tensor2tensor,代码行数:28,代码来源:shake_shake.py
示例11: testSymbolTupleModalityInputs
def testSymbolTupleModalityInputs(self):
"""Adapted from tensor2tensor/layers/modalities_test.py."""
batch_size = 10
num_datashards = 5
length = 5
vocab_size = [2000, 500, 2500]
hidden_size = 9
model_hparams = common_hparams.basic_params1()
model_hparams.hidden_size = hidden_size
model_hparams.mode = tf.estimator.ModeKeys.TRAIN
x = np.stack([
-1 + np.random.random_integers(
vocab_size[i], size=(batch_size, length, 1))
for i in range(len(vocab_size))
], axis=3)
m = modalities.SymbolTupleModality(model_hparams, vocab_size)
data_parallelism = expert_utils.Parallelism(
['/device:CPU:0'] * num_datashards)
with self.test_session() as session:
xs = tf.split(x, num_datashards)
sharded_output = m.bottom_sharded(xs, data_parallelism)
output = tf.concat(sharded_output, 0)
session.run(tf.global_variables_initializer())
res = session.run(output)
self.assertEqual(res.shape, (batch_size, length, 1, hidden_size))
开发者ID:cghawthorne,项目名称:magenta,代码行数:25,代码来源:modalities_test.py
示例12: attention_lm_base
def attention_lm_base():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.hidden_size = 1024
hparams.batch_size = 8192
hparams.max_length = 256
hparams.dropout = 0.0
hparams.clip_grad_norm = 0. # i.e. no gradient clipping
hparams.optimizer_adam_epsilon = 1e-9
hparams.learning_rate_decay_scheme = "noam"
hparams.learning_rate = 0.1
hparams.learning_rate_warmup_steps = 2000
hparams.initializer_gain = 1.0
hparams.num_hidden_layers = 6
hparams.initializer = "uniform_unit_scaling"
hparams.weight_decay = 0.0
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.98
hparams.label_smoothing = 0.0
hparams.shared_embedding_and_softmax_weights = False
hparams.add_hparam("filter_size", 4096) # Add new ones like this.
# attention-related flags
hparams.add_hparam("num_heads", 8)
hparams.add_hparam("attention_key_channels", 0)
hparams.add_hparam("attention_value_channels", 0)
# All hyperparameters ending in "dropout" are automatically set to 0.0
# when not in training mode.
hparams.add_hparam("attention_dropout", 0.0)
hparams.add_hparam("relu_dropout", 0.0)
hparams.add_hparam("pos", "timing") # timing, none
hparams.add_hparam("encoder_full_attention", False)
return hparams
开发者ID:zeyu-h,项目名称:tensor2tensor,代码行数:33,代码来源:attention_lm.py
示例13: my_very_own_hparams
def my_very_own_hparams():
# Start with the base set
hp = common_hparams.basic_params1()
# Modify existing hparams
hp.num_hidden_layers = 2
# Add new hparams
hp.add_hparam("filter_size", 2048)
return hp
开发者ID:kltony,项目名称:tensor2tensor,代码行数:8,代码来源:my_submodule.py
示例14: lstm_seq2seq
def lstm_seq2seq():
"""hparams for LSTM."""
hparams = common_hparams.basic_params1()
hparams.daisy_chain_variables = False
hparams.batch_size = 1024
hparams.hidden_size = 128
hparams.num_hidden_layers = 2
hparams.initializer = "uniform_unit_scaling"
hparams.initializer_gain = 1.0
hparams.weight_decay = 0.0
return hparams
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:11,代码来源:lstm.py
示例15: resnet_base
def resnet_base():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.add_hparam("layer_sizes", [3, 4, 6, 3])
hparams.add_hparam("use_nchw", True)
hparams.add_hparam("num_filters", [64, 128, 256, 512])
hparams.add_hparam("strides", [1, 2, 2, 2])
# Can run with a batch size of 128 with Problem ImageImagenet224
hparams.tpu_batch_size_per_shard = 128
return hparams
开发者ID:zeyu-h,项目名称:tensor2tensor,代码行数:11,代码来源:resnet.py
示例16: basic_fc_small
def basic_fc_small():
"""Small fully connected model."""
hparams = common_hparams.basic_params1()
hparams.learning_rate = 0.1
hparams.batch_size = 128
hparams.hidden_size = 256
hparams.num_hidden_layers = 2
hparams.initializer = "uniform_unit_scaling"
hparams.initializer_gain = 1.0
hparams.weight_decay = 0.0
hparams.dropout = 0.0
return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:12,代码来源:basic.py
示例17: vanilla_gan
def vanilla_gan():
"""Basic parameters for a vanilla_gan."""
hparams = common_hparams.basic_params1()
hparams.batch_size = 32
hparams.label_smoothing = 0.0
hparams.add_hparam("hidden_dim", 128)
hparams.add_hparam("random_sample_size", 100)
hparams.add_hparam("height", 28)
hparams.add_hparam("width", 28)
hparams.add_hparam("epsilon", 1e-4)
return hparams
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:12,代码来源:vanilla_gan.py
示例18: transformer_base_v1
def transformer_base_v1():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.norm_type = "layer"
hparams.hidden_size = 512
hparams.batch_size = 4096
hparams.max_length = 256
hparams.clip_grad_norm = 0. # i.e. no gradient clipping
hparams.optimizer_adam_epsilon = 1e-9
hparams.learning_rate_schedule = "legacy"
hparams.learning_rate_decay_scheme = "noam"
hparams.learning_rate = 0.1
hparams.learning_rate_warmup_steps = 4000
hparams.initializer_gain = 1.0
hparams.num_hidden_layers = 6
hparams.initializer = "uniform_unit_scaling"
hparams.weight_decay = 0.0
hparams.optimizer_adam_beta1 = 0.9
hparams.optimizer_adam_beta2 = 0.98
hparams.num_sampled_classes = 0
hparams.label_smoothing = 0.1
hparams.shared_embedding_and_softmax_weights = True
hparams.symbol_modality_num_shards = 16
# Add new ones like this.
hparams.add_hparam("filter_size", 2048)
# Layer-related flags. If zero, these fall back on hparams.num_hidden_layers.
hparams.add_hparam("num_encoder_layers", 0)
hparams.add_hparam("num_decoder_layers", 0)
# Attention-related flags.
hparams.add_hparam("num_heads", 8)
hparams.add_hparam("attention_key_channels", 0)
hparams.add_hparam("attention_value_channels", 0)
hparams.add_hparam("ffn_layer", "dense_relu_dense")
hparams.add_hparam("parameter_attention_key_channels", 0)
hparams.add_hparam("parameter_attention_value_channels", 0)
# All hyperparameters ending in "dropout" are automatically set to 0.0
# when not in training mode.
hparams.add_hparam("attention_dropout", 0.0)
hparams.add_hparam("attention_dropout_broadcast_dims", "")
hparams.add_hparam("relu_dropout", 0.0)
hparams.add_hparam("relu_dropout_broadcast_dims", "")
hparams.add_hparam("pos", "timing") # timing, none
hparams.add_hparam("nbr_decoder_problems", 1)
hparams.add_hparam("proximity_bias", False)
hparams.add_hparam("use_pad_remover", True)
hparams.add_hparam("self_attention_type", "dot_product")
hparams.add_hparam("max_relative_position", 0)
return hparams
开发者ID:chqiwang,项目名称:tensor2tensor,代码行数:49,代码来源:transformer.py
示例19: transformer_symshard_base
def transformer_symshard_base():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.hidden_size = 256
hparams.batch_size = 2048
hparams.max_length = 0
# All hyperparameters ending in "dropout" are automatically set to 0.0
# when not in training mode.
hparams.layer_prepostprocess_dropout = 0.2
hparams.add_hparam("attention_dropout", 0.1)
hparams.add_hparam("relu_dropout", 0.0)
hparams.add_hparam("relu_dropout_broadcast_dims", "1")
hparams.layer_prepostprocess_dropout = 0.1
hparams.layer_prepostprocess_dropout_broadcast_dims = "1" # length
hparams.label_smoothing = 0.1
hparams.clip_grad_norm = 0. # i.e. no gradient clipping
hparams.optimizer = "Adafactor"
hparams.learning_rate_schedule = "rsqrt_decay"
hparams.learning_rate_warmup_steps = 10000
hparams.initializer_gain = 1.0
hparams.initializer = "uniform_unit_scaling"
hparams.weight_decay = 0.0
# TODO(noam): use this to control sharing. We now share always
hparams.shared_embedding_and_softmax_weights = True
# we only want one data shard.
hparams.no_data_parallelism = True
# bypass the symbol modality so that we can use model parallelism.
hparams.modality = {
"inputs": modalities.IdentitySymbolModality,
"targets": modalities.IdentitySymbolModality,
}
hparams.add_hparam("filter_size", 1280)
hparams.add_hparam("mix_fraction", 0.5)
# attention-related flags
hparams.add_hparam("multihead_attention_num_heads", 4)
hparams.add_hparam("multihead_attention_key_channels", 0)
hparams.add_hparam("multihead_attention_value_channels", 0)
hparams.add_hparam("pos", "timing") # timing, none
hparams.add_hparam(
"encoder_layers", ("n,att,m,d,a," "n,ffn,m,d,a,") * 6 + "n,d")
hparams.add_hparam(
"decoder_layers",
("n,att,m,d,a," "n,enc-att,m,d,a," "n,ffn,m,d,a,") * 6 + "n,d")
# Number of model shards - each one has separate parameters.
# Changing this number invalidates checkpoints.
hparams.add_hparam("num_model_shards", 8)
return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:47,代码来源:transformer_symshard.py
示例20: mtf_image_transformer_base
def mtf_image_transformer_base():
"""Set of hyperparameters."""
hparams = common_hparams.basic_params1()
hparams.no_data_parallelism = True
hparams.use_fixed_batch_size = True
hparams.batch_size = 1
hparams.max_length = 3072
hparams.hidden_size = 256
hparams.label_smoothing = 0.0
# 8-way model-parallelism
hparams.add_hparam("mesh_shape", "batch:8")
hparams.add_hparam("layout", "batch:batch")
hparams.add_hparam("mtf_mode", True)
hparams.add_hparam("num_heads", 8)
hparams.add_hparam("filter_size", 1024)
hparams.add_hparam("num_encoder_layers", 0)
hparams.add_hparam("num_decoder_layers", 6)
hparams.add_hparam("attention_key_size", 256)
hparams.add_hparam("attention_value_size", 256)
# Share weights between input and target embeddings
hparams.shared_embedding = True
# mixture of experts hparams
hparams.add_hparam("ffn_layer", "dense_relu_dense")
hparams.add_hparam("moe_overhead_train", 1.0)
hparams.add_hparam("moe_overhead_eval", 2.0)
hparams.moe_num_experts = 16
hparams.moe_loss_coef = 1e-3
hparams.shared_embedding_and_softmax_weights = True
hparams.optimizer = "Adafactor"
hparams.learning_rate_schedule = "rsqrt_decay"
hparams.learning_rate_warmup_steps = 10000
hparams.add_hparam("d_kv", 64)
hparams.add_hparam("d_ff", 2048)
# Image related hparams
hparams.add_hparam("img_len", 32)
hparams.add_hparam("num_channels", 3)
hparams.add_hparam("unconditional", True)
# Local Attention related params
hparams.add_hparam("block_length", 128)
hparams.add_hparam("block_height", 16)
hparams.add_hparam("block_width", 16)
hparams.add_hparam("attention_type", "local1d")
return hparams
开发者ID:qixiuai,项目名称:tensor2tensor,代码行数:47,代码来源:mtf_image_transformer.py
注:本文中的tensor2tensor.layers.common_hparams.basic_params1函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论