!ModelConfig
config_data: !DataConfig
  data_statistics: !DataStatistics
    average_len_target_per_bucket:
    - 6.075394407762467
    - 13.5217993203292
    - 22.17636290425995
    - 31.063518715307275
    - 40.245343656474404
    - 49.23867075571288
    - 58.56875369202314
    - 68.33582061837383
    - 72.61340015501695
    - 67.74327701094705
    - 62.73646425366436
    - 67.12499999999999
    buckets:
    - !!python/tuple
      - 9
      - 10
    - !!python/tuple
      - 18
      - 20
    - !!python/tuple
      - 27
      - 30
    - !!python/tuple
      - 36
      - 40
    - !!python/tuple
      - 45
      - 50
    - !!python/tuple
      - 54
      - 60
    - !!python/tuple
      - 63
      - 70
    - !!python/tuple
      - 72
      - 80
    - !!python/tuple
      - 81
      - 90
    - !!python/tuple
      - 90
      - 100
    - !!python/tuple
      - 99
      - 101
    - !!python/tuple
      - 101
      - 101
    length_ratio_mean: 1.1733827330799247
    length_ratio_std: 0.4908778147879578
    max_observed_len_source: 101
    max_observed_len_target: 101
    num_discarded: 3512
    num_sents: 35909760
    num_sents_per_bucket:
    - 9761281
    - 9958338
    - 6832083
    - 4360762
    - 2502339
    - 1340491
    - 670364
    - 350047
    - 113536
    - 16808
    - 3343
    - 368
    num_tokens_source: 660500004
    num_tokens_target: 720441559
    num_unks_source: 5469537
    num_unks_target: 48167
    size_vocab_source: 30004
    size_vocab_target: 30004
  max_seq_len_source: 101
  max_seq_len_target: 101
  num_source_factors: 1
  source_with_eos: true
config_decoder: !TransformerConfig
  act_type: relu
  attention_heads: 8
  conv_config: null
  dropout_act: 0.1
  dropout_attention: 0.1
  dropout_prepost: 0.1
  dtype: float32
  feed_forward_num_hidden: 1024
  lhuc: false
  max_seq_len_source: 101
  max_seq_len_target: 101
  model_size: 512
  num_layers: 4
  positional_embedding_type: fixed
  postprocess_sequence: dr
  preprocess_sequence: n
  use_lhuc: false
config_embed_source: !EmbeddingConfig
  dropout: 0.0
  dtype: float32
  factor_configs: null
  num_embed: 512
  num_factors: 1
  source_factors_combine: concat
  vocab_size: 30004
config_embed_target: !EmbeddingConfig
  dropout: 0.0
  dtype: float32
  factor_configs: null
  num_embed: 512
  num_factors: 1
  source_factors_combine: concat
  vocab_size: 30004
config_encoder: !TransformerConfig
  act_type: relu
  attention_heads: 8
  conv_config: null
  dropout_act: 0.1
  dropout_attention: 0.1
  dropout_prepost: 0.1
  dtype: float32
  feed_forward_num_hidden: 1024
  lhuc: false
  max_seq_len_source: 101
  max_seq_len_target: 101
  model_size: 512
  num_layers: 4
  positional_embedding_type: fixed
  postprocess_sequence: dr
  preprocess_sequence: n
  use_lhuc: false
config_loss: !LossConfig
  label_smoothing: 0.1
  name: cross-entropy
  normalization_type: valid
  vocab_size: 30004
lhuc: false
vocab_source_size: 30004
vocab_target_size: 30004
weight_normalization: false
weight_tying: false
weight_tying_type: null
