!ModelConfig
config_data: !DataConfig
  data_statistics: !DataStatistics
    average_len_target_per_bucket:
    - 4.51099398085715
    - 13.470692179532469
    - 22.478389764393924
    - 31.02405649966898
    - 39.40296897090557
    - 47.61306532663318
    - 53.78662126693611
    - 63.26897762176396
    - 70.94335788671569
    - 77.0696832579184
    - 81.85032139577592
    - 84.21917808219183
    buckets:
    - !!python/tuple
      - 9
      - 10
    - !!python/tuple
      - 18
      - 20
    - !!python/tuple
      - 27
      - 30
    - !!python/tuple
      - 36
      - 40
    - !!python/tuple
      - 45
      - 50
    - !!python/tuple
      - 54
      - 60
    - !!python/tuple
      - 63
      - 70
    - !!python/tuple
      - 72
      - 80
    - !!python/tuple
      - 81
      - 90
    - !!python/tuple
      - 90
      - 100
    - !!python/tuple
      - 99
      - 101
    - !!python/tuple
      - 101
      - 101
    length_ratio_mean: 1.096520828033781
    length_ratio_std: 0.4227809108350488
    max_observed_len_source: 101
    max_observed_len_target: 101
    num_discarded: 1140
    num_sents: 372006
    num_sents_per_bucket:
    - 121612
    - 79228
    - 57681
    - 40779
    - 26878
    - 17512
    - 14097
    - 6837
    - 3937
    - 2210
    - 1089
    - 146
    num_tokens_source: 7617718
    num_tokens_target: 7812285
    num_unks_source: 3005
    num_unks_target: 3558
    size_vocab_source: 4004
    size_vocab_target: 4004
  max_seq_len_source: 101
  max_seq_len_target: 101
  num_source_factors: 1
  source_with_eos: true
config_decoder: !TransformerConfig
  act_type: relu
  attention_heads: 8
  conv_config: null
  dropout_act: 0.1
  dropout_attention: 0.1
  dropout_prepost: 0.1
  dtype: float32
  feed_forward_num_hidden: 1024
  lhuc: false
  max_seq_len_source: 101
  max_seq_len_target: 101
  model_size: 256
  num_layers: 4
  positional_embedding_type: fixed
  postprocess_sequence: dr
  preprocess_sequence: n
  use_lhuc: false
config_embed_source: !EmbeddingConfig
  dropout: 0.0
  dtype: float32
  factor_configs: null
  num_embed: 256
  num_factors: 1
  source_factors_combine: concat
  vocab_size: 4004
config_embed_target: !EmbeddingConfig
  dropout: 0.0
  dtype: float32
  factor_configs: null
  num_embed: 256
  num_factors: 1
  source_factors_combine: concat
  vocab_size: 4004
config_encoder: !TransformerConfig
  act_type: relu
  attention_heads: 8
  conv_config: null
  dropout_act: 0.1
  dropout_attention: 0.1
  dropout_prepost: 0.1
  dtype: float32
  feed_forward_num_hidden: 1024
  lhuc: false
  max_seq_len_source: 101
  max_seq_len_target: 101
  model_size: 256
  num_layers: 4
  positional_embedding_type: fixed
  postprocess_sequence: dr
  preprocess_sequence: n
  use_lhuc: false
config_loss: !LossConfig
  label_smoothing: 0.1
  name: cross-entropy
  normalization_type: valid
  vocab_size: 4004
lhuc: false
vocab_source_size: 4004
vocab_target_size: 4004
weight_normalization: false
weight_tying: false
weight_tying_type: null
