!ModelConfig
config_data: !DataConfig
  data_statistics: !DataStatistics
    average_len_target_per_bucket:
    - 6.10284636288689
    - 12.536993737509563
    - 21.081802763510098
    - 29.540386918275082
    - 37.554834536465805
    - 45.404880205457694
    - 53.387818836018646
    - 61.549137453381974
    - 67.85985072820927
    - 71.67227158935447
    - 73.54065071770343
    - 73.65676735405214
    - 73.59915884826914
    buckets:
    - !!python/tuple
      - 8
      - 10
    - !!python/tuple
      - 16
      - 20
    - !!python/tuple
      - 24
      - 30
    - !!python/tuple
      - 32
      - 40
    - !!python/tuple
      - 40
      - 50
    - !!python/tuple
      - 48
      - 60
    - !!python/tuple
      - 56
      - 70
    - !!python/tuple
      - 64
      - 80
    - !!python/tuple
      - 72
      - 90
    - !!python/tuple
      - 80
      - 100
    - !!python/tuple
      - 88
      - 101
    - !!python/tuple
      - 96
      - 101
    - !!python/tuple
      - 101
      - 101
    length_ratio_mean: 1.186632233857654
    length_ratio_std: 0.40395285865136443
    max_observed_len_source: 101
    max_observed_len_target: 101
    num_discarded: 6264
    num_sents: 49177708
    num_sents_per_bucket:
    - 19078769
    - 14315869
    - 5564812
    - 3766170
    - 2657504
    - 1680544
    - 999691
    - 590171
    - 314058
    - 141007
    - 52250
    - 13772
    - 3091
    num_tokens_source: 758934830
    num_tokens_target: 826788620
    num_unks_source: 103949
    num_unks_target: 8371
    size_vocab_source: 30004
    size_vocab_target: 30004
  max_seq_len_source: 101
  max_seq_len_target: 101
  num_source_factors: 1
  source_with_eos: true
config_decoder: !TransformerConfig
  act_type: relu
  attention_heads: 8
  conv_config: null
  dropout_act: 0.1
  dropout_attention: 0.1
  dropout_prepost: 0.1
  dtype: float32
  feed_forward_num_hidden: 1024
  lhuc: false
  max_seq_len_source: 101
  max_seq_len_target: 101
  model_size: 512
  num_layers: 4
  positional_embedding_type: fixed
  postprocess_sequence: dr
  preprocess_sequence: n
  use_lhuc: false
config_embed_source: !EmbeddingConfig
  dropout: 0.0
  dtype: float32
  factor_configs: null
  num_embed: 512
  num_factors: 1
  source_factors_combine: concat
  vocab_size: 30004
config_embed_target: !EmbeddingConfig
  dropout: 0.0
  dtype: float32
  factor_configs: null
  num_embed: 512
  num_factors: 1
  source_factors_combine: concat
  vocab_size: 30004
config_encoder: !TransformerConfig
  act_type: relu
  attention_heads: 8
  conv_config: null
  dropout_act: 0.1
  dropout_attention: 0.1
  dropout_prepost: 0.1
  dtype: float32
  feed_forward_num_hidden: 1024
  lhuc: false
  max_seq_len_source: 101
  max_seq_len_target: 101
  model_size: 512
  num_layers: 4
  positional_embedding_type: fixed
  postprocess_sequence: dr
  preprocess_sequence: n
  use_lhuc: false
config_loss: !LossConfig
  label_smoothing: 0.1
  name: cross-entropy
  normalization_type: valid
  vocab_size: 30004
lhuc: false
vocab_source_size: 30004
vocab_target_size: 30004
weight_normalization: false
weight_tying: false
weight_tying_type: null
