| { |
| "valid_metrics": [ |
| "BLEU" |
| ], |
| "n_sample": 0, |
| "tgt_vocab_size": 20000, |
| "vocab_size_multiple": 8, |
| "save_data": "data", |
| "transforms": [ |
| "sentencepiece", |
| "filtertoolong" |
| ], |
| "overwrite": true, |
| "src_vocab_size": 20000, |
| "tensorboard_log_dir_dated": "tensorboard/Sep-01_20-43-08", |
| "share_vocab": false, |
| "tgt_vocab": "th.eole.vocab", |
| "report_every": 100, |
| "tensorboard_log_dir": "tensorboard", |
| "src_vocab": "en.eole.vocab", |
| "tensorboard": true, |
| "seed": 1234, |
| "training": { |
| "accum_steps": [ |
| 0 |
| ], |
| "model_path": "quickmt-en-th-eole-model", |
| "max_grad_norm": 0.0, |
| "dropout_steps": [ |
| 0 |
| ], |
| "learning_rate": 2.0, |
| "decay_method": "noam", |
| "accum_count": [ |
| 10 |
| ], |
| "batch_type": "tokens", |
| "attention_dropout": [ |
| 0.1 |
| ], |
| "gpu_ranks": [ |
| 0 |
| ], |
| "compute_dtype": "torch.float16", |
| "prefetch_factor": 32, |
| "bucket_size": 128000, |
| "num_workers": 0, |
| "batch_size_multiple": 8, |
| "optim": "adamw", |
| "train_steps": 100000, |
| "average_decay": 0.0001, |
| "normalization": "tokens", |
| "param_init_method": "xavier_uniform", |
| "label_smoothing": 0.1, |
| "batch_size": 8000, |
| "warmup_steps": 4000, |
| "valid_batch_size": 4096, |
| "valid_steps": 5000, |
| "adam_beta2": 0.998, |
| "world_size": 1, |
| "dropout": [ |
| 0.1 |
| ], |
| "save_checkpoint_steps": 5000, |
| "keep_checkpoint": 4 |
| }, |
| "transforms_configs": { |
| "filtertoolong": { |
| "src_seq_length": 256, |
| "tgt_seq_length": 256 |
| }, |
| "sentencepiece": { |
| "src_subword_model": "${MODEL_PATH}/en.spm.model", |
| "tgt_subword_model": "${MODEL_PATH}/th.spm.model" |
| } |
| }, |
| "data": { |
| "corpus_1": { |
| "path_src": "train.en", |
| "path_align": null, |
| "path_tgt": "train.th", |
| "transforms": [ |
| "sentencepiece", |
| "filtertoolong" |
| ] |
| }, |
| "valid": { |
| "path_src": "valid.en", |
| "path_align": null, |
| "path_tgt": "valid.th", |
| "transforms": [ |
| "sentencepiece", |
| "filtertoolong" |
| ] |
| } |
| }, |
| "model": { |
| "hidden_size": 1024, |
| "share_embeddings": false, |
| "position_encoding_type": "SinusoidalInterleaved", |
| "share_decoder_embeddings": false, |
| "transformer_ff": 4096, |
| "heads": 8, |
| "architecture": "transformer", |
| "embeddings": { |
| "src_word_vec_size": 1024, |
| "word_vec_size": 1024, |
| "position_encoding_type": "SinusoidalInterleaved", |
| "tgt_word_vec_size": 1024 |
| }, |
| "decoder": { |
| "tgt_word_vec_size": 1024, |
| "hidden_size": 1024, |
| "decoder_type": "transformer", |
| "n_positions": null, |
| "position_encoding_type": "SinusoidalInterleaved", |
| "transformer_ff": 4096, |
| "heads": 8, |
| "layers": 2 |
| }, |
| "encoder": { |
| "hidden_size": 1024, |
| "encoder_type": "transformer", |
| "src_word_vec_size": 1024, |
| "n_positions": null, |
| "position_encoding_type": "SinusoidalInterleaved", |
| "transformer_ff": 4096, |
| "heads": 8, |
| "layers": 8 |
| } |
| } |
| } |