|
wandb_version: 1 |
|
|
|
_wandb: |
|
desc: null |
|
value: |
|
cli_version: 0.12.2 |
|
framework: huggingface |
|
huggingface_version: 4.12.2 |
|
is_jupyter_run: false |
|
is_kaggle_kernel: false |
|
python_version: 3.8.11 |
|
start_time: 1636233370 |
|
t: |
|
1: |
|
- 1 |
|
- 11 |
|
3: |
|
- 16 |
|
4: 3.8.11 |
|
5: 0.12.2 |
|
6: 4.12.2 |
|
8: |
|
- 5 |
|
backend: |
|
desc: null |
|
value: nccl |
|
deepspeed_plugin: |
|
desc: null |
|
value: None |
|
device: |
|
desc: null |
|
value: cuda:0 |
|
distributed_type: |
|
desc: null |
|
value: DistributedType.MULTI_GPU |
|
gradient_accumulation_steps: |
|
desc: null |
|
value: 1 |
|
gradient_checkpointing: |
|
desc: null |
|
value: false |
|
initialized: |
|
desc: null |
|
value: 'True' |
|
learning_rate: |
|
desc: null |
|
value: 0.0005 |
|
local_process_index: |
|
desc: null |
|
value: '0' |
|
lr_scheduler_type: |
|
desc: null |
|
value: cosine |
|
max_eval_steps: |
|
desc: null |
|
value: -1 |
|
max_train_steps: |
|
desc: null |
|
value: 150000 |
|
num_processes: |
|
desc: null |
|
value: '16' |
|
num_warmup_steps: |
|
desc: null |
|
value: 2000 |
|
process_index: |
|
desc: null |
|
value: '0' |
|
save_checkpoint_steps: |
|
desc: null |
|
value: 15000 |
|
seed: |
|
desc: null |
|
value: 1 |
|
seq_length: |
|
desc: null |
|
value: 1024 |
|
shuffle_buffer: |
|
desc: null |
|
value: 1000 |
|
train_batch_size: |
|
desc: null |
|
value: 12 |
|
use_fp16: |
|
desc: null |
|
value: 'True' |
|
valid_batch_size: |
|
desc: null |
|
value: 12 |
|
weight_decay: |
|
desc: null |
|
value: 0.1 |
|
|