lvwerra's picture
lvwerra HF staff
step 15000
acc8d6f
raw
history blame
1.38 kB
wandb_version: 1
_wandb:
desc: null
value:
cli_version: 0.12.2
framework: huggingface
huggingface_version: 4.12.2
is_jupyter_run: false
is_kaggle_kernel: false
python_version: 3.8.11
start_time: 1636233370
t:
1:
- 1
- 11
3:
- 16
4: 3.8.11
5: 0.12.2
6: 4.12.2
8:
- 5
backend:
desc: null
value: nccl
deepspeed_plugin:
desc: null
value: None
device:
desc: null
value: cuda:0
distributed_type:
desc: null
value: DistributedType.MULTI_GPU
gradient_accumulation_steps:
desc: null
value: 1
gradient_checkpointing:
desc: null
value: false
initialized:
desc: null
value: 'True'
learning_rate:
desc: null
value: 0.0005
local_process_index:
desc: null
value: '0'
lr_scheduler_type:
desc: null
value: cosine
max_eval_steps:
desc: null
value: -1
max_train_steps:
desc: null
value: 150000
num_processes:
desc: null
value: '16'
num_warmup_steps:
desc: null
value: 2000
process_index:
desc: null
value: '0'
save_checkpoint_steps:
desc: null
value: 15000
seed:
desc: null
value: 1
seq_length:
desc: null
value: 1024
shuffle_buffer:
desc: null
value: 1000
train_batch_size:
desc: null
value: 12
use_fp16:
desc: null
value: 'True'
valid_batch_size:
desc: null
value: 12
weight_decay:
desc: null
value: 0.1