|
{ |
|
"add_lora": "False", |
|
"apply_kd": "False", |
|
"begintask_idx": "0", |
|
"compute_replay_cross_entropy": "False", |
|
"cross_replay": "False", |
|
"data_path": "'/mnt/data/minghaoli/projects/api-bank-training/output-0620/lv1-lv2-lv3-train.json'", |
|
"deepscale": "False", |
|
"deepscale_config": "None", |
|
"deepspeed": "True", |
|
"deepspeed_config": "'/mnt/data/yingxiu/EFLOP/CODE/API-Bank/configs/ds_new_config.json'", |
|
"deepspeed_mpi": "False", |
|
"distill_first": "False", |
|
"exp": "'0620eflop_api_response_lv1_lv2_lv3_run0'", |
|
"gradient_accumulation_steps": "16", |
|
"kd_curr": "False", |
|
"kd_prev": "False", |
|
"local_rank": "2", |
|
"logging_dir": "'/mnt/data/yingxiu/EFLOP/LLMOUT/apitblogs/0620eflop_api_response_lv1_lv2_lv3_run0'", |
|
"lora_alpha": "32", |
|
"lora_r": "8", |
|
"lr": "2e-05", |
|
"max_grad_norm": "1.0", |
|
"max_train_steps": "-1", |
|
"model_max_length": "2048", |
|
"model_name_or_path": "'/mnt/data/yingxiu/EFLOP/MODELS/chavinlo-alpaca-native'", |
|
"num_epochs": "3", |
|
"num_fuse": "-1", |
|
"optimizer": "'AdamW'", |
|
"output_dir": "'/mnt/data/yingxiu/EFLOP/LLMOUT/apioutputs/0620eflop_api_response_lv1_lv2_lv3_run0'", |
|
"per_device_eval_batch_size": "4", |
|
"per_device_train_batch_size": "1", |
|
"print_steps": "20", |
|
"progressive_fuse": "False", |
|
"pseudo_instruction_path": "None", |
|
"replay": "False", |
|
"replay_first": "False", |
|
"replay_num": "500", |
|
"replay_ratio": "0.02", |
|
"scheduler": "'cosine'", |
|
"seed": "42", |
|
"tasks": "['debug']", |
|
"train_curr": "True", |
|
"twolora": "False", |
|
"use_pseudo": "False", |
|
"val_size": "2000", |
|
"warmup_ratio": "0.03", |
|
"warmup_steps": "0", |
|
"weight_decay": "0.0" |
|
} |
|
|