Update model_config.yaml
Browse files- model_config.yaml +7 -7
model_config.yaml
CHANGED
@@ -80,7 +80,7 @@ reduce_amax: true
|
|
80 |
use_emha: false
|
81 |
optim:
|
82 |
name: distributed_fused_adam
|
83 |
-
lr: 3.
|
84 |
weight_decay: 0.1
|
85 |
betas:
|
86 |
- 0.9
|
@@ -110,7 +110,7 @@ data:
|
|
110 |
num_workers: 2
|
111 |
dataloader_type: single
|
112 |
train_ds:
|
113 |
-
file_path: /dataset/
|
114 |
global_batch_size: 128
|
115 |
micro_batch_size: 1
|
116 |
shuffle: true
|
@@ -153,7 +153,7 @@ data:
|
|
153 |
hf_dataset: true
|
154 |
truncation_method: right
|
155 |
validation_ds:
|
156 |
-
file_path: /dataset/
|
157 |
names: null
|
158 |
global_batch_size: 128
|
159 |
micro_batch_size: 1
|
@@ -238,13 +238,13 @@ data:
|
|
238 |
index_mapping_dir: /indexmap_dir
|
239 |
data_prefix:
|
240 |
train:
|
241 |
-
- /datasets/
|
242 |
validation:
|
243 |
-
- /datasets/
|
244 |
test:
|
245 |
-
- /datasets/
|
246 |
answer_only_loss: true
|
247 |
-
restore_from_path: /models/
|
248 |
save_nemo_on_validation_end: true
|
249 |
use_flash_attention: null
|
250 |
pipeline_model_parallel_split_rank: 0
|
|
|
80 |
use_emha: false
|
81 |
optim:
|
82 |
name: distributed_fused_adam
|
83 |
+
lr: 3.001e-07
|
84 |
weight_decay: 0.1
|
85 |
betas:
|
86 |
- 0.9
|
|
|
110 |
num_workers: 2
|
111 |
dataloader_type: single
|
112 |
train_ds:
|
113 |
+
file_path: /dataset/train.jsonl
|
114 |
global_batch_size: 128
|
115 |
micro_batch_size: 1
|
116 |
shuffle: true
|
|
|
153 |
hf_dataset: true
|
154 |
truncation_method: right
|
155 |
validation_ds:
|
156 |
+
file_path: /dataset/val.jsonl
|
157 |
names: null
|
158 |
global_batch_size: 128
|
159 |
micro_batch_size: 1
|
|
|
238 |
index_mapping_dir: /indexmap_dir
|
239 |
data_prefix:
|
240 |
train:
|
241 |
+
- /datasets/train.jsonl
|
242 |
validation:
|
243 |
+
- /datasets/val.jsonl
|
244 |
test:
|
245 |
+
- /datasets/val.jsonl
|
246 |
answer_only_loss: true
|
247 |
+
restore_from_path: /models/340B_base
|
248 |
save_nemo_on_validation_end: true
|
249 |
use_flash_attention: null
|
250 |
pipeline_model_parallel_split_rank: 0
|