roneneldan
commited on
Commit
•
85c27af
1
Parent(s):
d1936ec
Update README.md
Browse files
README.md
CHANGED
@@ -11,13 +11,13 @@ License: mit
|
|
11 |
---
|
12 |
hyperparams used to train this model:
|
13 |
|
14 |
-
lr = 5e-4
|
15 |
-
lr_schedule = constant
|
16 |
-
wd=0.1
|
17 |
-
adam_beta1=0.9, adam_beta2 = 0.95
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
|
22 |
------ EXAMPLE USAGE ---
|
23 |
|
|
|
11 |
---
|
12 |
hyperparams used to train this model:
|
13 |
|
14 |
+
lr = 5e-4,
|
15 |
+
lr_schedule = constant,
|
16 |
+
wd=0.1,
|
17 |
+
adam_beta1=0.9, adam_beta2 = 0.95,
|
18 |
+
context_length=512,
|
19 |
+
batch_size=80,
|
20 |
+
gradient_accumulation_steps=16
|
21 |
|
22 |
------ EXAMPLE USAGE ---
|
23 |
|