File size: 2,027 Bytes
198bc78
 
 
 
 
 
 
 
 
 
 
d57ec63
198bc78
 
d57ec63
 
198bc78
 
 
d57ec63
198bc78
 
d57ec63
 
198bc78
 
 
d57ec63
198bc78
 
d57ec63
 
198bc78
 
 
d57ec63
198bc78
 
d57ec63
 
198bc78
 
 
d57ec63
198bc78
 
d57ec63
 
198bc78
 
 
d57ec63
198bc78
 
d57ec63
 
198bc78
 
 
d57ec63
198bc78
d57ec63
 
 
198bc78
 
 
d57ec63
198bc78
d57ec63
 
 
198bc78
 
 
d57ec63
198bc78
d57ec63
 
 
198bc78
 
 
d57ec63
198bc78
 
d57ec63
 
198bc78
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.38441208976022295,
  "eval_steps": 500,
  "global_step": 5000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "ep_loss": 4.4238,
      "epoch": 0.04,
      "learning_rate": 2.5e-05,
      "loss": 11.1467,
      "mlm_loss": 6.7229,
      "step": 500
    },
    {
      "ep_loss": 0.4305,
      "epoch": 0.08,
      "learning_rate": 5e-05,
      "loss": 2.9841,
      "mlm_loss": 2.5537,
      "step": 1000
    },
    {
      "ep_loss": 0.3724,
      "epoch": 0.12,
      "learning_rate": 7.5e-05,
      "loss": 2.2371,
      "mlm_loss": 1.8647,
      "step": 1500
    },
    {
      "ep_loss": 0.364,
      "epoch": 0.15,
      "learning_rate": 0.0001,
      "loss": 1.8477,
      "mlm_loss": 1.4837,
      "step": 2000
    },
    {
      "ep_loss": 0.3678,
      "epoch": 0.19,
      "learning_rate": 0.00012495,
      "loss": 1.5215,
      "mlm_loss": 1.1538,
      "step": 2500
    },
    {
      "ep_loss": 0.3617,
      "epoch": 0.23,
      "learning_rate": 0.00014995,
      "loss": 1.4119,
      "mlm_loss": 1.0501,
      "step": 3000
    },
    {
      "ep_loss": 0.3336,
      "epoch": 0.27,
      "learning_rate": 0.0001749,
      "loss": 1.3027,
      "mlm_loss": 0.9691,
      "step": 3500
    },
    {
      "ep_loss": 0.3348,
      "epoch": 0.31,
      "learning_rate": 0.0001999,
      "loss": 1.2441,
      "mlm_loss": 0.9093,
      "step": 4000
    },
    {
      "ep_loss": 0.3348,
      "epoch": 0.35,
      "learning_rate": 0.0002249,
      "loss": 1.1942,
      "mlm_loss": 0.8594,
      "step": 4500
    },
    {
      "ep_loss": 0.3331,
      "epoch": 0.38,
      "learning_rate": 0.0002499,
      "loss": 1.1466,
      "mlm_loss": 0.8135,
      "step": 5000
    }
  ],
  "logging_steps": 500,
  "max_steps": 520240,
  "num_train_epochs": 40,
  "save_steps": 500,
  "total_flos": 4.774086359751066e+18,
  "trial_name": null,
  "trial_params": null
}