File size: 2,199 Bytes
fc3de75
 
 
 
 
 
 
 
 
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
6757bbe
fc3de75
6757bbe
 
 
fc3de75
 
 
6757bbe
fc3de75
6757bbe
 
 
fc3de75
 
 
6757bbe
fc3de75
6757bbe
 
 
fc3de75
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
6757bbe
fc3de75
 
6757bbe
 
fc3de75
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.4228532987362453,
  "eval_steps": 500,
  "global_step": 5500,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "ep_loss": 4.4238,
      "epoch": 0.04,
      "learning_rate": 2.5e-05,
      "loss": 11.1467,
      "mlm_loss": 6.7229,
      "step": 500
    },
    {
      "ep_loss": 0.4305,
      "epoch": 0.08,
      "learning_rate": 5e-05,
      "loss": 2.9841,
      "mlm_loss": 2.5537,
      "step": 1000
    },
    {
      "ep_loss": 0.3724,
      "epoch": 0.12,
      "learning_rate": 7.5e-05,
      "loss": 2.2371,
      "mlm_loss": 1.8647,
      "step": 1500
    },
    {
      "ep_loss": 0.364,
      "epoch": 0.15,
      "learning_rate": 0.0001,
      "loss": 1.8477,
      "mlm_loss": 1.4837,
      "step": 2000
    },
    {
      "ep_loss": 0.3678,
      "epoch": 0.19,
      "learning_rate": 0.00012495,
      "loss": 1.5215,
      "mlm_loss": 1.1538,
      "step": 2500
    },
    {
      "ep_loss": 0.3617,
      "epoch": 0.23,
      "learning_rate": 0.00014995,
      "loss": 1.4119,
      "mlm_loss": 1.0501,
      "step": 3000
    },
    {
      "ep_loss": 0.3336,
      "epoch": 0.27,
      "learning_rate": 0.0001749,
      "loss": 1.3027,
      "mlm_loss": 0.9691,
      "step": 3500
    },
    {
      "ep_loss": 0.3348,
      "epoch": 0.31,
      "learning_rate": 0.0001999,
      "loss": 1.2441,
      "mlm_loss": 0.9093,
      "step": 4000
    },
    {
      "ep_loss": 0.3348,
      "epoch": 0.35,
      "learning_rate": 0.0002249,
      "loss": 1.1942,
      "mlm_loss": 0.8594,
      "step": 4500
    },
    {
      "ep_loss": 0.3331,
      "epoch": 0.38,
      "learning_rate": 0.0002499,
      "loss": 1.1466,
      "mlm_loss": 0.8135,
      "step": 5000
    },
    {
      "ep_loss": 0.3268,
      "epoch": 0.42,
      "learning_rate": 0.00027489999999999996,
      "loss": 1.1067,
      "mlm_loss": 0.7799,
      "step": 5500
    }
  ],
  "logging_steps": 500,
  "max_steps": 520240,
  "num_train_epochs": 40,
  "save_steps": 500,
  "total_flos": 5.251494995726172e+18,
  "trial_name": null,
  "trial_params": null
}