jinal26 commited on
Commit
92cf09a
1 Parent(s): aa9bc64

Model save

Browse files
Files changed (4) hide show
  1. all_results.json +18 -0
  2. eval_results.json +12 -0
  3. train_results.json +9 -0
  4. trainer_state.json +353 -0
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9768446160123027,
4
+ "eval_f1": 0.9388393674827523,
5
+ "eval_loss": 0.05124881863594055,
6
+ "eval_precision": 0.9288864055090761,
7
+ "eval_recall": 0.9490079303854105,
8
+ "eval_runtime": 122.2255,
9
+ "eval_samples": 20927,
10
+ "eval_samples_per_second": 171.216,
11
+ "eval_steps_per_second": 10.702,
12
+ "total_flos": 1.64925784065024e+16,
13
+ "train_loss": 0.0881471913733378,
14
+ "train_runtime": 3285.9718,
15
+ "train_samples": 167408,
16
+ "train_samples_per_second": 101.893,
17
+ "train_steps_per_second": 6.368
18
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_accuracy": 0.9768446160123027,
4
+ "eval_f1": 0.9388393674827523,
5
+ "eval_loss": 0.05124881863594055,
6
+ "eval_precision": 0.9288864055090761,
7
+ "eval_recall": 0.9490079303854105,
8
+ "eval_runtime": 122.2255,
9
+ "eval_samples": 20927,
10
+ "eval_samples_per_second": 171.216,
11
+ "eval_steps_per_second": 10.702
12
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "total_flos": 1.64925784065024e+16,
4
+ "train_loss": 0.0881471913733378,
5
+ "train_runtime": 3285.9718,
6
+ "train_samples": 167408,
7
+ "train_samples_per_second": 101.893,
8
+ "train_steps_per_second": 6.368
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.05124881863594055,
3
+ "best_model_checkpoint": "my_Pytorch_pii_model/checkpoint-20926",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 20926,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04778744146038421,
13
+ "grad_norm": 1.7837579250335693,
14
+ "learning_rate": 1.997195242749148e-05,
15
+ "loss": 0.8081,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.09557488292076842,
20
+ "grad_norm": 2.2397186756134033,
21
+ "learning_rate": 1.9887742837017725e-05,
22
+ "loss": 0.2477,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.14336232438115262,
27
+ "grad_norm": 1.895089030265808,
28
+ "learning_rate": 1.9747845385097936e-05,
29
+ "loss": 0.1609,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.19114976584153684,
34
+ "grad_norm": 1.4316082000732422,
35
+ "learning_rate": 1.9553047975508295e-05,
36
+ "loss": 0.1269,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.23893720730192106,
41
+ "grad_norm": 0.5299736857414246,
42
+ "learning_rate": 1.9304447709107316e-05,
43
+ "loss": 0.1052,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 0.28672464876230525,
48
+ "grad_norm": 1.0913931131362915,
49
+ "learning_rate": 1.9003444704953408e-05,
50
+ "loss": 0.0967,
51
+ "step": 3000
52
+ },
53
+ {
54
+ "epoch": 0.33451209022268946,
55
+ "grad_norm": 1.8749414682388306,
56
+ "learning_rate": 1.865248697176191e-05,
57
+ "loss": 0.0889,
58
+ "step": 3500
59
+ },
60
+ {
61
+ "epoch": 0.3822995316830737,
62
+ "grad_norm": 2.014411211013794,
63
+ "learning_rate": 1.825214512447262e-05,
64
+ "loss": 0.0843,
65
+ "step": 4000
66
+ },
67
+ {
68
+ "epoch": 0.4300869731434579,
69
+ "grad_norm": 0.6492653489112854,
70
+ "learning_rate": 1.780532711756845e-05,
71
+ "loss": 0.0812,
72
+ "step": 4500
73
+ },
74
+ {
75
+ "epoch": 0.4778744146038421,
76
+ "grad_norm": 1.5026785135269165,
77
+ "learning_rate": 1.7314549434302465e-05,
78
+ "loss": 0.0779,
79
+ "step": 5000
80
+ },
81
+ {
82
+ "epoch": 0.5256618560642263,
83
+ "grad_norm": 0.5166222453117371,
84
+ "learning_rate": 1.6782576139241983e-05,
85
+ "loss": 0.0719,
86
+ "step": 5500
87
+ },
88
+ {
89
+ "epoch": 0.5734492975246105,
90
+ "grad_norm": 0.9983482956886292,
91
+ "learning_rate": 1.62124033110307e-05,
92
+ "loss": 0.0746,
93
+ "step": 6000
94
+ },
95
+ {
96
+ "epoch": 0.6212367389849948,
97
+ "grad_norm": 0.8525393605232239,
98
+ "learning_rate": 1.5608485174626132e-05,
99
+ "loss": 0.0713,
100
+ "step": 6500
101
+ },
102
+ {
103
+ "epoch": 0.6690241804453789,
104
+ "grad_norm": 1.3193893432617188,
105
+ "learning_rate": 1.4971803628070286e-05,
106
+ "loss": 0.067,
107
+ "step": 7000
108
+ },
109
+ {
110
+ "epoch": 0.7168116219057632,
111
+ "grad_norm": 1.7481768131256104,
112
+ "learning_rate": 1.4307120836322233e-05,
113
+ "loss": 0.0677,
114
+ "step": 7500
115
+ },
116
+ {
117
+ "epoch": 0.7645990633661474,
118
+ "grad_norm": 0.7293727993965149,
119
+ "learning_rate": 1.3618180299169794e-05,
120
+ "loss": 0.0649,
121
+ "step": 8000
122
+ },
123
+ {
124
+ "epoch": 0.8123865048265316,
125
+ "grad_norm": 0.4748859405517578,
126
+ "learning_rate": 1.2908862136252995e-05,
127
+ "loss": 0.0675,
128
+ "step": 8500
129
+ },
130
+ {
131
+ "epoch": 0.8601739462869158,
132
+ "grad_norm": 0.9915094971656799,
133
+ "learning_rate": 1.218462628244987e-05,
134
+ "loss": 0.0648,
135
+ "step": 9000
136
+ },
137
+ {
138
+ "epoch": 0.9079613877473001,
139
+ "grad_norm": 2.1113200187683105,
140
+ "learning_rate": 1.1446650257897558e-05,
141
+ "loss": 0.0638,
142
+ "step": 9500
143
+ },
144
+ {
145
+ "epoch": 0.9557488292076842,
146
+ "grad_norm": 0.7165895700454712,
147
+ "learning_rate": 1.0700526685370893e-05,
148
+ "loss": 0.0615,
149
+ "step": 10000
150
+ },
151
+ {
152
+ "epoch": 1.0,
153
+ "eval_accuracy": 0.9751704843504109,
154
+ "eval_f1": 0.9315204370483973,
155
+ "eval_loss": 0.05543896183371544,
156
+ "eval_precision": 0.9214454994188416,
157
+ "eval_recall": 0.9418181257135804,
158
+ "eval_runtime": 190.6513,
159
+ "eval_samples_per_second": 109.766,
160
+ "eval_steps_per_second": 6.861,
161
+ "step": 10463
162
+ },
163
+ {
164
+ "epoch": 1.0035362706680684,
165
+ "grad_norm": 0.7908827066421509,
166
+ "learning_rate": 9.950457739907535e-06,
167
+ "loss": 0.0591,
168
+ "step": 10500
169
+ },
170
+ {
171
+ "epoch": 1.0513237121284527,
172
+ "grad_norm": 0.8281264901161194,
173
+ "learning_rate": 9.200667816922936e-06,
174
+ "loss": 0.0559,
175
+ "step": 11000
176
+ },
177
+ {
178
+ "epoch": 1.099111153588837,
179
+ "grad_norm": 0.6375882625579834,
180
+ "learning_rate": 8.456863027173128e-06,
181
+ "loss": 0.0528,
182
+ "step": 11500
183
+ },
184
+ {
185
+ "epoch": 1.146898595049221,
186
+ "grad_norm": 0.8089446425437927,
187
+ "learning_rate": 7.720252707524596e-06,
188
+ "loss": 0.0561,
189
+ "step": 12000
190
+ },
191
+ {
192
+ "epoch": 1.1946860365096053,
193
+ "grad_norm": 0.5335150957107544,
194
+ "learning_rate": 6.9964819462513174e-06,
195
+ "loss": 0.0563,
196
+ "step": 12500
197
+ },
198
+ {
199
+ "epoch": 1.2424734779699895,
200
+ "grad_norm": 0.7552749514579773,
201
+ "learning_rate": 6.289627027145937e-06,
202
+ "loss": 0.053,
203
+ "step": 13000
204
+ },
205
+ {
206
+ "epoch": 1.2902609194303736,
207
+ "grad_norm": 1.121811866760254,
208
+ "learning_rate": 5.603668963817577e-06,
209
+ "loss": 0.0537,
210
+ "step": 13500
211
+ },
212
+ {
213
+ "epoch": 1.3380483608907578,
214
+ "grad_norm": 1.5103745460510254,
215
+ "learning_rate": 4.942471078585398e-06,
216
+ "loss": 0.0529,
217
+ "step": 14000
218
+ },
219
+ {
220
+ "epoch": 1.3858358023511421,
221
+ "grad_norm": 1.125802755355835,
222
+ "learning_rate": 4.309757244210676e-06,
223
+ "loss": 0.0558,
224
+ "step": 14500
225
+ },
226
+ {
227
+ "epoch": 1.4336232438115264,
228
+ "grad_norm": 2.0246198177337646,
229
+ "learning_rate": 3.709090911010067e-06,
230
+ "loss": 0.051,
231
+ "step": 15000
232
+ },
233
+ {
234
+ "epoch": 1.4814106852719107,
235
+ "grad_norm": 0.31454652547836304,
236
+ "learning_rate": 3.1449479986513345e-06,
237
+ "loss": 0.0503,
238
+ "step": 15500
239
+ },
240
+ {
241
+ "epoch": 1.5291981267322947,
242
+ "grad_norm": 1.1729323863983154,
243
+ "learning_rate": 2.618245735460083e-06,
244
+ "loss": 0.0496,
245
+ "step": 16000
246
+ },
247
+ {
248
+ "epoch": 1.576985568192679,
249
+ "grad_norm": 1.942895531654358,
250
+ "learning_rate": 2.1331175823046777e-06,
251
+ "loss": 0.0508,
252
+ "step": 16500
253
+ },
254
+ {
255
+ "epoch": 1.624773009653063,
256
+ "grad_norm": 0.7527912259101868,
257
+ "learning_rate": 1.692295785545267e-06,
258
+ "loss": 0.0474,
259
+ "step": 17000
260
+ },
261
+ {
262
+ "epoch": 1.6725604511134473,
263
+ "grad_norm": 0.9754999279975891,
264
+ "learning_rate": 1.299002907484831e-06,
265
+ "loss": 0.0505,
266
+ "step": 17500
267
+ },
268
+ {
269
+ "epoch": 1.7203478925738316,
270
+ "grad_norm": 1.4431662559509277,
271
+ "learning_rate": 9.538783949020436e-07,
272
+ "loss": 0.0507,
273
+ "step": 18000
274
+ },
275
+ {
276
+ "epoch": 1.7681353340342159,
277
+ "grad_norm": 0.9095632433891296,
278
+ "learning_rate": 6.602380824095455e-07,
279
+ "loss": 0.0484,
280
+ "step": 18500
281
+ },
282
+ {
283
+ "epoch": 1.8159227754946001,
284
+ "grad_norm": 0.47108370065689087,
285
+ "learning_rate": 4.185593937350141e-07,
286
+ "loss": 0.0503,
287
+ "step": 19000
288
+ },
289
+ {
290
+ "epoch": 1.8637102169549842,
291
+ "grad_norm": 1.9777565002441406,
292
+ "learning_rate": 2.3084346943755388e-07,
293
+ "loss": 0.0492,
294
+ "step": 19500
295
+ },
296
+ {
297
+ "epoch": 1.9114976584153685,
298
+ "grad_norm": 0.7806472182273865,
299
+ "learning_rate": 9.814752738334654e-08,
300
+ "loss": 0.0493,
301
+ "step": 20000
302
+ },
303
+ {
304
+ "epoch": 1.9592850998757525,
305
+ "grad_norm": 0.42741918563842773,
306
+ "learning_rate": 2.1218912376697043e-08,
307
+ "loss": 0.051,
308
+ "step": 20500
309
+ },
310
+ {
311
+ "epoch": 2.0,
312
+ "eval_accuracy": 0.9768446160123027,
313
+ "eval_f1": 0.9388393674827523,
314
+ "eval_loss": 0.05124881863594055,
315
+ "eval_precision": 0.9288864055090761,
316
+ "eval_recall": 0.9490079303854105,
317
+ "eval_runtime": 115.4429,
318
+ "eval_samples_per_second": 181.276,
319
+ "eval_steps_per_second": 11.33,
320
+ "step": 20926
321
+ },
322
+ {
323
+ "epoch": 2.0,
324
+ "step": 20926,
325
+ "total_flos": 1.64925784065024e+16,
326
+ "train_loss": 0.0881471913733378,
327
+ "train_runtime": 3285.9718,
328
+ "train_samples_per_second": 101.893,
329
+ "train_steps_per_second": 6.368
330
+ },
331
+ {
332
+ "epoch": 2.0,
333
+ "eval_accuracy": 0.9768446160123027,
334
+ "eval_f1": 0.9388393674827523,
335
+ "eval_loss": 0.05124881863594055,
336
+ "eval_precision": 0.9288864055090761,
337
+ "eval_recall": 0.9490079303854105,
338
+ "eval_runtime": 122.2255,
339
+ "eval_samples_per_second": 171.216,
340
+ "eval_steps_per_second": 10.702,
341
+ "step": 20926
342
+ }
343
+ ],
344
+ "logging_steps": 500,
345
+ "max_steps": 20926,
346
+ "num_input_tokens_seen": 0,
347
+ "num_train_epochs": 2,
348
+ "save_steps": 500,
349
+ "total_flos": 1.64925784065024e+16,
350
+ "train_batch_size": 16,
351
+ "trial_name": null,
352
+ "trial_params": null
353
+ }