quyanh commited on
Commit
4f2d703
1 Parent(s): 95c7aa2

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -16,14 +16,14 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "lm_head",
20
  "k_proj",
 
 
 
21
  "o_proj",
22
- "up_proj",
23
- "gate_proj",
24
  "down_proj",
25
- "q_proj",
26
- "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
19
  "k_proj",
20
+ "v_proj",
21
+ "q_proj",
22
+ "lm_head",
23
  "o_proj",
 
 
24
  "down_proj",
25
+ "up_proj",
26
+ "gate_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3de4953061370a63e932ab9adef3d1b37a5e265460e924e74f17ca025920c147
3
- size 5031039
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6bac0fe4393fb334aa3f0c9d417e30a9a3d89300be07d4634893d07f4327a25
3
+ size 5031484
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b79a623491a90333d1ce22b9d9de2a35bcb25c991ac6e28b032ff67f8daec5b8
3
- size 2525407
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8d12cc31694a253150fb4b864dd9db0fd34d6085684fd2b497291115cd92e47
3
+ size 2525844
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:465720dc59f05c874da3430080ff334622b516143a99546883486e56c8841221
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d9b97ed8e6c4eaad8cb1ef6ae1e3ca5ff2f998cbd086280107ebe6396171944
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d6a51cd429314aae6c96ca15be43ce2d4ced57459a1e34e51c06283906510b4
3
- size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eedfec4bc6f5a00a07b48568793720af057cf404e35f584cd69e8f806039a34d
3
+ size 1064
trainer_state.json CHANGED
@@ -1,31 +1,139 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04828002414001207,
5
  "eval_steps": 500,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.02,
13
- "learning_rate": 1.3333333333333333e-05,
14
- "loss": 1.8743,
15
- "step": 10
16
- },
17
  {
18
  "epoch": 0.05,
19
- "learning_rate": 0.0,
20
- "loss": 1.8553,
21
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  }
23
  ],
24
- "logging_steps": 10,
25
- "max_steps": 20,
26
- "num_train_epochs": 1,
27
- "save_steps": 10,
28
- "total_flos": 1.393049758334976e+16,
29
  "trial_name": null,
30
  "trial_params": null
31
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9656004828002414,
5
  "eval_steps": 500,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
11
  {
12
  "epoch": 0.05,
13
+ "learning_rate": 1.9325842696629215e-05,
14
+ "loss": 1.8646,
15
  "step": 20
16
+ },
17
+ {
18
+ "epoch": 0.1,
19
+ "learning_rate": 1.8426966292134835e-05,
20
+ "loss": 1.9283,
21
+ "step": 40
22
+ },
23
+ {
24
+ "epoch": 0.14,
25
+ "learning_rate": 1.752808988764045e-05,
26
+ "loss": 1.9027,
27
+ "step": 60
28
+ },
29
+ {
30
+ "epoch": 0.19,
31
+ "learning_rate": 1.662921348314607e-05,
32
+ "loss": 1.8583,
33
+ "step": 80
34
+ },
35
+ {
36
+ "epoch": 0.24,
37
+ "learning_rate": 1.5730337078651687e-05,
38
+ "loss": 1.8618,
39
+ "step": 100
40
+ },
41
+ {
42
+ "epoch": 0.29,
43
+ "learning_rate": 1.4831460674157305e-05,
44
+ "loss": 1.8233,
45
+ "step": 120
46
+ },
47
+ {
48
+ "epoch": 0.34,
49
+ "learning_rate": 1.3932584269662923e-05,
50
+ "loss": 1.9046,
51
+ "step": 140
52
+ },
53
+ {
54
+ "epoch": 0.39,
55
+ "learning_rate": 1.303370786516854e-05,
56
+ "loss": 1.8505,
57
+ "step": 160
58
+ },
59
+ {
60
+ "epoch": 0.43,
61
+ "learning_rate": 1.213483146067416e-05,
62
+ "loss": 1.8608,
63
+ "step": 180
64
+ },
65
+ {
66
+ "epoch": 0.48,
67
+ "learning_rate": 1.1235955056179778e-05,
68
+ "loss": 1.7992,
69
+ "step": 200
70
+ },
71
+ {
72
+ "epoch": 0.53,
73
+ "learning_rate": 1.0337078651685396e-05,
74
+ "loss": 1.8869,
75
+ "step": 220
76
+ },
77
+ {
78
+ "epoch": 0.58,
79
+ "learning_rate": 9.438202247191012e-06,
80
+ "loss": 1.839,
81
+ "step": 240
82
+ },
83
+ {
84
+ "epoch": 0.63,
85
+ "learning_rate": 8.53932584269663e-06,
86
+ "loss": 1.8787,
87
+ "step": 260
88
+ },
89
+ {
90
+ "epoch": 0.68,
91
+ "learning_rate": 7.640449438202247e-06,
92
+ "loss": 1.8828,
93
+ "step": 280
94
+ },
95
+ {
96
+ "epoch": 0.72,
97
+ "learning_rate": 6.741573033707865e-06,
98
+ "loss": 1.8479,
99
+ "step": 300
100
+ },
101
+ {
102
+ "epoch": 0.77,
103
+ "learning_rate": 5.842696629213483e-06,
104
+ "loss": 1.9063,
105
+ "step": 320
106
+ },
107
+ {
108
+ "epoch": 0.82,
109
+ "learning_rate": 4.943820224719101e-06,
110
+ "loss": 1.8693,
111
+ "step": 340
112
+ },
113
+ {
114
+ "epoch": 0.87,
115
+ "learning_rate": 4.04494382022472e-06,
116
+ "loss": 1.827,
117
+ "step": 360
118
+ },
119
+ {
120
+ "epoch": 0.92,
121
+ "learning_rate": 3.146067415730337e-06,
122
+ "loss": 1.8259,
123
+ "step": 380
124
+ },
125
+ {
126
+ "epoch": 0.97,
127
+ "learning_rate": 2.2471910112359554e-06,
128
+ "loss": 1.8419,
129
+ "step": 400
130
  }
131
  ],
132
+ "logging_steps": 20,
133
+ "max_steps": 450,
134
+ "num_train_epochs": 2,
135
+ "save_steps": 20,
136
+ "total_flos": 2.786099516669952e+17,
137
  "trial_name": null,
138
  "trial_params": null
139
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e9f8e5ad467430e401cff9fca7527f5ec3d19e2a691d6c74591579cb046ba0f
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:050755ce01080bd5005c27dae5043cd1a94e5e57b091a9a27315291e7463c9fb
3
+ size 4472