KimByeongSu commited on
Commit
e2ec98b
1 Parent(s): 83037d9

gpt-neo-125m-cs-finetuning-5000

Browse files
README.md CHANGED
@@ -1,59 +1,59 @@
1
- ---
2
- license: mit
3
- base_model: EleutherAI/gpt-neo-125m
4
- tags:
5
- - generated_from_trainer
6
- model-index:
7
- - name: gpt-neo-125m-cs-finetuning-5000
8
- results: []
9
- ---
10
-
11
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
- should probably proofread and complete it, then remove this comment. -->
13
-
14
- # gpt-neo-125m-cs-finetuning-5000
15
-
16
- This model is a fine-tuned version of [EleutherAI/gpt-neo-125m](https://huggingface.co/EleutherAI/gpt-neo-125m) on the None dataset.
17
- It achieves the following results on the evaluation set:
18
- - Loss: 3.3585
19
-
20
- ## Model description
21
-
22
- More information needed
23
-
24
- ## Intended uses & limitations
25
-
26
- More information needed
27
-
28
- ## Training and evaluation data
29
-
30
- More information needed
31
-
32
- ## Training procedure
33
-
34
- ### Training hyperparameters
35
-
36
- The following hyperparameters were used during training:
37
- - learning_rate: 2e-05
38
- - train_batch_size: 8
39
- - eval_batch_size: 8
40
- - seed: 42
41
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
- - lr_scheduler_type: linear
43
- - num_epochs: 3.0
44
-
45
- ### Training results
46
-
47
- | Training Loss | Epoch | Step | Validation Loss |
48
- |:-------------:|:-----:|:----:|:---------------:|
49
- | No log | 1.0 | 65 | 3.4337 |
50
- | No log | 2.0 | 130 | 3.3733 |
51
- | No log | 3.0 | 195 | 3.3585 |
52
-
53
-
54
- ### Framework versions
55
-
56
- - Transformers 4.36.2
57
- - Pytorch 1.13.1+cu117
58
- - Datasets 2.14.6
59
- - Tokenizers 0.15.0
 
1
+ ---
2
+ license: mit
3
+ base_model: EleutherAI/gpt-neo-125m
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: gpt-neo-125m-cs-finetuning-5000
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # gpt-neo-125m-cs-finetuning-5000
15
+
16
+ This model is a fine-tuned version of [EleutherAI/gpt-neo-125m](https://huggingface.co/EleutherAI/gpt-neo-125m) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 3.4019
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 2e-05
38
+ - train_batch_size: 8
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
+ - lr_scheduler_type: linear
43
+ - num_epochs: 3.0
44
+
45
+ ### Training results
46
+
47
+ | Training Loss | Epoch | Step | Validation Loss |
48
+ |:-------------:|:-----:|:----:|:---------------:|
49
+ | No log | 1.0 | 64 | 3.4786 |
50
+ | No log | 2.0 | 128 | 3.4167 |
51
+ | No log | 3.0 | 192 | 3.4019 |
52
+
53
+
54
+ ### Framework versions
55
+
56
+ - Transformers 4.36.2
57
+ - Pytorch 1.13.1+cu117
58
+ - Datasets 2.14.6
59
+ - Tokenizers 0.15.0
config.json CHANGED
@@ -1,55 +1,55 @@
1
- {
2
- "_name_or_path": "EleutherAI/gpt-neo-125m",
3
- "activation_function": "gelu_new",
4
- "architectures": [
5
- "GPTNeoForCausalLM"
6
- ],
7
- "attention_dropout": 0,
8
- "attention_layers": [
9
- "global",
10
- "local",
11
- "global",
12
- "local",
13
- "global",
14
- "local",
15
- "global",
16
- "local",
17
- "global",
18
- "local",
19
- "global",
20
- "local"
21
- ],
22
- "attention_types": [
23
- [
24
- [
25
- "global",
26
- "local"
27
- ],
28
- 6
29
- ]
30
- ],
31
- "bos_token_id": 50256,
32
- "classifier_dropout": 0.1,
33
- "embed_dropout": 0,
34
- "eos_token_id": 50256,
35
- "gradient_checkpointing": false,
36
- "hidden_size": 768,
37
- "initializer_range": 0.02,
38
- "intermediate_size": null,
39
- "layer_norm_epsilon": 1e-05,
40
- "max_position_embeddings": 2048,
41
- "model_type": "gpt_neo",
42
- "num_heads": 12,
43
- "num_layers": 12,
44
- "resid_dropout": 0,
45
- "summary_activation": null,
46
- "summary_first_dropout": 0.1,
47
- "summary_proj_to_labels": true,
48
- "summary_type": "cls_index",
49
- "summary_use_proj": true,
50
- "torch_dtype": "float32",
51
- "transformers_version": "4.36.2",
52
- "use_cache": true,
53
- "vocab_size": 50257,
54
- "window_size": 256
55
- }
 
1
+ {
2
+ "_name_or_path": "EleutherAI/gpt-neo-125m",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPTNeoForCausalLM"
6
+ ],
7
+ "attention_dropout": 0,
8
+ "attention_layers": [
9
+ "global",
10
+ "local",
11
+ "global",
12
+ "local",
13
+ "global",
14
+ "local",
15
+ "global",
16
+ "local",
17
+ "global",
18
+ "local",
19
+ "global",
20
+ "local"
21
+ ],
22
+ "attention_types": [
23
+ [
24
+ [
25
+ "global",
26
+ "local"
27
+ ],
28
+ 6
29
+ ]
30
+ ],
31
+ "bos_token_id": 50256,
32
+ "classifier_dropout": 0.1,
33
+ "embed_dropout": 0,
34
+ "eos_token_id": 50256,
35
+ "gradient_checkpointing": false,
36
+ "hidden_size": 768,
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": null,
39
+ "layer_norm_epsilon": 1e-05,
40
+ "max_position_embeddings": 2048,
41
+ "model_type": "gpt_neo",
42
+ "num_heads": 12,
43
+ "num_layers": 12,
44
+ "resid_dropout": 0,
45
+ "summary_activation": null,
46
+ "summary_first_dropout": 0.1,
47
+ "summary_proj_to_labels": true,
48
+ "summary_type": "cls_index",
49
+ "summary_use_proj": true,
50
+ "torch_dtype": "float32",
51
+ "transformers_version": "4.36.2",
52
+ "use_cache": true,
53
+ "vocab_size": 50257,
54
+ "window_size": 256
55
+ }
emissions.csv CHANGED
@@ -1,2 +1,2 @@
1
- timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
- 2024-03-20T15:03:31,codecarbon,c0176a42-645f-4601-b986-12cb392fbd8b,80.7582950592041,0.002230554984087869,2.7620134655550165e-05,42.5,168.03,11.905189990997316,0.0009533025234937669,0.003649187882204122,0.00026685581055615505,0.004869346216254044,South Korea,KOR,gyeonggi-do,,,Windows-10-10.0.19045-SP0,3.9.18,2.2.3,20,12th Gen Intel(R) Core(TM) i7-12700,1,1 x NVIDIA GeForce RTX 3060,127.1377,37.4331,31.747173309326172,machine,N,1.0
 
1
+ timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
+ 2024-05-27T10:05:04,codecarbon,ed651fc3-2a98-4e67-a9e0-f744ae19a45d,84.14403414726257,0.00230752610858073,2.7423526004734586e-05,42.5,165.118,11.905189990997316,0.000993247193263637,0.0037661414692739645,0.00027798706049114166,0.005037375723028744,South Korea,KOR,seoul,,,Windows-10-10.0.19045-SP0,3.9.18,2.2.3,20,12th Gen Intel(R) Core(TM) i7-12700,1,1 x NVIDIA GeForce RTX 3060,126.9369,37.5551,31.747173309326172,machine,N,1.0
generation_config.json CHANGED
@@ -1,6 +1,6 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 50256,
4
- "eos_token_id": 50256,
5
- "transformers_version": "4.36.2"
6
- }
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.36.2"
6
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a977be68fd035fc625240d68804d673d72a84f0f8feac793e130e08666cc01f
3
  size 500811336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86ec265247c7b2fb974b84898105bae31ec9225f9430908b4c0e022ea452005
3
  size 500811336
runs/May27_10-03-37_kbs/events.out.tfevents.1716771820.kbs.34172.12 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e031b63fcd6a985cfbd25cbd4a987f6b82b6c8cb6752a3f782e761b44a62b0c
3
+ size 5969
runs/May27_10-03-37_kbs/events.out.tfevents.1716771906.kbs.34172.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9e174d5e97b07f28873155890cfd546362878a01c0446310ecfffd648a8c9c9
3
+ size 311
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a3fdd8ff27ffd955cdbbe52f597988d6502c27d1a881d4bba1746a908ba512b
3
  size 4283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b4207a7c74ba885255a02310683bf297a00927d1231369c1f9377045a0d2814
3
  size 4283