GuyCalledMav
commited on
Commit
•
9c4c6ae
1
Parent(s):
188078c
Upload 12 files
Browse files- added_tokens.json +6 -0
- config.json +26 -0
- eval_results.txt +2 -0
- merges.txt +0 -0
- model_args.json +1 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +21 -0
- tokenizer_config.json +68 -0
- training_args.bin +3 -0
- training_progress_scores.csv +105 -0
- vocab.json +0 -0
- vocab.txt +0 -0
added_tokens.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"[CLS]": 30002,
|
3 |
+
"[MASK]": 30003,
|
4 |
+
"[PAD]": 30001,
|
5 |
+
"[SEP]": 30000
|
6 |
+
}
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"RobertaForMaskedLM"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 512,
|
16 |
+
"model_type": "roberta",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 1,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.24.0",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 30004
|
26 |
+
}
|
eval_results.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
eval_loss = 3.955014076278108
|
2 |
+
perplexity = tensor(52.1964)
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model_args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "RoBERTa-ceb/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 160, "evaluate_during_training": true, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 20000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": null, "model_type": "roberta", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 100, "optimizer": "AdamW", "output_dir": "BERT-ceb", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 38, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 5000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": "BERT-ceb", "tokenizer_type": null, "train_batch_size": 160, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 5040, "weight_decay": 0.0, "model_class": "LanguageModelingModel", "block_size": 128, "config_name": null, "dataset_class": null, "dataset_type": "simple", "discriminator_config": {}, "discriminator_loss_weight": 50.0, "generator_config": {}, "max_steps": -1, "min_frequency": 2, "mlm": true, "mlm_probability": 0.15, "sliding_window": false, "special_tokens": ["<s>", "<pad>", "</s>", "<unk>", "<mask>"], "stride": 0.8, "tie_generator_and_discriminator_embeddings": true, "vocab_size": 30000, "clean_text": true, "handle_chinese_chars": true, "special_tokens_list": [], "strip_accents": true}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4cd7710a78d7e0382d1fa8328b2d917b051725cf3566e916c9bfe39ef344de4
|
3 |
+
size 436537529
|
special_tokens_map.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": "[CLS]",
|
10 |
+
"eos_token": {
|
11 |
+
"content": "</s>",
|
12 |
+
"lstrip": false,
|
13 |
+
"normalized": true,
|
14 |
+
"rstrip": false,
|
15 |
+
"single_word": false
|
16 |
+
},
|
17 |
+
"mask_token": "[MASK]",
|
18 |
+
"pad_token": "[PAD]",
|
19 |
+
"sep_token": "[SEP]",
|
20 |
+
"unk_token": "[UNK]"
|
21 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": {
|
4 |
+
"__type": "AddedToken",
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false
|
10 |
+
},
|
11 |
+
"cls_token": {
|
12 |
+
"__type": "AddedToken",
|
13 |
+
"content": "[CLS]",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false
|
18 |
+
},
|
19 |
+
"do_basic_tokenize": true,
|
20 |
+
"do_lower_case": true,
|
21 |
+
"eos_token": {
|
22 |
+
"__type": "AddedToken",
|
23 |
+
"content": "</s>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": true,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false
|
28 |
+
},
|
29 |
+
"errors": "replace",
|
30 |
+
"mask_token": {
|
31 |
+
"__type": "AddedToken",
|
32 |
+
"content": "[MASK]",
|
33 |
+
"lstrip": true,
|
34 |
+
"normalized": true,
|
35 |
+
"rstrip": false,
|
36 |
+
"single_word": false
|
37 |
+
},
|
38 |
+
"name_or_path": "BERT-ceb",
|
39 |
+
"never_split": null,
|
40 |
+
"pad_token": {
|
41 |
+
"__type": "AddedToken",
|
42 |
+
"content": "[PAD]",
|
43 |
+
"lstrip": false,
|
44 |
+
"normalized": true,
|
45 |
+
"rstrip": false,
|
46 |
+
"single_word": false
|
47 |
+
},
|
48 |
+
"sep_token": {
|
49 |
+
"__type": "AddedToken",
|
50 |
+
"content": "[SEP]",
|
51 |
+
"lstrip": false,
|
52 |
+
"normalized": true,
|
53 |
+
"rstrip": false,
|
54 |
+
"single_word": false
|
55 |
+
},
|
56 |
+
"special_tokens_map_file": null,
|
57 |
+
"strip_accents": null,
|
58 |
+
"tokenize_chinese_chars": true,
|
59 |
+
"tokenizer_class": "RobertaTokenizer",
|
60 |
+
"unk_token": {
|
61 |
+
"__type": "AddedToken",
|
62 |
+
"content": "[UNK]",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": true,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false
|
67 |
+
}
|
68 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d36f08f3d1d6f259142f076ba71f24ef1883c5f06bb4f50e40b965b7e9476161
|
3 |
+
size 3387
|
training_progress_scores.csv
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
global_step,perplexity,eval_loss,train_loss
|
2 |
+
840,tensor(5513.1270),8.614887404780816,8.615015983581543
|
3 |
+
1680,tensor(2866.9822),7.961015423327261,7.8731489181518555
|
4 |
+
2520,tensor(2383.4500),7.776304355729812,7.76918888092041
|
5 |
+
3360,tensor(2167.2241),7.681202540465441,7.873549938201904
|
6 |
+
4200,tensor(2011.2488),7.606511034671729,7.7114129066467285
|
7 |
+
5040,tensor(1925.7876),7.563090380899149,7.557165145874023
|
8 |
+
5880,tensor(1873.8759),7.535764445625775,7.542567253112793
|
9 |
+
6720,tensor(1828.3774),7.511184253963814,7.672656536102295
|
10 |
+
7560,tensor(1783.2251),7.4861786467204166,7.458953857421875
|
11 |
+
8400,tensor(1500.5310),7.313574164964576,7.289700508117676
|
12 |
+
9240,tensor(1025.8199),6.933247593341846,7.138363361358643
|
13 |
+
10080,tensor(850.7896),6.74616496370867,6.698551177978516
|
14 |
+
10920,tensor(718.7275),6.577482442720242,6.610429763793945
|
15 |
+
11760,tensor(591.0831),6.381956342272284,6.376274585723877
|
16 |
+
12600,tensor(477.7185),6.16902169458109,6.171719551086426
|
17 |
+
13440,tensor(357.5952),5.879401738044775,6.041259765625
|
18 |
+
14280,tensor(300.5546),5.705629161183868,5.919500827789307
|
19 |
+
15120,tensor(259.2738),5.557884844558499,5.610713958740234
|
20 |
+
15960,tensor(236.5213),5.46603816380433,5.657792091369629
|
21 |
+
16800,tensor(216.0584),5.375549020360431,5.34259033203125
|
22 |
+
17640,tensor(199.4774),5.295700963639535,5.249312400817871
|
23 |
+
18480,tensor(186.8399),5.230252159715263,5.466444492340088
|
24 |
+
19320,tensor(174.4325),5.161537850637571,5.218142032623291
|
25 |
+
20000,tensor(165.4119),5.108438900861695,5.233940601348877
|
26 |
+
20160,tensor(164.5391),5.103147904454814,5.230566024780273
|
27 |
+
21000,tensor(154.7863),5.042045109644885,5.188614845275879
|
28 |
+
21840,tensor(146.6164),4.9878196422522665,5.2135701179504395
|
29 |
+
22680,tensor(140.0771),4.942193148825406,4.814875602722168
|
30 |
+
23520,tensor(134.7099),4.903123423951497,4.779435157775879
|
31 |
+
24360,tensor(127.4303),4.8475694950158,4.873952388763428
|
32 |
+
25200,tensor(122.9376),4.811676481888758,4.701533317565918
|
33 |
+
26040,tensor(118.4770),4.77471930381811,4.734566688537598
|
34 |
+
26880,tensor(113.0559),4.727882242880726,4.733781814575195
|
35 |
+
27720,tensor(108.0131),4.682252737018169,4.784864902496338
|
36 |
+
28560,tensor(105.1070),4.654978862870926,4.570435523986816
|
37 |
+
29400,tensor(102.5748),4.6305917653992275,4.797003746032715
|
38 |
+
30240,tensor(98.3004),4.588028168791278,4.692539691925049
|
39 |
+
31080,tensor(95.5100),4.559231265461276,4.472110271453857
|
40 |
+
31920,tensor(92.8367),4.530842494060643,4.457094192504883
|
41 |
+
32760,tensor(91.0269),4.511155435824281,4.4847493171691895
|
42 |
+
33600,tensor(89.1943),4.490817592042317,4.467089653015137
|
43 |
+
34440,tensor(85.9484),4.453746852151591,4.714705467224121
|
44 |
+
35280,tensor(84.4949),4.436690606212164,4.459835529327393
|
45 |
+
36120,tensor(83.1834),4.4210476332931155,4.568218231201172
|
46 |
+
36960,tensor(81.0199),4.394695270682963,4.207172870635986
|
47 |
+
37800,tensor(78.6906),4.3655232479222015,4.555455207824707
|
48 |
+
38640,tensor(77.7475),4.353466031675655,4.264974594116211
|
49 |
+
39480,tensor(75.5957),4.32539894456547,4.3341593742370605
|
50 |
+
40000,tensor(76.1601),4.3328377375670515,4.343103408813477
|
51 |
+
40320,tensor(75.1305),4.3192271779498785,4.320957183837891
|
52 |
+
41160,tensor(73.8844),4.302502227620491,4.186777591705322
|
53 |
+
42000,tensor(72.7903),4.287583556785402,4.321813583374023
|
54 |
+
42840,tensor(71.3143),4.2670973850087535,4.38191556930542
|
55 |
+
43680,tensor(70.2743),4.2524054344231486,4.150886535644531
|
56 |
+
44520,tensor(69.5957),4.242702348537355,4.198707580566406
|
57 |
+
45360,tensor(67.9366),4.218575100198176,4.0208282470703125
|
58 |
+
46200,tensor(68.2866),4.223713122272944,4.201685428619385
|
59 |
+
47040,tensor(66.6248),4.1990775677830126,4.237384796142578
|
60 |
+
47880,tensor(66.2193),4.192972441985144,4.080409049987793
|
61 |
+
48720,tensor(65.3626),4.179950529930151,4.410434722900391
|
62 |
+
49560,tensor(64.7271),4.170180269892182,4.0652947425842285
|
63 |
+
50400,tensor(64.2023),4.162038403099747,4.079613208770752
|
64 |
+
51240,tensor(62.7907),4.139807715800136,4.082955360412598
|
65 |
+
52080,tensor(62.3912),4.133424125011499,4.0272297859191895
|
66 |
+
52920,tensor(62.1102),4.12891009633575,4.157703399658203
|
67 |
+
53760,tensor(61.3228),4.116151399522031,4.108161926269531
|
68 |
+
54600,tensor(60.7504),4.106774397935912,4.0041704177856445
|
69 |
+
55440,tensor(60.3719),4.1005239068614365,4.1067047119140625
|
70 |
+
56280,tensor(59.6579),4.08862609094918,4.0335893630981445
|
71 |
+
57120,tensor(59.8366),4.091618014737893,4.1296000480651855
|
72 |
+
57960,tensor(59.0609),4.078569598672514,4.050683498382568
|
73 |
+
58800,tensor(58.5802),4.070397336336109,4.029040813446045
|
74 |
+
59640,tensor(58.5353),4.0696296974381,4.057146072387695
|
75 |
+
60000,tensor(58.2795),4.065249836275363,4.020354747772217
|
76 |
+
60480,tensor(58.3460),4.066390033016837,3.941168785095215
|
77 |
+
61320,tensor(57.4409),4.050756845429046,4.060215950012207
|
78 |
+
62160,tensor(56.9148),4.041554759463993,3.8870997428894043
|
79 |
+
63000,tensor(56.7257),4.038228142318002,4.010282039642334
|
80 |
+
63840,tensor(56.4911),4.034082424019186,3.8918683528900146
|
81 |
+
64680,tensor(55.8974),4.023517806383106,3.913522481918335
|
82 |
+
65520,tensor(55.8921),4.023423032173048,3.9048819541931152
|
83 |
+
66360,tensor(55.6332),4.018779180626169,4.0070977210998535
|
84 |
+
67200,tensor(55.3677),4.013996507319229,3.8241348266601562
|
85 |
+
68040,tensor(55.1016),4.009178212468658,3.8613393306732178
|
86 |
+
68880,tensor(54.4008),3.9963787630271006,3.984696388244629
|
87 |
+
69720,tensor(54.6007),4.000046700662911,3.971323251724243
|
88 |
+
70560,tensor(54.7101),4.002047452881438,3.8017985820770264
|
89 |
+
71400,tensor(54.2510),3.993622047641266,4.045064926147461
|
90 |
+
72240,tensor(54.2700),3.9939723986584994,3.7902510166168213
|
91 |
+
73080,tensor(53.8975),3.987083983082342,4.033565998077393
|
92 |
+
73920,tensor(53.4055),3.97791349718356,3.8635339736938477
|
93 |
+
74760,tensor(53.2877),3.975705491423042,4.0212907791137695
|
94 |
+
75600,tensor(53.2083),3.9742152747384747,3.9361703395843506
|
95 |
+
76440,tensor(53.2604),3.975193798824509,3.7788941860198975
|
96 |
+
77280,tensor(53.3057),3.9760427565371255,3.9743189811706543
|
97 |
+
78120,tensor(52.6205),3.9631065192380786,3.8015661239624023
|
98 |
+
78960,tensor(52.6849),3.964329612198599,3.915081262588501
|
99 |
+
79800,tensor(53.0415),3.971074112218703,4.116347312927246
|
100 |
+
80000,tensor(52.3625),3.9581908987596703,3.8000710010528564
|
101 |
+
80640,tensor(52.4477),3.959817202735286,3.951565742492676
|
102 |
+
81480,tensor(52.9591),3.969520092010498,3.83418345451355
|
103 |
+
82320,tensor(52.5453),3.961675506067502,3.841287851333618
|
104 |
+
83160,tensor(52.6411),3.963497509888563,4.05776309967041
|
105 |
+
84000,tensor(52.3799),3.9585225909807105,3.8181488513946533
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|