Upload 12 files

Browse files

Files changed (12) hide show

added_tokens.json +6 -0
config.json +26 -0
eval_results.txt +2 -0
merges.txt +0 -0
model_args.json +1 -0
pytorch_model.bin +3 -0
special_tokens_map.json +21 -0
tokenizer_config.json +68 -0
training_args.bin +3 -0
training_progress_scores.csv +105 -0
vocab.json +0 -0
vocab.txt +0 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "[CLS]": 30002,
+  "[MASK]": 30003,
+  "[PAD]": 30001,
+  "[SEP]": 30000
+}

config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.24.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30004
+}

eval_results.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ eval_loss = 3.955014076278108
2	+ perplexity = tensor(52.1964)

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model_args.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "RoBERTa-ceb/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 160, "evaluate_during_training": true, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 20000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": null, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": null, "model_type": "roberta", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 100, "optimizer": "AdamW", "output_dir": "BERT-ceb", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 38, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 5000, "scheduler": "linear_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": "BERT-ceb", "tokenizer_type": null, "train_batch_size": 160, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 5040, "weight_decay": 0.0, "model_class": "LanguageModelingModel", "block_size": 128, "config_name": null, "dataset_class": null, "dataset_type": "simple", "discriminator_config": {}, "discriminator_loss_weight": 50.0, "generator_config": {}, "max_steps": -1, "min_frequency": 2, "mlm": true, "mlm_probability": 0.15, "sliding_window": false, "special_tokens": ["<s>", "<pad>", "</s>", "<unk>", "<mask>"], "stride": 0.8, "tie_generator_and_discriminator_embeddings": true, "vocab_size": 30000, "clean_text": true, "handle_chinese_chars": true, "special_tokens_list": [], "strip_accents": true}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4cd7710a78d7e0382d1fa8328b2d917b051725cf3566e916c9bfe39ef344de4
+size 436537529

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": "[CLS]",
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,68 @@

+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "__type": "AddedToken",
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "mask_token": {
+    "__type": "AddedToken",
+    "content": "[MASK]",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "name_or_path": "BERT-ceb",
+  "never_split": null,
+  "pad_token": {
+    "__type": "AddedToken",
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "__type": "AddedToken",
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "special_tokens_map_file": null,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "RobertaTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d36f08f3d1d6f259142f076ba71f24ef1883c5f06bb4f50e40b965b7e9476161
+size 3387

training_progress_scores.csv ADDED Viewed

	@@ -0,0 +1,105 @@

+global_step,perplexity,eval_loss,train_loss
+840,tensor(5513.1270),8.614887404780816,8.615015983581543
+1680,tensor(2866.9822),7.961015423327261,7.8731489181518555
+2520,tensor(2383.4500),7.776304355729812,7.76918888092041
+3360,tensor(2167.2241),7.681202540465441,7.873549938201904
+4200,tensor(2011.2488),7.606511034671729,7.7114129066467285
+5040,tensor(1925.7876),7.563090380899149,7.557165145874023
+5880,tensor(1873.8759),7.535764445625775,7.542567253112793
+6720,tensor(1828.3774),7.511184253963814,7.672656536102295
+7560,tensor(1783.2251),7.4861786467204166,7.458953857421875
+8400,tensor(1500.5310),7.313574164964576,7.289700508117676
+9240,tensor(1025.8199),6.933247593341846,7.138363361358643
+10080,tensor(850.7896),6.74616496370867,6.698551177978516
+10920,tensor(718.7275),6.577482442720242,6.610429763793945
+11760,tensor(591.0831),6.381956342272284,6.376274585723877
+12600,tensor(477.7185),6.16902169458109,6.171719551086426
+13440,tensor(357.5952),5.879401738044775,6.041259765625
+14280,tensor(300.5546),5.705629161183868,5.919500827789307
+15120,tensor(259.2738),5.557884844558499,5.610713958740234
+15960,tensor(236.5213),5.46603816380433,5.657792091369629
+16800,tensor(216.0584),5.375549020360431,5.34259033203125
+17640,tensor(199.4774),5.295700963639535,5.249312400817871
+18480,tensor(186.8399),5.230252159715263,5.466444492340088
+19320,tensor(174.4325),5.161537850637571,5.218142032623291
+20000,tensor(165.4119),5.108438900861695,5.233940601348877
+20160,tensor(164.5391),5.103147904454814,5.230566024780273
+21000,tensor(154.7863),5.042045109644885,5.188614845275879
+21840,tensor(146.6164),4.9878196422522665,5.2135701179504395
+22680,tensor(140.0771),4.942193148825406,4.814875602722168
+23520,tensor(134.7099),4.903123423951497,4.779435157775879
+24360,tensor(127.4303),4.8475694950158,4.873952388763428
+25200,tensor(122.9376),4.811676481888758,4.701533317565918
+26040,tensor(118.4770),4.77471930381811,4.734566688537598
+26880,tensor(113.0559),4.727882242880726,4.733781814575195
+27720,tensor(108.0131),4.682252737018169,4.784864902496338
+28560,tensor(105.1070),4.654978862870926,4.570435523986816
+29400,tensor(102.5748),4.6305917653992275,4.797003746032715
+30240,tensor(98.3004),4.588028168791278,4.692539691925049
+31080,tensor(95.5100),4.559231265461276,4.472110271453857
+31920,tensor(92.8367),4.530842494060643,4.457094192504883
+32760,tensor(91.0269),4.511155435824281,4.4847493171691895
+33600,tensor(89.1943),4.490817592042317,4.467089653015137
+34440,tensor(85.9484),4.453746852151591,4.714705467224121
+35280,tensor(84.4949),4.436690606212164,4.459835529327393
+36120,tensor(83.1834),4.4210476332931155,4.568218231201172
+36960,tensor(81.0199),4.394695270682963,4.207172870635986
+37800,tensor(78.6906),4.3655232479222015,4.555455207824707
+38640,tensor(77.7475),4.353466031675655,4.264974594116211
+39480,tensor(75.5957),4.32539894456547,4.3341593742370605
+40000,tensor(76.1601),4.3328377375670515,4.343103408813477
+40320,tensor(75.1305),4.3192271779498785,4.320957183837891
+41160,tensor(73.8844),4.302502227620491,4.186777591705322
+42000,tensor(72.7903),4.287583556785402,4.321813583374023
+42840,tensor(71.3143),4.2670973850087535,4.38191556930542
+43680,tensor(70.2743),4.2524054344231486,4.150886535644531
+44520,tensor(69.5957),4.242702348537355,4.198707580566406
+45360,tensor(67.9366),4.218575100198176,4.0208282470703125
+46200,tensor(68.2866),4.223713122272944,4.201685428619385
+47040,tensor(66.6248),4.1990775677830126,4.237384796142578
+47880,tensor(66.2193),4.192972441985144,4.080409049987793
+48720,tensor(65.3626),4.179950529930151,4.410434722900391
+49560,tensor(64.7271),4.170180269892182,4.0652947425842285
+50400,tensor(64.2023),4.162038403099747,4.079613208770752
+51240,tensor(62.7907),4.139807715800136,4.082955360412598
+52080,tensor(62.3912),4.133424125011499,4.0272297859191895
+52920,tensor(62.1102),4.12891009633575,4.157703399658203
+53760,tensor(61.3228),4.116151399522031,4.108161926269531
+54600,tensor(60.7504),4.106774397935912,4.0041704177856445
+55440,tensor(60.3719),4.1005239068614365,4.1067047119140625
+56280,tensor(59.6579),4.08862609094918,4.0335893630981445
+57120,tensor(59.8366),4.091618014737893,4.1296000480651855
+57960,tensor(59.0609),4.078569598672514,4.050683498382568
+58800,tensor(58.5802),4.070397336336109,4.029040813446045
+59640,tensor(58.5353),4.0696296974381,4.057146072387695
+60000,tensor(58.2795),4.065249836275363,4.020354747772217
+60480,tensor(58.3460),4.066390033016837,3.941168785095215
+61320,tensor(57.4409),4.050756845429046,4.060215950012207
+62160,tensor(56.9148),4.041554759463993,3.8870997428894043
+63000,tensor(56.7257),4.038228142318002,4.010282039642334
+63840,tensor(56.4911),4.034082424019186,3.8918683528900146
+64680,tensor(55.8974),4.023517806383106,3.913522481918335
+65520,tensor(55.8921),4.023423032173048,3.9048819541931152
+66360,tensor(55.6332),4.018779180626169,4.0070977210998535
+67200,tensor(55.3677),4.013996507319229,3.8241348266601562
+68040,tensor(55.1016),4.009178212468658,3.8613393306732178
+68880,tensor(54.4008),3.9963787630271006,3.984696388244629
+69720,tensor(54.6007),4.000046700662911,3.971323251724243
+70560,tensor(54.7101),4.002047452881438,3.8017985820770264
+71400,tensor(54.2510),3.993622047641266,4.045064926147461
+72240,tensor(54.2700),3.9939723986584994,3.7902510166168213
+73080,tensor(53.8975),3.987083983082342,4.033565998077393
+73920,tensor(53.4055),3.97791349718356,3.8635339736938477
+74760,tensor(53.2877),3.975705491423042,4.0212907791137695
+75600,tensor(53.2083),3.9742152747384747,3.9361703395843506
+76440,tensor(53.2604),3.975193798824509,3.7788941860198975
+77280,tensor(53.3057),3.9760427565371255,3.9743189811706543
+78120,tensor(52.6205),3.9631065192380786,3.8015661239624023
+78960,tensor(52.6849),3.964329612198599,3.915081262588501
+79800,tensor(53.0415),3.971074112218703,4.116347312927246
+80000,tensor(52.3625),3.9581908987596703,3.8000710010528564
+80640,tensor(52.4477),3.959817202735286,3.951565742492676
+81480,tensor(52.9591),3.969520092010498,3.83418345451355
+82320,tensor(52.5453),3.961675506067502,3.841287851333618
+83160,tensor(52.6411),3.963497509888563,4.05776309967041
+84000,tensor(52.3799),3.9585225909807105,3.8181488513946533

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff