Upload model files

Browse files

Files changed (9) hide show

.gitattributes +5 -35
README.md +202 -0
adapter_config.json +28 -0
adapter_model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +579 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,5 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text
+optimizer.pt filter=lfs diff=lfs merge=lfs -text
+rng_state.pth filter=lfs diff=lfs merge=lfs -text
+scheduler.pt filter=lfs diff=lfs merge=lfs -text
+training_args.bin filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.2.dev0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e0d94c41e26febd6e84b52eb5f1895344b1a1579cea18039b9df7879759eb5b7
+size 14689152

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:929122f2a02f0d1eecdf045f7fdd70c8ef8450a893a412663116ea11141b38da
+size 7422330

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4a4f0d569579ceac3c5854941e7194a47a81cfcf301e32f9f271d358820557
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae0ac8fb7f390e1fbaf23ee211815016a80881fc02c77c959c75e92c660b9685
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,579 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 7815,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06397952655150352,
+      "grad_norm": 1.344713568687439,
+      "learning_rate": 0.00029616122840690973,
+      "loss": 2.6677,
+      "step": 100
+    },
+    {
+      "epoch": 0.12795905310300704,
+      "grad_norm": 1.3358043432235718,
+      "learning_rate": 0.00029232245681381954,
+      "loss": 2.2932,
+      "step": 200
+    },
+    {
+      "epoch": 0.19193857965451055,
+      "grad_norm": 1.3450862169265747,
+      "learning_rate": 0.0002885220729366602,
+      "loss": 2.2158,
+      "step": 300
+    },
+    {
+      "epoch": 0.2559181062060141,
+      "grad_norm": 1.1904783248901367,
+      "learning_rate": 0.00028468330134357004,
+      "loss": 2.1907,
+      "step": 400
+    },
+    {
+      "epoch": 0.3198976327575176,
+      "grad_norm": 1.3194555044174194,
+      "learning_rate": 0.00028084452975047985,
+      "loss": 2.1725,
+      "step": 500
+    },
+    {
+      "epoch": 0.3838771593090211,
+      "grad_norm": 1.3454678058624268,
+      "learning_rate": 0.0002770057581573896,
+      "loss": 2.1586,
+      "step": 600
+    },
+    {
+      "epoch": 0.44785668586052463,
+      "grad_norm": 1.318642497062683,
+      "learning_rate": 0.0002731669865642994,
+      "loss": 2.1429,
+      "step": 700
+    },
+    {
+      "epoch": 0.5118362124120281,
+      "grad_norm": 1.1936390399932861,
+      "learning_rate": 0.0002693282149712092,
+      "loss": 2.0988,
+      "step": 800
+    },
+    {
+      "epoch": 0.5758157389635317,
+      "grad_norm": 1.1406426429748535,
+      "learning_rate": 0.000265489443378119,
+      "loss": 2.0834,
+      "step": 900
+    },
+    {
+      "epoch": 0.6397952655150352,
+      "grad_norm": 1.2195415496826172,
+      "learning_rate": 0.0002616506717850288,
+      "loss": 2.1004,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7037747920665387,
+      "grad_norm": 1.2042839527130127,
+      "learning_rate": 0.00025781190019193856,
+      "loss": 2.0883,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7677543186180422,
+      "grad_norm": 1.159279465675354,
+      "learning_rate": 0.0002539731285988483,
+      "loss": 2.0852,
+      "step": 1200
+    },
+    {
+      "epoch": 0.8317338451695457,
+      "grad_norm": 1.2210711240768433,
+      "learning_rate": 0.00025013435700575813,
+      "loss": 2.0449,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8957133717210493,
+      "grad_norm": 1.2941797971725464,
+      "learning_rate": 0.00024629558541266794,
+      "loss": 2.0455,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9596928982725528,
+      "grad_norm": 1.1550540924072266,
+      "learning_rate": 0.00024245681381957772,
+      "loss": 2.0293,
+      "step": 1500
+    },
+    {
+      "epoch": 1.0236724248240563,
+      "grad_norm": 1.3520652055740356,
+      "learning_rate": 0.0002386564299424184,
+      "loss": 1.9799,
+      "step": 1600
+    },
+    {
+      "epoch": 1.0876519513755598,
+      "grad_norm": 1.375148892402649,
+      "learning_rate": 0.0002348176583493282,
+      "loss": 1.9032,
+      "step": 1700
+    },
+    {
+      "epoch": 1.1516314779270633,
+      "grad_norm": 1.4116652011871338,
+      "learning_rate": 0.00023097888675623797,
+      "loss": 1.9398,
+      "step": 1800
+    },
+    {
+      "epoch": 1.2156110044785668,
+      "grad_norm": 1.2254273891448975,
+      "learning_rate": 0.00022714011516314776,
+      "loss": 1.9097,
+      "step": 1900
+    },
+    {
+      "epoch": 1.2795905310300704,
+      "grad_norm": 1.3888587951660156,
+      "learning_rate": 0.00022330134357005757,
+      "loss": 1.9039,
+      "step": 2000
+    },
+    {
+      "epoch": 1.3435700575815739,
+      "grad_norm": 1.4431171417236328,
+      "learning_rate": 0.00021946257197696736,
+      "loss": 1.9095,
+      "step": 2100
+    },
+    {
+      "epoch": 1.4075495841330774,
+      "grad_norm": 1.2467221021652222,
+      "learning_rate": 0.00021562380038387714,
+      "loss": 1.9103,
+      "step": 2200
+    },
+    {
+      "epoch": 1.471529110684581,
+      "grad_norm": 1.41363525390625,
+      "learning_rate": 0.00021178502879078693,
+      "loss": 1.9132,
+      "step": 2300
+    },
+    {
+      "epoch": 1.5355086372360844,
+      "grad_norm": 1.4501458406448364,
+      "learning_rate": 0.0002079462571976967,
+      "loss": 1.9095,
+      "step": 2400
+    },
+    {
+      "epoch": 1.599488163787588,
+      "grad_norm": 1.2889657020568848,
+      "learning_rate": 0.00020410748560460652,
+      "loss": 1.9308,
+      "step": 2500
+    },
+    {
+      "epoch": 1.6634676903390915,
+      "grad_norm": 1.4488581418991089,
+      "learning_rate": 0.0002002687140115163,
+      "loss": 1.9161,
+      "step": 2600
+    },
+    {
+      "epoch": 1.727447216890595,
+      "grad_norm": 1.3905428647994995,
+      "learning_rate": 0.0001964299424184261,
+      "loss": 1.8958,
+      "step": 2700
+    },
+    {
+      "epoch": 1.7914267434420985,
+      "grad_norm": 1.3509632349014282,
+      "learning_rate": 0.00019259117082533588,
+      "loss": 1.8933,
+      "step": 2800
+    },
+    {
+      "epoch": 1.855406269993602,
+      "grad_norm": 1.3293097019195557,
+      "learning_rate": 0.00018875239923224566,
+      "loss": 1.8902,
+      "step": 2900
+    },
+    {
+      "epoch": 1.9193857965451055,
+      "grad_norm": 1.3575371503829956,
+      "learning_rate": 0.00018491362763915547,
+      "loss": 1.9107,
+      "step": 3000
+    },
+    {
+      "epoch": 1.983365323096609,
+      "grad_norm": 1.4029614925384521,
+      "learning_rate": 0.00018107485604606526,
+      "loss": 1.9122,
+      "step": 3100
+    },
+    {
+      "epoch": 2.0473448496481126,
+      "grad_norm": 1.6483345031738281,
+      "learning_rate": 0.00017723608445297504,
+      "loss": 1.7797,
+      "step": 3200
+    },
+    {
+      "epoch": 2.111324376199616,
+      "grad_norm": 1.7276026010513306,
+      "learning_rate": 0.00017339731285988483,
+      "loss": 1.7411,
+      "step": 3300
+    },
+    {
+      "epoch": 2.1753039027511196,
+      "grad_norm": 1.6449826955795288,
+      "learning_rate": 0.00016955854126679461,
+      "loss": 1.7355,
+      "step": 3400
+    },
+    {
+      "epoch": 2.239283429302623,
+      "grad_norm": 1.606766939163208,
+      "learning_rate": 0.00016571976967370443,
+      "loss": 1.7479,
+      "step": 3500
+    },
+    {
+      "epoch": 2.3032629558541267,
+      "grad_norm": 1.6743805408477783,
+      "learning_rate": 0.0001618809980806142,
+      "loss": 1.7376,
+      "step": 3600
+    },
+    {
+      "epoch": 2.36724248240563,
+      "grad_norm": 1.58048415184021,
+      "learning_rate": 0.00015804222648752397,
+      "loss": 1.7499,
+      "step": 3700
+    },
+    {
+      "epoch": 2.4312220089571337,
+      "grad_norm": 1.7509996891021729,
+      "learning_rate": 0.00015420345489443375,
+      "loss": 1.741,
+      "step": 3800
+    },
+    {
+      "epoch": 2.495201535508637,
+      "grad_norm": 1.6279881000518799,
+      "learning_rate": 0.00015036468330134354,
+      "loss": 1.7584,
+      "step": 3900
+    },
+    {
+      "epoch": 2.5591810620601407,
+      "grad_norm": 1.6708228588104248,
+      "learning_rate": 0.00014652591170825335,
+      "loss": 1.7505,
+      "step": 4000
+    },
+    {
+      "epoch": 2.6231605886116443,
+      "grad_norm": 1.628318428993225,
+      "learning_rate": 0.00014268714011516314,
+      "loss": 1.7535,
+      "step": 4100
+    },
+    {
+      "epoch": 2.6871401151631478,
+      "grad_norm": 1.66116464138031,
+      "learning_rate": 0.00013884836852207292,
+      "loss": 1.7534,
+      "step": 4200
+    },
+    {
+      "epoch": 2.7511196417146513,
+      "grad_norm": 1.7303767204284668,
+      "learning_rate": 0.0001350095969289827,
+      "loss": 1.7605,
+      "step": 4300
+    },
+    {
+      "epoch": 2.815099168266155,
+      "grad_norm": 1.6892797946929932,
+      "learning_rate": 0.00013117082533589252,
+      "loss": 1.7343,
+      "step": 4400
+    },
+    {
+      "epoch": 2.8790786948176583,
+      "grad_norm": 1.700649380683899,
+      "learning_rate": 0.0001273320537428023,
+      "loss": 1.7545,
+      "step": 4500
+    },
+    {
+      "epoch": 2.943058221369162,
+      "grad_norm": 1.7158896923065186,
+      "learning_rate": 0.0001234932821497121,
+      "loss": 1.7472,
+      "step": 4600
+    },
+    {
+      "epoch": 3.0070377479206654,
+      "grad_norm": 1.5952404737472534,
+      "learning_rate": 0.00011965451055662187,
+      "loss": 1.709,
+      "step": 4700
+    },
+    {
+      "epoch": 3.071017274472169,
+      "grad_norm": 1.8965271711349487,
+      "learning_rate": 0.00011581573896353166,
+      "loss": 1.5308,
+      "step": 4800
+    },
+    {
+      "epoch": 3.1349968010236724,
+      "grad_norm": 1.9957573413848877,
+      "learning_rate": 0.00011197696737044146,
+      "loss": 1.564,
+      "step": 4900
+    },
+    {
+      "epoch": 3.198976327575176,
+      "grad_norm": 2.0544333457946777,
+      "learning_rate": 0.00010813819577735124,
+      "loss": 1.5668,
+      "step": 5000
+    },
+    {
+      "epoch": 3.2629558541266794,
+      "grad_norm": 2.041703462600708,
+      "learning_rate": 0.00010429942418426103,
+      "loss": 1.5766,
+      "step": 5100
+    },
+    {
+      "epoch": 3.326935380678183,
+      "grad_norm": 2.300631284713745,
+      "learning_rate": 0.00010046065259117082,
+      "loss": 1.5701,
+      "step": 5200
+    },
+    {
+      "epoch": 3.3909149072296865,
+      "grad_norm": 1.9454134702682495,
+      "learning_rate": 9.662188099808061e-05,
+      "loss": 1.5701,
+      "step": 5300
+    },
+    {
+      "epoch": 3.45489443378119,
+      "grad_norm": 2.113377571105957,
+      "learning_rate": 9.278310940499041e-05,
+      "loss": 1.5882,
+      "step": 5400
+    },
+    {
+      "epoch": 3.5188739603326935,
+      "grad_norm": 2.2492353916168213,
+      "learning_rate": 8.894433781190018e-05,
+      "loss": 1.5778,
+      "step": 5500
+    },
+    {
+      "epoch": 3.582853486884197,
+      "grad_norm": 2.1024489402770996,
+      "learning_rate": 8.510556621880996e-05,
+      "loss": 1.5926,
+      "step": 5600
+    },
+    {
+      "epoch": 3.6468330134357005,
+      "grad_norm": 2.1116743087768555,
+      "learning_rate": 8.126679462571976e-05,
+      "loss": 1.5937,
+      "step": 5700
+    },
+    {
+      "epoch": 3.710812539987204,
+      "grad_norm": 2.013080596923828,
+      "learning_rate": 7.742802303262955e-05,
+      "loss": 1.5913,
+      "step": 5800
+    },
+    {
+      "epoch": 3.7747920665387076,
+      "grad_norm": 2.1557400226593018,
+      "learning_rate": 7.358925143953934e-05,
+      "loss": 1.6041,
+      "step": 5900
+    },
+    {
+      "epoch": 3.838771593090211,
+      "grad_norm": 2.10186767578125,
+      "learning_rate": 6.975047984644913e-05,
+      "loss": 1.5799,
+      "step": 6000
+    },
+    {
+      "epoch": 3.9027511196417146,
+      "grad_norm": 2.129519462585449,
+      "learning_rate": 6.591170825335893e-05,
+      "loss": 1.5946,
+      "step": 6100
+    },
+    {
+      "epoch": 3.966730646193218,
+      "grad_norm": 2.045646905899048,
+      "learning_rate": 6.20729366602687e-05,
+      "loss": 1.5882,
+      "step": 6200
+    },
+    {
+      "epoch": 4.030710172744722,
+      "grad_norm": 2.2427146434783936,
+      "learning_rate": 5.82341650671785e-05,
+      "loss": 1.5005,
+      "step": 6300
+    },
+    {
+      "epoch": 4.094689699296225,
+      "grad_norm": 2.2632296085357666,
+      "learning_rate": 5.439539347408829e-05,
+      "loss": 1.3888,
+      "step": 6400
+    },
+    {
+      "epoch": 4.158669225847729,
+      "grad_norm": 2.541220188140869,
+      "learning_rate": 5.0556621880998075e-05,
+      "loss": 1.4124,
+      "step": 6500
+    },
+    {
+      "epoch": 4.222648752399232,
+      "grad_norm": 2.566311836242676,
+      "learning_rate": 4.6717850287907866e-05,
+      "loss": 1.409,
+      "step": 6600
+    },
+    {
+      "epoch": 4.286628278950736,
+      "grad_norm": 2.425945281982422,
+      "learning_rate": 4.287907869481765e-05,
+      "loss": 1.4134,
+      "step": 6700
+    },
+    {
+      "epoch": 4.350607805502239,
+      "grad_norm": 2.4377615451812744,
+      "learning_rate": 3.904030710172744e-05,
+      "loss": 1.4257,
+      "step": 6800
+    },
+    {
+      "epoch": 4.414587332053743,
+      "grad_norm": 2.6660194396972656,
+      "learning_rate": 3.5201535508637234e-05,
+      "loss": 1.4288,
+      "step": 6900
+    },
+    {
+      "epoch": 4.478566858605246,
+      "grad_norm": 2.393036365509033,
+      "learning_rate": 3.1362763915547026e-05,
+      "loss": 1.4182,
+      "step": 7000
+    },
+    {
+      "epoch": 4.54254638515675,
+      "grad_norm": 2.6361422538757324,
+      "learning_rate": 2.752399232245681e-05,
+      "loss": 1.4149,
+      "step": 7100
+    },
+    {
+      "epoch": 4.606525911708253,
+      "grad_norm": 2.6104772090911865,
+      "learning_rate": 2.3685220729366603e-05,
+      "loss": 1.407,
+      "step": 7200
+    },
+    {
+      "epoch": 4.670505438259757,
+      "grad_norm": 2.4266579151153564,
+      "learning_rate": 1.9846449136276387e-05,
+      "loss": 1.4113,
+      "step": 7300
+    },
+    {
+      "epoch": 4.73448496481126,
+      "grad_norm": 2.5348973274230957,
+      "learning_rate": 1.600767754318618e-05,
+      "loss": 1.3936,
+      "step": 7400
+    },
+    {
+      "epoch": 4.798464491362764,
+      "grad_norm": 2.3764045238494873,
+      "learning_rate": 1.2168905950095967e-05,
+      "loss": 1.4097,
+      "step": 7500
+    },
+    {
+      "epoch": 4.862444017914267,
+      "grad_norm": 2.4267590045928955,
+      "learning_rate": 8.330134357005757e-06,
+      "loss": 1.4209,
+      "step": 7600
+    },
+    {
+      "epoch": 4.926423544465771,
+      "grad_norm": 2.7150962352752686,
+      "learning_rate": 4.4913627639155465e-06,
+      "loss": 1.4148,
+      "step": 7700
+    },
+    {
+      "epoch": 4.990403071017274,
+      "grad_norm": 2.550471067428589,
+      "learning_rate": 6.525911708253358e-07,
+      "loss": 1.4254,
+      "step": 7800
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 7815,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 2500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.2180160877992346e+17,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e050d73f689ff2dfa70a49bc2a70b8327562c6588ce1897c8a51d8827a7743c
+size 5112