File size: 3,595 Bytes
e501d43 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# %%
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset, Dataset
from trl import DPOTrainer, DPOConfig
from peft import LoraConfig
from peft import prepare_model_for_kbit_training
import torch
import pandas as pd
# %%
dataset = load_dataset("Undi95/Weyaxi-humanish-dpo-project-noemoji")["train"]
model_name = "Undi95/Meta-Llama-3.1-8B-Claude-bf16"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.padding_side = "right"
tokenizer.pad_token = tokenizer.eos_token
# %%
tokenizer.chat_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}"
# %%
dataset2 = load_dataset("ResplendentAI/NSFW_RP_Format_DPO")['train']
# %%
dataset = dataset.to_pandas(
)
dataset2 = dataset2.to_pandas()
dataset = Dataset.from_pandas(pd.concat([dataset.sample(400), dataset2]).sample(frac=1))
# %%
def template_prompt(system, prompt):
if system is None:
messages = [
{"role": "user", "content": prompt},
]
else:
messages = [
{
"role": "system",
"content": system,
},
{"role": "user", "content": prompt},
]
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=False
)
return prompt
def template_answer(answer):
messages = [
{
"role": "assistant",
"content": answer,
},
]
answer = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=False
)
return answer
# %%
# create new columns
dataset = dataset.map(
lambda x: {
"prompt": template_prompt(None, x["prompt"]).replace("<|start_header_id|>assistant<|end_header_id|>\n\n", "")
}, # change this according to the dataset!!!
)
# %%
dataset = dataset.map(
lambda x: {"chosen": template_answer(x["chosen"]).replace('<|begin_of_text|>', '').replace('><|start_header_id|>assistant<|end_header_id|>\n\n', '>')},
)
dataset = dataset.map(
lambda x: {"rejected": template_answer(x["rejected"]).replace('<|begin_of_text|>', '').replace('><|start_header_id|>assistant<|end_header_id|>\n\n', '>')},
)
# %%
dataset[0]
# %%
# LoRA configuration
peft_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM",
target_modules=[
"k_proj",
"gate_proj",
"v_proj",
"up_proj",
"q_proj",
"o_proj",
"down_proj",
],
)
# Model to fine-tune
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
load_in_4bit=True,
device_map="auto",
)
model.config.use_cache = False
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
# %%
output_name = f"checkpoints/exp_human_{model_name}"
training_args = DPOConfig(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_train_epochs=1,
gradient_checkpointing=True,
output_dir=output_name,
logging_steps=1,
max_steps=50
)
trainer = DPOTrainer(
model,
ref_model=None,
train_dataset=dataset,
tokenizer=tokenizer,
args=training_args,
peft_config=peft_config,
)
trainer.train()
trainer.save_model(output_name)
|