Can you share your test code? I cannot produce it
#3
by
heegyu
- opened
I can't reproduce it.
this is my test notebook
import torch
torch.set_grad_enabled(False)
device = "cuda:0"
from transformers import AutoTokenizer, AutoModelForSequenceClassification
model_id = "sileod/deberta-v3-large-tasksource-rlhf-reward-model"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id).eval().to(device)
from datasets import load_dataset
dataset = load_dataset("heegyu/hh-rlhf-vicuna-format", split="test")
def join_conv(convs):
lines = []
for conv in convs:
if conv["from"] == "human":
lines.append("Human: " + conv["value"])
else:
lines.append("Assistant: " + conv["value"])
return "\n\n".join(lines)
def rank(context, response):
inputs = tokenizer(context, response, truncation=True, return_tensors='pt').to(device)
return model(**inputs).logits[0].cpu().detach().tolist()[0]
def map_item(item):
# with context
# context = join_conv(item["context"]) + "\n\n" + join_conv([item["instruction"]])
# context = context.strip()
# without context
context = item["instruction"]["value"]
chosen = rank(context, item["chosen"]["value"])
rejected = rank(context, item["rejected"]["value"])
return {
"chosen_score": chosen,
"rejected_score": rejected,
"predict": 1 if chosen > rejected else 0
}
tokenizer.truncation_side = "left"
classified = dataset.map(map_item, load_from_cache_file=False)
print(classified.to_pandas().mean()["predict"])
and result is
- with context: 0.6792563143124415
- without context: 0.6627689429373246