Model using
from transformers import AutoConfig, AutoTokenizer
from torch import nn
import torch.nn.functional as F
import torch
class HFCustomBertModel(nn.Module):
def __init__(self, config):
super().__init__()
self.bert = BertModel(config)
self.pooler = nn.Sequential(
nn.Linear(config.hidden_size, config.hidden_size),
nn.Tanh()
)
def forward(self, input_ids, attention_mask=None, token_type_ids=None):
outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
pooled_output = self.pooler(outputs.pooler_output)
return pooled_output
def load_custom_model_and_tokenizer(model_path):
config = AutoConfig.from_pretrained(model_path)
model = HFCustomBertModel(config)
tokenizer = AutoTokenizer.from_pretrained(model_path)
return model, tokenizer
model_path = "Imran1/embadding"
model, tokenizer = load_custom_model_and_tokenizer(model_path)
queries = ["how much protein should a female eat"]
documents = ["As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day."]
model.eval()
with torch.no_grad():
query_inputs = tokenizer(queries, padding=True, truncation=True, return_tensors="pt")
document_inputs = tokenizer(documents, padding=True, truncation=True, return_tensors="pt")
query_embeddings = model(**query_inputs)
document_embeddings = model(**document_inputs)
query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
document_embeddings = F.normalize(document_embeddings, p=2, dim=1)
scores = torch.matmul(query_embeddings, document_embeddings.transpose(0, 1))
print(f"Similarity score: {scores.item():.4f}")
Similarity score: 0.9605