File size: 2,333 Bytes
1ad8e33 d717661 1ad8e33 d717661 1ad8e33 fa4b600 1ad8e33 595a507 1ad8e33 595a507 1ad8e33 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import time
import gradio as gr
# bnb_config = BitsAndBytesConfig(
# load_in_4bit=True,
# bnb_4bit_quant_type="nf4",
# bnb_4bit_compute_dtype=torch.bfloat16,
# )
model = AutoModelForCausalLM.from_pretrained(
"microsoft/Phi-3.5-mini-instruct",
torch_dtype=torch.bfloat16,
# quantization_config=bnb_config,
trust_remote_code=True
)
model.load_adapter('./finetunedPEFTModel')
tokenizer = AutoTokenizer.from_pretrained('./finetunedPEFTModel', trust_remote_code=True)
# tokenizer.pad_token = tokenizer.unk_token
# tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3.5-mini-instruct", trust_remote_code=True)
def generateText(inputText="What is QLora finetuning?", num_tokens=200):
# pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens)
# result = pipe(f'''[INST] {inputText} [/INST]''')
# print(result[0]['generated_text'])
prompt = "What is model regularization?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=num_tokens)
result = pipe(f'''{inputText}''')
return result[0]['generated_text']
title = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora"
description = "Fine tuned Phi3.5 instruct model on OpenAssist dataset using QLora. Running on CPU and thus a bit slow. So please be patient on submitting a request as it might take 15 to 20 minutes for a response."
examples = [
["How can I optimize my web page for online search so that it is on top?", 200],
["Can you give me an example of python script for Fibonacci series?", 200],
["Can you explain what is Contrastive Loss in Deep Learning?", 200],
["How are Sentence Transformers different from Huggingface Transformers?", 200],
]
demo = gr.Interface(
generateText,
inputs = [
gr.Textbox(label="Question that you want to ask"),
gr.Slider(100, 500, value = 200, step=100, label="Number of tokens that you want in your output"),
],
outputs = [
gr.Text(),
],
title = title,
description = description,
examples = examples,
cache_examples=False
)
demo.launch()
|