Spaces:

macadeliccc
/

laser-dolphin-mixtral-chat

Running on Zero

macadeliccc commited on Nov 28, 2023

Commit

a9a8422

•

1 Parent(s): b1f30ac

test

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,13 +8,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load the tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
 model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
 @spaces.GPU
 def generate_response(user_input, chat_history):
     try:
-        prompt = "GPT4 Correct User: " + user_input + "<|end_of_turn|>" + "GPT4 Correct Assistant: "
         if chat_history:
             prompt = chat_history[-1024:] + prompt  # Keep last 1024 tokens of history
@@ -49,4 +50,4 @@ with gr.Blocks(gr.themes.Soft()) as app:
     send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
     clear.click(clear_chat, outputs=[chatbot, chat_history])
-app.launch()

 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
 model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
+model.eval()  # Set the model to evaluation mode
 @spaces.GPU
 def generate_response(user_input, chat_history):
     try:
+        prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
         if chat_history:
             prompt = chat_history[-1024:] + prompt  # Keep last 1024 tokens of history
     send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
     clear.click(clear_chat, outputs=[chatbot, chat_history])
+app.launch()