macadeliccc commited on
Commit
a9a8422
1 Parent(s): b1f30ac
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -8,13 +8,14 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
  # Load the tokenizer and model
11
- tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
12
  model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
 
13
 
14
  @spaces.GPU
15
  def generate_response(user_input, chat_history):
16
  try:
17
- prompt = "GPT4 Correct User: " + user_input + "<|end_of_turn|>" + "GPT4 Correct Assistant: "
18
  if chat_history:
19
  prompt = chat_history[-1024:] + prompt # Keep last 1024 tokens of history
20
 
@@ -49,4 +50,4 @@ with gr.Blocks(gr.themes.Soft()) as app:
49
  send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
50
  clear.click(clear_chat, outputs=[chatbot, chat_history])
51
 
52
- app.launch()
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
 
10
  # Load the tokenizer and model
11
+ tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
12
  model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
13
+ model.eval() # Set the model to evaluation mode
14
 
15
  @spaces.GPU
16
  def generate_response(user_input, chat_history):
17
  try:
18
+ prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
19
  if chat_history:
20
  prompt = chat_history[-1024:] + prompt # Keep last 1024 tokens of history
21
 
 
50
  send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
51
  clear.click(clear_chat, outputs=[chatbot, chat_history])
52
 
53
+ app.launch()