derek-thomas HF staff commited on
Commit
f687064
1 Parent(s): 21f4f83

Updating to use env var model

Browse files
Files changed (1) hide show
  1. backend/query_llm.py +7 -6
backend/query_llm.py CHANGED
@@ -5,14 +5,15 @@ import gradio as gr
5
  from huggingface_hub import InferenceClient
6
  from transformers import AutoTokenizer
7
 
8
- tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
 
9
 
10
  temperature = 0.9
11
  top_p = 0.6
12
  repetition_penalty = 1.2
13
 
14
  text_client = InferenceClient(
15
- "mistralai/Mistral-7B-Instruct-v0.1",
16
  token=getenv("HUGGING_FACE_HUB_TOKEN")
17
  )
18
 
@@ -38,7 +39,7 @@ def format_prompt(message: str) -> str:
38
  def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
39
  top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
40
  """
41
- Generate a sequence of tokens based on a given prompt and history using Mistral client.
42
 
43
  Args:
44
  prompt (str): The initial prompt for the text generation.
@@ -77,12 +78,12 @@ def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens
77
 
78
  except Exception as e:
79
  if "Too Many Requests" in str(e):
80
- print("ERROR: Too many requests on Mistral client")
81
- gr.Warning("Unfortunately Mistral is unable to process")
82
  return "Unfortunately, I am not able to process your request now."
83
  else:
84
  print("Unhandled Exception:", str(e))
85
- gr.Warning("Unfortunately Mistral is unable to process")
86
  return "I do not know what happened, but I couldn't understand you."
87
 
88
  return output
 
5
  from huggingface_hub import InferenceClient
6
  from transformers import AutoTokenizer
7
 
8
+ MODEL = getenv("MODEL")
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
10
 
11
  temperature = 0.9
12
  top_p = 0.6
13
  repetition_penalty = 1.2
14
 
15
  text_client = InferenceClient(
16
+ MODEL,
17
  token=getenv("HUGGING_FACE_HUB_TOKEN")
18
  )
19
 
 
39
  def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
40
  top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
41
  """
42
+ Generate a sequence of tokens based on a given prompt and history using MODEL client.
43
 
44
  Args:
45
  prompt (str): The initial prompt for the text generation.
 
78
 
79
  except Exception as e:
80
  if "Too Many Requests" in str(e):
81
+ print(f"ERROR: Too many requests on {MODEL} client")
82
+ gr.Warning(f"Unfortunately {MODEL} is unable to process")
83
  return "Unfortunately, I am not able to process your request now."
84
  else:
85
  print("Unhandled Exception:", str(e))
86
+ gr.Warning(f"Unfortunately {MODEL} is unable to process")
87
  return "I do not know what happened, but I couldn't understand you."
88
 
89
  return output