Spaces:
Runtime error
Runtime error
Commit
•
f687064
1
Parent(s):
21f4f83
Updating to use env var model
Browse files- backend/query_llm.py +7 -6
backend/query_llm.py
CHANGED
@@ -5,14 +5,15 @@ import gradio as gr
|
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from transformers import AutoTokenizer
|
7 |
|
8 |
-
|
|
|
9 |
|
10 |
temperature = 0.9
|
11 |
top_p = 0.6
|
12 |
repetition_penalty = 1.2
|
13 |
|
14 |
text_client = InferenceClient(
|
15 |
-
|
16 |
token=getenv("HUGGING_FACE_HUB_TOKEN")
|
17 |
)
|
18 |
|
@@ -38,7 +39,7 @@ def format_prompt(message: str) -> str:
|
|
38 |
def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
|
39 |
top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
|
40 |
"""
|
41 |
-
Generate a sequence of tokens based on a given prompt and history using
|
42 |
|
43 |
Args:
|
44 |
prompt (str): The initial prompt for the text generation.
|
@@ -77,12 +78,12 @@ def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens
|
|
77 |
|
78 |
except Exception as e:
|
79 |
if "Too Many Requests" in str(e):
|
80 |
-
print("ERROR: Too many requests on
|
81 |
-
gr.Warning("Unfortunately
|
82 |
return "Unfortunately, I am not able to process your request now."
|
83 |
else:
|
84 |
print("Unhandled Exception:", str(e))
|
85 |
-
gr.Warning("Unfortunately
|
86 |
return "I do not know what happened, but I couldn't understand you."
|
87 |
|
88 |
return output
|
|
|
5 |
from huggingface_hub import InferenceClient
|
6 |
from transformers import AutoTokenizer
|
7 |
|
8 |
+
MODEL = getenv("MODEL")
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL)
|
10 |
|
11 |
temperature = 0.9
|
12 |
top_p = 0.6
|
13 |
repetition_penalty = 1.2
|
14 |
|
15 |
text_client = InferenceClient(
|
16 |
+
MODEL,
|
17 |
token=getenv("HUGGING_FACE_HUB_TOKEN")
|
18 |
)
|
19 |
|
|
|
39 |
def generate(prompt: str, history: str, temperature: float = 0.9, max_new_tokens: int = 256,
|
40 |
top_p: float = 0.95, repetition_penalty: float = 1.0) -> Generator[str, None, str]:
|
41 |
"""
|
42 |
+
Generate a sequence of tokens based on a given prompt and history using MODEL client.
|
43 |
|
44 |
Args:
|
45 |
prompt (str): The initial prompt for the text generation.
|
|
|
78 |
|
79 |
except Exception as e:
|
80 |
if "Too Many Requests" in str(e):
|
81 |
+
print(f"ERROR: Too many requests on {MODEL} client")
|
82 |
+
gr.Warning(f"Unfortunately {MODEL} is unable to process")
|
83 |
return "Unfortunately, I am not able to process your request now."
|
84 |
else:
|
85 |
print("Unhandled Exception:", str(e))
|
86 |
+
gr.Warning(f"Unfortunately {MODEL} is unable to process")
|
87 |
return "I do not know what happened, but I couldn't understand you."
|
88 |
|
89 |
return output
|