Spaces:

TIGER-Lab
/

TIGERScore

Running on Zero

App Files Files Community

DongfuJiang commited on Mar 29

Commit

742d7b5

•

1 Parent(s): 525d2e5

update

Browse files

Files changed (2) hide show

app.py +26 -12
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -3,16 +3,22 @@ import os
 import gradio as gr
 import sys
 import copy
 from datasets import load_dataset
 from typing import List
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 from string import Template
 DESCRIPTIONS = """
 We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
-### [**Website**](https://tiger-ai-lab.github.io/TIGERScore/)  [**Paper**](https://arxiv.org/abs/2310.00752)   [**Code**](https://github.com/TIGER-AI-Lab/TIGERScore)   [**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B)   [**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B)
 """
@@ -44,16 +50,25 @@ For each error you give in the response, please also elaborate the following inf
 Your evaluation output:
 """
-llm = Llama(
-    model_path=hf_hub_download(
-        repo_id=os.environ.get("REPO_ID", "TIGER-Lab/TIGERScore-7B-GGUF"),
-        filename=os.environ.get("MODEL_FILE", "ggml-model-q4_0.gguf"),
-    ),
-    n_ctx=2048,
-    # n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
-)
-def generate_text(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
     prompt_template = Template(TEMPLATE)
     prompt = prompt_template.substitute(
             generation_instruction=generation_instruction,
@@ -76,7 +91,6 @@ def generate_text(input_context, generation_instruction, hypo_output, max_new_to
         stream = copy.deepcopy(out)
         temp += stream["choices"][0]["text"]
         yield temp
 def get_examples(inst_textbox, input_textbox, hypo_output_textbox):
     return inst_textbox, input_textbox, hypo_output_textbox
@@ -128,7 +142,7 @@ with gr.Blocks(theme='gradio/soft') as demo:
     submit_button.click(
-        fn=generate_text,
         inputs=[input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
         outputs=evaluation_output_textbox,
     )

 import gradio as gr
 import sys
 import copy
+import spaces
 from datasets import load_dataset
 from typing import List
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 from string import Template
+from tigerscore import TIGERScorer
 DESCRIPTIONS = """
 We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
+[**Website**](https://tiger-ai-lab.github.io/TIGERScore/) |
+[**Paper**](https://arxiv.org/abs/2310.00752) |
+[**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) |
+[**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B) |
+[**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B)
 """
 Your evaluation output:
 """
+# llm = Llama(
+#     model_path=hf_hub_download(
+#         repo_id=os.environ.get("REPO_ID", "TIGER-Lab/TIGERScore-13B-GGUF"),
+#         filename=os.environ.get("MODEL_FILE", "ggml-model-q4_0.gguf"),
+#     ),
+#     n_ctx=2048,
+#     n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
+# )
+scorer = TIGERScorer(model_name="TIGER-Lab/TIGERScore-13B")
+def generate_text_hf(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
+    global scorer
+    scorer.model = scorer.model.to("cuda")
+    for output in scorer.generate_stream(generation_instruction, hypo_output, input_context, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p):
+        yield output
+def generate_text_llamacpp(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
+    global llm
     prompt_template = Template(TEMPLATE)
     prompt = prompt_template.substitute(
             generation_instruction=generation_instruction,
         stream = copy.deepcopy(out)
         temp += stream["choices"][0]["text"]
         yield temp
 def get_examples(inst_textbox, input_textbox, hypo_output_textbox):
     return inst_textbox, input_textbox, hypo_output_textbox
     submit_button.click(
+        fn=generate_text_hf,
         inputs=[input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
         outputs=evaluation_output_textbox,
     )

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 datasets==2.14.5
 torch
 transformers
-llama-cpp-python

 datasets==2.14.5
 torch
 transformers
+git+https://github.com/TIGER-AI-Lab/TIGERScore.git
+gradio==4.24.0
+spaces