Spaces:
Running
on
Zero
Running
on
Zero
DongfuJiang
commited on
Commit
•
742d7b5
1
Parent(s):
525d2e5
update
Browse files- app.py +26 -12
- requirements.txt +3 -1
app.py
CHANGED
@@ -3,16 +3,22 @@ import os
|
|
3 |
import gradio as gr
|
4 |
import sys
|
5 |
import copy
|
|
|
6 |
from datasets import load_dataset
|
7 |
from typing import List
|
8 |
from llama_cpp import Llama
|
9 |
from huggingface_hub import hf_hub_download
|
10 |
from string import Template
|
|
|
11 |
|
12 |
DESCRIPTIONS = """
|
13 |
We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
|
14 |
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
|
17 |
"""
|
18 |
|
@@ -44,16 +50,25 @@ For each error you give in the response, please also elaborate the following inf
|
|
44 |
Your evaluation output:
|
45 |
"""
|
46 |
|
47 |
-
llm = Llama(
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
def
|
|
|
57 |
prompt_template = Template(TEMPLATE)
|
58 |
prompt = prompt_template.substitute(
|
59 |
generation_instruction=generation_instruction,
|
@@ -76,7 +91,6 @@ def generate_text(input_context, generation_instruction, hypo_output, max_new_to
|
|
76 |
stream = copy.deepcopy(out)
|
77 |
temp += stream["choices"][0]["text"]
|
78 |
yield temp
|
79 |
-
|
80 |
|
81 |
def get_examples(inst_textbox, input_textbox, hypo_output_textbox):
|
82 |
return inst_textbox, input_textbox, hypo_output_textbox
|
@@ -128,7 +142,7 @@ with gr.Blocks(theme='gradio/soft') as demo:
|
|
128 |
|
129 |
|
130 |
submit_button.click(
|
131 |
-
fn=
|
132 |
inputs=[input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
|
133 |
outputs=evaluation_output_textbox,
|
134 |
)
|
|
|
3 |
import gradio as gr
|
4 |
import sys
|
5 |
import copy
|
6 |
+
import spaces
|
7 |
from datasets import load_dataset
|
8 |
from typing import List
|
9 |
from llama_cpp import Llama
|
10 |
from huggingface_hub import hf_hub_download
|
11 |
from string import Template
|
12 |
+
from tigerscore import TIGERScorer
|
13 |
|
14 |
DESCRIPTIONS = """
|
15 |
We present ***TIGERScore***, a **T**rained metric that follows **I**nstruction **G**uidance to perform **E**xplainable, and **R**eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.
|
16 |
|
17 |
+
[**Website**](https://tiger-ai-lab.github.io/TIGERScore/) |
|
18 |
+
[**Paper**](https://arxiv.org/abs/2310.00752) |
|
19 |
+
[**Code**](https://github.com/TIGER-AI-Lab/TIGERScore) |
|
20 |
+
[**TIGERScore-7B**](https://huggingface.co/TIGER-Lab/TIGERScore-7B) |
|
21 |
+
[**TIGERScore-13B**](https://huggingface.co/TIGER-Lab/TIGERScore-13B)
|
22 |
|
23 |
"""
|
24 |
|
|
|
50 |
Your evaluation output:
|
51 |
"""
|
52 |
|
53 |
+
# llm = Llama(
|
54 |
+
# model_path=hf_hub_download(
|
55 |
+
# repo_id=os.environ.get("REPO_ID", "TIGER-Lab/TIGERScore-13B-GGUF"),
|
56 |
+
# filename=os.environ.get("MODEL_FILE", "ggml-model-q4_0.gguf"),
|
57 |
+
# ),
|
58 |
+
# n_ctx=2048,
|
59 |
+
# n_gpu_layers=50, # change n_gpu_layers if you have more or less VRAM
|
60 |
+
# )
|
61 |
+
scorer = TIGERScorer(model_name="TIGER-Lab/TIGERScore-13B")
|
62 |
+
|
63 |
+
def generate_text_hf(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
|
64 |
+
global scorer
|
65 |
+
scorer.model = scorer.model.to("cuda")
|
66 |
+
|
67 |
+
for output in scorer.generate_stream(generation_instruction, hypo_output, input_context, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p):
|
68 |
+
yield output
|
69 |
|
70 |
+
def generate_text_llamacpp(input_context, generation_instruction, hypo_output, max_new_tokens=1024, temperature=0.7, top_p=1.0):
|
71 |
+
global llm
|
72 |
prompt_template = Template(TEMPLATE)
|
73 |
prompt = prompt_template.substitute(
|
74 |
generation_instruction=generation_instruction,
|
|
|
91 |
stream = copy.deepcopy(out)
|
92 |
temp += stream["choices"][0]["text"]
|
93 |
yield temp
|
|
|
94 |
|
95 |
def get_examples(inst_textbox, input_textbox, hypo_output_textbox):
|
96 |
return inst_textbox, input_textbox, hypo_output_textbox
|
|
|
142 |
|
143 |
|
144 |
submit_button.click(
|
145 |
+
fn=generate_text_hf,
|
146 |
inputs=[input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
|
147 |
outputs=evaluation_output_textbox,
|
148 |
)
|
requirements.txt
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
datasets==2.14.5
|
2 |
torch
|
3 |
transformers
|
4 |
-
|
|
|
|
|
|
1 |
datasets==2.14.5
|
2 |
torch
|
3 |
transformers
|
4 |
+
git+https://github.com/TIGER-AI-Lab/TIGERScore.git
|
5 |
+
gradio==4.24.0
|
6 |
+
spaces
|