Spaces:

TIGER-Lab
/

TIGERScore

Running on Zero

App Files Files Community

TIGERScore / app.py

DongfuJiang

update

04efd2c 11 months ago

raw

history blame

4.77 kB

	import gradio as gr
	import sys
	import os
	from datasets import load_dataset
	from typing import List
	from tigerscore import TIGERScorer


	DESCRIPTIONS = """
	We present *TIGERScore, a Trained metric that follows Instruction Guidance to perform Explainable, and R*eference-free evaluation over a wide spectrum of text generation tasks. Different from other automatic evaluation methods that only provide arcane scores, TIGERScore is guided by the natural language instruction to provide error analysis to pinpoint the mistakes in the generated text.

	### [Website](https://tiger-ai-lab.github.io/TIGERScore/) [Paper](https://arxiv.org/abs/2310.00752) [Code](https://github.com/TIGER-AI-Lab/TIGERScore) [TIGERScore-7B](https://huggingface.co/TIGER-Lab/TIGERScore-7B-V1.0) [TIGERScore-13B](https://huggingface.co/TIGER-Lab/TIGERScore-13B-V1.0)

	"""

	EXAMPLES_DATASET = load_dataset("TIGER-Lab/MetricInstruct", split="train")
	SHUFFLED_EXAMPLES_DATASET = EXAMPLES_DATASET.shuffle(seed=42)
	EXAMPLES = []
	fields = ["instruction", "input_context", "hypo_output"]
	print("Loading examples...")
	for i, ex in enumerate(SHUFFLED_EXAMPLES_DATASET):
	if any([not ex[field] for field in fields]):
	continue
	EXAMPLES.append([ex[field] for field in fields])
	if i >= 100:
	break

	scorer = TIGERScorer("TIGER-Lab/TIGERScore-7B-GGUF", use_llamacpp=True)

	def submit_fn(input_context, generation_instruction, hypo_output, max_new_tokens=512, temperature=0.7, top_p=1.0):
	return scorer.score(
	insts=[generation_instruction],
	hypo_outputs=[hypo_output],
	input_contexts=[input_context],
	max_new_tokens=max_new_tokens,
	temperature=temperature,
	top_p=top_p,
	)[0]['raw_output'].strip()



	def get_examples(task, inst_textbox, input_textbox, hypo_output_textbox):
	return gr.Dropdown.update(value=task), inst_textbox, input_textbox, hypo_output_textbox

	def clear_all(task, inst_textbox, input_textbox, hypo_output_textbox):
	return gr.Dropdown.update(value=task), "", "", ""

	with gr.Blocks(theme='gradio/soft') as demo:

	gr.Markdown("# 🐯 TIGERScore Demo")
	with gr.Row():
	gr.Markdown(DESCRIPTIONS)
	gr.Image("https://jdf-prog.github.io/assets/img/publication_preview/tigerscore_preview.png")

	gr.Markdown("## TIGERScore Inputs")
	inst_textbox = gr.Textbox(lines=1, label="Instruction", placeholder="Enter instruction here", show_label=True)
	input_textbox = gr.Textbox(lines=4, label="Input Context", placeholder="Enter input context here", show_label=True)
	hypo_output_textbox = gr.Textbox(lines=4, label="Hypothesis Output", placeholder="Enter hypothesis output to be evaluated here", show_label=True)

	with gr.Row():
	clear_button = gr.Button('Clear', variant='primary')
	submit_button = gr.Button('Submit', variant='primary')

	with gr.Accordion(label='Advanced options', open=False):
	max_new_tokens = gr.Slider(
	label='Max new tokens fuser can generate',
	minimum=256,
	maximum=1024,
	step=1,
	value=512,
	)
	temperature = gr.Slider(
	label='Temperature of fuser generation',
	minimum=0.1,
	maximum=2.0,
	step=0.1,
	value=0.7,
	)
	top_p = gr.Slider(
	label='Top-p of fuser generation',
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	value=1.0,
	)

	gr.Markdown("## TIGERScore Outputs")
	evaluation_output_textbox = gr.Textbox(lines=4, label="Evaluation Output", placeholder="Evaluation output", show_label=True)


	submit_button.click(
	fn=submit_fn,
	inputs=[input_textbox, inst_textbox, hypo_output_textbox, max_new_tokens, temperature, top_p],
	outputs=evaluation_output_textbox,
	)

	clear_button.click(
	fn=clear_all,
	inputs=[inst_textbox, input_textbox, hypo_output_textbox],
	outputs=[inst_textbox, input_textbox, hypo_output_textbox],
	)

	batch_examples = gr.Examples(
	examples=EXAMPLES,
	fn=get_examples,
	cache_examples=True,
	examples_per_page=5,
	inputs=[inst_textbox, input_textbox, hypo_output_textbox],
	outputs=[inst_textbox, input_textbox, hypo_output_textbox],
	)

	citations = gr.Markdown("""## Citation
	```txt
	@article{jiang2023TIGERScore,
	title={TIGERScore: Towards Building Explainable Metric for All Text Generation Tasks},
	author={Dongfu Jiang, Yishan Li, Ge Zhang, Wenhao Huang, Bill Yuchen Lin, Wenhu Chen},
	journal={arXiv preprint arXiv:2310.00752},
	year={2023}
	}
	```""")

	demo.queue(max_size=20).launch()