File size: 2,764 Bytes
3d8b295
 
814e23a
3d8b295
 
 
434878e
95dfd9b
3d8b295
434878e
 
814e23a
3d8b295
69460b6
fcf6d89
69460b6
 
fcf6d89
69460b6
 
357b114
d35cae7
3d8b295
434878e
fcf6d89
 
3d8b295
9512d4d
434878e
aeb84b0
21346d9
aeb84b0
 
ea3b994
aeb84b0
 
 
e878c76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gradio as gr
from transformers import AutoTokenizer
import json

tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")

demo_conversation = """[
    {"role": "system", "content": "You are a helpful chatbot."},
    {"role": "user", "content": "Hi there!"},
    {"role": "assistant", "content": "Hello, human!"},
    {"role": "user", "content": "Can I ask a question?"}
]"""

default_template = """{% for message in messages %}
    {{ "<|im_start|>" + message["role"] + "\\n" + message["content"] + "<|im_end|>\\n" }}
{% endfor %}
{% if add_generation_prompt %}
    {{ "<|im_start|>assistant\\n" }}
{% endif %}"""

description_text = """### This space is a helper app for writing [Chat Templates](https://huggingface.co/docs/transformers/main/en/chat_templating).
### When you're happy with the outputs from your template, you can use the code block at the end to add it to a PR!"""

def apply_chat_template(template, test_conversation, add_generation_prompt, cleanup_whitespace):
    if cleanup_whitespace:
        template = "".join([line.strip() for line in template.split('\n')])
    tokenizer.chat_template = template
    outputs = []
    conversation = json.loads(test_conversation)
    pr_snippet = (
        "CHECKPOINT = \"big-ai-company/cool-new-model\"\n"
        "tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)",
        f"tokenizer.chat_template = \"{template}\"",
        "tokenizer.push_to_hub(CHECKPOINT, create_pr=True)"
    )
    pr_snippet = "\n".join(pr_snippet)
    formatted = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=add_generation_prompt)
    return formatted #, pr_snippet

def open_pr(template, model_repo):
  tokenizer = AutoTokenizer.from_pretrained(model_repo)
  tokenizer.chat_template = template
  tokenizer.push_to_hub(model_repo, create_pr=True)


with gr.Blocks() as demo:
    gr.Markdown(description_text)

    with gr.Row():
      with gr.Column():
          template = gr.TextArea(value=default_template, lines=10, max_lines=30, label="Chat Template")
          example = gr.TextArea(value=demo_conversation, lines=6, label="Conversation")
          generate_prompt = gr.Checkbox(value=False, label="Add generation prompt")
          clean_whitespace = gr.Checkbox(value=True, label="Cleanup template whitespace")
          btn = gr.Button("Submit")
      with gr.Column():
        output = gr.TextArea(label="Formatted conversation")
        model_repo = gr.Textbox(label='Model repo to open a PR')
        btn_pr = gr.Button("Open a PR with template update")
    btn.click(fn=apply_chat_template, inputs=[template, example, generate_prompt, clean_whitespace], outputs=[output])
    btn_pr.click(fn=open_pr, inputs=[template, model_repo])

demo.launch()