import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline from globe import title, description, customtool , presentation1, presentation2, joinus import spaces model_path = "nvidia/Mistral-NeMo-Minitron-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) # Extract config info from model's configuration config_info = model.config # Create a Markdown string to display the complete model configuration information model_info_md = "### Model Configuration: Mistral-NeMo-Minitron-8B-Instruct\n\n" for key, value in config_info.to_dict().items(): model_info_md += f"- **{key.replace('_', ' ').capitalize()}**: {value}\n" pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # pipe.tokenizer = tokenizer def create_prompt(system_message, user_message, tool_definition="", context=""): if tool_definition: return f"""System {system_message} {tool_definition} {context} User {user_message} Assistant """ else: return f"System\n{system_message}\n\nUser\n{user_message}\nAssistant\n" @spaces.GPU def generate_response(message, history, system_message, max_tokens, temperature, top_p, use_pipeline=False, tool_definition="", context=""): full_prompt = create_prompt(system_message, message, tool_definition, context) if use_pipeline: prompt = [{"role": "system", "content": system_message}, {"role": "user", "content": message}] response = pipe(prompt, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, stop_strings=[""])[0]['generated_text'] else: tokenized_chat = tokenizer.apply_chat_template( [ {"role": "system", "content": system_message}, {"role": "user", "content": message}, ], tokenize=True, add_generation_prompt=True, return_tensors="pt" ) with torch.no_grad(): output_ids = model.generate( tokenized_chat['input_ids'], max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, do_sample=True ) response = tokenizer.decode(output_ids[0], skip_special_tokens=True) assistant_response = response.split("Assistant\n")[-1].strip() if tool_definition and "" in assistant_response: tool_call = assistant_response.split("")[1].split("")[0] assistant_response += f"\n\nTool Call: {tool_call}\n\nNote: This is a simulated tool call. In a real scenario, the tool would be executed and its output would be used to generate a final response." return assistant_response with gr.Blocks() as demo: with gr.Row(): gr.Markdown(title) with gr.Row(): gr.Markdown(description) with gr.Row(): with gr.Column(scale=1): with gr.Group(): gr.Markdown(presentation1) with gr.Column(scale=1): with gr.Group(): gr.Markdown(model_info_md) with gr.Row(): with gr.Column(scale=3): chatbot = gr.Chatbot(label="🤖 Mistral-NeMo", height=400) msg = gr.Textbox(label="User Input", placeholder="Ask a question or request a task...") with gr.Accordion(label="🧪Advanced Settings", open=False): system_message = gr.Textbox( label="System Message", value="You are a helpful AI assistant.", lines=2, placeholder="Set the AI's behavior and context..." ) context = gr.Textbox( label="Context", lines=2, placeholder="Enter additional context information..." ) max_tokens = gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max Tokens") temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.1, label="Temperature") top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p") use_pipeline = gr.Checkbox(label="Use Pipeline", value=False) use_tool = gr.Checkbox(label="Use Function Calling", value=False) with gr.Column(visible=False) as tool_options: tool_definition = gr.Code( label="Tool Definition (JSON)", value="{}", lines=15, language="json" ) with gr.Row(): clear = gr.Button("Clear") send = gr.Button("Send") def user(user_message, history): return "", history + [[user_message, None]] def bot(history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context): user_message = history[-1][0] bot_message = generate_response(user_message, history, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context) history[-1][1] = bot_message return history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, [chatbot, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context], chatbot ) send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, [chatbot, system_message, max_tokens, temperature, top_p, use_pipeline, tool_definition, context], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) use_tool.change( fn=lambda x: gr.update(visible=x), inputs=[use_tool], outputs=[tool_options] ) if __name__ == "__main__": demo.queue demo.launch()