Spaces:
Running
Running
import gradio as gr | |
import torch | |
from transformers import AutoModel, AutoTokenizer | |
# Load the model | |
model = AutoModel.from_pretrained("openbmb/MiniCPM-V-2", trust_remote_code=True) | |
# Load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-V-2", trust_remote_code=True) | |
model.eval() | |
# Image and text inputs for the interface | |
image = gr.Image(type="pil", label="Image") | |
question = gr.Textbox(label="Question") | |
# Output for the interface | |
answer = gr.Textbox(label="Predicted answer", show_label=True, show_copy_button=True) | |
title = "Sudoku Solver by FG" | |
description = "Sudoku Solver using MiniCPM-V-2 model by FG. Upload an image of a sudoku puzzle and ask a question to solve it." | |
# Define the function for solving Sudoku | |
def solve_sudoku(image, question): | |
msgs = [{"role": "user", "content": question}] | |
res = model.chat( | |
image=image, | |
msgs=msgs, | |
context=None, | |
tokenizer=tokenizer, | |
sampling=True, | |
temperature=0.7, | |
stream=True, | |
system_prompt="You are an AI assistant specialized in visual content analysis. Given an image and a related question, analyze the image thoroughly and provide a precise and informative answer based on the visible content. Ensure your response is clear, accurate, and directly addresses the question.", | |
) | |
return "".join(res) | |
# Create the Gradio interface | |
demo = gr.Interface( | |
fn=solve_sudoku, | |
inputs=[image, question], | |
outputs=answer, | |
title=title, | |
description=description, | |
theme="compact", | |
) | |
# Launch the interface | |
demo.launch(share=True) | |