Spaces:

linm1
/

vlm

Sleeping

App Files Files Community

linm1 commited on Oct 3

Commit

302615c

•

1 Parent(s): 1e3ede4

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -162

app.py CHANGED Viewed

@@ -1,163 +1,162 @@
-import gradio as gr
-import os
-from dotenv import load_dotenv
-import base64
-from io import BytesIO
-from mistralai import Mistral
-from pydantic import BaseModel, Field
-from datasets import load_dataset
-from PIL import Image
-import json
-import sqlite3
-from datetime import datetime
-# Load the dataset
-ds = load_dataset("svjack/pokemon-blip-captions-en-zh")
-ds = ds["train"]
-# Load environment variables
-api_key = os.environ.get('MISTRAL_API_KEY')
-if not api_key:
-    raise ValueError("MISTRAL_API_KEY is not set in the environment variables.")
-# Create sample history
-hist = [str({"en": ds[i]["en_text"], "zh": ds[i]["zh_text"]}) for i in range(8)]
-hist_str = "\n".join(hist)
-# Define the Caption model
-class Caption(BaseModel):
-    en: str = Field(...,
-        description="English caption of image",
-        max_length=84)
-    zh: str = Field(...,
-        description="Chinese caption of image",
-        max_length=64)
-# Initialize the Mistral client
-client = Mistral(api_key=api_key)
-def generate_caption(image):
-    # Convert image to base64
-    buffered = BytesIO()
-    image.save(buffered, format="JPEG")
-    base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
-    messages = [
-        {
-            "role": "system",
-            "content": f'''
-            You are a highly accurate image to caption transformer.
-            Describe the image content in English and Chinese respectively. Make sure to FOCUS on item CATEGORY and COLOR!
-            Do NOT provide NAMES! KEEP it SHORT!
-            While adhering to the following JSON schema: {Caption.model_json_schema()}
-            Following are some samples you should adhere to for style and tone:
-            {hist_str}
-            '''
-        },
-        {
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Describe the image in English and Chinese"
-                },
-                {
-                    "type": "image_url",
-                    "image_url": f"data:image/jpeg;base64,{base64_image}"
-                }
-            ]
-        }
-    ]
-    chat_response = client.chat.complete(
-        model="pixtral-12b-2409",
-        messages=messages,
-        response_format = {
-          "type": "json_object",
-        }
-    )
-    response_content = chat_response.choices[0].message.content
-    try:
-        caption_dict = json.loads(response_content)
-        return Caption(**caption_dict)
-    except json.JSONDecodeError as e:
-        print(f"Error decoding JSON: {e}")
-        return None
-# Initialize SQLite database
-def init_db():
-    conn = sqlite3.connect('feedback.db')
-    c = conn.cursor()
-    c.execute('''CREATE TABLE IF NOT EXISTS thumbs_up
-                 (id INTEGER PRIMARY KEY AUTOINCREMENT,
-                  timestamp TEXT,
-                  input_data TEXT,
-                  output_data TEXT)''')
-    conn.commit()
-    conn.close()
-init_db()
-def process_image(image):
-    if image is None:
-        return "Please upload an image first."
-    result = generate_caption(image)
-    if result:
-        return f"English caption: {result.en}\nChinese caption: {result.zh}"
-    else:
-        return "Failed to generate caption. Please check the API call or network connectivity."
-def thumbs_up(image, caption):
-    # Convert image to base64 string for storage
-    buffered = BytesIO()
-    image.save(buffered, format="JPEG")
-    img_str = base64.b64encode(buffered.getvalue()).decode()
-    conn = sqlite3.connect('feedback.db')
-    c = conn.cursor()
-    c.execute("INSERT INTO thumbs_up (timestamp, input_data, output_data) VALUES (?, ?, ?)",
-              (datetime.now().isoformat(), img_str, caption))
-    conn.commit()
-    conn.close()
-    print(f"Thumbs up data saved to database.")
-    return gr.Notification("Thank you for your feedback!", type="success")
-# Create Gradio interface
-custom_css = """
-    .highlight-btn {
-        background-color: #3498db !important;
-        border-color: #3498db !important;
-        color: white !important;
-    }
-    .highlight-btn:hover {
-        background-color: #2980b9 !important;
-        border-color: #2980b9 !important;
-    }
-"""
-with gr.Blocks() as iface:
-    gr.Markdown("# Image Captioner")
-    gr.Markdown("Upload an image to generate captions in English and Chinese. Use the 'Thumbs Up' button if you like the result!")
-    with gr.Row():
-        with gr.Column(scale=1):
-            input_image = gr.Image(type="pil")
-            with gr.Row():
-                clear_btn = gr.Button("Clear")
-                submit_btn = gr.Button("Submit", elem_classes=["highlight-btn"])
-        with gr.Column(scale=1):
-            output_text = gr.Textbox()
-            thumbs_up_btn = gr.Button("Thumbs Up")
-    clear_btn.click(fn=lambda: None, inputs=None, outputs=input_image)
-    submit_btn.click(fn=process_image, inputs=input_image, outputs=output_text)
-    thumbs_up_btn.click(fn=thumbs_up, inputs=[input_image, output_text], outputs=None)
-# Launch the interface
 iface.launch(share=True)

+import gradio as gr
+import os
+import base64
+from io import BytesIO
+from mistralai import Mistral
+from pydantic import BaseModel, Field
+from datasets import load_dataset
+from PIL import Image
+import json
+import sqlite3
+from datetime import datetime
+# Load the dataset
+ds = load_dataset("svjack/pokemon-blip-captions-en-zh")
+ds = ds["train"]
+# Load environment variables
+api_key = os.environ.get('MISTRAL_API_KEY')
+if not api_key:
+    raise ValueError("MISTRAL_API_KEY is not set in the environment variables.")
+# Create sample history
+hist = [str({"en": ds[i]["en_text"], "zh": ds[i]["zh_text"]}) for i in range(8)]
+hist_str = "\n".join(hist)
+# Define the Caption model
+class Caption(BaseModel):
+    en: str = Field(...,
+        description="English caption of image",
+        max_length=84)
+    zh: str = Field(...,
+        description="Chinese caption of image",
+        max_length=64)
+# Initialize the Mistral client
+client = Mistral(api_key=api_key)
+def generate_caption(image):
+    # Convert image to base64
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
+    messages = [
+        {
+            "role": "system",
+            "content": f'''
+            You are a highly accurate image to caption transformer.
+            Describe the image content in English and Chinese respectively. Make sure to FOCUS on item CATEGORY and COLOR!
+            Do NOT provide NAMES! KEEP it SHORT!
+            While adhering to the following JSON schema: {Caption.model_json_schema()}
+            Following are some samples you should adhere to for style and tone:
+            {hist_str}
+            '''
+        },
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "Describe the image in English and Chinese"
+                },
+                {
+                    "type": "image_url",
+                    "image_url": f"data:image/jpeg;base64,{base64_image}"
+                }
+            ]
+        }
+    ]
+    chat_response = client.chat.complete(
+        model="pixtral-12b-2409",
+        messages=messages,
+        response_format = {
+          "type": "json_object",
+        }
+    )
+    response_content = chat_response.choices[0].message.content
+    try:
+        caption_dict = json.loads(response_content)
+        return Caption(**caption_dict)
+    except json.JSONDecodeError as e:
+        print(f"Error decoding JSON: {e}")
+        return None
+# Initialize SQLite database
+def init_db():
+    conn = sqlite3.connect('feedback.db')
+    c = conn.cursor()
+    c.execute('''CREATE TABLE IF NOT EXISTS thumbs_up
+                 (id INTEGER PRIMARY KEY AUTOINCREMENT,
+                  timestamp TEXT,
+                  input_data TEXT,
+                  output_data TEXT)''')
+    conn.commit()
+    conn.close()
+init_db()
+def process_image(image):
+    if image is None:
+        return "Please upload an image first."
+    result = generate_caption(image)
+    if result:
+        return f"English caption: {result.en}\nChinese caption: {result.zh}"
+    else:
+        return "Failed to generate caption. Please check the API call or network connectivity."
+def thumbs_up(image, caption):
+    # Convert image to base64 string for storage
+    buffered = BytesIO()
+    image.save(buffered, format="JPEG")
+    img_str = base64.b64encode(buffered.getvalue()).decode()
+    conn = sqlite3.connect('feedback.db')
+    c = conn.cursor()
+    c.execute("INSERT INTO thumbs_up (timestamp, input_data, output_data) VALUES (?, ?, ?)",
+              (datetime.now().isoformat(), img_str, caption))
+    conn.commit()
+    conn.close()
+    print(f"Thumbs up data saved to database.")
+    return gr.Notification("Thank you for your feedback!", type="success")
+# Create Gradio interface
+custom_css = """
+    .highlight-btn {
+        background-color: #3498db !important;
+        border-color: #3498db !important;
+        color: white !important;
+    }
+    .highlight-btn:hover {
+        background-color: #2980b9 !important;
+        border-color: #2980b9 !important;
+    }
+"""
+with gr.Blocks() as iface:
+    gr.Markdown("# Image Captioner")
+    gr.Markdown("Upload an image to generate captions in English and Chinese. Use the 'Thumbs Up' button if you like the result!")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(type="pil")
+            with gr.Row():
+                clear_btn = gr.Button("Clear")
+                submit_btn = gr.Button("Submit", elem_classes=["highlight-btn"])
+        with gr.Column(scale=1):
+            output_text = gr.Textbox()
+            thumbs_up_btn = gr.Button("Thumbs Up")
+    clear_btn.click(fn=lambda: None, inputs=None, outputs=input_image)
+    submit_btn.click(fn=process_image, inputs=input_image, outputs=output_text)
+    thumbs_up_btn.click(fn=thumbs_up, inputs=[input_image, output_text], outputs=None)
+# Launch the interface
 iface.launch(share=True)