Spaces:
Runtime error
Runtime error
import gradio as gr | |
import argparse | |
import datetime | |
import json | |
import os | |
import time | |
import gradio as gr | |
import requests | |
from PIL import Image | |
from q_align.model.builder import load_pretrained_model | |
from q_align.conversation import (default_conversation, conv_templates, | |
SeparatorStyle) | |
from q_align.constants import LOGDIR | |
from q_align.utils import (build_logger, server_error_msg, | |
violates_moderation, moderation_msg) | |
from q_align.evaluate.scorer import QAlignScorer, QAlignAestheticScorer, QAlignVideoScorer | |
import gradio as gr | |
def load_video(video_file): | |
from decord import VideoReader | |
vr = VideoReader(video_file) | |
# Get video frame rate | |
fps = vr.get_avg_fps() | |
# Calculate frame indices for 1fps | |
frame_indices = [int(fps * i) for i in range(int(len(vr) / fps))] | |
frames = vr.get_batch(frame_indices).asnumpy() | |
return [Image.fromarray(frames[i]) for i in range(int(len(vr) / fps))] | |
pretrained="q-future/one-align" | |
device="cuda:0" | |
tokenizer, model, image_processor, _ = load_pretrained_model(pretrained, None, "mplug_owl2", device=device) | |
iqa_scorer = QAlignScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) | |
iaa_scorer = QAlignAestheticScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) | |
vqa_scorer = QAlignVideoScorer(tokenizer=tokenizer, model=model, image_processor=image_processor) | |
scorers = {"Image Aesthetics (IAA)": iaa_scorer, "Image Quality (IQA)": iqa_scorer, "Video Quality (VQA)": vqa_scorer} | |
LEVELS = ["excellent (5)", "good (4)", "fair (3)", "poor (2)", "bad (1)"] | |
scores = [5,4,3,2,1] | |
def image_classifier(input_img, input_vid, scorer_type): | |
if scorer_type is None: | |
scorer_type = "Image Quality (IQA)" | |
this_scorer = scorers[scorer_type] | |
if input_vid is not None: | |
input_ = load_video(input_vid) | |
elif input_img is not None: | |
input_ = [input_img] | |
if "Video" in scorer_type: | |
input_ = [input_] | |
probs = this_scorer(input_).mean(0).tolist() | |
prob_dict = {LEVEL: prob for LEVEL, prob in zip(LEVELS, probs)} | |
score = sum([prob * score for score, prob in zip(scores, probs)]) | |
return prob_dict, score | |
title_markdown = (""" | |
<div style="width: 100%; text-align: center; margin:auto;"> | |
<img style="width: 100%" src="https://raw.githubusercontent.com/Q-Future/Q-Align/main/fig/onescorer.png"> | |
</div> | |
<h4 align="center"> If you like the OneScorer, please give us a star ✨ on <a href='https://github.com/Q-Future/Q-Align'>[GitHub]</a> for latest update. </h4> | |
<h5 align="center"> | |
<div style="display:flex; gap: 0.25rem;" align="center"> | |
<a href='https://q-align.github.io'><img src='https://img.shields.io/badge/Homepage-green'></a> | |
<a href='https://github.com/Q-Future/Q-Align'><img src='https://img.shields.io/badge/Github-Code-blue'></a> | |
<a href="https://Q-Future.github.io/Q-Align/fig/Q_Align_v0_1_preview.pdf"><img src="https://img.shields.io/badge/Technical-Report-red"></a> | |
<a href='https://github.com/Q-Future/Q-Align/stargazers'><img src='https://img.shields.io/github/stars/Q-Future/Q-Align.svg?style=social'></a> | |
</div> | |
</h5> | |
""") | |
input_img = gr.Image(type='pil', label="Upload an Image") | |
input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)",sources=["upload"]) | |
radio = gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?") | |
input_img = gr.Image(type='pil', label="Upload an Image") | |
labels = gr.Label(label="Probabilities of rating levels:") | |
number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.", precision=4) | |
demo = gr.Interface(fn=image_classifier, inputs=[input_img, input_vid, radio], outputs=[labels, number], description=title_markdown, examples=[["fig/eiffel_a.jpg", None, "Image Aesthetics (IAA)"], ["fig/singapore_flyer_2.jpg", None, "Image Quality (IQA)"], ["fig/none.png", "fig/10244479353.mp4", "Video Quality (VQA)"]], article="This is the Scorer Demo as Proposed by Paper: 'Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels'. The proposed Q-Align achieves state-of-the-art performance on image quality assessment (IQA), image aesthetic assessment (IAA), as well as video quality assessment (VQA) tasks under the original LMM structure. With the syllabus, we further unify the three tasks into one model, termed the **OneAlign**, to which the demo corresponds.") | |
demo.launch() | |