teowu commited on
Commit
2d5475b
1 Parent(s): 746a281

add examples

Browse files
README.md CHANGED
@@ -10,4 +10,6 @@ pinned: false
10
  license: mit
11
  ---
12
 
 
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  license: mit
11
  ---
12
 
13
+ arxiv.org/abs/2312.17090
14
+
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -64,39 +64,11 @@ def image_classifier(input_img, input_vid, scorer_type):
64
 
65
  title_markdown = ("""
66
 
67
- <h3 align="center">Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels</h3>
 
 
68
 
69
- <h3 align="center"> One Unified Model for Visual scoring. </h3>
70
-
71
- <h5 align="center">
72
- <a href="https://teowu.github.io/" target="_blank">Haoning Wu</a><sup>1</sup><sup>*</sup><sup>+</sup>,
73
- <a href="https://github.com/zzc-1998" target="_blank">Zicheng Zhang</a><sup>2</sup><sup>*</sup>,
74
- <a href="https://sites.google.com/view/r-panda" target="_blank">Weixia Zhang</a><sup>2</sup>,
75
- <a href="https://chaofengc.github.io" target="_blank">Chaofeng Chen</a><sup>1</sup>,
76
- <a href="https://liaoliang92.github.io" target="_blank">Liang Liao</a><sup>1</sup>,
77
- <a href="https://github.com/lcysyzxdxc" target="_blank">Chunyi Li</a><sup>2</sup>,
78
- </h5>
79
-
80
-
81
- <h5 align="center">
82
- <a href="https://github.com/YixuanGao98" target="_blank">Yixuan Gao</a><sup>2</sup>,
83
- <a href="https://github.com/AnnanWangDaniel" target="_blank">Annan Wang</a><sup>1</sup>,
84
- <a href="https://github.com/ZhangErliCarl/" target="_blank">Erli Zhang</a><sup>1</sup>,
85
- <a href="https://wenxiusun.com" target="_blank">Wenxiu Sun</a><sup>3</sup>,
86
- <a href="https://scholar.google.com/citations?user=uT9CtPYAAAAJ&hl=en" target="_blank">Qiong Yan</a><sup>3</sup>,
87
- <a href="https://sites.google.com/site/minxiongkuo/" target="_blank">Xiongkuo Min</a><sup>2</sup>,
88
- <a href="https://ee.sjtu.edu.cn/en/FacultyDetail.aspx?id=24&infoid=153&flag=153" target="_blank">Guangtao Zhai</a><sup>2</sup><sup>#</sup>,
89
- <a href="https://personal.ntu.edu.sg/wslin/Home.html" target="_blank">Weisi Lin</a><sup>1</sup><sup>#</sup>
90
- </h5>
91
-
92
- <h5 align="center">
93
- <sup>1</sup>Nanyang Technological University, <sup>2</sup>Shanghai Jiao Tong University, <sup>3</sup>Sensetime Research
94
- </h5>
95
- <h5 align="center">
96
- <sup>*</sup>Equal contribution. <sup>+</sup>Project Lead. <sup>#</sup>Corresponding author(s).
97
- </h5>
98
-
99
- <h4 align="center"> If you like the OneScorer, please give us a star ✨ on <a href='https://github.com/Q-Future/Q-Align'>GitHub</a> for latest update. </h4>
100
 
101
  <h5 align="center">
102
  <div style="display:flex; gap: 0.25rem;" align="center">
@@ -111,9 +83,13 @@ title_markdown = ("""
111
 
112
 
113
  input_img = gr.Image(type='pil', label="Upload an Image")
114
- input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)")
 
 
 
115
 
116
  labels = gr.Label(label="Probabilities of rating levels:")
117
- number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.")
118
- demo = gr.Interface(fn=image_classifier, inputs=[input_img, input_vid, gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?"),], outputs=[labels, number], title="OneScorer", description=title_markdown)
119
- demo.launch(share=True)
 
 
64
 
65
  title_markdown = ("""
66
 
67
+ <div style="width: 100%; text-align: center; margin:auto;">
68
+ <img style="width: 100%" src="https://raw.githubusercontent.com/Q-Future/Q-Align/main/fig/onescorer.png">
69
+ </div>
70
 
71
+ <h4 align="center"> If you like the OneScorer, please give us a star ✨ on <a href='https://github.com/Q-Future/Q-Align'>[GitHub]</a> for latest update. </h4>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  <h5 align="center">
74
  <div style="display:flex; gap: 0.25rem;" align="center">
 
83
 
84
 
85
  input_img = gr.Image(type='pil', label="Upload an Image")
86
+ input_vid = gr.Video(label="Upload a Video (will INGORE the image if a video is uploaded)",sources=["upload"])
87
+ radio = gr.Radio(["Image Aesthetics (IAA)", "Image Quality (IQA)", "Video Quality (VQA)"], label="Task", info="Which Scorer will you need?")
88
+
89
+ input_img = gr.Image(type='pil', label="Upload an Image")
90
 
91
  labels = gr.Label(label="Probabilities of rating levels:")
92
+ number = gr.Number(label="Output score:", info="Range in [1,5]. Higher is better.", precision=4)
93
+ demo = gr.Interface(fn=image_classifier, inputs=[input_img, input_vid, radio], outputs=[labels, number], description=title_markdown, examples=[["fig/eiffel_a.jpg", None, "Image Aesthetics (IAA)"], ["fig/singapore_flyer_2.jpg", None, "Image Quality (IQA)"], ["fig/none.png", "fig/10244479353.mp4", "Video Quality (VQA)"]], article="This is the Scorer Demo as Proposed by Paper: 'Q-Align: Teaching LMMs for Visual Scoring via Discrete Text-Defined Levels'. The proposed Q-Align achieves state-of-the-art performance on image quality assessment (IQA), image aesthetic assessment (IAA), as well as video quality assessment (VQA) tasks under the original LMM structure. With the syllabus, we further unify the three tasks into one model, termed the **OneAlign**, to which the demo corresponds.")
94
+ demo.launch()
95
+
fig/10102107193.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0139566e1e3ed9f452472cd444236f745e3dcce99b0648fb01d9a8835af25ef9
3
+ size 1072646
fig/10244479353.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:154e74795d12bf904091cdc7479f997865dd6339c85040012e7cab28cd863e30
3
+ size 1523613
fig/eiffel_a.jpg ADDED

Git LFS Details

  • SHA256: 3622a90a5996034e3bfdde33cd39f429f8a97ed0a3c1226c9f94eda154d0a968
  • Pointer size: 132 Bytes
  • Size of remote file: 3.16 MB
fig/longgang_nightblur.png ADDED

Git LFS Details

  • SHA256: 9f6abc40a22cc4cdd3b5a926566c901981e357be58324284b656872abf5964c3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.67 MB
fig/none.png ADDED

Git LFS Details

  • SHA256: 5df7644187513e706f6231d3a893a87db1931946c016eaa36c328d3e863af385
  • Pointer size: 130 Bytes
  • Size of remote file: 25.9 kB
fig/onescorer.png ADDED

Git LFS Details

  • SHA256: 7f926247ac9815f541673a37233915d079193d1ab306b3a9d4ea21d6d553e6a4
  • Pointer size: 131 Bytes
  • Size of remote file: 284 kB
fig/singapore_flyer_2.jpg ADDED

Git LFS Details

  • SHA256: 09f86ecd97a2a16a79a8ccbc3acc8d8fa435e53e34da6d9fe144083446d2c644
  • Pointer size: 131 Bytes
  • Size of remote file: 144 kB