Spaces:
Running
on
Zero
Running
on
Zero
add r2 support
Browse files- app.py +67 -38
- requirements.txt +2 -1
- utils/__pycache__/__init__.cpython-310.pyc +0 -0
- utils/__pycache__/florence.cpython-310.pyc +0 -0
- utils/__pycache__/sam.cpython-310.pyc +0 -0
app.py
CHANGED
@@ -7,15 +7,20 @@ import torch
|
|
7 |
from PIL import Image
|
8 |
from io import BytesIO
|
9 |
import PIL.Image
|
10 |
-
import requests
|
11 |
import cv2
|
12 |
import json
|
13 |
import time
|
14 |
import os
|
15 |
-
|
|
|
16 |
from utils.florence import load_florence_model, run_florence_inference, \
|
17 |
FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
|
18 |
from utils.sam import load_sam_image_model, run_sam_inference
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
DEVICE = torch.device("cuda")
|
21 |
# DEVICE = torch.device("cpu")
|
@@ -29,14 +34,6 @@ if torch.cuda.get_device_properties(0).major >= 8:
|
|
29 |
FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
|
30 |
SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
|
31 |
|
32 |
-
def fetch_image_from_url(image_url):
|
33 |
-
try:
|
34 |
-
response = requests.get(image_url)
|
35 |
-
response.raise_for_status()
|
36 |
-
img = Image.open(BytesIO(response.content))
|
37 |
-
return img
|
38 |
-
except Exception as e:
|
39 |
-
return None
|
40 |
|
41 |
class calculateDuration:
|
42 |
def __init__(self, activity_name=""):
|
@@ -60,28 +57,45 @@ class calculateDuration:
|
|
60 |
|
61 |
print(f"Activity: {self.activity_name}, End time: {self.start_time_formatted}")
|
62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
@spaces.GPU()
|
65 |
@torch.inference_mode()
|
66 |
@torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
67 |
-
def process_image(
|
68 |
-
|
69 |
-
if not image_input:
|
70 |
-
gr.Info("Please upload an image.")
|
71 |
-
return None
|
72 |
|
73 |
-
if not task_prompt:
|
74 |
-
gr.Info("Please enter a task prompt.")
|
75 |
-
return None
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
82 |
|
83 |
# start to parse prompt
|
84 |
-
with calculateDuration("
|
85 |
print(task_prompt, text_prompt)
|
86 |
_, result = run_florence_inference(
|
87 |
model=FLORENCE_MODEL,
|
@@ -91,7 +105,7 @@ def process_image(image_input, image_url, task_prompt, text_prompt=None, dilate=
|
|
91 |
task=task_prompt,
|
92 |
text=text_prompt
|
93 |
)
|
94 |
-
with calculateDuration("
|
95 |
# start to dectect
|
96 |
detections = sv.Detections.from_lmm(
|
97 |
lmm=sv.LMM.FLORENCE_2,
|
@@ -123,7 +137,7 @@ def process_image(image_input, image_url, task_prompt, text_prompt=None, dilate=
|
|
123 |
detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
|
124 |
if len(detections) == 0:
|
125 |
gr.Info("No objects detected.")
|
126 |
-
return None
|
127 |
print("mask generated:", len(detections.mask))
|
128 |
kernel_size = dilate
|
129 |
kernel = np.ones((kernel_size, kernel_size), np.uint8)
|
@@ -143,7 +157,20 @@ def process_image(image_input, image_url, task_prompt, text_prompt=None, dilate=
|
|
143 |
with calculateDuration("invert mask colors"):
|
144 |
images = [cv2.bitwise_not(mask) for mask in images]
|
145 |
|
146 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
|
149 |
def update_task_info(task_prompt):
|
@@ -166,7 +193,6 @@ def update_task_info(task_prompt):
|
|
166 |
with gr.Blocks() as demo:
|
167 |
with gr.Row():
|
168 |
with gr.Column():
|
169 |
-
image = gr.Image(type='pil', label='Upload image')
|
170 |
image_url = gr.Textbox(label='Image url', placeholder='Enter text prompts (Optional)', info="The image_url parameter allows you to input a URL pointing to an image.")
|
171 |
task_prompt = gr.Dropdown(['<OD>', '<CAPTION_TO_PHRASE_GROUNDING>', '<DENSE_REGION_CAPTION>', '<REGION_PROPOSAL>', '<OCR_WITH_REGION>', '<REFERRING_EXPRESSION_SEGMENTATION>', '<REGION_TO_SEGMENTATION>', '<OPEN_VOCABULARY_DETECTION>', '<REGION_TO_CATEGORY>', '<REGION_TO_DESCRIPTION>'], value="<CAPTION_TO_PHRASE_GROUNDING>", label="Task Prompt", info="check doc at [Florence](https://huggingface.co/microsoft/Florence-2-large)")
|
172 |
text_prompt = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
|
@@ -178,21 +204,24 @@ with gr.Blocks() as demo:
|
|
178 |
return_rectangles = gr.Checkbox(label="Return Rectangles", value=False, info="The return_rectangles parameter, when enabled, generates masks as filled white rectangles corresponding to the bounding boxes of detected objects, rather than detailed contours or segments. This option is useful for simpler, box-based visualizations.")
|
179 |
invert_mask = gr.Checkbox(label="invert mask", value=False, info="The invert_mask option allows you to reverse the colors of the generated mask, changing black areas to white and white areas to black. This can be useful for visualizing or processing the mask in a different context.")
|
180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
with gr.Column():
|
182 |
image_gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery", columns=[3], rows=[1], object_fit="contain", height="auto")
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
image_url.change(
|
187 |
-
fn=fetch_image_from_url,
|
188 |
-
inputs=[image_url],
|
189 |
-
outputs=[image]
|
190 |
-
)
|
191 |
|
192 |
submit_button.click(
|
193 |
fn=process_image,
|
194 |
-
inputs=[
|
195 |
-
outputs=[image_gallery],
|
196 |
show_api=False
|
197 |
)
|
198 |
|
|
|
7 |
from PIL import Image
|
8 |
from io import BytesIO
|
9 |
import PIL.Image
|
|
|
10 |
import cv2
|
11 |
import json
|
12 |
import time
|
13 |
import os
|
14 |
+
from diffusers.utils import load_image
|
15 |
+
import json
|
16 |
from utils.florence import load_florence_model, run_florence_inference, \
|
17 |
FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
|
18 |
from utils.sam import load_sam_image_model, run_sam_inference
|
19 |
+
import copy
|
20 |
+
import random
|
21 |
+
import time
|
22 |
+
import boto3
|
23 |
+
from datetime import datetime
|
24 |
|
25 |
DEVICE = torch.device("cuda")
|
26 |
# DEVICE = torch.device("cpu")
|
|
|
34 |
FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
|
35 |
SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
class calculateDuration:
|
39 |
def __init__(self, activity_name=""):
|
|
|
57 |
|
58 |
print(f"Activity: {self.activity_name}, End time: {self.start_time_formatted}")
|
59 |
|
60 |
+
def upload_image_to_r2(image, account_id, access_key, secret_key, bucket_name):
|
61 |
+
print("upload_image_to_r2", account_id, access_key, secret_key, bucket_name)
|
62 |
+
connectionUrl = f"https://{account_id}.r2.cloudflarestorage.com"
|
63 |
+
|
64 |
+
s3 = boto3.client(
|
65 |
+
's3',
|
66 |
+
endpoint_url=connectionUrl,
|
67 |
+
region_name='auto',
|
68 |
+
aws_access_key_id=access_key,
|
69 |
+
aws_secret_access_key=secret_key
|
70 |
+
)
|
71 |
+
|
72 |
+
current_time = datetime.now().strftime("%Y/%m/%d/%H%M%S")
|
73 |
+
image_file = f"generated_images/{current_time}_{random.randint(0, MAX_SEED)}.png"
|
74 |
+
buffer = BytesIO()
|
75 |
+
image.save(buffer, "PNG")
|
76 |
+
buffer.seek(0)
|
77 |
+
s3.upload_fileobj(buffer, bucket_name, image_file)
|
78 |
+
print("upload finish", image_file)
|
79 |
+
return image_file
|
80 |
|
81 |
@spaces.GPU()
|
82 |
@torch.inference_mode()
|
83 |
@torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
84 |
+
def process_image(image_url, task_prompt, text_prompt=None, dilate=0, merge_masks=False, return_rectangles=False, invert_mask=False, upload_to_r2=False, account_id="", bucket="", access_key="", secret_key="") -> Optional[Image.Image]:
|
|
|
|
|
|
|
|
|
85 |
|
86 |
+
if not task_prompt or not image_url:
|
87 |
+
gr.Info("Please enter a image url or task prompt.")
|
88 |
+
return None, json.dumps({"status": "failed", "message": "invalid parameters"})
|
89 |
|
90 |
+
with calculateDuration("Download Image"):
|
91 |
+
print("start to fetch image from url", image_url)
|
92 |
+
image_input = load_image(image_url)
|
93 |
+
if not image_input:
|
94 |
+
return None, json.dumps({"status": "failed", "message": "invalid image"})
|
95 |
+
|
96 |
|
97 |
# start to parse prompt
|
98 |
+
with calculateDuration("run_florence_inference"):
|
99 |
print(task_prompt, text_prompt)
|
100 |
_, result = run_florence_inference(
|
101 |
model=FLORENCE_MODEL,
|
|
|
105 |
task=task_prompt,
|
106 |
text=text_prompt
|
107 |
)
|
108 |
+
with calculateDuration("run_detections"):
|
109 |
# start to dectect
|
110 |
detections = sv.Detections.from_lmm(
|
111 |
lmm=sv.LMM.FLORENCE_2,
|
|
|
137 |
detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
|
138 |
if len(detections) == 0:
|
139 |
gr.Info("No objects detected.")
|
140 |
+
return None, json.dumps({"status": "failed", "message": "no object tetected"})
|
141 |
print("mask generated:", len(detections.mask))
|
142 |
kernel_size = dilate
|
143 |
kernel = np.ones((kernel_size, kernel_size), np.uint8)
|
|
|
157 |
with calculateDuration("invert mask colors"):
|
158 |
images = [cv2.bitwise_not(mask) for mask in images]
|
159 |
|
160 |
+
# return results
|
161 |
+
json_result = {"status": "success", "message": "", "image_urls": []}
|
162 |
+
if upload_to_r2:
|
163 |
+
with calculateDuration("upload to r2"):
|
164 |
+
image_urls = []
|
165 |
+
for image in images:
|
166 |
+
url = upload_image_to_r2(image, account_id, access_key, secret_key, bucket)
|
167 |
+
image_urls.append(url)
|
168 |
+
json_result["image_urls"] = image_urls
|
169 |
+
json_result["message"] = "upload to r2 success"
|
170 |
+
else:
|
171 |
+
json_result["message"] = "not upload"
|
172 |
+
|
173 |
+
return images, json.dumps(json_result)
|
174 |
|
175 |
|
176 |
def update_task_info(task_prompt):
|
|
|
193 |
with gr.Blocks() as demo:
|
194 |
with gr.Row():
|
195 |
with gr.Column():
|
|
|
196 |
image_url = gr.Textbox(label='Image url', placeholder='Enter text prompts (Optional)', info="The image_url parameter allows you to input a URL pointing to an image.")
|
197 |
task_prompt = gr.Dropdown(['<OD>', '<CAPTION_TO_PHRASE_GROUNDING>', '<DENSE_REGION_CAPTION>', '<REGION_PROPOSAL>', '<OCR_WITH_REGION>', '<REFERRING_EXPRESSION_SEGMENTATION>', '<REGION_TO_SEGMENTATION>', '<OPEN_VOCABULARY_DETECTION>', '<REGION_TO_CATEGORY>', '<REGION_TO_DESCRIPTION>'], value="<CAPTION_TO_PHRASE_GROUNDING>", label="Task Prompt", info="check doc at [Florence](https://huggingface.co/microsoft/Florence-2-large)")
|
198 |
text_prompt = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
|
|
|
204 |
return_rectangles = gr.Checkbox(label="Return Rectangles", value=False, info="The return_rectangles parameter, when enabled, generates masks as filled white rectangles corresponding to the bounding boxes of detected objects, rather than detailed contours or segments. This option is useful for simpler, box-based visualizations.")
|
205 |
invert_mask = gr.Checkbox(label="invert mask", value=False, info="The invert_mask option allows you to reverse the colors of the generated mask, changing black areas to white and white areas to black. This can be useful for visualizing or processing the mask in a different context.")
|
206 |
|
207 |
+
with gr.Accordion("R2 Settings", open=False):
|
208 |
+
upload_to_r2 = gr.Checkbox(label="Upload to R2", value=False)
|
209 |
+
with gr.Row():
|
210 |
+
account_id = gr.Textbox(label="Account Id", placeholder="Enter R2 account id")
|
211 |
+
bucket = gr.Textbox(label="Bucket Name", placeholder="Enter R2 bucket name here")
|
212 |
+
|
213 |
+
with gr.Row():
|
214 |
+
access_key = gr.Textbox(label="Access Key", placeholder="Enter R2 access key here")
|
215 |
+
secret_key = gr.Textbox(label="Secret Key", placeholder="Enter R2 secret key here")
|
216 |
+
|
217 |
with gr.Column():
|
218 |
image_gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery", columns=[3], rows=[1], object_fit="contain", height="auto")
|
219 |
+
json_result = gr.Code(label="JSON Result", language="json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
|
221 |
submit_button.click(
|
222 |
fn=process_image,
|
223 |
+
inputs=[image_url, task_prompt, text_prompt, dilate, merge_masks, return_rectangles, invert_mask, upload_to_r2, account_id, bucket, access_key, secret_key],
|
224 |
+
outputs=[image_gallery, json_result],
|
225 |
show_api=False
|
226 |
)
|
227 |
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ gradio
|
|
8 |
supervision
|
9 |
opencv-python
|
10 |
pytest
|
11 |
-
|
|
|
|
8 |
supervision
|
9 |
opencv-python
|
10 |
pytest
|
11 |
+
diffusers
|
12 |
+
boto3
|
utils/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ
|
|
utils/__pycache__/florence.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/florence.cpython-310.pyc and b/utils/__pycache__/florence.cpython-310.pyc differ
|
|
utils/__pycache__/sam.cpython-310.pyc
CHANGED
Binary files a/utils/__pycache__/sam.cpython-310.pyc and b/utils/__pycache__/sam.cpython-310.pyc differ
|
|