jiuface commited on
Commit
ffe5aff
1 Parent(s): 5197257

add r2 support

Browse files
app.py CHANGED
@@ -7,15 +7,20 @@ import torch
7
  from PIL import Image
8
  from io import BytesIO
9
  import PIL.Image
10
- import requests
11
  import cv2
12
  import json
13
  import time
14
  import os
15
-
 
16
  from utils.florence import load_florence_model, run_florence_inference, \
17
  FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
18
  from utils.sam import load_sam_image_model, run_sam_inference
 
 
 
 
 
19
 
20
  DEVICE = torch.device("cuda")
21
  # DEVICE = torch.device("cpu")
@@ -29,14 +34,6 @@ if torch.cuda.get_device_properties(0).major >= 8:
29
  FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
30
  SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
31
 
32
- def fetch_image_from_url(image_url):
33
- try:
34
- response = requests.get(image_url)
35
- response.raise_for_status()
36
- img = Image.open(BytesIO(response.content))
37
- return img
38
- except Exception as e:
39
- return None
40
 
41
  class calculateDuration:
42
  def __init__(self, activity_name=""):
@@ -60,28 +57,45 @@ class calculateDuration:
60
 
61
  print(f"Activity: {self.activity_name}, End time: {self.start_time_formatted}")
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  @spaces.GPU()
65
  @torch.inference_mode()
66
  @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
67
- def process_image(image_input, image_url, task_prompt, text_prompt=None, dilate=0, merge_masks=False, return_rectangles=False, invert_mask=False) -> Optional[Image.Image]:
68
-
69
- if not image_input:
70
- gr.Info("Please upload an image.")
71
- return None
72
 
73
- if not task_prompt:
74
- gr.Info("Please enter a task prompt.")
75
- return None
76
 
77
- if image_url:
78
- with calculateDuration("Download Image"):
79
- print("start to fetch image from url", image_url)
80
- image_input = fetch_image_from_url(image_url)
81
- print("fetch image success")
 
82
 
83
  # start to parse prompt
84
- with calculateDuration("FLORENCE"):
85
  print(task_prompt, text_prompt)
86
  _, result = run_florence_inference(
87
  model=FLORENCE_MODEL,
@@ -91,7 +105,7 @@ def process_image(image_input, image_url, task_prompt, text_prompt=None, dilate=
91
  task=task_prompt,
92
  text=text_prompt
93
  )
94
- with calculateDuration("sv.Detections"):
95
  # start to dectect
96
  detections = sv.Detections.from_lmm(
97
  lmm=sv.LMM.FLORENCE_2,
@@ -123,7 +137,7 @@ def process_image(image_input, image_url, task_prompt, text_prompt=None, dilate=
123
  detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
124
  if len(detections) == 0:
125
  gr.Info("No objects detected.")
126
- return None
127
  print("mask generated:", len(detections.mask))
128
  kernel_size = dilate
129
  kernel = np.ones((kernel_size, kernel_size), np.uint8)
@@ -143,7 +157,20 @@ def process_image(image_input, image_url, task_prompt, text_prompt=None, dilate=
143
  with calculateDuration("invert mask colors"):
144
  images = [cv2.bitwise_not(mask) for mask in images]
145
 
146
- return images
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
 
149
  def update_task_info(task_prompt):
@@ -166,7 +193,6 @@ def update_task_info(task_prompt):
166
  with gr.Blocks() as demo:
167
  with gr.Row():
168
  with gr.Column():
169
- image = gr.Image(type='pil', label='Upload image')
170
  image_url = gr.Textbox(label='Image url', placeholder='Enter text prompts (Optional)', info="The image_url parameter allows you to input a URL pointing to an image.")
171
  task_prompt = gr.Dropdown(['<OD>', '<CAPTION_TO_PHRASE_GROUNDING>', '<DENSE_REGION_CAPTION>', '<REGION_PROPOSAL>', '<OCR_WITH_REGION>', '<REFERRING_EXPRESSION_SEGMENTATION>', '<REGION_TO_SEGMENTATION>', '<OPEN_VOCABULARY_DETECTION>', '<REGION_TO_CATEGORY>', '<REGION_TO_DESCRIPTION>'], value="<CAPTION_TO_PHRASE_GROUNDING>", label="Task Prompt", info="check doc at [Florence](https://huggingface.co/microsoft/Florence-2-large)")
172
  text_prompt = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
@@ -178,21 +204,24 @@ with gr.Blocks() as demo:
178
  return_rectangles = gr.Checkbox(label="Return Rectangles", value=False, info="The return_rectangles parameter, when enabled, generates masks as filled white rectangles corresponding to the bounding boxes of detected objects, rather than detailed contours or segments. This option is useful for simpler, box-based visualizations.")
179
  invert_mask = gr.Checkbox(label="invert mask", value=False, info="The invert_mask option allows you to reverse the colors of the generated mask, changing black areas to white and white areas to black. This can be useful for visualizing or processing the mask in a different context.")
180
 
 
 
 
 
 
 
 
 
 
 
181
  with gr.Column():
182
  image_gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery", columns=[3], rows=[1], object_fit="contain", height="auto")
183
- # json_result = gr.Code(label="JSON Result", language="json")
184
-
185
-
186
- image_url.change(
187
- fn=fetch_image_from_url,
188
- inputs=[image_url],
189
- outputs=[image]
190
- )
191
 
192
  submit_button.click(
193
  fn=process_image,
194
- inputs=[image, image_url, task_prompt, text_prompt, dilate, merge_masks, return_rectangles, invert_mask],
195
- outputs=[image_gallery],
196
  show_api=False
197
  )
198
 
 
7
  from PIL import Image
8
  from io import BytesIO
9
  import PIL.Image
 
10
  import cv2
11
  import json
12
  import time
13
  import os
14
+ from diffusers.utils import load_image
15
+ import json
16
  from utils.florence import load_florence_model, run_florence_inference, \
17
  FLORENCE_OPEN_VOCABULARY_DETECTION_TASK
18
  from utils.sam import load_sam_image_model, run_sam_inference
19
+ import copy
20
+ import random
21
+ import time
22
+ import boto3
23
+ from datetime import datetime
24
 
25
  DEVICE = torch.device("cuda")
26
  # DEVICE = torch.device("cpu")
 
34
  FLORENCE_MODEL, FLORENCE_PROCESSOR = load_florence_model(device=DEVICE)
35
  SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
36
 
 
 
 
 
 
 
 
 
37
 
38
  class calculateDuration:
39
  def __init__(self, activity_name=""):
 
57
 
58
  print(f"Activity: {self.activity_name}, End time: {self.start_time_formatted}")
59
 
60
+ def upload_image_to_r2(image, account_id, access_key, secret_key, bucket_name):
61
+ print("upload_image_to_r2", account_id, access_key, secret_key, bucket_name)
62
+ connectionUrl = f"https://{account_id}.r2.cloudflarestorage.com"
63
+
64
+ s3 = boto3.client(
65
+ 's3',
66
+ endpoint_url=connectionUrl,
67
+ region_name='auto',
68
+ aws_access_key_id=access_key,
69
+ aws_secret_access_key=secret_key
70
+ )
71
+
72
+ current_time = datetime.now().strftime("%Y/%m/%d/%H%M%S")
73
+ image_file = f"generated_images/{current_time}_{random.randint(0, MAX_SEED)}.png"
74
+ buffer = BytesIO()
75
+ image.save(buffer, "PNG")
76
+ buffer.seek(0)
77
+ s3.upload_fileobj(buffer, bucket_name, image_file)
78
+ print("upload finish", image_file)
79
+ return image_file
80
 
81
  @spaces.GPU()
82
  @torch.inference_mode()
83
  @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
84
+ def process_image(image_url, task_prompt, text_prompt=None, dilate=0, merge_masks=False, return_rectangles=False, invert_mask=False, upload_to_r2=False, account_id="", bucket="", access_key="", secret_key="") -> Optional[Image.Image]:
 
 
 
 
85
 
86
+ if not task_prompt or not image_url:
87
+ gr.Info("Please enter a image url or task prompt.")
88
+ return None, json.dumps({"status": "failed", "message": "invalid parameters"})
89
 
90
+ with calculateDuration("Download Image"):
91
+ print("start to fetch image from url", image_url)
92
+ image_input = load_image(image_url)
93
+ if not image_input:
94
+ return None, json.dumps({"status": "failed", "message": "invalid image"})
95
+
96
 
97
  # start to parse prompt
98
+ with calculateDuration("run_florence_inference"):
99
  print(task_prompt, text_prompt)
100
  _, result = run_florence_inference(
101
  model=FLORENCE_MODEL,
 
105
  task=task_prompt,
106
  text=text_prompt
107
  )
108
+ with calculateDuration("run_detections"):
109
  # start to dectect
110
  detections = sv.Detections.from_lmm(
111
  lmm=sv.LMM.FLORENCE_2,
 
137
  detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
138
  if len(detections) == 0:
139
  gr.Info("No objects detected.")
140
+ return None, json.dumps({"status": "failed", "message": "no object tetected"})
141
  print("mask generated:", len(detections.mask))
142
  kernel_size = dilate
143
  kernel = np.ones((kernel_size, kernel_size), np.uint8)
 
157
  with calculateDuration("invert mask colors"):
158
  images = [cv2.bitwise_not(mask) for mask in images]
159
 
160
+ # return results
161
+ json_result = {"status": "success", "message": "", "image_urls": []}
162
+ if upload_to_r2:
163
+ with calculateDuration("upload to r2"):
164
+ image_urls = []
165
+ for image in images:
166
+ url = upload_image_to_r2(image, account_id, access_key, secret_key, bucket)
167
+ image_urls.append(url)
168
+ json_result["image_urls"] = image_urls
169
+ json_result["message"] = "upload to r2 success"
170
+ else:
171
+ json_result["message"] = "not upload"
172
+
173
+ return images, json.dumps(json_result)
174
 
175
 
176
  def update_task_info(task_prompt):
 
193
  with gr.Blocks() as demo:
194
  with gr.Row():
195
  with gr.Column():
 
196
  image_url = gr.Textbox(label='Image url', placeholder='Enter text prompts (Optional)', info="The image_url parameter allows you to input a URL pointing to an image.")
197
  task_prompt = gr.Dropdown(['<OD>', '<CAPTION_TO_PHRASE_GROUNDING>', '<DENSE_REGION_CAPTION>', '<REGION_PROPOSAL>', '<OCR_WITH_REGION>', '<REFERRING_EXPRESSION_SEGMENTATION>', '<REGION_TO_SEGMENTATION>', '<OPEN_VOCABULARY_DETECTION>', '<REGION_TO_CATEGORY>', '<REGION_TO_DESCRIPTION>'], value="<CAPTION_TO_PHRASE_GROUNDING>", label="Task Prompt", info="check doc at [Florence](https://huggingface.co/microsoft/Florence-2-large)")
198
  text_prompt = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
 
204
  return_rectangles = gr.Checkbox(label="Return Rectangles", value=False, info="The return_rectangles parameter, when enabled, generates masks as filled white rectangles corresponding to the bounding boxes of detected objects, rather than detailed contours or segments. This option is useful for simpler, box-based visualizations.")
205
  invert_mask = gr.Checkbox(label="invert mask", value=False, info="The invert_mask option allows you to reverse the colors of the generated mask, changing black areas to white and white areas to black. This can be useful for visualizing or processing the mask in a different context.")
206
 
207
+ with gr.Accordion("R2 Settings", open=False):
208
+ upload_to_r2 = gr.Checkbox(label="Upload to R2", value=False)
209
+ with gr.Row():
210
+ account_id = gr.Textbox(label="Account Id", placeholder="Enter R2 account id")
211
+ bucket = gr.Textbox(label="Bucket Name", placeholder="Enter R2 bucket name here")
212
+
213
+ with gr.Row():
214
+ access_key = gr.Textbox(label="Access Key", placeholder="Enter R2 access key here")
215
+ secret_key = gr.Textbox(label="Secret Key", placeholder="Enter R2 secret key here")
216
+
217
  with gr.Column():
218
  image_gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery", columns=[3], rows=[1], object_fit="contain", height="auto")
219
+ json_result = gr.Code(label="JSON Result", language="json")
 
 
 
 
 
 
 
220
 
221
  submit_button.click(
222
  fn=process_image,
223
+ inputs=[image_url, task_prompt, text_prompt, dilate, merge_masks, return_rectangles, invert_mask, upload_to_r2, account_id, bucket, access_key, secret_key],
224
+ outputs=[image_gallery, json_result],
225
  show_api=False
226
  )
227
 
requirements.txt CHANGED
@@ -8,4 +8,5 @@ gradio
8
  supervision
9
  opencv-python
10
  pytest
11
- requests
 
 
8
  supervision
9
  opencv-python
10
  pytest
11
+ diffusers
12
+ boto3
utils/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/__init__.cpython-310.pyc and b/utils/__pycache__/__init__.cpython-310.pyc differ
 
utils/__pycache__/florence.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/florence.cpython-310.pyc and b/utils/__pycache__/florence.cpython-310.pyc differ
 
utils/__pycache__/sam.cpython-310.pyc CHANGED
Binary files a/utils/__pycache__/sam.cpython-310.pyc and b/utils/__pycache__/sam.cpython-310.pyc differ