mattraj commited on
Commit
753797a
1 Parent(s): b103f0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -5
app.py CHANGED
@@ -34,7 +34,7 @@ def infer(
34
  result = processor.decode(generated_ids[0], skip_special_tokens=True)
35
 
36
  # Placeholder to extract bounding box info from the result (you should replace this with actual bounding box extraction)
37
- bounding_boxes = extract_bounding_boxes(result)
38
 
39
  # Draw bounding boxes on the image
40
  annotated_image = image.copy()
@@ -48,17 +48,26 @@ def infer(
48
 
49
  return result, annotated_image
50
 
51
- def extract_bounding_boxes(result):
 
52
  """
53
  Extract bounding boxes and labels from the model result.
54
 
55
- Extracts two x,y coordinate pairs from <loc> tags and associates them with the corresponding labels.
56
 
57
- Example return: [((x1, y1, x2, y2), "Label")]
 
 
 
 
 
58
  """
59
  # Regular expression to capture the <loc> tags and their associated labels
60
  loc_pattern = re.compile(r"<loc(\d{4})><loc(\d{4})><loc(\d{4})><loc(\d{4})>\s*([a-zA-Z\-]+)")
61
 
 
 
 
62
  # Find all matches of bounding box coordinates and labels in the result string
63
  matches = loc_pattern.findall(result)
64
 
@@ -70,7 +79,13 @@ def extract_bounding_boxes(result):
70
  # Convert coordinates from string to integer
71
  x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
72
 
73
- # Append the bounding box and label as a tuple
 
 
 
 
 
 
74
  bounding_boxes.append(((x1, y1, x2, y2), label))
75
 
76
  return bounding_boxes
 
34
  result = processor.decode(generated_ids[0], skip_special_tokens=True)
35
 
36
  # Placeholder to extract bounding box info from the result (you should replace this with actual bounding box extraction)
37
+ bounding_boxes = extract_bounding_boxes(result, image)
38
 
39
  # Draw bounding boxes on the image
40
  annotated_image = image.copy()
 
48
 
49
  return result, annotated_image
50
 
51
+
52
+ def extract_bounding_boxes(result, image):
53
  """
54
  Extract bounding boxes and labels from the model result.
55
 
56
+ Coordinates are scaled by dividing by 1024 and then multiplying by the image dimensions.
57
 
58
+ Args:
59
+ result (str): The model's output string containing bounding box data.
60
+ image (PIL.Image.Image): The image to use for scaling the bounding boxes.
61
+
62
+ Returns:
63
+ List[Tuple[Tuple[int, int, int, int], str]]: A list of bounding boxes and labels.
64
  """
65
  # Regular expression to capture the <loc> tags and their associated labels
66
  loc_pattern = re.compile(r"<loc(\d{4})><loc(\d{4})><loc(\d{4})><loc(\d{4})>\s*([a-zA-Z\-]+)")
67
 
68
+ # Get image dimensions
69
+ width, height = image.size
70
+
71
  # Find all matches of bounding box coordinates and labels in the result string
72
  matches = loc_pattern.findall(result)
73
 
 
79
  # Convert coordinates from string to integer
80
  x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
81
 
82
+ # Scale coordinates
83
+ x1 = int((x1 / 1024) * width)
84
+ y1 = int((y1 / 1024) * height)
85
+ x2 = int((x2 / 1024) * width)
86
+ y2 = int((y2 / 1024) * height)
87
+
88
+ # Append the scaled bounding box and label as a tuple
89
  bounding_boxes.append(((x1, y1, x2, y2), label))
90
 
91
  return bounding_boxes