Spaces:

georgescutelnicu
/

MangaTranslator

Running

App Files Files Community

georgescutelnicu commited on Apr 6

Commit

6add590

•

1 Parent(s): ec64f33

Upload 13 files

Browse files

Files changed (14) hide show

.gitattributes +1 -0
add_text.py +54 -0
app.py +65 -0
detect_bubbles.py +19 -0
examples/0.png +3 -0
examples/ex0.png +0 -0
fonts/animeace_i.ttf +0 -0
fonts/ariali.ttf +0 -0
fonts/mangati.ttf +0 -0
model.pt +3 -0
packages.txt +1 -0
process_bubble.py +27 -0
requirements +7 -0
translator.py +41 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+examples/0.png filter=lfs diff=lfs merge=lfs -text

add_text.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from PIL import Image, ImageDraw, ImageFont
+import numpy as np
+import textwrap
+import cv2
+def add_text(image, text, font_path, bubble_contour):
+    """
+    Add text inside a speech bubble contour.
+    Args:
+        image (numpy.ndarray): Processed bubble image (cv2 format - BGR).
+        text (str): Text to be placed inside the speech bubble.
+        font_path (str): Font path.
+        bubble_contour (numpy.ndarray): Contour of the detected speech bubble.
+    Returns:
+        numpy.ndarray: Image with text placed inside the speech bubble.
+    """
+    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
+    draw = ImageDraw.Draw(pil_image)
+    x, y, w, h = cv2.boundingRect(bubble_contour)
+    wrapped_text = textwrap.fill(text, width=int(w * 0.1), break_long_words=True)
+    line_height = 12
+    font_size = 10
+    font = ImageFont.truetype(font_path, size=font_size)
+    lines = wrapped_text.split('\n')
+    total_text_height = (len(lines)) * line_height
+    if total_text_height > h:
+        font_size *= (h / total_text_height)
+        line_height = 10
+        total_text_height = (len(lines)) * line_height
+    # Vertical centering
+    text_y = y + (h - total_text_height) // 2
+    for line in lines:
+        text_length = draw.textlength(line, font=font)
+        # Horizontal centering
+        text_x = x + (w - text_length) // 2
+        draw.text((text_x, text_y), line, font=font, fill=(0, 0, 0))
+        text_y += line_height
+    image[:, :, :] = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+    return image

app.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from add_text import add_text
+from detect_bubbles import detect_bubbles
+from process_bubble import process_bubble
+from translator import MangaTranslator
+from ultralytics import YOLO
+from manga_ocr import MangaOcr
+from PIL import Image
+import gradio as gr
+import numpy as np
+import cv2
+MODEL = "model.pt"
+EXAMPLE_LIST = [["examples/0.png"],
+                 ["examples/ex0.png"]]
+TITLE = "Manga Translator"
+DESCRIPTION = "Translate text in manga bubbles!"
+def predict(img, translation_method="google", font="fonts/animeace_i.ttf"):
+    results = detect_bubbles(MODEL, img)
+    manga_translator = MangaTranslator()
+    mocr = MangaOcr()
+    image = np.array(img)
+    for result in results:
+        x1, y1, x2, y2, score, class_id = result
+        detected_image = image[int(y1):int(y2), int(x1):int(x2)]
+        im = Image.fromarray(np.uint8((detected_image)*255))
+        text = mocr(im)
+        detected_image, cont = process_bubble(detected_image)
+        text_translated = manga_translator.translate(text,
+                                                     method=translation_method)
+        image_with_text = add_text(detected_image, text_translated, font, cont)
+    return image
+demo = gr.Interface(fn=predict,
+                    inputs=["image",
+                            gr.Dropdown([("Google", "google"),
+                                         ("Helsinki-NLP's opus-mt-ja-en model",
+                                          "hf")],
+                                        label="Translation Method",
+                                        value="google"),
+                            gr.Dropdown([("animeace_i", ("fonts/animeace_i.ttf")),
+                                         ("mangati", "fonts/mangati.ttf"),
+                                         ("ariali", "fonts/ariali.ttf")],
+                                        label="Text Font",
+                                        value="fonts/animeace_i.ttf")
+                            ],
+                    outputs=[gr.Image()],
+                    examples=EXAMPLE_LIST,
+                    title=TITLE,
+                    description=DESCRIPTION)
+demo.launch(debug=False,
+            share=False)

detect_bubbles.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from ultralytics import YOLO
+def detect_bubbles(model_path, image_path):
+    """
+    Detects bubbles in an image using a YOLOv8 model.
+    Args:
+        model_path (str): The file path to the YOLO model.
+        image_path (str): The file path to the input image.
+    Returns:
+        list: A list containing the coordinates, score and class_id of
+              the detected bubbles.
+    """
+    model = YOLO(model_path)
+    bubbles = model(image_path)[0]
+    return bubbles.boxes.data.tolist()

examples/0.png ADDED Viewed

Git LFS Details

SHA256: cfccaf6c12b806994d153cc083dd595c4f43884a4de54504d1cfac82b4e79de2
Pointer size: 132 Bytes
Size of remote file: 1.31 MB

examples/ex0.png ADDED Viewed

fonts/animeace_i.ttf ADDED Viewed

Binary file (28.8 kB). View file

fonts/ariali.ttf ADDED Viewed

Binary file (717 kB). View file

fonts/mangati.ttf ADDED Viewed

Binary file (30.4 kB). View file

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f1a64e4e4c0dd30b361eb332866dea0f52eab9acb288b9ffdcb2622cb5d1cdb
+size 6234585

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python3-opencv

process_bubble.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import cv2
+import numpy as np
+def process_bubble(image):
+    """
+    Processes the speech bubble in the given image, making its contents white.
+    Parameters:
+    - image (numpy.ndarray): Input image.
+    Returns:
+    - image (numpy.ndarray):  Image with the speech bubble content set to white.
+    - largest_contour (numpy.ndarray): Contour of the detected speech bubble.
+    """
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY)
+    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    largest_contour = max(contours, key=cv2.contourArea)
+    mask = np.zeros_like(gray)
+    cv2.drawContours(mask, [largest_contour], -1, 255, cv2.FILLED)
+    image[mask == 255] = (255, 255, 255)
+    return image, largest_contour

requirements ADDED Viewed

	@@ -0,0 +1,7 @@

+deep-translator==1.11.4
+huggingface-hub==0.22.2
+manga-ocr==0.1.11
+numpy==1.24.2
+opencv-python==4.9.0.80
+pillow==10.3.0
+ultralytics==8.1.43

translator.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from deep_translator import GoogleTranslator
+from transformers import pipeline
+class MangaTranslator:
+    def __init__(self):
+        self.target = "en"
+        self.source = "ja"
+    def translate(self, text, method="google"):
+        """
+        Translates the given text to the target language using the specified method.
+        Args:
+            text (str): The text to be translated.
+            method (str):'google' for Google Translator,
+                         'hf' for Helsinki-NLP's opus-mt-ja-en model (HF pipeline)
+        Returns:
+            str: The translated text.
+        """
+        if method == "hf":
+            return self._translate_with_hf(self._preprocess_text(text))
+        elif method == "google":
+            return self._translate_with_google(self._preprocess_text(text))
+        else:
+            raise ValueError("Invalid translation method.")
+    def _translate_with_google(self, text):
+        translator = GoogleTranslator(source=self.source, target=self.target)
+        translated_text = translator.translate(text)
+        return translated_text
+    def _translate_with_hf(self, text):
+        pipe = pipeline("translation", model=f"Helsinki-NLP/opus-mt-ja-en")
+        translated_text = pipe(text)[0]["translation_text"]
+        return translated_text
+    def _preprocess_text(self, text):
+        preprocessed_text = text.replace("．", ".")
+        return preprocessed_text