Spaces:

DEEMOSTECH
/

Rodin

Running on Zero

App Files Files Community

skkk commited on Aug 3

Commit

b51ed7d

•

1 Parent(s): 9b1e7f2

Revert "test 600m"

Browse files

This reverts commit 893330bad43179b8a93788b03a11e16e5e39f1ed.

Files changed (16) hide show

Rodin.py +34 -44
app.py +79 -229
constant.py +0 -3
openclay/models/__init__.py +0 -3
openclay/models/condition.py +0 -102
openclay/models/ldm.py +0 -83
openclay/models/vae.py +0 -124
openclay/modules/attention.py +0 -73
openclay/modules/control_volume.py +0 -52
openclay/modules/diag_gaussian.py +0 -42
openclay/modules/drop_path.py +0 -34
openclay/modules/embedding.py +0 -36
openclay/modules/transformer.py +0 -116
openclay/pipeline_openclay.py +0 -195
openclay/utils.py +0 -80
requirements.txt +1 -13

Rodin.py CHANGED Viewed

@@ -13,27 +13,30 @@ import gradio as gr
 from requests_toolbelt.multipart.encoder import MultipartEncoder
 from constant import *
 def login(email, password):
     payload = {'password': password}
     if email:
         payload['email'] = email
     response = requests.post(f"{BASE_URL}/user/login", json=payload)
     try:
         response_data = response.json()
     except json.JSONDecodeError as e:
         log("ERROR", f"Error in login: {response}")
         raise e
     if 'error' in response_data and response_data['error']:
         raise Exception(response_data['error'])
     log("INFO", f"Logged successfully")
     user_uuid = response_data['user_uuid']
     token = response_data['token']
     return user_uuid, token
 def rodin_history(task_uuid, token):
     headers = {
         'Authorization': f'Bearer {token}'
@@ -41,7 +44,6 @@ def rodin_history(task_uuid, token):
     response = requests.post(f"{BASE_URL}/task/rodin_history", data={"uuid": task_uuid}, headers=headers)
     return response.json()
 def rodin_preprocess_image(generate_prompt, image, name, token):
     m = MultipartEncoder(
         fields={
@@ -56,7 +58,6 @@ def rodin_preprocess_image(generate_prompt, image, name, token):
     response = requests.post(f"{BASE_URL}/task/rodin_mesh_image_process", data=m, headers=headers)
     return response
 def crop_image(image, type):
     if image == None:
         raise gr.Error("Please generate the object first")
@@ -77,7 +78,7 @@ def crop_image(image, type):
 # Perform Rodin mesh operation
 def rodin_mesh(prompt, group_uuid, settings, images, name, token):
     images = [convert_base64_to_binary(img) for img in images]
     m = MultipartEncoder(
         fields={
             'prompt': prompt,
@@ -99,13 +100,12 @@ def rodin_mesh(prompt, group_uuid, settings, images, name, token):
 def convert_base64_to_binary(base64_string):
     if ',' in base64_string:
         base64_string = base64_string.split(',')[1]
     image_data = base64.b64decode(base64_string)
     image_buffer = io.BytesIO(image_data)
     return image_buffer
 def rodin_update(prompt, task_uuid, token, settings):
     headers = {
         'Authorization': f'Bearer {token}'
@@ -113,7 +113,6 @@ def rodin_update(prompt, task_uuid, token, settings):
     response = requests.post(f"{BASE_URL}/task/rodin_update", data={"uuid": task_uuid, "prompt": prompt, "settings": settings}, headers=headers)
     return response
 def load_image(img_path):
     try:
         image = Image.open(img_path)
@@ -136,11 +135,9 @@ def load_image(img_path):
     image_bytes = byte_io.getvalue()
     return image_bytes
 def log(level, info_text):
     print(f"[ {level} ] - {time.strftime('%Y%m%d_%H:%M:%S', time.localtime())} - {info_text}")
 class Generator:
     def __init__(self, user_id, password, token) -> None:
         # _, self.token = login(user_id, password)
@@ -149,11 +146,11 @@ class Generator:
         self.password = password
         self.task_uuid = None
         self.processed_image = None
-    def preprocess(self, prompt, image_path, processed_image, task_uuid=""):
-        if image_path is None:
             raise gr.Error("Please upload an image first")
         if processed_image and prompt and (not task_uuid):
             log("INFO", "Using cached image and prompt...")
             return prompt, processed_image
@@ -163,10 +160,10 @@ class Generator:
         while not success:
             if try_times > 3:
                 raise gr.Error("Failed to preprocess image")
-            try_times += 1
             image_file = load_image(image_path)
             log("INFO", "Image loaded, processing...")
             try:
                 if prompt and task_uuid:
                     res = rodin_preprocess_image(generate_prompt=False, image=image_file, name=os.path.basename(image_path), token=self.token)
@@ -203,13 +200,13 @@ class Generator:
         log("INFO", "Image preprocessed successfully")
         return prompt, processed_image
     def generate_mesh(self, prompt, processed_image, task_uuid=""):
         log("INFO", "Generating mesh...")
         if task_uuid == "":
             settings = {'view_weights': [1]}  # Define weights as per your requirements, for multiple images, use multiple values, e,g [0.5, 0.5]
             images = [processed_image]  # List of images, all the images should be processed first
             res = rodin_mesh(prompt=prompt, group_uuid=None, settings=settings, images=images, name="images.jpeg", token=self.token)
             try:
                 mesh_response = res.json()
@@ -218,14 +215,14 @@ class Generator:
             except Exception as e:
                 log("ERROR", f"Error in generating mesh: {e} and response: {res}")
                 raise gr.Error("Error in generating mesh, please try again later.")
-            task_uuid = mesh_response['uuid']  # The task_uuid should be same during whole generation process
         else:
             new_prompt = prompt
             settings = {
                 "view_weights": [1],
-                "seed": random.randint(0, 10000),  # Customize your seed here
-                "escore": 5.5,  # Temprature
             }
             res = rodin_update(new_prompt, task_uuid, self.token, settings)
             try:
@@ -243,7 +240,7 @@ class Generator:
         except Exception as e:
             log("ERROR", f"Error in generating mesh: {history}")
             raise gr.Error("Busy connection, please try again later.")
         response = requests.get(preview_image, stream=True)
         if response.status_code == 200:
             image = Image.open(response.raw)
@@ -260,32 +257,25 @@ class JobStatusChecker:
         self.subscription_key = subscription_key
         self.sio = socketio.Client(logger=True, engineio_logger=True)
-        @self.sio.on('connect', namespace='*')
-        def connect(*args):
-            print("[ JobStatusChecker.connect ] Connected to the server.")
-        @self.sio.on('disconnect', namespace='*')
-        def disconnect(*args):
-            print("[ JobStatusChecker.disconnect ] Disconnected from server.")
         @self.sio.on('message', namespace='*')
         def message(*args, **kwargs):
-            print(f"""[ JobStatusChecker.message ] args = {args}""")
-            safe_to_disconnect = False
             if len(args) > 2:
                 data = args[2]
                 if data.get('jobStatus') == 'Succeeded':
-                    safe_to_disconnect = True
-            if args[1] == "SAFE_TO_DISCONNECT":
-                safe_to_disconnect = True
-            if safe_to_disconnect:
-                print("[ JobStatusChecker.message ] Job Succeeded! Please find the SDF image in history")
-                self.sio.disconnect()
             else:
-                print(f"[ JobStatusChecker.message ] Received event with insufficient arguments. {args}")
     def start(self):
-        self.sio.connect(f"{self.base_url}/scheduler_socket?subscription={self.subscription_key}",
                          namespaces=['/api/scheduler_socket'], transports='websocket')
-        self.sio.wait()

 from requests_toolbelt.multipart.encoder import MultipartEncoder
 from constant import *
+@spaces.GPU
+def foo():
+    pass
 def login(email, password):
     payload = {'password': password}
     if email:
         payload['email'] = email
     response = requests.post(f"{BASE_URL}/user/login", json=payload)
     try:
         response_data = response.json()
     except json.JSONDecodeError as e:
         log("ERROR", f"Error in login: {response}")
         raise e
     if 'error' in response_data and response_data['error']:
         raise Exception(response_data['error'])
     log("INFO", f"Logged successfully")
     user_uuid = response_data['user_uuid']
     token = response_data['token']
     return user_uuid, token
 def rodin_history(task_uuid, token):
     headers = {
         'Authorization': f'Bearer {token}'
     response = requests.post(f"{BASE_URL}/task/rodin_history", data={"uuid": task_uuid}, headers=headers)
     return response.json()
 def rodin_preprocess_image(generate_prompt, image, name, token):
     m = MultipartEncoder(
         fields={
     response = requests.post(f"{BASE_URL}/task/rodin_mesh_image_process", data=m, headers=headers)
     return response
 def crop_image(image, type):
     if image == None:
         raise gr.Error("Please generate the object first")
 # Perform Rodin mesh operation
 def rodin_mesh(prompt, group_uuid, settings, images, name, token):
     images = [convert_base64_to_binary(img) for img in images]
     m = MultipartEncoder(
         fields={
             'prompt': prompt,
 def convert_base64_to_binary(base64_string):
     if ',' in base64_string:
         base64_string = base64_string.split(',')[1]
     image_data = base64.b64decode(base64_string)
     image_buffer = io.BytesIO(image_data)
     return image_buffer
 def rodin_update(prompt, task_uuid, token, settings):
     headers = {
         'Authorization': f'Bearer {token}'
     response = requests.post(f"{BASE_URL}/task/rodin_update", data={"uuid": task_uuid, "prompt": prompt, "settings": settings}, headers=headers)
     return response
 def load_image(img_path):
     try:
         image = Image.open(img_path)
     image_bytes = byte_io.getvalue()
     return image_bytes
 def log(level, info_text):
     print(f"[ {level} ] - {time.strftime('%Y%m%d_%H:%M:%S', time.localtime())} - {info_text}")
 class Generator:
     def __init__(self, user_id, password, token) -> None:
         # _, self.token = login(user_id, password)
         self.password = password
         self.task_uuid = None
         self.processed_image = None
+    def preprocess(self, prompt, image_path, processed_image , task_uuid=""):
+        if image_path == None:
             raise gr.Error("Please upload an image first")
         if processed_image and prompt and (not task_uuid):
             log("INFO", "Using cached image and prompt...")
             return prompt, processed_image
         while not success:
             if try_times > 3:
                 raise gr.Error("Failed to preprocess image")
+            try_times += 1
             image_file = load_image(image_path)
             log("INFO", "Image loaded, processing...")
             try:
                 if prompt and task_uuid:
                     res = rodin_preprocess_image(generate_prompt=False, image=image_file, name=os.path.basename(image_path), token=self.token)
         log("INFO", "Image preprocessed successfully")
         return prompt, processed_image
     def generate_mesh(self, prompt, processed_image, task_uuid=""):
         log("INFO", "Generating mesh...")
         if task_uuid == "":
             settings = {'view_weights': [1]}  # Define weights as per your requirements, for multiple images, use multiple values, e,g [0.5, 0.5]
             images = [processed_image]  # List of images, all the images should be processed first
             res = rodin_mesh(prompt=prompt, group_uuid=None, settings=settings, images=images, name="images.jpeg", token=self.token)
             try:
                 mesh_response = res.json()
             except Exception as e:
                 log("ERROR", f"Error in generating mesh: {e} and response: {res}")
                 raise gr.Error("Error in generating mesh, please try again later.")
+            task_uuid = mesh_response['uuid'] # The task_uuid should be same during whole generation process
         else:
             new_prompt = prompt
             settings = {
                 "view_weights": [1],
+                "seed": random.randint(0, 10000), # Customize your seed here
+                "escore": 5.5, # Temprature
             }
             res = rodin_update(new_prompt, task_uuid, self.token, settings)
             try:
         except Exception as e:
             log("ERROR", f"Error in generating mesh: {history}")
             raise gr.Error("Busy connection, please try again later.")
         response = requests.get(preview_image, stream=True)
         if response.status_code == 200:
             image = Image.open(response.raw)
         self.subscription_key = subscription_key
         self.sio = socketio.Client(logger=True, engineio_logger=True)
+        @self.sio.event
+        def connect():
+            print("Connected to the server.")
+        @self.sio.event
+        def disconnect():
+            print("Disconnected from server.")
         @self.sio.on('message', namespace='*')
         def message(*args, **kwargs):
             if len(args) > 2:
                 data = args[2]
                 if data.get('jobStatus') == 'Succeeded':
+                    print("Job Succeeded! Please find the SDF image in history")
+                    self.sio.disconnect()
             else:
+                print("Received event with insufficient arguments.")
     def start(self):
+        self.sio.connect(f"{self.base_url}/scheduler_socket?subscription={self.subscription_key}",
                          namespaces=['/api/scheduler_socket'], transports='websocket')
+        self.sio.wait()

app.py CHANGED Viewed

@@ -1,21 +1,8 @@
 import os
-os.system('pip3 uninstall -y gradio_fake3d')
-os.system('pip3 install gradio_fake3d-0.0.3-py3-none-any.whl')
-os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
-os.environ["TORCH_LINEAR_FLATTEN_3D"] = "1"
-import cv2
-import time
-import numpy as np
-import torch
-from openclay.pipeline_openclay import OpenClayPipeline
-from openclay.models import ClayVAE, ClayLDM, ClayConditionNet
-from openclay.utils import process_image_square
-from transformers import Dinov2Model, BitImageProcessor, CLIPTextModel, CLIPTokenizer
 import gradio as gr
-import spaces
 import re
 from gradio_fake3d import Fake3D
 from PIL import Image
@@ -23,57 +10,6 @@ from Rodin import Generator, crop_image, log, convert_base64_to_binary
 from constant import *
 generator = Generator(USER, PASSWORD, TOKEN)
-os.makedirs(FOLDER_TEMP_MESH, exist_ok=True)
-device = torch.device("cuda")
-tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
-text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", torch_dtype=torch.float16).eval().requires_grad_(False).to(device)
-image_processor = BitImageProcessor.from_pretrained("facebook/dinov2-giant", torch_dtype=torch.float16)
-image_encoder = Dinov2Model.from_pretrained("facebook/dinov2-giant", torch_dtype=torch.float16).eval().requires_grad_(False).to(device)
-vae = ClayVAE.from_pretrained("DEEMOSTECH/CLAYV1_VAE", token=ACCESS_TOKEN, torch_dtype=torch.float16).eval().requires_grad_(False).to(device)
-ldm = ClayLDM.from_pretrained("DEEMOSTECH/CLAYV1_LDM_MEDIUM", token=ACCESS_TOKEN, torch_dtype=torch.float16).eval().requires_grad_(False).to(device)
-condition_net_image = ClayConditionNet.from_pretrained("DEEMOSTECH/CLAYV1_LDM_MEDIUM_CONDITION_IMAGE", token=ACCESS_TOKEN, torch_dtype=torch.float16).eval().requires_grad_(False).to(device)
-ldm.register_condition_net([condition_net_image])
-pipe = OpenClayPipeline(
-    vae=vae,
-    text_encoder=text_encoder,
-    tokenizer=tokenizer,
-    ldm=ldm,
-)
-@spaces.GPU
-def read_image(image, image_processor, image_encoder, size=224):
-    render = process_image_square(image)
-    render = cv2.resize(render, (size, size))
-    image_pixel_values = image_processor(render, return_tensors="pt", do_rescale=False,
-                                         do_resize=False, do_center_crop=False)["pixel_values"][0]
-    image_embeds_patch = image_encoder(image_pixel_values.half().to(image_encoder.device)[None])["last_hidden_state"]
-    return render, image_embeds_patch
-@spaces.GPU
-def local_inference(block_prompt, image_pil):
-    image = np.array(image_pil)
-    _, image_embeds_patch = read_image(image, image_processor, image_encoder)
-    mesh = pipe(
-        prompt=block_prompt, negative_prompt='fragmentation.',
-        res=256,
-        num_inference_steps=50,
-        mini_batch=65**3,
-        seed=42, num=1,
-        condition_seq=[(image_embeds_patch, [1, 0])],
-    )
-    mesh_path = f"{FOLDER_TEMP_MESH}/{int(time.time())}.glb"
-    os.makedirs(os.path.dirname(mesh_path), exist_ok=True)
-    mesh.export(mesh_path)
-    return mesh_path
 change_button_name = """
 function updateButton(input) {
@@ -83,14 +19,6 @@ function updateButton(input) {
 }
 """
-change_button_name_600 = """
-function updateButton(input) {
-    var buttonGenerate = document.getElementById('button_generate_600');
-    buttonGenerate.innerText = 'Redo';
-    return '';
-}
-"""
 change_button_name_to_generating = """
 function updateButton(input) {
     var buttonGenerate = document.getElementById('button_generate');
@@ -99,15 +27,6 @@ function updateButton(input) {
 }
 """
-change_button_name_to_generating_600 = """
-function updateButton(input) {
-    var buttonGenerate = document.getElementById('button_generate_600');
-    buttonGenerate.innerText = 'Generating...';
-    return '';
-}
-"""
 reset_button_name = """
 function updateButton(input) {
     var buttonGenerate = document.getElementById('button_generate');
@@ -116,15 +35,6 @@ function updateButton(input) {
 }
 """
-reset_button_name_600 = """
-function updateButton(input) {
-    var buttonGenerate = document.getElementById('button_generate_600');
-    buttonGenerate.innerText = 'Generate';
-    return '';
-}
-"""
 jump_to_rodin = """
 function redirectToGithub(input) {
     if (input.includes('OpenClay')) {
@@ -188,23 +98,18 @@ example = [
     ["assets/46.png"]
 ]
 def do_nothing(text):
     return ""
 def handle_selection(selection):
     return "Rodin Gen-1(0525)"
 def hint_in_prompt(hint, prompt):
     return re.search(fr"{hint[:-1]}", prompt) is not None
 def prompt_remove_hint(prompt, hint):
     return re.sub(fr"\s*{hint[:-1]}[\.,]*", "", prompt)
 def handle_hint_change(prompt: str, prompt_hint):
     prompt = prompt.strip()
     if prompt != "" and not prompt.endswith("."):
@@ -218,7 +123,6 @@ def handle_hint_change(prompt: str, prompt_hint):
     prompt = prompt.strip()
     return prompt
 def handle_prompt_change(prompt):
     hint_list = []
     for _, hint in PROMPT_HINT_LIST:
@@ -227,15 +131,6 @@ def handle_prompt_change(prompt):
     return hint_list
-def preprocessing(prompt, image_path, processed_image, task_uuid=""):
-    prompt, image_base64 = generator.preprocess(prompt, image_path, processed_image, task_uuid)
-    image_rgb = convert_base64_to_binary(image_base64)
-    image_rgb = cv2.imdecode(np.frombuffer(image_rgb.getvalue(), np.uint8), -1)[...,[2,1,0,3]]
-    image_rgb = Image.fromarray(image_rgb, 'RGBA')
-    # image_rgb = cv2.resize(image_rgb, (256, 256), cv2.INTER_AREA)
-    return prompt, image_base64, image_rgb
 def clear_task(task_input=None):
     """_summary_
     [cache_task_uuid, block_prompt, block_prompt_hint, fake3d, block_3d]
@@ -243,33 +138,26 @@ def clear_task(task_input=None):
     log("INFO", "Clearing task...")
     return "", "", "", [], "assets/white_image.png"
 def clear_task_id():
     return ""
 def return_render(image):
     image = Image.fromarray(image)
     return image, crop_image(image, DEFAULT)
 def crop_image_default(image):
     return crop_image(image, DEFAULT)
 def crop_image_metal(image):
     return crop_image(image, METAL)
 def crop_image_contrast(image):
     return crop_image(image, CONTRAST)
 def crop_image_normal(image):
     return crop_image(image, NORMAL)
-with gr.Blocks(css=css) as demo:
     gr.HTML(html_content)
     cache_task_uuid = gr.Text(value="", visible=False)
@@ -279,86 +167,66 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column():
             with gr.Group():
-                with gr.Row():
-                    block_image = gr.Image(
-                            label='Input',
-                            height=max_height,
-                            image_mode="RGBA",
-                            sources="upload",
-                            elem_id="elem_block_image",
-                            elem_classes="elem_imagebox",
-                            type="filepath"
-                        )
-                    block_image_masked = gr.Image(
-                            label='Preprocessed',
-                            height=max_height,
-                            elem_id="elem_block_image_crop",
-                            elem_classes="elem_imagebox",
-                            interactive=False,
-                        )
-            block_prompt = gr.Textbox(
-                value="",
-                placeholder="Auto generated description of Image",
-                lines=1,
-                show_label=True,
-                label="Prompt",
-            )
-            block_prompt_hint = gr.CheckboxGroup(value="Labels", choices=PROMPT_HINT_LIST, show_label=False)
-        with gr.Column(elem_id="right_col"):
-            with gr.Group(elem_id="right_col_group"):
-                with gr.Row(elem_id="right_col_group_row"):
-                    with gr.Group(elem_id="right_col_group_row_gleft"):
-                        block_3d = gr.Model3D(
-                            value='./empty.obj',
-                            height=320,
-                            camera_position=(90 + 30, 90 - 15, 3),
-                            zoom_speed=0.2,
-                            pan_speed=0.3,
-                            label="3D Preview (OpenCLAY(600M))",
-                            elem_id="block_3d"
-                        )
-                        button_generate_600 = gr.Button(value="Generate", variant="primary", elem_id="button_generate_600")
-                    with gr.Group(elem_id="right_col_group_row_gright"):
-                        fake3d = Fake3D(interactive=False,
-                                        # height=320,
-                                        # width=320,
-                                        label="3D Preview (Rodin Gen-1(0525))",
-                                        elem_id="fake3d"
-                                        )
-                        with gr.Row():
-                            button_generate = gr.Button(value="Generate", variant="primary", elem_id="button_generate")
-                            button_more = gr.Button(value="Download", variant="primary", link=rodin_url)
-            block_example = gr.Examples(
-                examples=example,
-                fn=clear_task,
-                inputs=[block_image],
-                outputs=[cache_image_base64, cache_task_uuid, block_prompt, block_prompt_hint],
-                run_on_click=True,
-                cache_examples=True,
-                label="Examples"
-            )
     block_image.upload(
-        fn=do_nothing,
-        js=change_button_name_to_generating,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     ).success(
-        fn=preprocessing,
-        inputs=[block_prompt, block_image, cache_image_base64, cache_task_uuid],
-        outputs=[block_prompt, cache_image_base64, block_image_masked],
         show_progress="minimal",
         queue=True
     ).success(
@@ -367,36 +235,36 @@ with gr.Blocks(css=css) as demo:
         outputs=[cache_raw_image, cache_task_uuid, fake3d],
         queue=True
     ).success(
-        fn=do_nothing,
-        js=change_button_name,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     )
     block_image.clear(
-        fn=do_nothing,
-        js=reset_button_name,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     ).then(
-        fn=clear_task,
-        outputs=[cache_image_base64, cache_task_uuid, block_prompt, block_prompt_hint, fake3d],
         show_progress="hidden",
         queue=False
     )
     button_generate.click(
-        fn=do_nothing,
-        js=change_button_name_to_generating,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     ).success(
-        fn=preprocessing,
-        inputs=[block_prompt, block_image, cache_image_base64, cache_task_uuid],
-        outputs=[block_prompt, cache_image_base64, block_image_masked],
         show_progress="minimal",
         queue=True
     ).success(
@@ -405,46 +273,26 @@ with gr.Blocks(css=css) as demo:
         outputs=[cache_raw_image, cache_task_uuid, fake3d],
         queue=True
     ).then(
-        fn=do_nothing,
-        js=change_button_name,
-        inputs=[cacha_empty],
-        outputs=[cacha_empty],
-        queue=False
-    )
-    button_generate_600.click(
-        fn=do_nothing,
-        js=change_button_name_to_generating_600,
-        inputs=[cacha_empty],
-        outputs=[cacha_empty],
-        queue=False
-    ).success(
-        fn=preprocessing,
-        inputs=[block_prompt, block_image, cache_image_base64, cache_task_uuid],
-        outputs=[block_prompt, cache_image_base64, block_image_masked],
-        show_progress="minimal",
-        queue=True
-    ).success(
-        fn=local_inference,
-        inputs=[block_prompt, block_image_masked],
-        outputs=[block_3d],
-        queue=True
-    ).then(
-        fn=do_nothing,
-        js=change_button_name_600,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     )
     button_more.click()
     block_prompt_hint.input(
         fn=handle_hint_change, inputs=[block_prompt, block_prompt_hint], outputs=[block_prompt],
         show_progress="hidden",
         queue=False,
     )
     block_prompt.change(
         fn=handle_prompt_change,
         inputs=[block_prompt],
@@ -452,6 +300,8 @@ with gr.Blocks(css=css) as demo:
         trigger_mode="always_last",
         show_progress="hidden",
     )
 if __name__ == "__main__":

 import os
+os.system('pip uninstall -y gradio_fake3d')
+os.system('pip install gradio_fake3d-0.0.3-py3-none-any.whl')
 import gradio as gr
 import re
 from gradio_fake3d import Fake3D
 from PIL import Image
 from constant import *
 generator = Generator(USER, PASSWORD, TOKEN)
 change_button_name = """
 function updateButton(input) {
 }
 """
 change_button_name_to_generating = """
 function updateButton(input) {
     var buttonGenerate = document.getElementById('button_generate');
 }
 """
 reset_button_name = """
 function updateButton(input) {
     var buttonGenerate = document.getElementById('button_generate');
 }
 """
 jump_to_rodin = """
 function redirectToGithub(input) {
     if (input.includes('OpenClay')) {
     ["assets/46.png"]
 ]
 def do_nothing(text):
     return ""
 def handle_selection(selection):
     return "Rodin Gen-1(0525)"
 def hint_in_prompt(hint, prompt):
     return re.search(fr"{hint[:-1]}", prompt) is not None
 def prompt_remove_hint(prompt, hint):
     return re.sub(fr"\s*{hint[:-1]}[\.,]*", "", prompt)
 def handle_hint_change(prompt: str, prompt_hint):
     prompt = prompt.strip()
     if prompt != "" and not prompt.endswith("."):
     prompt = prompt.strip()
     return prompt
 def handle_prompt_change(prompt):
     hint_list = []
     for _, hint in PROMPT_HINT_LIST:
     return hint_list
 def clear_task(task_input=None):
     """_summary_
     [cache_task_uuid, block_prompt, block_prompt_hint, fake3d, block_3d]
     log("INFO", "Clearing task...")
     return "", "", "", [], "assets/white_image.png"
 def clear_task_id():
     return ""
 def return_render(image):
     image = Image.fromarray(image)
     return image, crop_image(image, DEFAULT)
 def crop_image_default(image):
     return crop_image(image, DEFAULT)
 def crop_image_metal(image):
     return crop_image(image, METAL)
 def crop_image_contrast(image):
     return crop_image(image, CONTRAST)
 def crop_image_normal(image):
     return crop_image(image, NORMAL)
+with gr.Blocks() as demo:
     gr.HTML(html_content)
     cache_task_uuid = gr.Text(value="", visible=False)
     with gr.Row():
         with gr.Column():
+            block_image = gr.Image(height=256, image_mode="RGB", sources="upload", elem_classes="elem_imageupload", type="filepath")
+            block_model_card = gr.Dropdown(choices=options, label="Model Card", value="Rodin Gen-1(0525)", interactive=True)
             with gr.Group():
+                block_prompt = gr.Textbox(
+                    value="",
+                    placeholder="Auto generated description of Image",
+                    lines=1,
+                    show_label=True,
+                    label="Prompt",
+                )
+                block_prompt_hint = gr.CheckboxGroup(value="Labels", choices=PROMPT_HINT_LIST, show_label=False)
+        with gr.Column():
+            with gr.Group():
+                fake3d = Fake3D(interactive=False, label="3D Preview")
+                with gr.Row():
+                    button_generate = gr.Button(value="Generate", variant="primary", elem_id="button_generate")
+                    with gr.Column(min_width=200, scale=20):
+                        with gr.Row():
+                            block_default = gr.Button("Default", min_width=0)
+                            block_metal = gr.Button("Metal", min_width=0)
+                        with gr.Row():
+                            block_contrast = gr.Button("Contrast", min_width=0)
+                            block_normal = gr.Button("Normal", min_width=0)
+            button_more = gr.Button(value="Download from Rodin", variant="primary", link=rodin_url)
+            gr.Markdown("""
+                        **TIPS**:
+                        1. Upload an image to generate 3D geometry.
+                        2. Click Redo to regenerate the model.
+                        3. 4 buttons to switch the view.
+                        4. Swipe to rotate the model.
+                        """)
+            cache_task_uuid = gr.Text(value="", visible=False)
+    cache_raw_image = gr.Image(visible=False, type="pil")
+    cacha_empty = gr.Text(visible=False)
+    cache_image_base64 = gr.Text(visible=False)
+    block_example = gr.Examples(
+        examples=example,
+        fn=clear_task,
+        inputs=[block_image],
+        outputs=[cache_image_base64, cache_task_uuid, block_prompt, block_prompt_hint, fake3d],
+        run_on_click=True,
+        cache_examples=True,
+        label="Examples"
+    )
     block_image.upload(
+        fn=do_nothing,
+        js=change_button_name_to_generating,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     ).success(
+        fn=generator.preprocess,
+        inputs=[block_prompt, block_image, cache_image_base64, cache_task_uuid],
+        outputs=[block_prompt, cache_image_base64],
         show_progress="minimal",
         queue=True
     ).success(
         outputs=[cache_raw_image, cache_task_uuid, fake3d],
         queue=True
     ).success(
+        fn=do_nothing,
+        js=change_button_name,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     )
     block_image.clear(
+        fn=do_nothing,
+        js=reset_button_name,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     ).then(
+        fn=clear_task,
+        outputs=[cache_image_base64, cache_task_uuid, block_prompt, block_prompt_hint, fake3d],
         show_progress="hidden",
         queue=False
     )
     button_generate.click(
+        fn=do_nothing,
+        js=change_button_name_to_generating,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     ).success(
+        fn=generator.preprocess,
+        inputs=[block_prompt, block_image, cache_image_base64, cache_task_uuid],
+        outputs=[block_prompt, cache_image_base64],
         show_progress="minimal",
         queue=True
     ).success(
         outputs=[cache_raw_image, cache_task_uuid, fake3d],
         queue=True
     ).then(
+        fn=do_nothing,
+        js=change_button_name,
         inputs=[cacha_empty],
         outputs=[cacha_empty],
         queue=False
     )
+    block_default.click(fn=crop_image_default, inputs=[cache_raw_image], outputs=fake3d, show_progress="minimal")
+    block_metal.click(fn=crop_image_metal, inputs=[cache_raw_image], outputs=fake3d, show_progress="minimal")
+    block_contrast.click(fn=crop_image_contrast, inputs=[cache_raw_image], outputs=fake3d, show_progress="minimal")
+    block_normal.click(fn=crop_image_normal, inputs=[cache_raw_image], outputs=fake3d, show_progress="minimal")
     button_more.click()
     block_prompt_hint.input(
         fn=handle_hint_change, inputs=[block_prompt, block_prompt_hint], outputs=[block_prompt],
         show_progress="hidden",
         queue=False,
     )
     block_prompt.change(
         fn=handle_prompt_change,
         inputs=[block_prompt],
         trigger_mode="always_last",
         show_progress="hidden",
     )
+    block_model_card.change(fn=handle_selection, inputs=[block_model_card], outputs=[block_model_card], show_progress="hidden", js=jump_to_rodin)
 if __name__ == "__main__":

constant.py CHANGED Viewed

@@ -8,9 +8,6 @@ USER = os.getenv("USER")
 PASSWORD = os.getenv("PASSWORD")
 TOKEN = os.getenv("TOKEN")
-ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
-FOLDER_TEMP_MESH = './tmp_mesh'
 DEFAULT = [0, 0]
 CONTRAST = [360, 0]
 METAL = [0, 360]

 PASSWORD = os.getenv("PASSWORD")
 TOKEN = os.getenv("TOKEN")
 DEFAULT = [0, 0]
 CONTRAST = [360, 0]
 METAL = [0, 360]

openclay/models/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .condition import ClayConditionNet
-from .ldm import ClayLDM
-from .vae import ClayVAE

openclay/models/condition.py DELETED Viewed

@@ -1,102 +0,0 @@
-import copy
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from diffusers import ModelMixin, ConfigMixin
-from diffusers.configuration_utils import register_to_config
-from ..modules.embedding import PointEmbed
-from ..modules.control_volume import ControlVolume
-from ..utils import get_center_position
-class ClayConditionNet(ModelMixin, ConfigMixin):
-    @register_to_config
-    def __init__(
-        self,
-        kv_dim,
-        ldm_dim, ldm_heads, ldm_depth,
-        on_volume=False, volume_input_dim=1, volume_block_dim=256, aggregation_method=None,
-        on_point=False, point_number=8, point_leading_embed=False, additional_token_length=None,
-        stage="parallel",
-    ):
-        super().__init__()
-        self.multihead_attn_condition_list = nn.ModuleList([copy.deepcopy(
-            nn.MultiheadAttention(ldm_dim, ldm_heads, dropout=0, batch_first=True, kdim=kv_dim, vdim=kv_dim)
-        ) for i in range(ldm_depth)])
-        if on_volume:
-            self.condition_volume_point_embed = PointEmbed(dim=kv_dim)
-            self.condition_volume_conv = ControlVolume(volume_dim=volume_input_dim,
-                                                       block_dim=volume_block_dim,
-                                                       condition_dim=kv_dim,
-                                                       time_embed_dim=ldm_dim,
-                                                       downsample_times=1,
-                                                       aggregation_method=aggregation_method)
-        if on_point:
-            self.condition_point_point_embed = PointEmbed(dim=kv_dim)
-            if point_number == 0:
-                self.condition_point_token = None
-            else:
-                self.condition_point_token = nn.Parameter(torch.randn(point_number, kv_dim))
-        self.on_volume = on_volume
-        self.on_point = on_point
-        assert stage in {"parallel", "postfix"}
-        self.stage = stage
-    def preprocess_condition(self, condition, additional_dict, time_embed) -> torch.Tensor:
-        if self.on_volume:
-            condition_volume = condition
-            Bor1, volume_dim, X, Y, Z = condition_volume.shape
-            assert X == Y == Z == 16
-            condition_volume = self.condition_volume_conv(condition_volume, time_embed)  # [Bor1, condition_dim, X, Y, Z]
-            condition_volume = condition_volume.reshape(-1, condition_volume.shape[1], 8**3).permute(0, 2, 1)  # [Bor1, X*Y*Z, condition_dim]
-            center_position = get_center_position(8)[None].to(time_embed).reshape(1, 8**3, 3)  # [1, X*Y*Z, 3]
-            condition_volume = condition_volume + self.condition_volume_point_embed(center_position)  # [Bor1, X*Y*Z, condition_dim]
-            condition = condition_volume
-        if self.on_point:
-            point = condition
-            Bor1, M, _ = point.shape
-            condition_point = self.condition_point_point_embed(point)
-            if self.condition_point_token is not None:
-                if self.config.point_leading_embed:
-                    condition_point = torch.cat([
-                                                condition_point[:, :self.condition_point_token.shape[0]] + self.condition_point_token,
-                                                condition_point[:, self.condition_point_token.shape[0]:]
-                                                ], dim=1)
-                else:
-                    condition_point = condition_point + self.condition_point_token * additional_dict.get("condition_point_token_scale", 1)
-            condition = condition_point
-        return condition
-    def process(self, index, x, condition, key_padding_mask=None):
-        """
-        x: [B, N, dim]
-        condition: [B, M, condition_dim]
-        """
-        if self.stage == "parallel":
-            residual = self.multihead_attn_condition_list[index](x,
-                                                                 condition, condition, need_weights=False,
-                                                                 key_padding_mask=key_padding_mask
-                                                                )[0]
-        else:
-            x = x + self.multihead_attn_condition_list[index](self.norm2_condition_list[index](x),
-                                                              condition, condition, need_weights=False,
-                                                              key_padding_mask=key_padding_mask
-                                                              )[0]
-            residual = self.linear2_condition_list[index](F.gelu(self.linear1_condition_list[index]
-                                                                (self.norm3_condition_list[index](x)))
-                                                         )
-        return residual

openclay/models/ldm.py DELETED Viewed

@@ -1,83 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from diffusers import ModelMixin, ConfigMixin
-from diffusers.configuration_utils import register_to_config
-from diffusers.models.embeddings import TimestepEmbedding, Timesteps
-from ..modules.transformer import ClayTransformerDecoderLayer
-class ClayLDM(ModelMixin, ConfigMixin):
-    @register_to_config
-    def __init__(
-        self,
-        depth=24,
-        dim=512,
-        latent_dim=64,
-        heads=8,
-    ):
-        super().__init__()
-        timestep_input_dim = dim // 2
-        time_embed_dim = dim
-        self.time_proj = Timesteps(timestep_input_dim, True, 0)
-        self.time_embedding = TimestepEmbedding(
-            timestep_input_dim,
-            time_embed_dim,
-            act_fn="silu",
-            post_act_fn="silu",
-        )
-        self.time_embed_dim = time_embed_dim
-        self.proj_in = nn.Linear(latent_dim, dim, bias=False)
-        self.layers = nn.TransformerDecoder(
-            ClayTransformerDecoderLayer(dim, heads, dim_feedforward=dim * 4, dropout=0, activation=F.gelu, batch_first=True, norm_first=True, kdim=768, vdim=768, layer_norm_eps=1e-4),
-            depth
-        )
-        for i in range(depth):
-            self.layers.layers[i].index = i
-        self.proj_out = nn.Linear(dim, latent_dim, bias=False)
-    def register_condition_net(self, condition_net_list):
-        self.condition_net_list = nn.ModuleList(condition_net_list)
-        for layer in self.layers.layers:
-            layer.register_condition_net(condition_net_list)
-    def forward(self, sample, t, condition_text, condition_seq=None):
-        """
-        sample: [B, L, C]
-        t: [N]
-        condition: [B, 77, 768]
-        condition_seq: [(condition_1, condition_1_scale), ...]
-        return: [B, L, C]
-        """
-        B, L, C = sample.shape
-        x = self.proj_in(sample)
-        time_encoding = self.time_proj(t).to(sample)
-        time_embed = self.time_embedding(time_encoding)
-        condition_text = condition_text.expand(B, -1, -1)
-        if condition_seq is None:
-            condition_seq = []
-        condition_seq = list(condition_seq)
-        for i in range(len(condition_seq)):
-            condition, condition_scale = condition_seq[i][:2]
-            additional_dict = {}
-            if len(condition_seq[i]) > 2:
-                additional_dict = condition_seq[i][2]
-                assert isinstance(additional_dict, dict)
-            condition = self.condition_net_list[i].preprocess_condition(condition, additional_dict, time_embed)
-            condition = condition.expand(B, -1, -1)
-            condition_seq[i] = (condition, condition_scale, additional_dict)
-        x_aug = torch.cat([x, time_embed[:, None]], dim=1)
-        y = self.layers(x_aug, (condition_text, condition_seq))
-        eps = self.proj_out(y[:, :-1])
-        return eps

openclay/models/vae.py DELETED Viewed

@@ -1,124 +0,0 @@
-import tqdm
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from diffusers import ModelMixin, ConfigMixin
-from diffusers.configuration_utils import register_to_config
-# from torch_cluster import fps
-from ..modules.embedding import PointEmbed
-from ..modules.attention import CrossAttentionLayer
-from ..modules.drop_path import DropPathWrapper
-from ..modules.diag_gaussian import DiagonalGaussianDistribution
-class ClayVAE(ModelMixin, ConfigMixin):
-    @register_to_config
-    def __init__(
-        self,
-        depth=24,
-        dim=512,
-        latent_dim=64,
-        heads=8,
-        output_dim=1,
-        ratio=0.25,
-    ):
-        super().__init__()
-        self.ratio = ratio
-        self.point_embed = PointEmbed(dim=dim)
-        self.encoder_cross_attention = CrossAttentionLayer(dim, 1, dim_feedforward=dim * 4, dropout=0, activation=F.gelu, batch_first=True, norm_first=True, layer_norm_eps=1e-4)
-        self.encoder_out = nn.Linear(dim, latent_dim * 2)
-        self.decoder_in = nn.Linear(latent_dim, dim)
-        self.decoder_layers = nn.TransformerEncoder(
-                                        DropPathWrapper(
-                                                nn.TransformerEncoderLayer(dim,
-                                                                           heads,
-                                                                           dim_feedforward=dim * 4,
-                                                                           dropout=0,
-                                                                           activation=F.gelu,
-                                                                           batch_first=True,
-                                                                           norm_first=True,
-                                                                           layer_norm_eps=1e-4)
-                                                                           ),
-                                                                            depth)
-        self.decoder_cross_attention = CrossAttentionLayer(dim, 1, dim_feedforward=dim * 4, dropout=0, activation=F.gelu, batch_first=True, norm_first=True, layer_norm_eps=1e-4)
-        self.decoder_out = nn.Linear(dim, output_dim)
-    def encode(self, pc_data, output_dict=None, attn_mask=None, no_cast=False, **kwargs):
-        pc = pc_data[:, :, :3]
-        # pc: B x N x 3
-        B, N, D = pc.shape
-        pc_flat = pc.reshape(B * N, D)
-        batch = torch.arange(B).to(pc.device)
-        batch = torch.repeat_interleave(batch, N)
-        ratio = self.ratio
-        idx = fps(pc_flat, batch, ratio=ratio)
-        while idx.max() >= pc_flat.shape[0]:
-            idx = fps(pc_flat, batch, ratio=ratio)
-        sampled_pc = pc_flat[idx].reshape(B, -1, 3)
-        pc_embeddings = self.point_embed(pc)
-        sampled_pc_embeddings = self.point_embed(sampled_pc)
-        x, attn_output_weights = self.encoder_cross_attention(sampled_pc_embeddings, pc_embeddings, attn_mask, need_weights=output_dict is not None, no_cast=no_cast)
-        mean, logvar = self.encoder_out(x).chunk(2, dim=-1)
-        posterior = DiagonalGaussianDistribution(mean, logvar)
-        x = posterior.sample()
-        kl = posterior.kl()
-        if output_dict is not None:
-            output_dict["fps_idx"] = idx
-            output_dict["mean"] = mean
-            output_dict["logvar"] = logvar
-            output_dict["x"] = x
-            output_dict["attn_output_weights"] = attn_output_weights
-        return kl, x
-    def decode(self, x, pc, mini_batch=None, no_cast=False, show_progress=True, cpu=True, **kwargs):
-        x = self.decode_first(x)
-        if mini_batch is None:
-            y = self.decode_second(x, pc, no_cast)
-        else:
-            ys = []
-            for mini_batch_start in tqdm.tqdm(range(0, pc.shape[1], mini_batch), "[ ClayVAE.decode ]", disable=not (show_progress and pc.shape[1] > mini_batch)):
-                mini_pc = pc[:, mini_batch_start:mini_batch_start + mini_batch].to(x.device)
-                y = self.decode_second(x, mini_pc, no_cast)
-                if cpu:
-                    y = y.cpu()
-                ys.append(y)
-            y = torch.cat(ys, dim=1)
-        return y
-    def decode_first(self, x):
-        x = self.decoder_in(x)
-        x = self.decoder_layers(x)
-        return x
-    def decode_second(self, x, mini_pc, no_cast=False):
-        pc_embeddings = self.point_embed(mini_pc)
-        y, _ = self.decoder_cross_attention(pc_embeddings, x, no_cast=no_cast)
-        y = self.decoder_out(y)
-        return y
-    def forward(self, surface, points, no_cast=False, **kwargs):
-        kl, x = self.encode(surface, no_cast=no_cast, **kwargs)
-        x = self.decode(x, points, no_cast=no_cast, **kwargs)[:, :, 0]
-        return {"logits": x, "kl": kl}

openclay/modules/attention.py DELETED Viewed

@@ -1,73 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch import Tensor
-from typing import Optional, Union, Callable
-class CrossAttentionLayer(nn.Module):
-    __constants__ = ["batch_first", "norm_first", "context_norm"]
-    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
-                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
-                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False, context_norm=True,
-                 device=None, dtype=None) -> None:
-        factory_kwargs = {"device": device, "dtype": dtype}
-        super(CrossAttentionLayer, self).__init__()
-        self.multihead_attn = nn.modules.activation.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first, **factory_kwargs)
-        # Implementation of Feedforward model
-        self.linear1 = nn.Linear(d_model, dim_feedforward, **factory_kwargs)
-        self.dropout = nn.Dropout(dropout)
-        self.linear2 = nn.Linear(dim_feedforward, d_model, **factory_kwargs)
-        self.norm_first = norm_first
-        self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
-        self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
-        self.dropout2 = nn.Dropout(dropout)
-        self.dropout3 = nn.Dropout(dropout)
-        self.context_norm = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs) if context_norm else None
-        # Legacy string support for activation function.
-        self.activation = activation
-    def forward(self, tgt: Tensor, memory: Tensor, memory_mask: Optional[Tensor] = None, memory_key_padding_mask: Optional[Tensor] = None, need_weights=False, no_cast=False) -> Tensor:
-        assert self.norm_first
-        if no_cast:
-            tgt = tgt.float()
-            memory = memory.float()
-            with torch.autocast("cuda", enabled=False):
-                x = tgt
-                memory = self.context_norm(memory) if self.context_norm is not None else memory
-                y, attn_output_weights = self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask, need_weights=need_weights)
-                x = x + y
-                x = x + self._ff_block(self.norm3(x))
-            return x, attn_output_weights
-        x = tgt
-        memory = self.context_norm(memory) if self.context_norm is not None else memory
-        y, attn_output_weights = self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask, need_weights=need_weights)
-        x = x + y
-        x = x + self._ff_block(self.norm3(x))
-        return x, attn_output_weights
-    # multihead attention block
-    def _mha_block(self, x: Tensor, mem: Tensor,
-                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor], need_weights) -> Tensor:
-        x, attn_output_weights = self.multihead_attn(x, mem, mem,
-                                                     attn_mask=attn_mask,
-                                                     key_padding_mask=key_padding_mask,
-                                                     need_weights=need_weights)
-        return self.dropout2(x), attn_output_weights
-    # feed forward block
-    def _ff_block(self, x: Tensor) -> Tensor:
-        x = self.linear2(self.dropout(self.activation(self.linear1(x))))
-        return self.dropout3(x)

openclay/modules/control_volume.py DELETED Viewed

@@ -1,52 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class ControlVolume(nn.Module):
-    def __init__(self, volume_dim=1, block_dim=384, condition_dim=768, time_embed_dim=2048, downsample_times=1, aggregation_method=None):
-        super().__init__()
-        if aggregation_method is None:
-            pass
-        elif aggregation_method == "maxpool":
-            self.proj_maxpool = nn.Linear(volume_dim, volume_dim)
-        self.conv_in = nn.Conv3d(volume_dim, block_dim, kernel_size=3, stride=1, padding=1)
-        self.proj_t = nn.Linear(time_embed_dim, block_dim)
-        self.norm = nn.GroupNorm(8, block_dim)
-        self.blocks = nn.ModuleList([])
-        for i in range(downsample_times):
-            self.blocks.append(nn.Conv3d(block_dim, block_dim, kernel_size=3, padding=1))
-            self.blocks.append(nn.Conv3d(block_dim, block_dim, kernel_size=3, padding=1, stride=2))
-        self.conv_out = nn.Conv3d(block_dim, condition_dim, kernel_size=3, stride=1, padding=1)
-        self.volume_dim = volume_dim
-        self.aggregation_method = aggregation_method
-    def forward(self, volume, time_embed):
-        """
-        volume: [B, volume_dim, X, Y, Z]
-        time_embed: [B, block_dim]
-        return:
-            [B, condition_dim, X, Y, Z]
-        """
-        B, _, X, Y, Z = volume.shape
-        if self.aggregation_method == "maxpool":
-            volume = self.proj_maxpool(volume.reshape(B, 4, self.volume_dim, X, Y, Z).permute(0, 1, 3, 4, 5, 2)).permute(0, 5, 2, 3, 4, 1)
-            volume = F.max_pool1d(volume.reshape(B, self.volume_dim * X * Y * Z, 4), 4).reshape(B, self.volume_dim, X, Y, Z)
-        x = F.silu(self.conv_in(volume))  # [B, block_dim, X, Y, Z]
-        time_embed = self.proj_t(time_embed)  # [B, block_dim]
-        x = x + time_embed[:, :, None, None, None]
-        x = self.norm(x)
-        for block in self.blocks:
-            x = block(x)
-            x = F.silu(x)
-        x = self.conv_out(x)  # [B, condition_dim, X, Y, Z]
-        return x

openclay/modules/diag_gaussian.py DELETED Viewed

@@ -1,42 +0,0 @@
-import numpy as np
-import torch
-class DiagonalGaussianDistribution(object):
-    def __init__(self, mean, logvar, deterministic=False):
-        self.mean = mean
-        self.logvar = logvar
-        self.logvar = torch.clamp(self.logvar, -30.0, 20.0)
-        self.deterministic = deterministic
-        self.std = torch.exp(0.5 * self.logvar)
-        self.var = torch.exp(self.logvar)
-        if self.deterministic:
-            self.var = self.std = torch.zeros_like(self.mean).to(device=self.mean.device)
-    def sample(self):
-        x = self.mean + self.std * torch.randn(self.mean.shape).to(self.mean)
-        return x
-    def kl(self, other=None):
-        if self.deterministic:
-            return torch.Tensor([0.])
-        else:
-            if other is None:
-                return 0.5 * torch.mean(torch.pow(self.mean, 2)
-                                        + self.var - 1.0 - self.logvar,
-                                        dim=[1, 2])
-            else:
-                return 0.5 * torch.mean(
-                    torch.pow(self.mean - other.mean, 2) / other.var
-                    + self.var / other.var - 1.0 - self.logvar + other.logvar,
-                    dim=[1, 2, 3])
-    def nll(self, sample, dims=[1, 2, 3]):
-        if self.deterministic:
-            return torch.Tensor([0.])
-        logtwopi = np.log(2.0 * np.pi)
-        return 0.5 * torch.sum(
-            logtwopi + self.logvar + torch.pow(sample - self.mean, 2) / self.var,
-            dim=dims)
-    def mode(self):
-        return self.mean

openclay/modules/drop_path.py DELETED Viewed

@@ -1,34 +0,0 @@
-import torch
-import torch.nn as nn
-from torch import Tensor
-from timm.models.layers import DropPath
-from typing import Optional
-class Struct:
-    pass
-class DropPathWrapper(nn.Module):
-    def __init__(self, layer):
-        super().__init__()
-        self.layer = layer
-        self.drop_path = DropPath(drop_prob=0.1)
-        self_attn_dummy = Struct()
-        self_attn_dummy.batch_first = True
-        self.self_attn = self_attn_dummy
-    def forward(
-        self,
-        src: Tensor,
-        src_mask: Optional[Tensor] = None,
-        src_key_padding_mask: Optional[Tensor] = None,
-        is_causal: bool = False
-    ) -> Tensor:
-        x = src
-        x_p = self.layer(src, src_mask, src_key_padding_mask, is_causal)
-        p = x_p - x
-        y = x + self.drop_path(p)
-        return y

openclay/modules/embedding.py DELETED Viewed

@@ -1,36 +0,0 @@
-# TODO: add reference to 3dshape2vecset
-import numpy as np
-import torch
-import torch.nn as nn
-class PointEmbed(nn.Module):
-    def __init__(self, hidden_dim=48, dim=128):
-        super().__init__()
-        assert hidden_dim % 6 == 0
-        self.embedding_dim = hidden_dim
-        e = torch.pow(2, torch.arange(self.embedding_dim // 6)).float() * np.pi
-        e = torch.stack([
-            torch.cat([e, torch.zeros(self.embedding_dim // 6),
-                        torch.zeros(self.embedding_dim // 6)]),
-            torch.cat([torch.zeros(self.embedding_dim // 6), e,
-                       torch.zeros(self.embedding_dim // 6)]),
-            torch.cat([torch.zeros(self.embedding_dim // 6),
-                       torch.zeros(self.embedding_dim // 6), e]),
-        ])
-        self.register_buffer("basis", e, persistent=False)  # 3 x 24
-        self.mlp = nn.Linear(self.embedding_dim + 3, dim)
-    @staticmethod
-    def embed(input, basis):
-        projections = torch.einsum("bnd,de->bne", input, basis)
-        embeddings = torch.cat([projections.sin(), projections.cos()], dim=2)
-        return embeddings
-    def forward(self, input):
-        # input: B x N x 3
-        embed = self.mlp(torch.cat([self.embed(input, self.basis), input], dim=2))  # B x N x C
-        return embed

openclay/modules/transformer.py DELETED Viewed

@@ -1,116 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch import Tensor
-from torch.nn.modules.transformer import _get_activation_fn
-from typing import Optional, Union, Callable, List
-class ClayTransformerDecoderLayer(nn.TransformerDecoderLayer):
-    __constants__ = ["batch_first", "norm_first"]
-    def __init__(self, d_model: int, nhead: int, dim_feedforward: int = 2048, dropout: float = 0.1,
-                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
-                 layer_norm_eps: float = 1e-5, batch_first: bool = False, norm_first: bool = False,
-                 device=None, dtype=None, kdim=None, vdim=None) -> None:
-        factory_kwargs = {"device": device, "dtype": dtype}
-        nn.Module.__init__(self)
-        self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first, **factory_kwargs)
-        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first, kdim=kdim, vdim=vdim, **factory_kwargs)
-        # Implementation of Feedforward model
-        self.linear1 = nn.Linear(d_model, dim_feedforward, **factory_kwargs)
-        self.dropout = lambda x: x
-        self.linear2 = nn.Linear(dim_feedforward, d_model, **factory_kwargs)
-        self.norm_first = norm_first
-        self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
-        self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
-        self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
-        self.dropout1 = lambda x: x
-        self.dropout2 = lambda x: x
-        self.dropout3 = lambda x: x
-        # Legacy string support for activation function.
-        if isinstance(activation, str):
-            self.activation = _get_activation_fn(activation)
-        else:
-            self.activation = activation
-        self.d_model = d_model
-        self.nhead = nhead
-        self.dropout_ = dropout
-        self.batch_first = batch_first
-        self.kdim = kdim
-        self.vdim = vdim
-        self.factory_kwargs = factory_kwargs
-        self._mha_block_second = None
-        self.condition_net_list = []
-    def register_condition_net(self, condition_net_list):
-        self.condition_net_list = condition_net_list
-    def process_condition_net(self, x_norm, condition_seq, stage):
-        """
-        condition_seq: [(condition_1, condition_1_scale), ...]
-        """
-        assert len(condition_seq) == 0 or len(condition_seq) == len(self.condition_net_list), f"len(self.condition_net_list)={len(self.condition_net_list)}, len(condition_seq)={len(condition_seq)}"
-        residual_all = 0
-        for i in range(len(condition_seq)):
-            condition_net = self.condition_net_list[i]
-            condition, condition_scale = condition_seq[i][:2]
-            if condition_scale == 0:
-                continue
-            key_padding_mask = None
-            if len(condition_seq[i]) > 2:
-                additional_dict = condition_seq[i][2]
-                assert isinstance(additional_dict, dict)
-                key_padding_mask = additional_dict.get("key_padding_mask", None)
-            if stage == condition_net.stage:
-                residual = condition_net.process(self.index, x_norm, condition, key_padding_mask=key_padding_mask)
-                residual_all = residual_all + residual * condition_scale
-        return residual_all
-    def forward(
-        self,
-        tgt: Tensor,
-        memory_list: List[Tensor],
-        tgt_mask: Optional[Tensor] = None,
-        memory_mask: Optional[Tensor] = None,
-        tgt_key_padding_mask: Optional[Tensor] = None,
-        memory_key_padding_mask: Optional[Tensor] = None,
-        tgt_is_causal: bool = False,
-        memory_is_causal: bool = False,
-    ) -> Tensor:
-        """
-        memory_list = ( condition_text, [(condition_1, condition_1_scale), ...] )
-        """
-        x = tgt
-        assert self.norm_first
-        memory, condition_seq = memory_list
-        x = x + self._sa_block(self.norm1(x), tgt_mask, tgt_key_padding_mask, tgt_is_causal)
-        x_norm = self.norm2(x)
-        x = x + self._mha_block(x_norm, memory, memory_mask, memory_key_padding_mask, memory_is_causal) + self.process_condition_net(x_norm, condition_seq, stage="parallel")
-        x = x + self._ff_block(self.norm3(x))
-        x = x + self.process_condition_net(x_norm, condition_seq, stage="postfix")
-        return x
-    def _mha_block(self, x: Tensor, mem: Tensor,
-                   attn_mask: Optional[Tensor], key_padding_mask: Optional[Tensor], is_causal: bool = False, multihead_attn=None) -> Tensor:
-        if multihead_attn is None:
-            multihead_attn = self.multihead_attn
-        x = multihead_attn(x, mem, mem,
-                           attn_mask=attn_mask,
-                           key_padding_mask=key_padding_mask,
-                           is_causal=is_causal,
-                           need_weights=False)[0]
-        return self.dropout2(x)

openclay/pipeline_openclay.py DELETED Viewed

@@ -1,195 +0,0 @@
-import numpy as np
-import torch
-import torch.nn.functional as F
-import torch.utils.checkpoint
-import torch.utils.data
-import tqdm
-import mcubes
-import inspect
-import trimesh
-import gc
-from diffusers import UniPCMultistepScheduler
-from .utils import get_grid_tensor
-from .models import ClayVAE, ClayLDM, ClayConditionNet
-from transformers import Dinov2Model, BitImageProcessor, CLIPTextModel, CLIPTokenizer
-from diffusers import DiffusionPipeline
-class OpenClayPipeline(DiffusionPipeline):
-    def __init__(self,
-                 vae: ClayVAE ,
-                 text_encoder: CLIPTextModel,
-                 tokenizer: CLIPTokenizer,
-                 ldm: ClayLDM,
-                 scheduler=None,
-                 ):
-        super().__init__()
-        if scheduler is None:
-            scheduler = self.get_unipc_scheduler()
-        self.register_modules(
-            vae=vae,
-            text_encoder=text_encoder,
-            tokenizer=tokenizer,
-            ldm=ldm,
-            scheduler=scheduler,
-            )
-    def get_unipc_scheduler(self):
-        scheduler = UniPCMultistepScheduler(
-            num_train_timesteps=1000,
-            beta_schedule="squaredcos_cap_v2",
-            prediction_type="v_prediction",
-            timestep_spacing="linspace",
-            rescale_betas_zero_snr=True
-        )
-        return scheduler
-    def get_timesteps(self, num_inference_steps, strength):
-        # get the original timestep using init_timestep
-        init_timestep = min(int(round(num_inference_steps * strength)), num_inference_steps)
-        t_start = max(num_inference_steps - init_timestep, 0)
-        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order:]
-        return timesteps, num_inference_steps - t_start
-    def vae_decode_latent(self, latent, res=128, mini_batch=129 * 129 * 129, show_progress=True):
-        assert torch.isfinite(latent).all()
-        gap = 2 / res
-        grid = get_grid_tensor(res).to(latent)
-        logits = self.vae.decode(latent, grid, mini_batch=mini_batch, show_progress=show_progress)[:, :, 0].cpu()
-        logits = logits.view(res + 1, res + 1, res + 1)
-        if isinstance(logits, torch.Tensor):
-            logits = logits.cpu().numpy().astype(np.float32)
-        assert isinstance(logits, np.ndarray)
-        verts, faces = mcubes.marching_cubes(logits, 0)
-        verts *= gap
-        verts -= 1
-        m = trimesh.Trimesh(verts, faces)
-        return m
-    def __call__(self, prompt="", negative_prompt="", sample=None, strength=None,
-                 res=128, rescale_phi=0.7, cfg=7.5, resacle_cfg=True,
-                 num_latents=1024, num_inference_steps=100, timesteps=None,
-                 mini_batch=129**3, seed=42, num=1,
-                 condition_seq=None, show_progress=True,
-                 ):
-        """
-        condition_seq: [(condition_1, condition_1_scale), ...]
-        sample_lock_index: [m]
-        """
-        device = self.text_encoder.device
-        generator = torch.Generator(device).manual_seed(seed)
-        prompt: list = [prompt] if isinstance(prompt, str) else prompt
-        negative_prompt: list = [negative_prompt] if isinstance(negative_prompt, str) else negative_prompt
-        assert len(prompt) == len(negative_prompt) == num or len(prompt) == 1 or len(negative_prompt) == 1
-        if len(prompt) == 1:
-            prompt = prompt * num
-        if len(negative_prompt) == 1:
-            negative_prompt = negative_prompt * num
-        token = self.tokenizer(prompt + negative_prompt, max_length=self.tokenizer.model_max_length, padding="max_length", truncation=True, return_tensors="pt").input_ids
-        encoder_hidden_states = self.text_encoder(token.to(device))[0].to(self.dtype)
-        encoder_hidden_states = encoder_hidden_states.reshape(2, num, encoder_hidden_states.shape[1], encoder_hidden_states.shape[2]).permute(1, 0, 2, 3).reshape(num * 2, encoder_hidden_states.shape[1], encoder_hidden_states.shape[2])
-        assert encoder_hidden_states.shape[0] == num * 2
-        # set step values
-        extra_set_kwargs = {}
-        if "timesteps" in set(inspect.signature(self.scheduler.set_timesteps).parameters.keys()):
-            extra_set_kwargs["timesteps"] = timesteps
-        self.scheduler.set_timesteps(num_inference_steps, **extra_set_kwargs)
-        timesteps = self.scheduler.timesteps
-        noise = torch.randn(num, num_latents, 64, dtype=self.dtype, device=device, generator=generator)
-        if sample is not None:
-            assert torch.isfinite(sample).all()
-            timesteps, num_inference_steps = self.get_timesteps(num_inference_steps, strength)
-            t_start = timesteps[0]
-            sample_noisy = self.scheduler.add_noise(sample, noise, torch.tensor(t_start).to(sample.device))
-            sample = sample_noisy
-        else:
-            sample = noise
-        condition_dict_cond = {
-            "condition_seq": [
-                (
-                    condition_scale_dict[0], condition_scale_dict[1][0],
-                    ((condition_scale_dict[2][0] if not isinstance(condition_scale_dict[2], dict) else condition_scale_dict[2]) if len(condition_scale_dict) > 2 else {})
-                )
-                for condition_scale_dict in condition_seq
-            ],
-        }
-        condition_dict_uncond = {
-            "condition_seq": [
-                (
-                    condition_scale_dict[0], condition_scale_dict[1][1],
-                    ((condition_scale_dict[2][1] if not isinstance(condition_scale_dict[2], dict) else condition_scale_dict[2]) if len(condition_scale_dict) > 2 else {})
-                )
-                for condition_scale_dict in condition_seq
-            ],
-        }
-        extra_step_kwargs = {}
-        if "generator" in set(inspect.signature(self.scheduler.step).parameters.keys()):
-            extra_step_kwargs["generator"] = generator
-        for t in tqdm.tqdm(timesteps, "[ ClayLDMPipeline.__call__ ]", disable=not show_progress):
-            # 1. predict noise model_output
-            if isinstance(t, torch.Tensor):
-                t = t.item()
-            t_tensor = torch.tensor([t], dtype=torch.long, device=device)
-            model_output_cond = self.ldm(
-                sample,
-                t_tensor.expand(num),
-                encoder_hidden_states[0::2],
-                **condition_dict_cond,
-            )
-            model_output_uncond = self.ldm(
-                sample,
-                t_tensor.expand(num),
-                encoder_hidden_states[1::2],
-                **condition_dict_uncond,
-            )
-            model_output_cfg = (model_output_cond - model_output_uncond) * cfg + model_output_uncond
-            if resacle_cfg:
-                model_output_rescaled = model_output_cfg / model_output_cfg.std(dim=(1, 2), keepdim=True) * model_output_cond.std(dim=(1, 2), keepdim=True)
-                model_output = rescale_phi * model_output_rescaled + (1 - rescale_phi) * model_output_cfg
-            else:
-                model_output = model_output_cfg
-            # 2. compute previous image: x_t -> x_t-1
-            sample = self.scheduler.step(
-                model_output[:, None, :, :].permute(0, 3, 1, 2),
-                t,
-                sample[:, None, :, :].permute(0, 3, 1, 2),
-                **extra_step_kwargs
-            ).prev_sample.permute(0, 2, 3, 1)[:, 0, :, :]
-            assert torch.isfinite(sample).all(), sample
-        gc.collect()
-        torch.cuda.empty_cache()
-        mesh_list = []
-        for i in tqdm.tqdm(range(sample.shape[0])):
-            mesh = self.vae_decode_latent(sample[i:i + 1], res=res, mini_batch=mini_batch)
-            mesh.vertices[:, 0] += i % 4 * 2
-            mesh.vertices[:, 2] += i // 4 * 4
-            mesh_list.append(mesh)
-        mesh_combined = trimesh.util.concatenate(mesh_list)
-        return mesh_combined

openclay/utils.py DELETED Viewed

@@ -1,80 +0,0 @@
-import numpy as np
-import torch
-import cv2
-def get_grid_tensor(res=128):
-    gap = 2. / res
-    x = torch.linspace(-1, 1, res + 1)
-    y = torch.linspace(-1, 1, res + 1)
-    z = torch.linspace(-1, 1, res + 1)
-    grid = torch.stack(torch.meshgrid(x, y, z)).view(3, -1).T[None]
-    return grid
-def pad_to_square(image, pad_color=None):
-    H, W, C = image.shape
-    max_side = max(H, W)
-    padded_image = np.ones((max_side, max_side, C))
-    if pad_color is None:
-        pad_color = image[0, 0]
-    padded_image[:] = pad_color
-    vertical_offset = (max_side - H) // 2
-    horizontal_offset = (max_side - W) // 2
-    padded_image[vertical_offset:vertical_offset + H, horizontal_offset:horizontal_offset + W, :] = image
-    return padded_image
-def read_image_square(path):
-    image = cv2.imread(path, -1)
-    return process_image_square(image)
-def process_image_square(image):
-    image = image.astype(np.float32) / 255
-    if image.shape[2] == 4:  # background
-        fg = image[:, :, 3] > 0.5
-        fg_coord = np.stack(np.where(fg))
-        rc_min = fg_coord.min(axis=1)
-        rc_max = fg_coord.max(axis=1)
-        rc_range = rc_max - rc_min
-        rc_min -= (rc_range * 0.1).astype(int)
-        rc_max += (rc_range * 0.1).astype(int)
-        rc_min = rc_min.clip(0, None)
-        rc_max = rc_max.clip(0, None)
-        image = image[rc_min[0]:rc_max[0], rc_min[1]:rc_max[1]]
-        image = image[:, :, :3] * image[:, :, 3:] + 1 * (1 - image[:, :, 3:])
-    render = image[:, :, ::-1]
-    render = pad_to_square(render)
-    return render
-def get_center_position(num_voxel):
-    """
-    num_voxel: int
-    return:
-        [X, Y, Z, 3]
-    """
-    center_position = (torch.stack(torch.meshgrid([torch.arange(num_voxel, dtype=torch.float32)] * 3, indexing="ij"), dim=3) + 0.5) / num_voxel * 2 - 1
-    return center_position
-def geometry_get_voxel(geometry):
-    import pysdf
-    res_large = 128
-    voxel_large_or = np.zeros((res_large, res_large, res_large), dtype=bool)
-    center_position_large = get_center_position(res_large).numpy()
-    voxel_large = pysdf.SDF(geometry.vertices, geometry.faces).contains(center_position_large.reshape(-1, 3)).reshape(res_large, res_large, res_large)
-    voxel_large_or |= voxel_large
-    res = 16
-    voxel_or = np.zeros((res, res, res), dtype=bool)
-    loc = np.mgrid[:res, :res, :res].transpose(1, 2, 3, 0).reshape(-1, 3)
-    for l in loc:
-        voxel_or[l[0], l[1], l[2]] = voxel_large_or[l[0] * 8:l[0] * 8 + 8, l[1] * 8:l[1] * 8 + 8, l[2] * 8:l[2] * 8 + 8].sum() > 0
-    return voxel_or

requirements.txt CHANGED Viewed

@@ -3,16 +3,4 @@ requests
 pillow
 gradio==4.31.2
 requests-toolbelt
-websocket-client
-numpy==1.24.1
-PyMCubes
-pysdf==0.1.9
-opencv-python==4.9.0.80
-tqdm==4.66.1
-trimesh==4.0.5
-timm==0.9.12
-diffusers==0.29.0
-transformers
-accelerate

 pillow
 gradio==4.31.2
 requests-toolbelt
+websocket-client