Spaces:

cavargas10
/

Text3D-UTPL

Running on Zero

App Files Files Community

cavargas10 commited on Aug 28

Commit

3b7142f

•

1 Parent(s): 90df5e0

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -36

app.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import spaces
 from spaces.zero.decorator import GPU
 import numpy as np
 import tqdm
 import torch
@@ -10,6 +13,7 @@ import torchvision.transforms.functional as TF
 from safetensors.torch import load_file
 import rembg
 import gradio as gr
 import kiui
 from kiui.op import recenter
 from kiui.cam import orbit_camera
@@ -25,6 +29,9 @@ from huggingface_hub import hf_hub_download
 import spaces
 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
 GRADIO_OBJ_ALBEDO_PATH = 'gradio_output_albedo.obj'
 GRADIO_OBJ_SHADING_PATH = 'gradio_output_shading.obj'
@@ -79,6 +86,13 @@ if opt.resume is not None:
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = model.half().to(device)
 proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
 proj_matrix[2, 3] = 1
@@ -120,7 +134,6 @@ if os.path.exists(unet_path):
     unet_ckpt_path = unet_path
 else:
     unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
 state_dict = torch.load(unet_ckpt_path, map_location='cpu')
 pipe_image_plus.unet.load_state_dict(state_dict, strict=True)
 pipe_image_plus = pipe_image_plus.to(device)
@@ -169,7 +182,6 @@ def generate_mv(condition_input_image, prompt, prompt_neg='', input_elevation=0,
             mv_image_grid = np.concatenate([mv_image[1], mv_image[2],mv_image[3], mv_image[0]],axis=1)
             input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
         else:
             from PIL import Image
             from einops import rearrange, repeat
@@ -184,7 +196,6 @@ def generate_mv(condition_input_image, prompt, prompt_neg='', input_elevation=0,
             input_image = mv_image
     return mv_image_grid, processed_image, input_image
 @spaces.GPU
 def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_seed=42):
     kiui.seed_everything(input_seed)
@@ -303,46 +314,14 @@ def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_
 _TITLE = '''LDM: Large Tensorial SDF Model for Textured Mesh Generation'''
 _DESCRIPTION = '''
 * Input can be text prompt, image.
 * The currently supported multi-view diffusion models include the image-conditioned MVdream and Zero123plus, as well as the text-conditioned Imagedream.
 * If you find the output unsatisfying, try using different multi-view diffusion models or seeds!
 '''
 block = gr.Blocks(title=_TITLE).queue()
 with block:
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown('# ' + _TITLE)

 import spaces
 from spaces.zero.decorator import GPU
+import os
+import tyro
+import imageio
 import numpy as np
 import tqdm
 import torch
 from safetensors.torch import load_file
 import rembg
 import gradio as gr
 import kiui
 from kiui.op import recenter
 from kiui.cam import orbit_camera
 import spaces
 IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
+GRADIO_VIDEO_PATH = 'gradio_output.mp4'
+GRADIO_OBJ_PATH = 'gradio_output_rgb.obj'
 GRADIO_OBJ_ALBEDO_PATH = 'gradio_output_albedo.obj'
 GRADIO_OBJ_SHADING_PATH = 'gradio_output_shading.obj'
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 model = model.half().to(device)
+model.eval()
+tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
+proj_matrix = torch.zeros(4, 4, dtype=torch.float32).to(device)
+proj_matrix[0, 0] = 1 / tan_half_fov
+proj_matrix[1, 1] = 1 / tan_half_fov
+proj_matrix[2, 2] = (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
 proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
 proj_matrix[2, 3] = 1
     unet_ckpt_path = unet_path
 else:
     unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model")
 state_dict = torch.load(unet_ckpt_path, map_location='cpu')
 pipe_image_plus.unet.load_state_dict(state_dict, strict=True)
 pipe_image_plus = pipe_image_plus.to(device)
             mv_image_grid = np.concatenate([mv_image[1], mv_image[2],mv_image[3], mv_image[0]],axis=1)
             input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0)
         else:
             from PIL import Image
             from einops import rearrange, repeat
             input_image = mv_image
     return mv_image_grid, processed_image, input_image
 @spaces.GPU
 def generate_3d(input_image, condition_input_image, mv_moedl_option=None, input_seed=42):
     kiui.seed_everything(input_seed)
 _TITLE = '''LDM: Large Tensorial SDF Model for Textured Mesh Generation'''
 _DESCRIPTION = '''
 * Input can be text prompt, image.
 * The currently supported multi-view diffusion models include the image-conditioned MVdream and Zero123plus, as well as the text-conditioned Imagedream.
 * If you find the output unsatisfying, try using different multi-view diffusion models or seeds!
+* The project code is available at [https://github.com/rgxie/LDM](https://github.com/rgxie/LDM).
 '''
 block = gr.Blocks(title=_TITLE).queue()
 with block:
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown('# ' + _TITLE)