Spaces:

wyysf
/

CraftsMan

Runtime error

App Files Files Community

CraftsMan / gradio_app.py

wyysf

Update gradio_app.py

f5810c5 verified 5 months ago

raw

history blame

19.5 kB

	import spaces
	import argparse
	import os
	import json
	import torch
	import sys
	import time
	import importlib
	import numpy as np
	from omegaconf import OmegaConf
	from huggingface_hub import hf_hub_download

	from collections import OrderedDict
	import trimesh
	import gradio as gr
	from typing import Any
	from einops import rearrange

	proj_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	sys.path.append(os.path.join(proj_dir))

	import tempfile

	from apps.utils import *

	_TITLE = '''CraftsMan: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner'''
	_DESCRIPTION = '''
	<div>
	<span style="color: red;">Important: The ckpt models released have been primarily trained on character data, hence they are likely to exhibit superior performance in this category. We are also planning to release more advanced pretrained models in the future.</span>
	<br>
	By mimicking the artist/craftsman modeling workflow, we propose CraftsMan (aka 匠心) which uses 3D Latent Set Diffusion Model that directly generates coarse meshes,
	then a multi-view normal enhanced image generation model is used to refine the mesh.
	We provide the coarse 3D diffusion part here.
	<br>
	If you found CraftsMan is helpful, please help to ⭐ the <a href='https://github.com/wyysf-98/CraftsMan/' target='_blank'>Github Repo</a>. Thanks!
	<a style="display:inline-block; margin-left: .5em" href='https://github.com/wyysf-98/CraftsMan/'><img src='https://img.shields.io/github/stars/wyysf-98/CraftsMan?style=social' /></a>
	<br>
	*If you have your own multi-view images, you can directly upload it.
	</div>
	'''
	_CITE_ = r"""
	---
	📝 Citation
	If you find our work useful for your research or applications, please cite using this bibtex:
	```bibtex
	@article{li2024craftsman,
	author = {Weiyu Li and Jiarui Liu and Rui Chen and Yixun Liang and Xuelin Chen and Ping Tan and Xiaoxiao Long},
	title = {CraftsMan: High-fidelity Mesh Generation with 3D Native Generation and Interactive Geometry Refiner},
	journal = {arXiv preprint arXiv:2405.14979},
	year = {2024},
	}
	```
	🤗 Acknowledgements
	We use <a href='https://github.com/wjakob/instant-meshes' target='_blank'>Instant Meshes</a> to remesh the generated mesh to a lower face count, thanks to the authors for the great work.
	📋 License
	CraftsMan is under [AGPL-3.0](https://www.gnu.org/licenses/agpl-3.0.en.html), so any downstream solution and products (including cloud services) that include CraftsMan code or a trained model (both pretrained or custom trained) inside it should be open-sourced to comply with the AGPL conditions. If you have any questions about the usage of CraftsMan, please contact us first.
	📧 Contact
	If you have any questions, feel free to open a discussion or contact us at <b>[email protected]</b>.
	"""
	from apps.third_party.CRM.pipelines import TwoStagePipeline
	from apps.third_party.LGM.pipeline_mvdream import MVDreamPipeline
	from apps.third_party.Era3D.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
	from apps.third_party.Era3D.data.single_image_dataset import SingleImageDataset

	import re
	import os
	import stat

	RD, WD, XD = 4, 2, 1
	BNS = [RD, WD, XD]
	MDS = [
	[stat.S_IRUSR, stat.S_IRGRP, stat.S_IROTH],
	[stat.S_IWUSR, stat.S_IWGRP, stat.S_IWOTH],
	[stat.S_IXUSR, stat.S_IXGRP, stat.S_IXOTH]
	]

	def chmod(path, mode):
	if isinstance(mode, int):
	mode = str(mode)
	if not re.match("^[0-7]{1,3}$", mode):
	raise Exception("mode does not conform to ^[0-7]{1,3}$ pattern")
	mode = "{0:0>3}".format(mode)
	mode_num = 0
	for midx, m in enumerate(mode):
	for bnidx, bn in enumerate(BNS):
	if (int(m) & bn) > 0:
	mode_num += MDS[bnidx][midx]
	os.chmod(path, mode_num)

	chmod(f"{parent_dir}/apps/third_party/InstantMeshes", "777")

	model = None
	cached_dir = None
	generator = None

	sys.path.append(f"apps/third_party/CRM")
	crm_pipeline = None

	sys.path.append(f"apps/third_party/LGM")
	imgaedream_pipeline = None

	sys.path.append(f"apps/third_party/Era3D")
	era3d_pipeline = None

	@spaces.GPU
	def gen_mvimg(
	mvimg_model, image, seed, guidance_scale, step, text, neg_text, elevation, backgroud_color
	):
	if seed == 0:
	seed = np.random.randint(1, 65535)
	global generator
	generator.manual_seed(seed)

	if mvimg_model == "CRM":
	global crm_pipeline
	crm_pipeline.set_seed(seed)
	background = Image.new("RGBA", image.size, (127, 127, 127))
	image = Image.alpha_composite(background, image)
	mv_imgs = crm_pipeline(
	image,
	scale=guidance_scale,
	step=step
	)["stage1_images"]
	return mv_imgs[5], mv_imgs[3], mv_imgs[2], mv_imgs[0]

	elif mvimg_model == "ImageDream":
	global imagedream_pipeline
	background = Image.new("RGBA", image.size, backgroud_color)
	image = Image.alpha_composite(background, image)
	image = np.array(image).astype(np.float32) / 255.0
	image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
	mv_imgs = imagedream_pipeline(
	text,
	image,
	negative_prompt=neg_text,
	guidance_scale=guidance_scale,
	num_inference_steps=step,
	elevation=elevation,
	generator=generator,
	)
	return mv_imgs[1], mv_imgs[2], mv_imgs[3], mv_imgs[0]

	elif mvimg_model == "Era3D":
	global era3d_pipeline
	crop_size = 420
	batch = SingleImageDataset(root_dir='', num_views=6, img_wh=[512, 512], bg_color='white',
	crop_size=crop_size, single_image=image, prompt_embeds_path='apps/third_party/Era3D/data/fixed_prompt_embeds_6view')[0]
	imgs_in = torch.cat([batch['imgs_in']]*2, dim=0)
	imgs_in = rearrange(imgs_in, "B Nv C H W -> (B Nv) C H W")# (B*Nv, 3, H, W)

	normal_prompt_embeddings, clr_prompt_embeddings = batch['normal_prompt_embeddings'], batch['color_prompt_embeddings']
	prompt_embeddings = torch.cat([normal_prompt_embeddings, clr_prompt_embeddings], dim=0)
	prompt_embeddings = rearrange(prompt_embeddings, "B Nv N C -> (B Nv) N C")

	imgs_in = imgs_in.to(device=device, dtype=torch.float16)
	prompt_embeddings = prompt_embeddings.to(device=device, dtype=torch.float16)

	mv_imgs = era3d_pipeline(
	imgs_in,
	None,
	prompt_embeds=prompt_embeddings,
	generator=generator,
	guidance_scale=guidance_scale,
	num_inference_steps=step,
	num_images_per_prompt=1,
	**{'eta': 1.0}
	).images
	return mv_imgs[6], mv_imgs[8], mv_imgs[9], mv_imgs[10]

	@spaces.GPU
	def image2mesh(view_front: np.ndarray,
	view_right: np.ndarray,
	view_back: np.ndarray,
	view_left: np.ndarray,
	more: bool = False,
	scheluder_name: str ="DDIMScheduler",
	guidance_scale: int = 7.5,
	steps: int = 50,
	seed: int = 4,
	octree_depth: int = 7):

	sample_inputs = {
	"mvimages": [[
	Image.fromarray(view_front),
	Image.fromarray(view_right),
	Image.fromarray(view_back),
	Image.fromarray(view_left)
	]]
	}

	global model
	latents = model.sample(
	sample_inputs,
	sample_times=1,
	guidance_scale=guidance_scale,
	return_intermediates=False,
	steps=steps,
	seed=seed

	)[0]

	# decode the latents to mesh
	box_v = 1.1
	mesh_outputs, _ = model.shape_model.extract_geometry(
	latents,
	bounds=[-box_v, -box_v, -box_v, box_v, box_v, box_v],
	octree_depth=octree_depth
	)
	assert len(mesh_outputs) == 1, "Only support single mesh output for gradio demo"
	mesh = trimesh.Trimesh(mesh_outputs[0][0], mesh_outputs[0][1])
	# filepath = f"{cached_dir}/{time.time()}.obj"
	filepath = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False).name
	mesh.export(filepath, include_normals=True)

	if 'Remesh' in more:
	remeshed_filepath = tempfile.NamedTemporaryFile(suffix=f"_remeshed.obj", delete=False).name
	print("Remeshing with Instant Meshes...")
	# target_face_count = int(len(mesh.faces)/10)
	target_face_count = 2000
	command = f"{proj_dir}/apps/third_party/InstantMeshes {filepath} -f {target_face_count} -o {remeshed_filepath}"
	os.system(command)
	filepath = remeshed_filepath
	# filepath = filepath.replace('.obj', '_remeshed.obj')

	return filepath

	if __name__=="__main__":
	parser = argparse.ArgumentParser()
	# parser.add_argument("--model_path", type=str, required=True, help="Path to the object file",)
	parser.add_argument("--cached_dir", type=str, default="./gradio_cached_dir")
	parser.add_argument("--device", type=int, default=0)
	args = parser.parse_args()

	cached_dir = args.cached_dir
	os.makedirs(args.cached_dir, exist_ok=True)
	device = torch.device(f"cuda:{args.device}" if torch.cuda.is_available() else "cpu")
	print(f"using device: {device}")

	# for multi-view images generation
	background_choice = OrderedDict({
	"Alpha as Mask": "Alpha as Mask",
	"Auto Remove Background": "Auto Remove Background",
	"Original Image": "Original Image",
	})
	mvimg_model_config_list = [
	"Era3D",
	# "CRM",
	# "ImageDream"
	]
	if "Era3D" in mvimg_model_config_list:
	# cfg = load_config("apps/third_party/Era3D/configs/test_unclip-512-6view.yaml")
	# schema = OmegaConf.structured(TestConfig)
	# cfg = OmegaConf.merge(schema, cfg)
	era3d_pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
	'pengHTYX/MacLab-Era3D-512-6view',
	torch_dtype=torch.float16
	)
	# enable xformers
	era3d_pipeline.unet.enable_xformers_memory_efficient_attention()
	era3d_pipeline.to(device)
	elif "CRM" in mvimg_model_config_list:
	stage1_config = OmegaConf.load(f"apps/third_party/CRM/configs/nf7_v3_SNR_rd_size_stroke.yaml").config
	stage1_sampler_config = stage1_config.sampler
	stage1_model_config = stage1_config.models
	stage1_model_config.resume = hf_hub_download(repo_id="Zhengyi/CRM", filename="pixel-diffusion.pth", repo_type="model")
	stage1_model_config.config = f"apps/third_party/CRM/" + stage1_model_config.config
	crm_pipeline = TwoStagePipeline(
	stage1_model_config,
	stage1_sampler_config,
	device=device,
	dtype=torch.float16
	)
	elif "ImageDream" in mvimg_model_config_list:
	imagedream_pipeline = MVDreamPipeline.from_pretrained(
	"ashawkey/imagedream-ipmv-diffusers", # remote weights
	torch_dtype=torch.float16,
	trust_remote_code=True,
	)

	generator = torch.Generator(device)

	# for 3D latent set diffusion
	ckpt_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-aligned-vae/model.ckpt", repo_type="model")
	config_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6-aligned-vae/config.yaml", repo_type="model")
	# ckpt_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/model-300k.ckpt", repo_type="model")
	# config_path = hf_hub_download(repo_id="wyysf/CraftsMan", filename="image-to-shape-diffusion/clip-mvrgb-modln-l256-e64-ne8-nd16-nl6/config.yaml", repo_type="model")
	scheluder_dict = OrderedDict({
	"DDIMScheduler": 'diffusers.schedulers.DDIMScheduler',
	# "DPMSolverMultistepScheduler": 'diffusers.schedulers.DPMSolverMultistepScheduler', # not support yet
	# "UniPCMultistepScheduler": 'diffusers.schedulers.UniPCMultistepScheduler', # not support yet
	})

	# main GUI
	custom_theme = gr.themes.Soft(primary_hue="blue").set(
	button_secondary_background_fill="*neutral_100",
	button_secondary_background_fill_hover="*neutral_200")
	custom_css = '''#disp_image {
	text-align: center; /* Horizontally center the content */
	}'''

	with gr.Blocks(title=_TITLE, theme=custom_theme, css=custom_css) as demo:
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown('# ' + _TITLE)
	gr.Markdown(_DESCRIPTION)

	with gr.Row():
	with gr.Column(scale=2):
	with gr.Column():
	# input image
	with gr.Row():
	image_input = gr.Image(
	label="Image Input",
	image_mode="RGBA",
	sources="upload",
	type="pil",
	)
	run_btn = gr.Button('Generate', variant='primary', interactive=True)

	with gr.Row():
	gr.Markdown('''Try a different <b>seed and MV Model</b> for better results. Good Luck :)''')
	with gr.Row():
	seed = gr.Number(0, label='Seed', show_label=True)
	mvimg_model = gr.Dropdown(value="Era3D", label="MV Image Model", choices=list(mvimg_model_config_list))
	more = gr.CheckboxGroup(["Remesh", "Symmetry(TBD)"], label="More", show_label=False)
	with gr.Row():
	# input prompt
	text = gr.Textbox(label="Prompt (Opt.)", info="only works for ImageDream")

	with gr.Accordion('Advanced options', open=False):
	# negative prompt
	neg_text = gr.Textbox(label="Negative Prompt", value='ugly, blurry, pixelated obscure, unnatural colors, poor lighting, dull, unclear, cropped, lowres, low quality, artifacts, duplicate')
	# elevation
	elevation = gr.Slider(label="elevation", minimum=-90, maximum=90, step=1, value=0)

	with gr.Row():
	gr.Examples(
	examples=[os.path.join("./apps/examples", i) for i in os.listdir("./apps/examples")],
	inputs=[image_input],
	examples_per_page=8
	)

	with gr.Column(scale=4):
	with gr.Row():
	output_model_obj = gr.Model3D(
	label="Output Model (OBJ Format)",
	camera_position=(90.0, 90.0, 3.5),
	interactive=False,
	)
	with gr.Row():
	gr.Markdown('''*please note that the model is fliped due to the gradio viewer, please download the obj file and you will get the correct orientation.''')

	with gr.Row():
	view_front = gr.Image(label="Front", interactive=True, show_label=True)
	view_right = gr.Image(label="Right", interactive=True, show_label=True)
	view_back = gr.Image(label="Back", interactive=True, show_label=True)
	view_left = gr.Image(label="Left", interactive=True, show_label=True)

	with gr.Accordion('Advanced options', open=False):
	with gr.Row(equal_height=True):
	run_mv_btn = gr.Button('Only Generate 2D', interactive=True)
	run_3d_btn = gr.Button('Only Generate 3D', interactive=True)

	with gr.Accordion('Advanced options (2D)', open=False):
	with gr.Row():
	foreground_ratio = gr.Slider(
	label="Foreground Ratio",
	minimum=0.5,
	maximum=1.0,
	value=1.0,
	step=0.05,
	)

	with gr.Row():
	background_choice = gr.Dropdown(label="Backgroud Choice", value="Auto Remove Background",choices=list(background_choice.keys()))
	rmbg_type = gr.Dropdown(label="Backgroud Remove Type", value="rembg",choices=['sam', "rembg"])
	backgroud_color = gr.ColorPicker(label="Background Color", value="#FFFFFF", interactive=True)
	# backgroud_color = gr.ColorPicker(label="Background Color", value="#7F7F7F", interactive=True)

	with gr.Row():
	mvimg_guidance_scale = gr.Number(value=3.0, minimum=1, maximum=10, label="2D Guidance Scale")
	mvimg_steps = gr.Number(value=30, minimum=20, maximum=100, label="2D Sample Steps")

	with gr.Accordion('Advanced options (3D)', open=False):
	with gr.Row():
	guidance_scale = gr.Number(label="3D Guidance Scale", value=3.0, minimum=1.0, maximum=10.0)
	steps = gr.Number(value=50, minimum=20, maximum=100, label="3D Sample Steps")

	with gr.Row():
	scheduler = gr.Dropdown(label="scheluder", value="DDIMScheduler",choices=list(scheluder_dict.keys()))
	octree_depth = gr.Slider(label="Octree Depth", value=7, minimum=4, maximum=8, step=1)

	gr.Markdown(_CITE_)

	outputs = [output_model_obj]
	rmbg = RMBG(device)

	model = load_model(ckpt_path, config_path, device)

	run_btn.click(fn=check_input_image, inputs=[image_input]
	).success(
	fn=rmbg.run,
	inputs=[rmbg_type, image_input, foreground_ratio, background_choice, backgroud_color],
	outputs=[image_input]
	).success(
	fn=gen_mvimg,
	inputs=[mvimg_model, image_input, seed, mvimg_guidance_scale, mvimg_steps, text, neg_text, elevation, backgroud_color],
	outputs=[view_front, view_right, view_back, view_left]
	).success(
	fn=image2mesh,
	inputs=[view_front, view_right, view_back, view_left, more, scheduler, guidance_scale, steps, seed, octree_depth],
	outputs=outputs,
	api_name="generate_img2obj")
	run_mv_btn.click(fn=gen_mvimg,
	inputs=[mvimg_model, image_input, seed, mvimg_guidance_scale, mvimg_steps, text, neg_text, elevation, backgroud_color],
	outputs=[view_front, view_right, view_back, view_left]
	)
	run_3d_btn.click(fn=image2mesh,
	inputs=[view_front, view_right, view_back, view_left, more, scheduler, guidance_scale, steps, seed, octree_depth],
	outputs=outputs,
	api_name="generate_img2obj")

	demo.queue().launch(share=True, allowed_paths=[args.cached_dir])