from PIL import Image import gradio as gr import numpy as np import PIL.Image import random import cv2 import torch from accelerate import Accelerator from transformers import pipeline from diffusers.utils import load_image from diffusers import KandinskyV22PriorPipeline, KandinskyV22ControlnetPipeline accelerator = Accelerator() depth_estimator = accelerator.prepare(pipeline("depth-estimation",model="vinvino02/glpn-nyu")) depth_estimator.to("cpu") pipe_prior = accelerator.prepare(KandinskyV22PriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float32)) pipe_prior.to("cpu") pipe = accelerator.prepare(KandinskyV22ControlnetPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float32)) pipe.to("cpu") ##pipe.unet.to(memory_format=torch.channels_last) generator = torch.Generator(device="cpu").manual_seed(4096) def make_hint(image): image = depth_estimator(image)["depth"] image = np.array(image) image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) detected_map = torch.from_numpy(image).float() / 255.0 hint = detected_map.permute(2, 0, 1) return hint def plex(prompt,goof): goof = load_image(goof) goof = goof.convert("RGB") goof.thumbnail((512, 512)) hint = make_hint(goof).unsqueeze(0).to("cpu") negative_prior_prompt = "lowres,text,bad quality,jpeg artifacts,ugly,bad face,extra fingers,blurry,bad anatomy,extra limbs,fused fingers,long neck,watermark,signature" image_emb, zero_image_emb = pipe_prior(prompt=prompt, negative_prompt=negative_prior_prompt, generator=generator).to_tuple() images = pipe( image_embeds=image_emb, negative_image_embeds=zero_image_emb, hint=hint, num_inference_steps=10, generator=generator, height=512, width=512, ).images[0] return images iface = gr.Interface(fn=plex,inputs=[gr.Textbox(),gr.Image(type="filepath")], outputs=gr.Image(), title="Img2Img_SkyV22CntrlNet_CPU", description="Running on CPU, very slow!") iface.launch()