Spaces:

JoPmt
/

Img2Img_SkyV22CntrlNet

Runtime error

App Files Files Community

JoPmt commited on Dec 3, 2023

Commit

102fb88

•

1 Parent(s): a7a36ba

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -47

app.py CHANGED Viewed

@@ -5,77 +5,37 @@ import random, os, gc, base64, io
 import cv2
 import torch
 from accelerate import Accelerator
-from transformers import pipeline, DPTImageProcessor, DPTForDepthEstimation, DPTFeatureExtractor
 from diffusers.utils import load_image
 from diffusers import KandinskyV22PriorPipeline, KandinskyV22ControlnetPipeline
 from gradio_client import Client
 accelerator = Accelerator(cpu=True)
 pipe_prior = accelerator.prepare(KandinskyV22PriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float32))
 pipe_prior = accelerator.prepare(pipe_prior.to("cpu"))
 pipe = accelerator.prepare(KandinskyV22ControlnetPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float32))
 pipe = accelerator.prepare(pipe.to("cpu"))
 generator = torch.Generator("cpu").manual_seed(random.randint(1, 867346))
-processor = DPTImageProcessor.from_pretrained("Intel/dpt-large")
-feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large")
-model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")
-def process_image(image):
-    # prepare image for the model
-    encoding = feature_extractor(image, return_tensors="pt")
-    # forward pass
-    with torch.no_grad():
-       outputs = model(**encoding)
-       predicted_depth = outputs.predicted_depth
-    # interpolate to original size
-    prediction = torch.nn.functional.interpolate(
-                        predicted_depth.unsqueeze(1),
-                        size=image.size[::-1],
-                        mode="bicubic",
-                        align_corners=False,
-                 ).squeeze()
-    output = prediction.cpu().numpy()
-    formatted = (output * 255 / np.max(output)).astype('uint8')
-    img = Image.fromarray(formatted)
-    return img
-def make_hint(note):
-    ##client = Client("https://adpro-dpt-depth06.hf.space/")
-    ##imoge = client.predict(note, api_name="/predict")
-    ##image_dota = base64.b64decode(imoge)
-    ##image = Image.open(io.BytesIO(image_dota))
-    ##in_mage = processor(images=note, return_tensors="pt")
-    ##with torch.no_grad():
-    ##in_dep = tell(**in_mage)
-    prod_depth = process_image(note)
-    ##prediction = torch.nn.functional.interpolate(predicted_depth.unsqueeze(1),size=note.size[::-1],mode="bicubic",align_corners=False,)
-    ##in_dep = prediction.squeeze().cpu().numpy()
-    ##formatted = (in_dep * 255 / np.max(in_dep)).astype("uint8")
-    ##depth = Image.fromarray(formatted)
-    image = load_image(prod_depth)
     image = np.array(image)
     image = image[:, :, None]
     image = np.concatenate([image, image, image], axis=2)
     detected_map = torch.from_numpy(image).float() / 255.0
-    ##hint = detected_map.permute(2, 0, 1)
-    hint = detected_map.permute(3, 0, 1, 2)
     return hint
 def plex(goof,prompt):
     gc.collect()
     goof = load_image(goof)
-    goof = goof.convert("RGB")
     goof.save('./gf.png', 'PNG')
     ##base64_string = ''
     ##with open('./gf.png', 'rb') as image_file:
     ##    base64_string = base64.b64encode(image_file.read()).decode('utf-8')
-    hint = make_hint(goof).unsqueeze(0).to("cpu")
     negative_prior_prompt = "lowres,text,bad quality,jpeg artifacts,ugly,bad face,extra fingers,blurry,bad anatomy,extra limbs,fused fingers,long neck,watermark,signature"
     image_emb, zero_image_emb = pipe_prior(prompt=prompt, negative_prompt=negative_prior_prompt, num_inference_steps=5,generator=generator).to_tuple()

 import cv2
 import torch
 from accelerate import Accelerator
+from transformers import pipeline
 from diffusers.utils import load_image
 from diffusers import KandinskyV22PriorPipeline, KandinskyV22ControlnetPipeline
 from gradio_client import Client
 accelerator = Accelerator(cpu=True)
+depth_estimator = accelerator.prepare(pipeline("depth-estimation", model="Intel/dpt-hybrid-midas"))
 pipe_prior = accelerator.prepare(KandinskyV22PriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float32))
 pipe_prior = accelerator.prepare(pipe_prior.to("cpu"))
 pipe = accelerator.prepare(KandinskyV22ControlnetPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float32))
 pipe = accelerator.prepare(pipe.to("cpu"))
 generator = torch.Generator("cpu").manual_seed(random.randint(1, 867346))
+def make_hint(image, depth_estimator):
+    image = depth_estimator(image)["depth"]
     image = np.array(image)
     image = image[:, :, None]
     image = np.concatenate([image, image, image], axis=2)
     detected_map = torch.from_numpy(image).float() / 255.0
+    hint = detected_map.permute(2, 0, 1)
     return hint
 def plex(goof,prompt):
     gc.collect()
     goof = load_image(goof)
     goof.save('./gf.png', 'PNG')
+    goof = goof.convert("RGB")
     ##base64_string = ''
     ##with open('./gf.png', 'rb') as image_file:
     ##    base64_string = base64.b64encode(image_file.read()).decode('utf-8')
+    hint = make_hint(goof, depth_estimator).unsqueeze(0).to("cpu")
     negative_prior_prompt = "lowres,text,bad quality,jpeg artifacts,ugly,bad face,extra fingers,blurry,bad anatomy,extra limbs,fused fingers,long neck,watermark,signature"
     image_emb, zero_image_emb = pipe_prior(prompt=prompt, negative_prompt=negative_prior_prompt, num_inference_steps=5,generator=generator).to_tuple()