Real-Time-Latent-Consistency-Model

Runtime error

App Files Files Community

radames commited on Nov 18, 2023

Commit

c01188e

•

1 Parent(s): 43462a5

sdxl loras

Browse files

Files changed (5) hide show

frontend/src/lib/components/Selectlist.svelte +1 -1
frontend/src/lib/components/TextArea.svelte +12 -7
frontend/src/routes/+page.svelte +23 -16
pipelines/controlnetLoraSDXL.py +261 -0
pipelines/{txt2imglora.py → txt2imgLoRA.py} +0 -0

frontend/src/lib/components/Selectlist.svelte CHANGED Viewed

@@ -9,7 +9,7 @@
 </script>
 <div class="grid max-w-md grid-cols-4 items-center justify-items-start gap-3">
-  <label for="model-list" class="font-medium">{params?.title} </label>
   {#if params?.values}
     <select
       bind:value

 </script>
 <div class="grid max-w-md grid-cols-4 items-center justify-items-start gap-3">
+  <label for="model-list" class="text-sm font-medium">{params?.title} </label>
   {#if params?.values}
     <select
       bind:value

frontend/src/lib/components/TextArea.svelte CHANGED Viewed

@@ -8,11 +8,16 @@
   });
 </script>
-<div class="text-normal flex items-center rounded-md border border-gray-700 px-1 py-1">
-  <textarea
-    class="mx-1 w-full px-3 py-2 font-light outline-none dark:text-black"
-    title={params?.title}
-    placeholder="Add your prompt here..."
-    bind:value
-  ></textarea>
 </div>

   });
 </script>
+<div class="px-1 py-1">
+  <label class="text-sm font-medium" for={params?.title}>
+    {params?.title}
+  </label>
+  <div class="text-normal flex items-center rounded-md border border-gray-700">
+    <textarea
+      class="mx-1 w-full px-3 py-2 font-light outline-none dark:text-black"
+      title={params?.title}
+      placeholder="Add your prompt here..."
+      bind:value
+    ></textarea>
+  </div>
 </div>

frontend/src/routes/+page.svelte CHANGED Viewed

@@ -16,7 +16,7 @@
   let pipelineInfo: PipelineInfo;
   let isImageMode: boolean = false;
   let maxQueueSize: number = 0;
   onMount(() => {
     getSettings();
   });
@@ -28,6 +28,16 @@
     isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
     maxQueueSize = settings.max_queue_size;
     pipelineParams = pipelineParams.filter((e) => e?.disabled !== true);
   }
   function getSreamdata() {
@@ -59,14 +69,14 @@
   }
 </script>
-<div class="fixed right-2 top-2 max-w-xs rounded-lg p-4 text-center text-sm font-bold" id="error" />
 <main class="container mx-auto flex max-w-4xl flex-col gap-3 px-4 py-4">
-  <article class="flex- mx-auto max-w-xl text-center">
     <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
     {#if pipelineInfo?.title?.default}
       <h3 class="text-xl font-bold">{pipelineInfo?.title?.default}</h3>
     {/if}
-    <p class="py-2 text-sm">
       This demo showcases
       <a
         href="https://huggingface.co/blog/lcm_lora"
@@ -80,10 +90,17 @@
         class="text-blue-500 underline hover:no-underline">Diffusers</a
       > with a MJPEG stream server.
     </p>
     {#if maxQueueSize > 0}
       <p class="text-sm">
-        There are <span id="queue_size" class="font-bold">0</span> user(s) sharing the same GPU,
-        affecting real-time performance. Maximum queue size is {maxQueueSize}.
         <a
           href="https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model?duplicate=true"
           target="_blank"
@@ -93,16 +110,6 @@
     {/if}
   </article>
   {#if pipelineParams}
-    <header>
-      <h2 class="font-medium">Prompt</h2>
-      <p class="text-sm text-gray-500">
-        Change the prompt to generate different images, accepts <a
-          href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
-          target="_blank"
-          class="text-blue-500 underline hover:no-underline">Compel</a
-        > syntax.
-      </p>
-    </header>
     <PipelineOptions {pipelineParams}></PipelineOptions>
     <div class="flex gap-3">
       <Button on:click={toggleLcmLive} {disabled}>

   let pipelineInfo: PipelineInfo;
   let isImageMode: boolean = false;
   let maxQueueSize: number = 0;
+  let currentQueueSize: number = 0;
   onMount(() => {
     getSettings();
   });
     isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
     maxQueueSize = settings.max_queue_size;
     pipelineParams = pipelineParams.filter((e) => e?.disabled !== true);
+    if (maxQueueSize > 0) {
+      getQueueSize();
+      setInterval(() => {
+        getQueueSize();
+      }, 2000);
+    }
+  }
+  async function getQueueSize() {
+    const data = await fetch(`${PUBLIC_BASE_URL}/queue_size`).then((r) => r.json());
+    currentQueueSize = data.queue_size;
   }
   function getSreamdata() {
   }
 </script>
+<div class="fixed right-2 top-2 max-w-xs rounded-lg p-4 text-sm font-bold" id="error" />
 <main class="container mx-auto flex max-w-4xl flex-col gap-3 px-4 py-4">
+  <article class="text-center">
     <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
     {#if pipelineInfo?.title?.default}
       <h3 class="text-xl font-bold">{pipelineInfo?.title?.default}</h3>
     {/if}
+    <p class="text-sm">
       This demo showcases
       <a
         href="https://huggingface.co/blog/lcm_lora"
         class="text-blue-500 underline hover:no-underline">Diffusers</a
       > with a MJPEG stream server.
     </p>
+    <p class="text-sm text-gray-500">
+      Change the prompt to generate different images, accepts <a
+        href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
+        target="_blank"
+        class="text-blue-500 underline hover:no-underline">Compel</a
+      > syntax.
+    </p>
     {#if maxQueueSize > 0}
       <p class="text-sm">
+        There are <span id="queue_size" class="font-bold">{currentQueueSize}</span>
+        user(s) sharing the same GPU, affecting real-time performance. Maximum queue size is {maxQueueSize}.
         <a
           href="https://huggingface.co/spaces/radames/Real-Time-Latent-Consistency-Model?duplicate=true"
           target="_blank"
     {/if}
   </article>
   {#if pipelineParams}
     <PipelineOptions {pipelineParams}></PipelineOptions>
     <div class="flex gap-3">
       <Button on:click={toggleLcmLive} {disabled}>

pipelines/controlnetLoraSDXL.py ADDED Viewed

	@@ -0,0 +1,261 @@

+from diffusers import (
+    StableDiffusionXLControlNetImg2ImgPipeline,
+    ControlNetModel,
+    LCMScheduler,
+    AutoencoderKL,
+)
+from compel import Compel, ReturnedEmbeddingsType
+import torch
+from pipelines.utils.canny_gpu import SobelOperator
+try:
+    import intel_extension_for_pytorch as ipex  # type: ignore
+except:
+    pass
+import psutil
+from config import Args
+from pydantic import BaseModel, Field
+from PIL import Image
+controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
+model_id = "stabilityai/stable-diffusion-xl-base-1.0"
+lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
+# # base model with activation token, it will prepend the prompt with the activation token
+base_models = {
+    "plasmo/woolitize": "woolitize",
+    "nitrosocke/Ghibli-Diffusion": "ghibli style",
+    "nitrosocke/mo-di-diffusion": "modern disney style",
+}
+# lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
+default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
+default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
+class Pipeline:
+    class Info(BaseModel):
+        name: str = "controlnet+loras+sdxl"
+        title: str = "SDXL + LCM + LoRA + Controlnet "
+        description: str = "Generates an image from a text prompt"
+        input_mode: str = "image"
+    class InputParams(BaseModel):
+        prompt: str = Field(
+            default_prompt,
+            title="Prompt",
+            field="textarea",
+            id="prompt",
+        )
+        model_id: str = Field(
+            "plasmo/woolitize",
+            title="Base Model",
+            values=list(base_models.keys()),
+            field="select",
+            id="model_id",
+        )
+        negative_prompt: str = Field(
+            default_negative_prompt,
+            title="Negative Prompt",
+            field="textarea",
+            id="negative_prompt",
+            hide=True,
+        )
+        seed: int = Field(
+            2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
+        )
+        steps: int = Field(
+            4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
+        )
+        width: int = Field(
+            512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
+        )
+        height: int = Field(
+            512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
+        )
+        guidance_scale: float = Field(
+            1.0,
+            min=0,
+            max=20,
+            step=0.001,
+            title="Guidance Scale",
+            field="range",
+            hide=True,
+            id="guidance_scale",
+        )
+        strength: float = Field(
+            0.5,
+            min=0.25,
+            max=1.0,
+            step=0.001,
+            title="Strength",
+            field="range",
+            hide=True,
+            id="strength",
+        )
+        controlnet_scale: float = Field(
+            0.5,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet Scale",
+            field="range",
+            hide=True,
+            id="controlnet_scale",
+        )
+        controlnet_start: float = Field(
+            0.0,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet Start",
+            field="range",
+            hide=True,
+            id="controlnet_start",
+        )
+        controlnet_end: float = Field(
+            1.0,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet End",
+            field="range",
+            hide=True,
+            id="controlnet_end",
+        )
+        canny_low_threshold: float = Field(
+            0.31,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Canny Low Threshold",
+            field="range",
+            hide=True,
+            id="canny_low_threshold",
+        )
+        canny_high_threshold: float = Field(
+            0.125,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Canny High Threshold",
+            field="range",
+            hide=True,
+            id="canny_high_threshold",
+        )
+        debug_canny: bool = Field(
+            False,
+            title="Debug Canny",
+            field="checkbox",
+            hide=True,
+            id="debug_canny",
+        )
+    def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
+        controlnet_canny = ControlNetModel.from_pretrained(
+            controlnet_model, torch_dtype=torch_dtype
+        ).to(device)
+        vae = AutoencoderKL.from_pretrained(
+            "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
+        )
+        if args.safety_checker:
+            self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
+                model_id,
+                controlnet=controlnet_canny,
+                vae=vae,
+            )
+        else:
+            self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
+                model_id,
+                safety_checker=None,
+                controlnet=controlnet_canny,
+                vae=vae,
+            )
+        self.canny_torch = SobelOperator(device=device)
+        # Load LCM LoRA
+        self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
+        self.pipe.load_lora_weights(
+            "CiroN2022/toy-face",
+            weight_name="toy_face_sdxl.safetensors",
+            adapter_name="toy",
+        )
+        self.pipe.set_adapters(["lcm", "toy"], adapter_weights=[1.0, 0.8])
+        self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
+        self.pipe.set_progress_bar_config(disable=True)
+        self.pipe.to(device=device, dtype=torch_dtype).to(device)
+        if psutil.virtual_memory().total < 64 * 1024**3:
+            self.pipe.enable_attention_slicing()
+        self.pipe.compel_proc = Compel(
+            tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
+            text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
+            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+            requires_pooled=[False, True],
+        )
+        if args.torch_compile:
+            self.pipe.unet = torch.compile(
+                self.pipe.unet, mode="reduce-overhead", fullgraph=True
+            )
+            self.pipe.vae = torch.compile(
+                self.pipe.vae, mode="reduce-overhead", fullgraph=True
+            )
+            self.pipe(
+                prompt="warmup",
+                image=[Image.new("RGB", (768, 768))],
+                control_image=[Image.new("RGB", (768, 768))],
+            )
+    def predict(self, params: "Pipeline.InputParams") -> Image.Image:
+        generator = torch.manual_seed(params.seed)
+        print(f"Using model: {params.model_id}")
+        # pipe = self.pipes[params.model_id]
+        # activation_token = base_models[params.model_id]
+        # prompt = f"{activation_token} {params.prompt}"
+        prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
+            [params.prompt, params.negative_prompt]
+        )
+        control_image = self.canny_torch(
+            params.image, params.canny_low_threshold, params.canny_high_threshold
+        )
+        results = self.pipe(
+            image=params.image,
+            control_image=control_image,
+            prompt_embeds=prompt_embeds[0:1],
+            pooled_prompt_embeds=pooled_prompt_embeds[0:1],
+            negative_prompt_embeds=prompt_embeds[1:2],
+            negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
+            generator=generator,
+            strength=params.strength,
+            num_inference_steps=params.steps,
+            guidance_scale=params.guidance_scale,
+            width=params.width,
+            height=params.height,
+            output_type="pil",
+            controlnet_conditioning_scale=params.controlnet_scale,
+            control_guidance_start=params.controlnet_start,
+            control_guidance_end=params.controlnet_end,
+        )
+        nsfw_content_detected = (
+            results.nsfw_content_detected[0]
+            if "nsfw_content_detected" in results
+            else False
+        )
+        if nsfw_content_detected:
+            return None
+        result_image = results.images[0]
+        if params.debug_canny:
+            # paste control_image on top of result_image
+            w0, h0 = (200, 200)
+            control_image = control_image.resize((w0, h0))
+            w1, h1 = result_image.size
+            result_image.paste(control_image, (w1 - w0, h1 - h0))
+        return result_image

pipelines/{txt2imglora.py → txt2imgLoRA.py} RENAMED Viewed

File without changes