Spaces:

instruction-tuning-sd
/

instruction-tuned-sd

Runtime error

File size: 4,592 Bytes

0acc836
 
d46e1fe
 
 
 
 
 
0acc836
f00ed6c
0acc836
 
d46e1fe
0acc836
 
 
2355a89
 
0acc836
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f00ed6c
6ba54df
0acc836
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a163bc7
0acc836
 
 
 
de87909
 
0acc836
 
 
 
 
 
 
 
 
45414f1
f7ac112
0acc836
 
f7ac112
0acc836
2495f4f
0acc836
abe2443
0acc836
 
 
 
 
 
2495f4f
 
 
 
 
 
 
 
 
0acc836
 
 
45414f1
0acc836
 
 
 
 
2495f4f
0acc836
99cab67
0acc836
 
 
 
 
2495f4f
 
 
 
 
 
 
 
 
d46e1fe
2495f4f

import gradio as gr
import PIL
import torch
from diffusers import StableDiffusionInstructPix2PixPipeline

cartoonization_id = "instruction-tuning-sd/cartoonizer"
image_proc_id = "instruction-tuning-sd/low-level-img-proc"

title = "Instruction-tuned Stable Diffusion"
description = "This Space demonstrates the instruction-tuning on Stable Diffusion. To know more, please check out the [corresponding blog post](https://hf.co/blog/instruction-tuning-sd). Some experimentation tips on the hyperparameters are available from [the original InstructPix2Pix Space](https://huggingface.co/spaces/timbrooks/instruct-pix2pix). You can quickly try out the samples provided at the bottom of this demo."


def load_pipeline(id: str):
    pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
        id, torch_dtype=torch.float16
    ).to("cuda")
    pipeline.enable_xformers_memory_efficient_attention()
    pipeline.set_progress_bar_config(disable=True)
    return pipeline


def infer_cartoonization(
    prompt: str,
    negative_prompt: str,
    image: PIL.Image.Image,
    steps: int,
    img_cfg: float,
    text_cfg: float,
    seed: int,
):
    pipeline = load_pipeline(cartoonization_id)
    images = pipeline(
        prompt,
        image,
        negative_prompt=negative_prompt,
        num_inference_steps=int(steps),
        image_guidance_scale=img_cfg,
        guidance_scale=text_cfg,
        generator=torch.manual_seed(int(seed)),
        num_images_per_prompt=4
    ).images
    return images


def infer_img_proc(
    prompt: str,
    negative_prompt: str,
    image: PIL.Image.Image,
    steps: int,
    img_cfg: float,
    text_cfg: float,
    seed: int,
):
    pipeline = load_pipeline(image_proc_id)
    images = pipeline(
        prompt,
        image,
        negative_prompt=negative_prompt,
        num_inference_steps=int(steps),
        image_guidance_scale=img_cfg,
        guidance_scale=text_cfg,
        generator=torch.manual_seed(int(seed)),
    ).images
    return images


examples = [
    ["cartoonize this image", "low quality", "examples/mountain.png", 20, 1.5, 7.5, 0],
    ["derain this image", "low quality", "examples/duck.png", 20, 1.5, 7.5, 0],
]

with gr.Blocks(theme="gradio/soft") as demo:
    gr.Markdown(f"## {title}")
    gr.Markdown(description)

    with gr.Tab("Cartoonization"):
        prompt = gr.Textbox(label="Prompt")
        neg_prompt = gr.Textbox(label="Negative Prompt")
        input_image = gr.Image(label="Input Image", type="pil")
        steps = gr.Slider(minimum=5, maximum=100, step=1, label="Steps")
        img_cfg = gr.Number(value=1.5, label=f"Image CFG", interactive=True)
        text_cfg = gr.Number(value=7.5, label=f"Text CFG", interactive=True)
        seed = gr.Slider(minimum=0, maximum=100000, step=1, label="Seed")

        car_output_gallery = gr.Gallery().style(grid=[2], height="auto")
        submit_btn = gr.Button(value="Submit")
        all_car_inputs = [prompt, neg_prompt, input_image, steps, img_cfg, text_cfg, seed]
        submit_btn.click(
            fn=infer_cartoonization,
            inputs=all_car_inputs,
            outputs=[car_output_gallery],
        )

        gr.Markdown("### Cartoonization example")
        gr.Examples(
            [examples[0]],
            inputs=all_car_inputs,
            outputs=car_output_gallery,
            fn=infer_cartoonization,
            cache_examples=True,
        )

    with gr.Tab("Low-level image processing"):
        rompt = gr.Textbox(label="Prompt")
        neg_prompt = gr.Textbox(label="Negative Prompt")
        input_image = gr.Image(label="Input Image", type="pil")
        steps = gr.Slider(minimum=5, maximum=100, step=1)
        img_cfg = gr.Number(value=1.5, label=f"Image CFG", interactive=True)
        text_cfg = gr.Number(value=7.5, label=f"Text CFG", interactive=True)
        seed = gr.Slider(minimum=0, maximum=100000, step=1)

        img_proc_output_gallery = gr.Gallery().style(grid=[2], height="auto")
        submit_btn = gr.Button(value="Submit")
        all_img_proc_inputs = [prompt, neg_prompt, input_image, steps, img_cfg, text_cfg, seed]
        submit_btn.click(
            fn=infer_img_proc,
            inputs=all_img_proc_inputs,
            outputs=[img_proc_output_gallery],
        )
        
        gr.Markdown("### Low-level image processing example")
        gr.Examples(
            [examples[1]],
            inputs=all_img_proc_inputs,
            outputs=img_proc_output_gallery,
            fn=infer_img_proc,
            cache_examples=True,
        )

demo.launch(enable_queue=True)