Spaces:
Sleeping
Sleeping
from tuneavideo.pipelines.pipeline_tuneavideo import TuneAVideoPipeline | |
from tuneavideo.models.unet import UNet3DConditionModel | |
from tuneavideo.util import save_videos_grid | |
import torch | |
import gradio as gr | |
model_list = [ | |
"runwayml/stable-diffusion-v1-5", | |
"CompVis/stable-diffusion-v1-4", | |
"prompthero/openjourney", | |
"dreamlike-art/dreamlike-photoreal-2.0", | |
"dreamlike-art/dreamlike-diffusion-1.0" | |
] | |
def tune_video_predict( | |
pipe_id: str, | |
prompt: str, | |
video_length: int, | |
height: int, | |
width: int, | |
num_inference_steps: int, | |
guidance_scale: float, | |
): | |
unet = UNet3DConditionModel.from_pretrained("Tune-A-Video-library/a-man-is-surfing", subfolder='unet', torch_dtype=torch.float16).to('cuda') | |
pipe = TuneAVideoPipeline.from_pretrained(pipe_id, unet=unet, torch_dtype=torch.float16).to("cuda") | |
video = pipe(prompt, video_length=video_length, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).videos | |
output_path = save_videos_grid(video, save_path='output', path=f"{prompt}.gif") | |
return output_path | |
demo_inputs = [ | |
gr.Dropdown( | |
label="Model", | |
choices=model_list, | |
value="CompVis/stable-diffusion-v1-4", | |
), | |
gr.Textbox( | |
label="Prompt", | |
value='a flower blooming' | |
), | |
gr.Slider( | |
label="Video Length", | |
minimum=1, | |
maximum=50, | |
value=8, | |
step=1, | |
), | |
gr.Slider( | |
label="Height", | |
minimum=128, | |
maximum=1280, | |
value=416, | |
step=32, | |
), | |
gr.Slider( | |
label="Width", | |
minimum=128, | |
maximum=1280, | |
value=416, | |
step=32, | |
), | |
gr.Slider( | |
label="Num Inference Steps", | |
minimum=1, | |
maximum=100, | |
value=50, | |
step=1, | |
), | |
gr.Slider( | |
label="Guidance Scale", | |
minimum=0.0, | |
maximum=100, | |
value=7.5, | |
step=0.5, | |
) | |
] | |
demo_outputs = gr.outputs.Video(type="gif", label="Output") | |
examples = [ | |
["CompVis/stable-diffusion-v1-4", "a panda is surfing", 5, 416, 416, 50, 7.5], | |
["sd-dreambooth-library/disco-diffusion-style", "ddfusion style on the church", 5, 416, 416, 50, 7.5], | |
#["sd-dreambooth-library/nasa-space-v2-768", "nasa style galaxy moving", 5, 416, 416, 50, 7.5], | |
["sd-dreambooth-library/mr-potato-head", "sks mr potato head, wearing a pink hat, is surfing.", 5, 416, 416, 50, 7.5], | |
["sd-dreambooth-library/mr-potato-head", "sks mr potato head is surfing in the forest.", 5, 416, 416, 50, 7.5], | |
] | |
description = "This is an application that generates video based on a text prompt. To get started, simply input text. The default model in the dropdown is a generic model that you can generate anything. Alternatively, for more photorealistic generations, you can use other models in the dropdown. These models are Dreambooth models, and they're trained with a specific object name, so make sure you know what the object is called. You can find an example prompt for a dreambooth model in Examples section right below the interface." | |
title = "Tune-A-Video: One-Shot Tuning of Image Diffusion Models for Text-to-Video Generation" | |
demo_app = gr.Interface( | |
fn=tune_video_predict, | |
inputs=demo_inputs, | |
outputs=demo_outputs, | |
examples=examples, | |
cache_examples=False, | |
title=title, | |
theme="huggingface", | |
description=description | |
) | |
demo_app.launch(debug=True, enable_queue=True) | |