Spaces:
Running
on
L40S
Running
on
L40S
Commit
•
fff6dc8
1
Parent(s):
799d1da
Update app.py
Browse files
app.py
CHANGED
@@ -44,18 +44,9 @@ snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
|
|
44 |
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16).to("cpu")
|
45 |
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
46 |
|
47 |
-
|
48 |
-
"THUDM/CogVideoX-5b-I2V",
|
49 |
-
|
50 |
-
"THUDM/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
|
51 |
-
),
|
52 |
-
vae=pipe.vae,
|
53 |
-
scheduler=pipe.scheduler,
|
54 |
-
tokenizer=pipe.tokenizer,
|
55 |
-
text_encoder=pipe.text_encoder,
|
56 |
-
torch_dtype=torch.bfloat16,
|
57 |
-
).to("cpu")
|
58 |
-
|
59 |
|
60 |
# pipe.transformer.to(memory_format=torch.channels_last)
|
61 |
# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
@@ -241,11 +232,20 @@ def infer(
|
|
241 |
guidance_scale=guidance_scale,
|
242 |
generator=torch.Generator(device="cpu").manual_seed(seed),
|
243 |
).frames
|
|
|
244 |
del pipe_video
|
245 |
gc.collect()
|
246 |
torch.cuda.empty_cache()
|
247 |
elif image_input is not None:
|
248 |
-
pipe_image.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
image_input = Image.fromarray(image_input).resize(size=(720, 480)) # Convert to PIL
|
250 |
image = load_image(image_input)
|
251 |
video_pt = pipe_image(
|
@@ -259,7 +259,9 @@ def infer(
|
|
259 |
generator=torch.Generator(device="cpu").manual_seed(seed),
|
260 |
).frames
|
261 |
pipe_image.to("cpu")
|
|
|
262 |
gc.collect()
|
|
|
263 |
else:
|
264 |
pipe.to(device)
|
265 |
video_pt = pipe(
|
|
|
44 |
pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16).to("cpu")
|
45 |
pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
|
46 |
|
47 |
+
i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
|
48 |
+
"THUDM/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
|
49 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
# pipe.transformer.to(memory_format=torch.channels_last)
|
52 |
# pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
|
|
|
232 |
guidance_scale=guidance_scale,
|
233 |
generator=torch.Generator(device="cpu").manual_seed(seed),
|
234 |
).frames
|
235 |
+
pipe_video.to("cpu")
|
236 |
del pipe_video
|
237 |
gc.collect()
|
238 |
torch.cuda.empty_cache()
|
239 |
elif image_input is not None:
|
240 |
+
pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
|
241 |
+
"THUDM/CogVideoX-5b-I2V",
|
242 |
+
transformer=i2v_transformer,
|
243 |
+
vae=pipe.vae,
|
244 |
+
scheduler=pipe.scheduler,
|
245 |
+
tokenizer=pipe.tokenizer,
|
246 |
+
text_encoder=pipe.text_encoder,
|
247 |
+
torch_dtype=torch.bfloat16,
|
248 |
+
).to(device)
|
249 |
image_input = Image.fromarray(image_input).resize(size=(720, 480)) # Convert to PIL
|
250 |
image = load_image(image_input)
|
251 |
video_pt = pipe_image(
|
|
|
259 |
generator=torch.Generator(device="cpu").manual_seed(seed),
|
260 |
).frames
|
261 |
pipe_image.to("cpu")
|
262 |
+
del pipe_image
|
263 |
gc.collect()
|
264 |
+
torch.cuda.empty_cache()
|
265 |
else:
|
266 |
pipe.to(device)
|
267 |
video_pt = pipe(
|