fffiloni commited on
Commit
929d7f2
1 Parent(s): c65aada

Handling mismatch tensor size

Browse files
animatediff/pipelines/pipeline_animation.py CHANGED
@@ -307,43 +307,48 @@ class AnimationPipeline(DiffusionPipeline):
307
  else:
308
  init_latents = None
309
 
310
-
311
  if isinstance(generator, list) and len(generator) != batch_size:
312
  raise ValueError(
313
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
314
  f" size of {batch_size}. Make sure the batch size matches the length of the generators."
315
  )
 
316
  if latents is None:
317
  rand_device = "cpu" if device.type == "mps" else device
318
 
319
  if isinstance(generator, list):
320
- shape = shape
321
- # shape = (1,) + shape[1:]
322
- # ignore init latents for batch model
323
- latents = [
324
- torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype)
325
- for i in range(batch_size)
326
- ]
327
- latents = torch.cat(latents, dim=0).to(device)
 
 
328
  else:
329
- latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
330
  if init_latents is not None:
 
331
  for i in range(video_length):
332
- # I just feel dividing by 30 yield stable result but I don't know why
333
- # gradully reduce init alpha along video frames (loosen restriction)
334
- init_alpha = (video_length - float(i)) / video_length / 30
335
  latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
 
 
 
336
  else:
337
  if latents.shape != shape:
338
  raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
339
  latents = latents.to(device)
340
 
341
- # scale the initial noise by the standard deviation required by the scheduler
342
- #latents = latents * self.scheduler.init_noise_sigma
343
  if init_latents is None:
344
  latents = latents * self.scheduler.init_noise_sigma
 
345
  return latents
346
 
 
347
  @torch.no_grad()
348
  def __call__(
349
  self,
 
307
  else:
308
  init_latents = None
309
 
 
310
  if isinstance(generator, list) and len(generator) != batch_size:
311
  raise ValueError(
312
  f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
313
  f" size of {batch_size}. Make sure the batch size matches the length of the generators."
314
  )
315
+
316
  if latents is None:
317
  rand_device = "cpu" if device.type == "mps" else device
318
 
319
  if isinstance(generator, list):
320
+ # Initialize latents as a random tensor
321
+ latents = torch.randn(shape, device=rand_device, dtype=dtype)
322
+
323
+ # If init_latents is not None, copy the values for each video frame
324
+ if init_latents is not None:
325
+ for i in range(video_length):
326
+ init_alpha = (video_length - float(i)) / video_length / 30
327
+ latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
328
+
329
+ latents = latents.to(device)
330
  else:
331
+ # If init_latents is not None, repeat it for the entire batch
332
  if init_latents is not None:
333
+ init_latents = init_latents.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
334
  for i in range(video_length):
335
+ init_alpha = (video_length - float(i)) / video_length / 30
 
 
336
  latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
337
+ else:
338
+ latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
339
+
340
  else:
341
  if latents.shape != shape:
342
  raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
343
  latents = latents.to(device)
344
 
345
+ # Scale the initial noise by the standard deviation required by the scheduler
 
346
  if init_latents is None:
347
  latents = latents * self.scheduler.init_noise_sigma
348
+
349
  return latents
350
 
351
+
352
  @torch.no_grad()
353
  def __call__(
354
  self,