Spaces:

AIGC-Audio
/

AudioGPT

Build error

App Files Files Community

lmzjms commited on Apr 3, 2023

Commit

e154cd6

•

1 Parent(s): fda2ed9

Upload img2audio_args.yaml

Browse files

Files changed (1) hide show

text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml +77 -0

text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml ADDED Viewed

	@@ -0,0 +1,77 @@

+model:
+  base_learning_rate: 1.0e-05
+  target: ldm.models.diffusion.ddpm_audio.LatentDiffusion_audio
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: image
+    cond_stage_key: caption
+    image_size: 32      # unused
+    mel_dim: 10         # 80 // 2^3
+    mel_length: 78     # 624 // 2^3
+    channels: 4
+    cond_stage_trainable: false
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_by_std: True
+    use_ema: False
+    scheduler_config: # 10000 warmup steps
+      target: ldm.lr_scheduler.LambdaLinearScheduler
+      params:
+        warm_up_steps: [10000]
+        cycle_lengths: [10000000000000]
+        f_start: [1.e-6]
+        f_max: [1.]
+        f_min: [ 1.]
+    unet_config:
+      target: ldm.modules.diffusionmodules.custom_openaimodel.UNetModel
+      params:
+        image_size: 32 # ununsed
+        in_channels: 4
+        out_channels: 4
+        model_channels: 256
+        attention_resolutions:
+        - 1
+        - 2
+        num_res_blocks: 2
+        channel_mult:  # num_down = len(ch_mult)-1
+        - 1
+        - 2
+        num_head_channels: 32
+        use_spatial_transformer: true
+        transformer_depth: 1
+        context_dim: 1024
+        use_context_project: false
+    first_stage_config:
+      target: ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 848
+          in_channels: 1
+          out_ch: 1
+          ch: 128
+          ch_mult: [ 1, 2, 2, 4 ]  # num_down = len(ch_mult)-1
+          num_res_blocks: 2
+          attn_resolutions: [106, 212]
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: ldm.modules.encoders.modules.FrozenGlobalNormOpenCLIPEmbedder
+      params:
+        freeze: True
+        delvisual: False