First version (100k steps)

Browse files

Files changed (16) hide show

.DS_Store +0 -0
README.md +33 -0
images_0.png +0 -0
images_1.png +0 -0
model_index.json +32 -0
scheduler/scheduler_config.json +21 -0
text_encoder/config.json +25 -0
text_encoder/flax_model.msgpack +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +24 -0
tokenizer/tokenizer_config.json +0 -0
tokenizer/vocab.json +0 -0
unet/config.json +40 -0
unet/diffusion_flax_model.msgpack +3 -0
vae/config.json +30 -0
vae/diffusion_flax_model.msgpack +3 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

README.md ADDED Viewed

	@@ -0,0 +1,33 @@

+---
+license: creativeml-openrail-m
+base_model: runwayml/stable-diffusion-v1-5
+tags:
+- stable-diffusion
+- stable-diffusion-diffusers
+- text-to-image
+- diffusers
+- jax-diffusers-event
+inference: true
+---
+# Stable Diffusion Nano
+prompt: A watercolor painting of an otter
+![images_0)](./images_0.png)
+prompt: Marvel MCU deadpool, red mask, red shirt, red gloves, black shoulders, black elbow pads, black legs, gold buckle, black belt, black mask, white eyes, black boots, fuji low light color 35mm film, downtown Osaka alley at night out of focus in background, neon lights
+![images_1)](./images_1.png)
+## Training details
+All parameters were initialized from the runwayml/stable-diffusion-v1-5 model. The unet was fine tuned as follows:
+- 100,000 steps training the full unet, learning rate = 1e-5, batch size = 512 (128 per TPU).
+- Trained on [LAION Improved Aesthetics 6plus](https://huggingface.co/datasets/ChristophSchuhmann/improved_aesthetics_6plus).
+## License
+This model is open access and available to all, with a CreativeML OpenRAIL-M license further specifying rights and usage. The CreativeML OpenRAIL License specifies:
+- You can't use the model to deliberately produce nor share illegal or harmful outputs or content.
+- The authors claims no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in the license.
+- You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully) Please read the full license here.

images_0.png ADDED Viewed

images_1.png ADDED Viewed

model_index.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "_class_name": "FlaxStableDiffusionPipeline",
+  "_diffusers_version": "0.16.0.dev0",
+  "feature_extractor": [
+    null,
+    null
+  ],
+  "safety_checker": [
+    null,
+    null
+  ],
+  "scheduler": [
+    "diffusers",
+    "FlaxDPMSolverMultistepScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "FlaxCLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "FlaxUNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "FlaxAutoencoderKL"
+  ]
+}

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "_class_name": "FlaxDPMSolverMultistepScheduler",
+  "_diffusers_version": "0.16.0.dev0",
+  "algorithm_type": "dpmsolver++",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "dynamic_thresholding_ratio": 0.995,
+  "lower_order_final": true,
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "sample_max_value": 1.0,
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "solver_order": 2,
+  "solver_type": "midpoint",
+  "steps_offset": 1,
+  "thresholding": false,
+  "trained_betas": null
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "openai/clip-vit-large-patch14",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float32",
+  "transformers_version": "4.28.1",
+  "vocab_size": 49408
+}

text_encoder/flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:41eba2abe3a73b328cd047c899066febb92a8e12b1e6cfe0cb4d5d5ac6b5c978
+size 492248682

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

File without changes

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "_class_name": "FlaxUNet2DConditionModel",
+  "_diffusers_version": "0.16.0.dev0",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dropout": 0.0,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "use_linear_projection": false,
+  "use_memory_efficient_attention": false
+}

unet/diffusion_flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:caa0d87db3b72ac7180d8aa7ac72d6f3153266fc4527117b35955fa13e65a67b
+size 3438108367

vae/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_class_name": "FlaxAutoencoderKL",
+  "_diffusers_version": "0.16.0.dev0",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "scaling_factor": 0.18215,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

vae/diffusion_flax_model.msgpack ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed39fc57b0224dec2c0cc6f9a532633ccef89815e40b544a6b8f38f422023d8a
+size 334623853