initial commit

Files changed (5) hide show

.gitattributes +35 -0
README.md +97 -0
evo_ukiyoe_v1.py +176 -0
pytorch_lora_weights.safetensors +3 -0
requirements.txt +8 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,97 @@

+---
+library_name: diffusers
+license: apache-2.0
+language:
+- ja
+pipeline_tag: text-to-image
+tags:
+- stable-diffusion
+---
+# 🐟 Evo-Ukiyoe-v1
+🤗 [Models](https://huggingface.co/SakanaAI/Evo-Ukiyoe-v1/) | 📝 [Blog](https://sakana.ai/evo-ukiyoe/) | 🐦 [Twitter](https://twitter.com/SakanaAILabs)
+**Evo-Ukiyoe-v1** is an experimental education-purpose Japanese woodblock print Ukiyoe style image generation model. The model was train based on Sakana AI's [Evo-SDXL-JP](https://huggingface.co/SakanaAI/EvoSDXL-JP-v1).
+All the dataset used to train Evo-Ukiyoe comes from Ukiyoe images belonged to [Ritsumeikan University, Art Research Center](https://www.arc.ritsumei.ac.jp/).
+Please refer to our [blog](https://sakana.ai/evo-ukiyoe/) for more details.
+## Usage
+Use the code below to get started with the model.
+<details>
+<summary> Click to expand </summary>
+1. Git clone this model card
+   ```
+   git clone https://huggingface.co/SakanaAI/Evo-Ukiyoe-v1
+   ```
+2. Install git-lfs if you don't have it yet.
+   ```
+   sudo apt install git-lfs
+   git lfs install
+   ```
+3. Create conda env
+   ```
+   conda create -n evo-ukiyoe python=3.11
+   conda activate evo-ukiyoe
+   ```
+4. Install packages
+   ```
+   cd Evo-Ukiyoe-v1
+   pip install -r requirements.txt
+   ```
+5. Run
+   ```python
+   from evo_ukiyoe_v1 import load_evo_ukiyoe
+   prompt = "着物を着ている猫が庭でお茶を飲んでいる。"
+   pipe = load_evo_ukiyoe(device="cuda")
+   images = pipe(prompt + "輻の浮世絵。超詳細。", negative_prompt='', guidance_scale=8.0, num_inference_steps=40).images
+   images[0].save("image.png")
+   ```
+</details>
+## Model Details
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [Sakana AI](https://sakana.ai/)
+- **Model type:** Diffusion-based text-to-image generative model
+- **Language(s):** Japanese
+- **Blog:** https://sakana.ai/evo-ukiyoe/
+## License
+The Python script included in this repository and Lora weight are licensed under the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0).
+Please note that the license for the model/pipeline generated by this script is inherited from the source models.
+## Uses
+This model is provided for research and development purposes only and should be considered as an experimental prototype.
+It is not intended for commercial use or deployment in mission-critical environments.
+Use of this model is at the user's own risk, and its performance and outcomes are not guaranteed.
+Sakana AI shall not be liable for any direct, indirect, special, incidental, or consequential damages, or any loss arising from the use of this model, regardless of the results obtained.
+Users must fully understand the risks associated with the use of this model and use it at their own discretion.
+## Acknowledgement
+Evo-Ukiyoe was trained based on Evo-SDXL-JP. We would like to thank the developers of Evo-SDXL-JP source models for their contributions and for making their work available.
+- [SDXL](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+- [Juggernaut-XL-v9](https://huggingface.co/RunDiffusion/Juggernaut-XL-v9)
+- [SDXL-DPO](https://huggingface.co/mhdang/dpo-sdxl-text2image-v1)
+- [JSDXL](https://huggingface.co/stabilityai/japanese-stable-diffusion-xl)
+## Citation
+    @misc{Evo-Ukiyoe,
+    url    = {[https://huggingface.co/SakanaAI/Evo-Nishikie-v1](https://huggingface.co/SakanaAI/Evo-Nishikie-v1)},
+    title  = {Evo-Ukiyoe},
+    author = {Clanuwat, Tarin and Shing, Makoto and Imajuku, Yuki and Kitamoto, Asanobu and Akama, Ryo}
+    }

evo_ukiyoe_v1.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import gc
+import os
+from typing import Dict, List, Union
+from diffusers import (
+    StableDiffusionXLPipeline,
+    UNet2DConditionModel,
+)
+from huggingface_hub import hf_hub_download
+import safetensors
+import torch
+from tqdm import tqdm
+from transformers import AutoTokenizer, CLIPTextModelWithProjection
+# Base models
+SDXL_REPO = "stabilityai/stable-diffusion-xl-base-1.0"
+DPO_REPO = "mhdang/dpo-sdxl-text2image-v1"
+JN_REPO = "RunDiffusion/Juggernaut-XL-v9"
+JSDXL_REPO = "stabilityai/japanese-stable-diffusion-xl"
+# Evo-Ukiyoe
+UKIYOE_REPO = "SakanaAI/Evo-Ukiyoe-v1"
+def load_state_dict(checkpoint_file: Union[str, os.PathLike], device: str = "cpu"):
+    file_extension = os.path.basename(checkpoint_file).split(".")[-1]
+    if file_extension == "safetensors":
+        return safetensors.torch.load_file(checkpoint_file, device=device)
+    else:
+        return torch.load(checkpoint_file, map_location=device)
+def load_from_pretrained(
+    repo_id,
+    filename="diffusion_pytorch_model.fp16.safetensors",
+    subfolder="unet",
+    device="cuda",
+) -> Dict[str, torch.Tensor]:
+    return load_state_dict(
+        hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            subfolder=subfolder,
+        ),
+        device=device,
+    )
+def reshape_weight_task_tensors(task_tensors, weights):
+    """
+    Reshapes `weights` to match the shape of `task_tensors` by unsqeezing in the remaining dimenions.
+    Args:
+        task_tensors (`torch.Tensor`): The tensors that will be used to reshape `weights`.
+        weights (`torch.Tensor`): The tensor to be reshaped.
+    Returns:
+        `torch.Tensor`: The reshaped tensor.
+    """
+    new_shape = weights.shape + (1,) * (task_tensors.dim() - weights.dim())
+    weights = weights.view(new_shape)
+    return weights
+def linear(task_tensors: List[torch.Tensor], weights: torch.Tensor) -> torch.Tensor:
+    """
+    Merge the task tensors using `linear`.
+    Args:
+        task_tensors(`List[torch.Tensor]`):The task tensors to merge.
+        weights (`torch.Tensor`):The weights of the task tensors.
+    Returns:
+        `torch.Tensor`: The merged tensor.
+    """
+    task_tensors = torch.stack(task_tensors, dim=0)
+    # weighted task tensors
+    weights = reshape_weight_task_tensors(task_tensors, weights)
+    weighted_task_tensors = task_tensors * weights
+    mixed_task_tensors = weighted_task_tensors.sum(dim=0)
+    return mixed_task_tensors
+def merge_models(task_tensors, weights):
+    keys = list(task_tensors[0].keys())
+    weights = torch.tensor(weights, device=task_tensors[0][keys[0]].device)
+    state_dict = {}
+    for key in tqdm(keys, desc="Merging"):
+        w_list = []
+        for i, sd in enumerate(task_tensors):
+            w = sd.pop(key)
+            w_list.append(w)
+        new_w = linear(task_tensors=w_list, weights=weights)
+        state_dict[key] = new_w
+    return state_dict
+def split_conv_attn(weights):
+    attn_tensors = {}
+    conv_tensors = {}
+    for key in list(weights.keys()):
+        if any(k in key for k in ["to_k", "to_q", "to_v", "to_out.0"]):
+            attn_tensors[key] = weights.pop(key)
+        else:
+            conv_tensors[key] = weights.pop(key)
+    return {"conv": conv_tensors, "attn": attn_tensors}
+def load_evo_ukiyoe(device="cuda") -> StableDiffusionXLPipeline:
+    # Load base models
+    sdxl_weights = split_conv_attn(load_from_pretrained(SDXL_REPO, device=device))
+    dpo_weights = split_conv_attn(
+        load_from_pretrained(
+            DPO_REPO, "diffusion_pytorch_model.safetensors", device=device
+        )
+    )
+    jn_weights = split_conv_attn(load_from_pretrained(JN_REPO, device=device))
+    jsdxl_weights = split_conv_attn(load_from_pretrained(JSDXL_REPO, device=device))
+    # Merge base models
+    tensors = [sdxl_weights, dpo_weights, jn_weights, jsdxl_weights]
+    new_conv = merge_models(
+        [sd["conv"] for sd in tensors],
+        [
+            0.15928833971605916,
+            0.1032449268871776,
+            0.6503217149752791,
+            0.08714501842148402,
+        ],
+    )
+    new_attn = merge_models(
+        [sd["attn"] for sd in tensors],
+        [
+            0.1877279276437178,
+            0.20014114603909822,
+            0.3922685507065275,
+            0.2198623756106564,
+        ],
+    )
+    # Delete no longer needed variables to free
+    del sdxl_weights, dpo_weights, jn_weights, jsdxl_weights
+    gc.collect()
+    if "cuda" in device:
+        torch.cuda.empty_cache()
+    # Instantiate UNet
+    unet_config = UNet2DConditionModel.load_config(SDXL_REPO, subfolder="unet")
+    unet = UNet2DConditionModel.from_config(unet_config).to(device=device)
+    unet.load_state_dict({**new_conv, **new_attn})
+    # Load other modules
+    text_encoder = CLIPTextModelWithProjection.from_pretrained(
+        JSDXL_REPO, subfolder="text_encoder", torch_dtype=torch.float16, variant="fp16",
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        JSDXL_REPO, subfolder="tokenizer", use_fast=False,
+    )
+    # Load pipeline
+    pipe = StableDiffusionXLPipeline.from_pretrained(
+        SDXL_REPO,
+        unet=unet,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        torch_dtype=torch.float16,
+        variant="fp16",
+    )
+    # Load Evo-Ukiyoe weights
+    pipe.load_lora_weights(UKIYOE_REPO)
+    pipe.fuse_lora(lora_scale=1.0)
+    pipe = pipe.to(device=torch.device(device), dtype=torch.float16)
+    return pipe

pytorch_lora_weights.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02ea124249f2bc80db556f9b81d3c98ec3a256b00885b17b5450c0d7a7d0e9c0
+size 59519264

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+torchvision
+accelerate==0.32.0
+diffusers==0.29.2
+sentencepiece==0.2.0
+transformers==4.42.3
+peft==0.11.1