import os import sys import base64 from io import BytesIO sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import torch from fastapi import FastAPI import numpy as np from PIL import Image import clip from dalle.models import Dalle from dalle.utils.utils import clip_score, download print("Loading models...") app = FastAPI() url = "https://arena.kakaocdn.net/brainrepo/models/minDALL-E/57b008f02ceaa02b779c8b7463143315/1.3B.tar.gz" root = os.path.expanduser("~/.cache/minDALLE") filename = os.path.basename(url) pathname = filename[: -len(".tar.gz")] download_target = os.path.join(root, filename) result_path = os.path.join(root, pathname) if not os.path.exists(result_path): result_path = download(url, root) device = "cuda" if torch.cuda.is_available() else "cpu" model = Dalle.from_pretrained(result_path) # This will automatically download the pretrained model. model.to(device=device) model_clip, preprocess_clip = clip.load("ViT-B/32", device=device) model_clip.to(device=device) print("Models loaded !") @app.get("/") def read_root(): return {"minDALL-E!"} @app.get("/{generate}") def generate(prompt): images = sample(prompt) images = [to_base64(image) for image in images] return {"images": images} def sample(prompt): # Sampling images = ( model.sampling(prompt=prompt, top_k=96, top_p=None, softmax_temperature=1.0, num_candidates=9, device=device) .cpu() .numpy() ) images = np.transpose(images, (0, 2, 3, 1)) # CLIP Re-ranking rank = clip_score( prompt=prompt, images=images, model_clip=model_clip, preprocess_clip=preprocess_clip, device=device ) images = images[rank] pil_images = [] for i in range(len(images)): im = Image.fromarray((images[i] * 255).astype(np.uint8)) pil_images.append(im) return pil_images def to_base64(pil_image): buffered = BytesIO() pil_image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue())