import os import json import numpy as np from PIL import Image, ImageFilter import soundfile def save_html(logdir, x, zs, labels, alignments, hps): level = hps.levels - 1 # Top level used z = zs[level] bs, total_length = z.shape[0], z.shape[1] with open(f'{logdir}/index.html', 'w') as html: print(f"{logdir}", file=html) print("", file=html) for item in range(bs): data = dict(wav=x[item].cpu().numpy(), sr=hps.sr, info=labels['info'][item], total_length=total_length, total_tokens=len(labels['info'][item]['full_tokens']), alignment=alignments[item] if alignments is not None else None) item_dir = f'{logdir}/item_{item}' _save_item_html(item_dir, item, item, data) print(f"", file=html) print("", file=html) def _save_item_html(item_dir, item_id, item_name, data): # replace gs:// with /root/samples/ # an html for each sample. Main html has a selector to get us id of this? if not os.path.exists(item_dir): os.makedirs(item_dir) with open(f'{item_dir}/index.html', 'w') as html: print(f"{item_name}", file=html) print("", file=html) total_length = data['total_length'] total_tokens = data['total_tokens'] alignment = data['alignment'] lyrics = data["info"]["lyrics"] wav, sr = data['wav'], data['sr'] genre, artist = data["info"]["genre"], data["info"]["artist"] # Strip unused columns if alignment is not None: assert alignment.shape == (total_length, total_tokens) assert len(lyrics) == total_tokens, f'Total_tokens: {total_tokens}, Lyrics Len: {len(lyrics)}. Lyrics: {lyrics}' max_attn_at_token = np.max(alignment, axis=0) assert len(max_attn_at_token) == total_tokens for token in reversed(range(total_tokens)): if max_attn_at_token[token] > 0: break alignment = alignment[:,:token+1] lyrics = lyrics[:token+1] total_tokens = token+1 # Small alignment image im = Image.fromarray(np.uint8(alignment * 255)).resize((512, 1024)).transpose(Image.ROTATE_90) img_src = f'align.png' im.save(f'{item_dir}/{img_src}') print(f"", file=html) # Smaller alignment json for animation total_alignment_length = total_length // 16 alignment = Image.fromarray(np.uint8(alignment * 255)).resize((total_tokens, total_alignment_length)) alignment = alignment.filter(ImageFilter.GaussianBlur(radius=1.5)) alignment = np.asarray(alignment).tolist() align_src = f'align.json' with open(f'{item_dir}/{align_src}', 'w') as f: json.dump(alignment, f) # Audio wav_src = f'audio.wav' soundfile.write(f'{item_dir}/{wav_src}', wav, samplerate=sr, format='wav') print(f"", file=html) # Labels and Lyrics print(f"
", end="", file=html)
        print(f"
Artist {artist}, Genre {genre}
", file=html) lyrics = [c for c in lyrics] # already characters actually lyrics = [''] + lyrics[:-1] # input lyrics are shifted by 1 for i, c in enumerate(lyrics): print(f"{c}", end="", file=html) print(f"
", file=html) with open(f'{item_dir}/lyrics.json', 'w') as f: json.dump(lyrics, f) if alignment is not None: # JS for alignment animation print("""""", file=html) print("", file=html)