{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "347ace04", "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "# VCTK Corpus Path\n", "__CORPUSPATH__ = os.path.expanduser(\"~/data/VCTK-Corpus\") \n", "\n", "# output path\n", "__OUTPATH__ = \"./Data\"" ] }, { "cell_type": "code", "execution_count": null, "id": "4ce9eb2e", "metadata": {}, "outputs": [], "source": [ "import os\n", "from scipy.io import wavfile\n", "from pydub import AudioSegment\n", "\n", "from pydub import AudioSegment\n", "from pydub.silence import split_on_silence\n", "import os\n", "\n", "def split(sound):\n", " dBFS = sound.dBFS\n", " chunks = split_on_silence(sound,\n", " min_silence_len = 100,\n", " silence_thresh = dBFS-16,\n", " keep_silence = 100\n", " )\n", " return chunks\n", "\n", "def combine(_src):\n", " audio = AudioSegment.empty()\n", " for i,filename in enumerate(os.listdir(_src)):\n", " if filename.endswith('.wav'):\n", " filename = os.path.join(_src, filename)\n", " audio += AudioSegment.from_wav(filename)\n", " return audio\n", "\n", "def save_chunks(chunks, directory):\n", " if not os.path.exists(directory):\n", " os.makedirs(directory)\n", " counter = 0\n", "\n", " target_length = 5 * 1000\n", " output_chunks = [chunks[0]]\n", " for chunk in chunks[1:]:\n", " if len(output_chunks[-1]) < target_length:\n", " output_chunks[-1] += chunk\n", " else:\n", " # if the last output chunk is longer than the target length,\n", " # we can start a new one\n", " output_chunks.append(chunk)\n", "\n", " for chunk in output_chunks:\n", " chunk = chunk.set_frame_rate(24000)\n", " chunk = chunk.set_channels(1)\n", " counter = counter + 1\n", " chunk.export(os.path.join(directory, str(counter) + '.wav'), format=\"wav\")" ] }, { "cell_type": "code", "execution_count": null, "id": "769a7f62", "metadata": {}, "outputs": [], "source": [ "# Source: http://speech.ee.ntu.edu.tw/~jjery2243542/resource/model/is18/en_speaker_used.txt\n", "# Source: https://github.com/jjery2243542/voice_conversion\n", "\n", "speakers = [225,228,229,230,231,233,236,239,240,244,226,227,232,243,254,256,258,259,270,273]" ] }, { "cell_type": "code", "execution_count": null, "id": "9302fb6a", "metadata": {}, "outputs": [], "source": [ "# downsample to 24 kHz\n", "\n", "for p in speakers:\n", " directory = __OUTPATH__ + '/p' + str(p)\n", " if not os.path.exists(directory):\n", " audio = combine(__CORPUSPATH__ + '/wav48/p' + str(p))\n", " chunks = split(audio)\n", " save_chunks(chunks, directory)" ] }, { "cell_type": "code", "execution_count": null, "id": "4b0ca022", "metadata": {}, "outputs": [], "source": [ "# get all speakers\n", "\n", "data_list = []\n", "for path, subdirs, files in os.walk(__OUTPATH__):\n", " for name in files:\n", " if name.endswith(\".wav\"):\n", " speaker = int(path.split('/')[-1].replace('p', ''))\n", " if speaker in speakers:\n", " data_list.append({\"Path\": os.path.join(path, name), \"Speaker\": int(speakers.index(speaker)) + 1})\n", " \n", "import pandas as pd\n", "\n", "data_list = pd.DataFrame(data_list)\n", "data_list = data_list.sample(frac=1)\n", "\n", "import random\n", "\n", "split_idx = round(len(data_list) * 0.1)\n", "\n", "test_data = data_list[:split_idx]\n", "train_data = data_list[split_idx:]" ] }, { "cell_type": "code", "execution_count": null, "id": "88df2a45", "metadata": {}, "outputs": [], "source": [ "# write to file \n", "\n", "file_str = \"\"\n", "for index, k in train_data.iterrows():\n", " file_str += k['Path'] + \"|\" +str(k['Speaker'] - 1)+ '\\n'\n", "text_file = open(__OUTPATH__ + \"/train_list.txt\", \"w\")\n", "text_file.write(file_str)\n", "text_file.close()\n", "\n", "file_str = \"\"\n", "for index, k in test_data.iterrows():\n", " file_str += k['Path'] + \"|\" + str(k['Speaker'] - 1) + '\\n'\n", "text_file = open(__OUTPATH__ + \"/val_list.txt\", \"w\")\n", "text_file.write(file_str)\n", "text_file.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }