"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Reading metadata...: 5187it [00:00, 30765.68it/s]\n",
"Reading metadata...: 3703it [00:00, 26251.44it/s]\n",
"Reading metadata...: 19267it [00:00, 32817.93it/s]\n",
"The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [1001/5000 1:50:49 < 7:23:38, 0.15 it/s, Epoch 5.03/9223372036854775807]\n",
"
\n",
" \n",
" \n",
" \n",
" Step | \n",
" Training Loss | \n",
" Validation Loss | \n",
" Wer | \n",
"
\n",
" \n",
" \n",
" \n",
" 1000 | \n",
" 0.013700 | \n",
" 0.206843 | \n",
" 15.703064 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Reading metadata...: 5187it [00:00, 20496.49it/s]\n",
"Reading metadata...: 3703it [00:00, 22242.62it/s]\n",
"Reading metadata...: 19267it [00:00, 40619.61it/s]\n",
"Reading metadata...: 5187it [00:00, 26427.59it/s]\n",
"Reading metadata...: 3703it [00:00, 24175.02it/s]\n",
"Reading metadata...: 19267it [00:00, 37108.24it/s]\n",
"Reading metadata...: 5187it [00:00, 23505.99it/s]\n",
"Reading metadata...: 3703it [00:00, 24004.94it/s]\n",
"Reading metadata...: 19267it [00:00, 48305.49it/s]\n",
"Reading metadata...: 5187it [00:00, 61068.55it/s]\n",
"Reading metadata...: 3703it [00:00, 62782.48it/s]\n",
"Reading metadata...: 19267it [00:00, 83700.58it/s]\n",
"Reading metadata...: 5187it [00:00, 68870.74it/s]\n",
"Reading metadata...: 3703it [00:00, 50878.77it/s]\n",
"Reading metadata...: 19267it [00:00, 83165.23it/s]\n",
"***** Running Evaluation *****\n",
" Num examples: Unknown\n",
" Batch size = 8\n",
"Reading metadata...: 3859it [00:00, 15916.98it/s]\n",
"The following columns in the evaluation set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.\n",
"Saving model checkpoint to ./checkpoint-1000\n",
"Configuration saved in ./checkpoint-1000/config.json\n",
"Model weights saved in ./checkpoint-1000/pytorch_model.bin\n",
"Feature extractor saved in ./checkpoint-1000/preprocessor_config.json\n",
"tokenizer config file saved in ./checkpoint-1000/tokenizer_config.json\n",
"Special tokens file saved in ./checkpoint-1000/special_tokens_map.json\n",
"added tokens file saved in ./checkpoint-1000/added_tokens.json\n",
"Feature extractor saved in ./preprocessor_config.json\n",
"tokenizer config file saved in ./tokenizer_config.json\n",
"Special tokens file saved in ./special_tokens_map.json\n",
"added tokens file saved in ./added_tokens.json\n"
]
},
{
"ename": "IsADirectoryError",
"evalue": "[Errno 21] Is a directory: '/home/ubuntu/whisper-small-ro/./wandb/latest-run'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIsADirectoryError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[35], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1536\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1531\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_wrapped \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\n\u001b[1;32m 1533\u001b[0m inner_training_loop \u001b[38;5;241m=\u001b[39m find_executable_batch_size(\n\u001b[1;32m 1534\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_inner_training_loop, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_train_batch_size, args\u001b[38;5;241m.\u001b[39mauto_find_batch_size\n\u001b[1;32m 1535\u001b[0m )\n\u001b[0;32m-> 1536\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1537\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1538\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1539\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:1861\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1858\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[1;32m 1859\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 1861\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1862\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1863\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:2128\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2125\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m 2127\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol\u001b[38;5;241m.\u001b[39mshould_save:\n\u001b[0;32m-> 2128\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_save_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetrics\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2129\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_save(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:2272\u001b[0m, in \u001b[0;36mTrainer._save_checkpoint\u001b[0;34m(self, model, trial, metrics)\u001b[0m\n\u001b[1;32m 2269\u001b[0m torch\u001b[38;5;241m.\u001b[39msave(rng_states, os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(output_dir, \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrng_state_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mprocess_index\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.pth\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 2271\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpush_to_hub:\n\u001b[0;32m-> 2272\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_push_from_checkpoint\u001b[49m\u001b[43m(\u001b[49m\u001b[43moutput_dir\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2274\u001b[0m \u001b[38;5;66;03m# Maybe delete some older checkpoints.\u001b[39;00m\n\u001b[1;32m 2275\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mshould_save:\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/transformers/trainer.py:3444\u001b[0m, in \u001b[0;36mTrainer._push_from_checkpoint\u001b[0;34m(self, checkpoint_folder)\u001b[0m\n\u001b[1;32m 3442\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3443\u001b[0m commit_message \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTraining in progress, epoch \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mint\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 3444\u001b[0m _, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpush_in_progress \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrepo\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpush_to_hub\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3445\u001b[0m \u001b[43m \u001b[49m\u001b[43mcommit_message\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcommit_message\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_lfs_prune\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 3446\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3447\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 3448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mhub_strategy \u001b[38;5;241m==\u001b[39m HubStrategy\u001b[38;5;241m.\u001b[39mCHECKPOINT:\n\u001b[1;32m 3449\u001b[0m \u001b[38;5;66;03m# Move back the checkpoint to its place\u001b[39;00m\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1430\u001b[0m, in \u001b[0;36mRepository.push_to_hub\u001b[0;34m(self, commit_message, blocking, clean_ok, auto_lfs_prune)\u001b[0m\n\u001b[1;32m 1428\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRepo currently clean. Ignoring push_to_hub\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 1429\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1430\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgit_add\u001b[49m\u001b[43m(\u001b[49m\u001b[43mauto_lfs_track\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 1431\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_commit(commit_message)\n\u001b[1;32m 1432\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgit_push(\n\u001b[1;32m 1433\u001b[0m upstream\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124morigin \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_branch\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1434\u001b[0m blocking\u001b[38;5;241m=\u001b[39mblocking,\n\u001b[1;32m 1435\u001b[0m auto_lfs_prune\u001b[38;5;241m=\u001b[39mauto_lfs_prune,\n\u001b[1;32m 1436\u001b[0m )\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1113\u001b[0m, in \u001b[0;36mRepository.git_add\u001b[0;34m(self, pattern, auto_lfs_track)\u001b[0m\n\u001b[1;32m 1110\u001b[0m tracked_files \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauto_track_large_files(pattern)\n\u001b[1;32m 1112\u001b[0m \u001b[38;5;66;03m# Read the remaining files and track them if they're binary\u001b[39;00m\n\u001b[0;32m-> 1113\u001b[0m tracked_files\u001b[38;5;241m.\u001b[39mextend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mauto_track_binary_files\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tracked_files:\n\u001b[1;32m 1116\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 1117\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAdding files tracked by Git LFS: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtracked_files\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m. This may take a\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1118\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m bit of time if the files are large.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1119\u001b[0m )\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:1001\u001b[0m, in \u001b[0;36mRepository.auto_track_binary_files\u001b[0;34m(self, pattern)\u001b[0m\n\u001b[1;32m 994\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m size_in_mb \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10\u001b[39m:\n\u001b[1;32m 995\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 996\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mParsing a large file to check if binary or not. Tracking large\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 997\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m files using `repository.auto_track_large_files` is\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 998\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m recommended so as to not load the full file in memory.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 999\u001b[0m )\n\u001b[0;32m-> 1001\u001b[0m is_binary \u001b[38;5;241m=\u001b[39m \u001b[43mis_binary_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpath_to_file\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1003\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_binary:\n\u001b[1;32m 1004\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlfs_track(filename)\n",
"File \u001b[0;32m~/whisper-ft/lib/python3.8/site-packages/huggingface_hub/repository.py:235\u001b[0m, in \u001b[0;36mis_binary_file\u001b[0;34m(filename)\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;124;03mCheck if file is a binary file.\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;124;03m `bool`: `True` if the file passed is a binary file, `False` otherwise.\u001b[39;00m\n\u001b[1;32m 233\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 235\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 236\u001b[0m content \u001b[38;5;241m=\u001b[39m f\u001b[38;5;241m.\u001b[39mread(\u001b[38;5;241m10\u001b[39m \u001b[38;5;241m*\u001b[39m (\u001b[38;5;241m1024\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m2\u001b[39m)) \u001b[38;5;66;03m# Read a maximum of 10MB\u001b[39;00m\n\u001b[1;32m 238\u001b[0m \u001b[38;5;66;03m# Code sample taken from the following stack overflow thread\u001b[39;00m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;66;03m# https://stackoverflow.com/questions/898669/how-can-i-detect-if-a-file-is-binary-non-text-in-python/7392391#7392391\u001b[39;00m\n",
"\u001b[0;31mIsADirectoryError\u001b[0m: [Errno 21] Is a directory: '/home/ubuntu/whisper-small-ro/./wandb/latest-run'"
]
}
],
"source": [
"trainer.train()"
]
},
{
"cell_type": "markdown",
"id": "747c6a6e",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"(note that training may take some time to commence as we load the first training data samples with streaming mode)"
]
},
{
"cell_type": "markdown",
"id": "810ced54-7187-4a06-b2fe-ba6dcca94dc3",
"metadata": {},
"source": [
"We can label our checkpoint with the `whisper-event` tag on push by setting the appropriate key-word arguments (kwargs):"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6dd0e310-9b07-4133-ac14-2ed2d7524e22",
"metadata": {},
"outputs": [],
"source": [
"kwargs = {\n",
" \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
" \"dataset\": \"Common Voice 11.0\", # a 'pretty' name for the training dataset\n",
" \"language\": \"es\",\n",
" \"model_name\": \"Whisper Small Es - Sanchit Gandhi\", # a 'pretty' name for your model\n",
" \"finetuned_from\": \"openai/whisper-small\",\n",
" \"tasks\": \"automatic-speech-recognition\",\n",
" \"tags\": \"whisper-event\",\n",
"}"
]
},
{
"cell_type": "markdown",
"id": "090d676a-f944-4297-a938-a40eda0b2b68",
"metadata": {},
"source": [
"The training results can now be uploaded to the Hub. To do so, execute the `push_to_hub` command:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95737cda-c5dd-4887-a4d0-dfcb0d61d977",
"metadata": {},
"outputs": [],
"source": [
"trainer.push_to_hub(**kwargs)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "whisper-ft",
"language": "python",
"name": "whisper-ft"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}