import torch import piano_transcription_inference import numpy as np import os import sys sys.path.append('../../') from src.music.utils import get_out_path, load_audio from src.music.config import CHKPT_PATH_TRANSCRIPTION, FPS, MIN_LEN, CROP_LEN # import librosa device = 'cuda' if torch.cuda.is_available() else 'cpu' TRANSCRIPTOR = piano_transcription_inference.PianoTranscription(device=device, checkpoint_path=CHKPT_PATH_TRANSCRIPTION) def audio2midi(audio_path, midi_path=None, crop=CROP_LEN, random_crop=True, verbose=False, level=0): if verbose and crop < MIN_LEN + 2: print('crop is inferior to the minimal length of a tune') assert '.mp3' == audio_path[-4:] if midi_path is None: midi_path, _, _ = get_out_path(in_path=audio_path, in_word='audio', out_word='midi', out_extension='.mid') if verbose: print(' ' * level + f'Transcribing {audio_path}.') if os.path.exists(midi_path): if verbose: print(' ' * (level + 2) + 'Midi file already exists.') return midi_path, '' error_msg = 'Error in transcription. ' try: error_msg += 'Maybe in audio loading?' (audio, _) = load_audio(audio_path, sr=FPS, mono=True) error_msg += ' Nope. Cropping?' if isinstance(crop, int) and len(audio) > FPS * crop: rc_str = ' (random crop)' if random_crop else ' (start crop)' if verbose: print(' ' * (level + 2) + f'Cropping the song to {crop}s before transcription{rc_str}. ') if random_crop: size_crop = FPS * crop index_begining = np.random.randint(len(audio) - size_crop - 1) else: index_begining = 0 audio = audio[index_begining: index_begining + size_crop] error_msg += ' Nope. Transcription?' TRANSCRIPTOR.transcribe(audio, midi_path) error_msg += ' Nope.' extra = f' Saved to {midi_path}' if midi_path else '' if verbose: print(' ' * (level + 2) + f'Success! {extra}') return midi_path, '' except: if verbose: print(' ' * (level + 2) + 'Transcription failed.') if os.path.exists(midi_path): os.remove(midi_path) return None, error_msg + ' Yes.'