Spaces:
Runtime error
Runtime error
import numpy as np | |
from music21 import * | |
from music21.features import native, jSymbolic, DataSet | |
import pretty_midi as pm | |
from src.music.utils import get_out_path | |
from src.music.utilities.handcoded_rep_utilities.tht import tactus_hypothesis_tracker, tracker_analysis | |
from src.music.utilities.handcoded_rep_utilities.loudness import get_loudness, compute_total_loudness, amplitude2db, velocity2amplitude, get_db_of_equivalent_loudness_at_440hz, pitch2freq | |
import json | |
import os | |
environment.set('musicxmlPath', '/home/cedric/Desktop/test/') | |
midi_path = "/home/cedric/Documents/pianocktail/data/music/processed/doug_mckenzie_processed/allthethings_reharmonized_processed.mid" | |
FEATURES_DICT_SCORE = dict( | |
# strongest pulse: measures how fast the melody is | |
# stronger_pulse=jSymbolic.StrongestRhythmicPulseFeature, | |
# weights of the two strongest pulse, measures rhythmic consistency: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#combinedstrengthoftwostrongestrhythmicpulsesfeature | |
pulse_strength_two=jSymbolic.CombinedStrengthOfTwoStrongestRhythmicPulsesFeature, | |
# weights of the strongest pulse, measures rhythmic consistency: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#combinedstrengthoftwostrongestrhythmicpulsesfeature | |
pulse_strength = jSymbolic.StrengthOfStrongestRhythmicPulseFeature, | |
# variability of attacks: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#variabilityoftimebetweenattacksfeature | |
) | |
FEATURES_DICT = dict( | |
# bass register importance: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#importanceofbassregisterfeature | |
# bass_register=jSymbolic.ImportanceOfBassRegisterFeature, | |
# high register importance: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#importanceofbassregisterfeature | |
# high_register=jSymbolic.ImportanceOfHighRegisterFeature, | |
# medium register importance: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#importanceofbassregisterfeature | |
# medium_register=jSymbolic.ImportanceOfMiddleRegisterFeature, | |
# number of common pitches (at least 9% of all): https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#numberofcommonmelodicintervalsfeature | |
# common_pitches=jSymbolic.NumberOfCommonPitchesFeature, | |
# pitch class variety (used at least once): https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#pitchvarietyfeature | |
# pitch_variety=jSymbolic.PitchVarietyFeature, | |
# attack_variability = jSymbolic.VariabilityOfTimeBetweenAttacksFeature, | |
# staccato fraction: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#staccatoincidencefeature | |
# staccato_score = jSymbolic.StaccatoIncidenceFeature, | |
# mode analysis: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesNative.html | |
av_melodic_interval = jSymbolic.AverageMelodicIntervalFeature, | |
# chromatic motion: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#chromaticmotionfeature | |
chromatic_motion = jSymbolic.ChromaticMotionFeature, | |
# direction of motion (fraction of rising intervals: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#directionofmotionfeature | |
motion_direction = jSymbolic.DirectionOfMotionFeature, | |
# duration of melodic arcs: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#durationofmelodicarcsfeature | |
melodic_arcs_duration = jSymbolic.DurationOfMelodicArcsFeature, | |
# melodic arcs size: https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#sizeofmelodicarcsfeature | |
melodic_arcs_size = jSymbolic.SizeOfMelodicArcsFeature, | |
# number of common melodic interval (at least 9% of all): https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#numberofcommonmelodicintervalsfeature | |
# common_melodic_intervals = jSymbolic.NumberOfCommonMelodicIntervalsFeature, | |
# https://web.mit.edu/music21/doc/moduleReference/moduleFeaturesJSymbolic.html#amountofarpeggiationfeature | |
# arpeggiato=jSymbolic.AmountOfArpeggiationFeature, | |
) | |
def compute_beat_info(onsets): | |
onsets_in_ms = np.array(onsets) * 1000 | |
tht = tactus_hypothesis_tracker.default_tht() | |
trackers = tht(onsets_in_ms) | |
top_hts = tracker_analysis.top_hypothesis(trackers, len(onsets_in_ms)) | |
beats = tracker_analysis.produce_beats_information(onsets_in_ms, top_hts, adapt_period=250 is not None, | |
adapt_phase=tht.eval_f, max_delta_bpm=250, avoid_quickturns=None) | |
tempo = 1 / (np.mean(np.diff(beats)) / 1000) * 60 # in bpm | |
conf_values = tracker_analysis.tht_tracking_confs(trackers, len(onsets_in_ms)) | |
pulse_clarity = np.mean(np.array(conf_values), axis=0)[1] | |
return tempo, pulse_clarity | |
def dissonance_score(A): | |
""" | |
Given a piano-roll indicator matrix representation of a musical work (128 pitches x beats), | |
return the dissonance as a function of beats. | |
Input: | |
A - 128 x beats indicator matrix of MIDI pitch number | |
""" | |
freq_rats = np.arange(1, 7) # Harmonic series ratios | |
amps = np.exp(-.5 * freq_rats) # Partial amplitudes | |
F0 = 8.1757989156 # base frequency for MIDI (note 0) | |
diss = [] # List for dissonance values | |
thresh = 1e-3 | |
for beat in A.T: | |
idx = np.where(beat>thresh)[0] | |
if len(idx): | |
freqs, mags = [], [] # lists for frequencies, mags | |
for i in idx: | |
freqs.extend(F0*2**(i/12.0)*freq_rats) | |
mags.extend(amps) | |
freqs = np.array(freqs) | |
mags = np.array(mags) | |
sortIdx = freqs.argsort() | |
d = compute_dissonance(freqs[sortIdx],mags[sortIdx]) | |
diss.extend([d]) | |
else: | |
diss.extend([-1]) # Null value | |
diss = np.array(diss) | |
return diss[np.where(diss != -1)] | |
def compute_dissonance(freqs, amps): | |
""" | |
From https://notebook.community/soundspotter/consonance/week1_consonance | |
Compute dissonance between partials with center frequencies in freqs, uses a model of critical bandwidth. | |
and amplitudes in amps. Based on Sethares "Tuning, Timbre, Spectrum, Scale" (1998) after Plomp and Levelt (1965) | |
inputs: | |
freqs - list of partial frequencies | |
amps - list of corresponding amplitudes [default, uniformly 1] | |
""" | |
b1, b2, s1, s2, c1, c2, Dstar = (-3.51, -5.75, 0.0207, 19.96, 5, -5, 0.24) | |
f = np.array(freqs) | |
a = np.array(amps) | |
idx = np.argsort(f) | |
f = f[idx] | |
a = a[idx] | |
N = f.size | |
D = 0 | |
for i in range(1, N): | |
Fmin = f[ 0 : N - i ] | |
S = Dstar / ( s1 * Fmin + s2) | |
Fdif = f[ i : N ] - f[ 0 : N - i ] | |
am = a[ i : N ] * a[ 0 : N - i ] | |
Dnew = am * (c1 * np.exp (b1 * S * Fdif) + c2 * np.exp(b2 * S * Fdif)) | |
D += Dnew.sum() | |
return D | |
def store_new_midi(notes, out_path): | |
midi = pm.PrettyMIDI() | |
midi.instruments.append(pm.Instrument(program=0, is_drum=False)) | |
midi.instruments[0].notes = notes | |
midi.write(out_path) | |
return midi | |
def processed2handcodedrep(midi_path, handcoded_rep_path=None, crop=30, verbose=False, save=True, return_rep=False, level=0): | |
try: | |
if not handcoded_rep_path: | |
handcoded_rep_path, _, _ = get_out_path(in_path=midi_path, in_word='processed', out_word='handcoded_reps', out_extension='.mid') | |
features = dict() | |
if verbose: print(' ' * level + 'Computing handcoded representations') | |
if os.path.exists(handcoded_rep_path): | |
with open(handcoded_rep_path.replace('.mid', '.json'), 'r') as f: | |
features = json.load(f) | |
rep = np.array([features[k] for k in sorted(features.keys())]) | |
if rep.size == 49: | |
os.remove(handcoded_rep_path) | |
else: | |
if verbose: print(' ' * (level + 2) + 'Already computed.') | |
if return_rep: | |
return handcoded_rep_path, np.array([features[k] for k in sorted(features.keys())]), '' | |
else: | |
return handcoded_rep_path, '' | |
midi = pm.PrettyMIDI(midi_path) # load midi with pretty midi | |
notes = midi.instruments[0].notes # get notes | |
notes.sort(key=lambda x: (x.start, x.pitch)) # sort notes per start and pitch | |
onsets, offsets, pitches, durations, velocities = [], [], [], [], [] | |
n_notes_cropped = len(notes) | |
for i_n, n in enumerate(notes): | |
onsets.append(n.start) | |
offsets.append(n.end) | |
durations.append(n.end-n.start) | |
pitches.append(n.pitch) | |
velocities.append(n.velocity) | |
if crop is not None: # find how many notes to keep | |
if n.start > crop and n_notes_cropped == len(notes): | |
n_notes_cropped = i_n | |
break | |
notes = notes[:n_notes_cropped] | |
midi = store_new_midi(notes, handcoded_rep_path) | |
# pianoroll = midi.get_piano_roll() # extract piano roll representation | |
# compute loudness | |
amplitudes = velocity2amplitude(np.array(velocities)) | |
power_dbs = amplitude2db(amplitudes) | |
frequencies = pitch2freq(np.array(pitches)) | |
loudness_values = get_loudness(power_dbs, frequencies) | |
# compute average perceived loudness | |
# for each power, compute loudness, then compute power such that the loudness at 440 Hz would be equivalent. | |
# equivalent_powers_dbs = get_db_of_equivalent_loudness_at_440hz(frequencies, power_dbs) | |
# then get the corresponding amplitudes | |
# equivalent_amplitudes = 10 ** (equivalent_powers_dbs / 20) | |
# not use a amplitude model across the sample to compute the instantaneous amplitude, turn it back to dbs, then to perceived loudness with unique freq 440 Hz | |
# av_total_loudness, std_total_loudness = compute_total_loudness(equivalent_amplitudes, onsets, offsets) | |
end_time = np.max(offsets) | |
start_time = notes[0].start | |
score = converter.parse(handcoded_rep_path) | |
score.chordify() | |
notes_without_chords = stream.Stream(score.flatten().getElementsByClass('Note')) | |
velocities_wo_chords, pitches_wo_chords, amplitudes_wo_chords, dbs_wo_chords = [], [], [], [] | |
frequencies_wo_chords, loudness_values_wo_chords, onsets_wo_chords, offsets_wo_chords, durations_wo_chords = [], [], [], [], [] | |
for i_n in range(len(notes_without_chords)): | |
n = notes_without_chords[i_n] | |
velocities_wo_chords.append(n.volume.velocity) | |
pitches_wo_chords.append(n.pitch.midi) | |
onsets_wo_chords.append(n.offset) | |
offsets_wo_chords.append(onsets_wo_chords[-1] + n.seconds) | |
durations_wo_chords.append(n.seconds) | |
amplitudes_wo_chords = velocity2amplitude(np.array(velocities_wo_chords)) | |
power_dbs_wo_chords = amplitude2db(amplitudes_wo_chords) | |
frequencies_wo_chords = pitch2freq(np.array(pitches_wo_chords)) | |
loudness_values_wo_chords = get_loudness(power_dbs_wo_chords, frequencies_wo_chords) | |
# compute average perceived loudness | |
# for each power, compute loudness, then compute power such that the loudness at 440 Hz would be equivalent. | |
# equivalent_powers_dbs_wo_chords = get_db_of_equivalent_loudness_at_440hz(frequencies_wo_chords, power_dbs_wo_chords) | |
# then get the corresponding amplitudes | |
# equivalent_amplitudes_wo_chords = 10 ** (equivalent_powers_dbs_wo_chords / 20) | |
# not use a amplitude model across the sample to compute the instantaneous amplitude, turn it back to dbs, then to perceived loudness with unique freq 440 Hz | |
# av_total_loudness_wo_chords, std_total_loudness_wo_chords = compute_total_loudness(equivalent_amplitudes_wo_chords, onsets_wo_chords, offsets_wo_chords) | |
ds = DataSet(classLabel='test') | |
f = list(FEATURES_DICT.values()) | |
ds.addFeatureExtractors(f) | |
ds.addData(notes_without_chords) | |
ds.process() | |
for k, f in zip(FEATURES_DICT.keys(), ds.getFeaturesAsList()[0][1:-1]): | |
features[k] = f | |
ds = DataSet(classLabel='test') | |
f = list(FEATURES_DICT_SCORE.values()) | |
ds.addFeatureExtractors(f) | |
ds.addData(score) | |
ds.process() | |
for k, f in zip(FEATURES_DICT_SCORE.keys(), ds.getFeaturesAsList()[0][1:-1]): | |
features[k] = f | |
# # # # # | |
# Register features | |
# # # # # | |
# features['av_pitch'] = np.mean(pitches) | |
# features['std_pitch'] = np.std(pitches) | |
# features['range_pitch'] = np.max(pitches) - np.min(pitches) # aka ambitus | |
# # # # # | |
# Rhythmic features | |
# # # # # | |
# tempo, pulse_clarity = compute_beat_info(onsets[:n_notes_cropped]) | |
# features['pulse_clarity'] = pulse_clarity | |
# features['tempo'] = tempo | |
features['tempo_pm'] = midi.estimate_tempo() | |
# # # # # | |
# Temporal features | |
# # # # # | |
features['av_duration'] = np.mean(durations) | |
# features['std_duration'] = np.std(durations) | |
features['note_density'] = len(notes) / (end_time - start_time) | |
# intervals_wo_chords = np.diff(onsets_wo_chords) | |
# articulations = [max((i-d)/i, 0) for d, i in zip(durations_wo_chords, intervals_wo_chords) if i != 0] | |
# features['articulation'] = np.mean(articulations) | |
# features['av_duration_wo_chords'] = np.mean(durations_wo_chords) | |
# features['std_duration_wo_chords'] = np.std(durations_wo_chords) | |
# # # # # | |
# Dynamics features | |
# # # # # | |
features['av_velocity'] = np.mean(velocities) | |
features['std_velocity'] = np.std(velocities) | |
features['av_loudness'] = np.mean(loudness_values) | |
# features['std_loudness'] = np.std(loudness_values) | |
features['range_loudness'] = np.max(loudness_values) - np.min(loudness_values) | |
# features['av_integrated_loudness'] = av_total_loudness | |
# features['std_integrated_loudness'] = std_total_loudness | |
# features['av_velocity_wo_chords'] = np.mean(velocities_wo_chords) | |
# features['std_velocity_wo_chords'] = np.std(velocities_wo_chords) | |
# features['av_loudness_wo_chords'] = np.mean(loudness_values_wo_chords) | |
# features['std_loudness_wo_chords'] = np.std(loudness_values_wo_chords) | |
features['range_loudness_wo_chords'] = np.max(loudness_values_wo_chords) - np.min(loudness_values_wo_chords) | |
# features['av_integrated_loudness'] = av_total_loudness_wo_chords | |
# features['std_integrated_loudness'] = std_total_loudness_wo_chords | |
# indices_with_intervals = np.where(intervals_wo_chords > 0.01) | |
# features['av_loudness_change'] = np.mean(np.abs(np.diff(np.array(loudness_values_wo_chords)[indices_with_intervals]))) # accentuation | |
# features['av_velocity_change'] = np.mean(np.abs(np.diff(np.array(velocities_wo_chords)[indices_with_intervals]))) # accentuation | |
# # # # # | |
# Harmony features | |
# # # # # | |
# get major_minor score: https://web.mit.edu/music21/doc/moduleReference/moduleAnalysisDiscrete.html | |
music_analysis = score.analyze('key') | |
major_score = None | |
minor_score = None | |
for a in [music_analysis] + music_analysis.alternateInterpretations: | |
if 'major' in a.__str__() and a.correlationCoefficient > 0: | |
major_score = a.correlationCoefficient | |
elif 'minor' in a.__str__() and a.correlationCoefficient > 0: | |
minor_score = a.correlationCoefficient | |
if major_score is not None and minor_score is not None: | |
break | |
features['major_minor'] = major_score / (major_score + minor_score) | |
features['tonal_certainty'] = music_analysis.tonalCertainty() | |
# features['av_sensory_dissonance'] = np.mean(dissonance_score(pianoroll)) | |
#TODO only works for chords, do something with melodic intervals: like proportion that is not third, fifth or sevenths? | |
# # # # # | |
# Interval features | |
# # # # # | |
#https://web.mit.edu/music21/doc/moduleReference/moduleAnalysisPatel.html | |
# features['melodic_interval_variability'] = analysis.patel.melodicIntervalVariability(notes_without_chords) | |
# # # # # | |
# Suprize features | |
# # # # # | |
# https://web.mit.edu/music21/doc/moduleReference/moduleAnalysisMetrical.html | |
# analysis.metrical.thomassenMelodicAccent(notes_without_chords) | |
# melodic_accents = [n.melodicAccent for n in notes_without_chords] | |
# features['melodic_accent'] = np.mean(melodic_accents) | |
if save: | |
for k, v in features.items(): | |
features[k] = float(features[k]) | |
with open(handcoded_rep_path.replace('.mid', '.json'), 'w') as f: | |
json.dump(features, f) | |
else: | |
print(features) | |
if os.path.exists(handcoded_rep_path): | |
os.remove(handcoded_rep_path) | |
if verbose: print(' ' * (level + 2) + 'Success.') | |
if return_rep: | |
return handcoded_rep_path, np.array([features[k] for k in sorted(features.keys())]), '' | |
else: | |
return handcoded_rep_path, '' | |
except: | |
if verbose: print(' ' * (level + 2) + 'Failed.') | |
if return_rep: | |
return None, None, 'error' | |
else: | |
return None, 'error' | |
if __name__ == '__main__': | |
processed2handcodedrep(midi_path, '/home/cedric/Desktop/test.mid', save=False) |