rhythmic feature extraction code

ronggong · Apr 20, 2018 · ed05b81 · ed05b81
1 parent fb0b862
commit ed05b81
Show file tree

Hide file tree

Showing 4 changed files with 162 additions and 6 deletions.
diff --git a/distribute_proposed_method.py b/distribute_proposed_method.py
@@ -17,6 +17,7 @@
 from general.parameters import varin
 from general.utilFunctions import smooth_obs
 from general.utilFunctions import parse_score
+from general.utilFunctions import get_onset_time_syllable_duration_ref
 
 from plot_code import figure_plot_joint
 
@@ -35,7 +36,7 @@
 score_file = './data/score_exercise_03.txt'
 score_png = './data/exercise_03.png'
 
-syllable_durations, syllable_labels = parse_score(filename_score=score_file)
+tempo, syllable_durations, syllable_labels, beats = parse_score(filename_score=score_file)
 
 print('syllable durations (second):')
 print(syllable_durations)
@@ -45,10 +46,18 @@
 print(syllable_labels)
 print('\n')
 
+print(beats)
+
 # get wav duration
 data_wav, fs_wav = sf.read(wav_file)
 time_wav = len(data_wav)/float(fs_wav)
 
+onset_time_ref, syllable_durations_ref = get_onset_time_syllable_duration_ref(syllable_durations=syllable_durations,
+                                                                              len_audio=time_wav)
+
+print(onset_time_ref)
+print(syllable_durations_ref)
+
 results_vad = VAD(wav_file=wav_file, hopsize_t=hopsize_t)
 
 # calculate log mel feature
@@ -79,13 +88,17 @@
 # syllable boundaries
 boundaries_syllable_start_time = np.array(boundaries_syllable[:-1])*hopsize_t
 boundaries_syllable_end_time = np.array(boundaries_syllable[1:])*hopsize_t
+syllable_durations_detected = boundaries_syllable_end_time - boundaries_syllable_start_time
 
 print('Detected syllable onset times (second):')
 print(boundaries_syllable_start_time)
 print('\n')
 
+print(syllable_durations_detected)
+
 figure_plot_joint(score_png=score_png,
                   mfcc_line=log_mel_old,
+                  onset_time_ref=onset_time_ref,
                   vad=results_vad,
                   obs_syllable=obs_syllable,
                   boundaries_syllable_start_time=boundaries_syllable_start_time,

diff --git a/feature_extraction.py b/feature_extraction.py
@@ -0,0 +1,102 @@
+import numpy as np
+from scipy.stats import skew
+from scipy.stats import kurtosis
+
+
+class FeatureExtraction(object):
+    """
+    extract rhythmic beat deviation features
+    """
+    def __init__(self,
+                 onset_time_ref,
+                 syllable_durations_ref,
+                 onset_time_detected,
+                 syllable_durations_detected,
+                 beats):
+        self.onset_time_ref = onset_time_ref
+        self.syllable_durations_ref = syllable_durations_ref
+        self.onset_time_detected = onset_time_detected
+        self.syllable_durations_detected = syllable_durations_detected
+        self.beats = beats
+
+    def onset_deviation(self):
+        return np.abs(self.onset_time_ref - self.onset_time_detected)
+
+    def syllable_durations_weighted_onset_deviation(self, od):
+        return od/self.syllable_durations_ref
+
+    def duration_deviation(self):
+        return np.abs(self.syllable_durations_ref - self.syllable_durations_detected)
+
+    def syllable_durations_weighted_duration_deviation(self, dd):
+        return dd/self.syllable_durations_ref
+
+    def on_beat_deviation(self, deviation):
+        indices = [i for i, x in enumerate(self.beats) if x == "on"]
+        return deviation[indices]
+
+    def off_beat_deviation(self, deviation):
+        indices = [i for i, x in enumerate(self.beats) if x == "off"]
+        return deviation[indices]
+
+    def other_beat_deviation(self, deviation):
+        indices = [i for i, x in enumerate(self.beats) if x is None]
+        return deviation[indices]
+
+    @staticmethod
+    def statistics_deviation(deviation):
+        return [np.min(deviation), np.max(deviation), np.median(deviation),
+                np.mean(deviation), np.std(deviation), skew(deviation), kurtosis(deviation)]
+
+
+if __name__ == '__main__':
+    # test variables
+    onset_time_ref = np.array([0.,         2.72727891, 3.06818878, 3.40909864, 3.7500085,  4.09091837,
+                              4.43182823, 4.7727381,  5.11364796, 5.45455782, 6.13637755, 6.81819728,
+                              7.50001701, 8.18183673, 9.54547619])
+    syllable_durations_ref = np.array([2.72727891, 0.34090986, 0.34090986, 0.34090986, 0.34090986, 0.34090986,
+                                      0.34090986, 0.34090986, 0.34090986, 0.68181973, 0.68181973, 0.68181973,
+                                      0.68181973, 1.36363946, 1.36363946])
+    onset_time_detected = np.array([0.,   2.59, 3.02, 3.3,  3.69, 4.,
+                                    4.35, 4.71, 5.04, 5.39, 6.07, 6.54, 7.3,  7.91, 9.56])
+    syllable_durations_detected = np.array([2.59, 0.43, 0.28, 0.39,
+                                            0.31, 0.35, 0.36, 0.33,
+                                            0.35, 0.68, 0.47, 0.76,
+                                            0.61, 1.65, 1.34])
+    beats = [None, 'on', None, 'off', None, 'on', None, 'off', None, 'on', 'off', 'on', 'off', 'on', 'on']
+
+    fe = FeatureExtraction(onset_time_ref=onset_time_ref[1:],
+                           syllable_durations_ref=syllable_durations_ref[1:],
+                           onset_time_detected=onset_time_detected[1:],
+                           syllable_durations_detected=syllable_durations_detected[1:],
+                           beats=beats[1:])
+
+    # general features
+    od = fe.onset_deviation()
+    sdwod = fe.syllable_durations_weighted_onset_deviation(od)
+    dd = fe.duration_deviation()
+    sdwdd = fe.syllable_durations_weighted_duration_deviation(dd)
+
+    # on beat features
+    od_on = fe.on_beat_deviation(od)
+    sdwod_on = fe.on_beat_deviation(sdwod)
+    dd_on = fe.on_beat_deviation(dd)
+    sdwdd_on = fe.on_beat_deviation(sdwdd)
+
+    # off beat features
+    od_off = fe.off_beat_deviation(od)
+    sdwod_off = fe.off_beat_deviation(sdwod)
+    dd_off = fe.off_beat_deviation(dd)
+    sdwdd_off = fe.off_beat_deviation(sdwdd)
+
+    # other beats features
+    od_other = fe.other_beat_deviation(od)
+    sdwod_other = fe.other_beat_deviation(sdwod)
+    dd_other = fe.other_beat_deviation(dd)
+    sdwdd_other = fe.other_beat_deviation(sdwdd)
+
+    # calculate feature statistics
+    feature_set = fe.statistics_deviation(od) + fe.statistics_deviation(sdwod) + \
+                  fe.statistics_deviation(dd) + fe.statistics_deviation(sdwdd)
+
+    print(feature_set)
diff --git a/general/utilFunctions.py b/general/utilFunctions.py
@@ -23,9 +23,47 @@ def parse_score(filename_score):
     """
     with open(filename_score, 'r') as scorefile:
         data = scorefile.readlines()
-        syllable_durations, syllable_labels = [], []
-        for line in data:
-            syllable_labels.append(line.split()[0])
-            syllable_durations.append(float(line.split()[1]))
+        syllable_durations, syllable_labels, beats = [], [], []
+        tempo = float(data[0].split()[1])
+        for line in data[1:]:
+            list_line = line.split()
+            if len(list_line) == 3:
+                beats.append(list_line[2])
+            else:
+                beats.append(None)
+            syllable_labels.append(list_line[0])
+            syllable_durations.append(float(list_line[1]))
     syllable_durations = np.array(syllable_durations)
-    return syllable_durations, syllable_labels
+    return tempo, syllable_durations, syllable_labels, beats
+
+
+def get_onset_time_syllable_duration_ref(syllable_durations, len_audio):
+    """
+    get onset time positions from the syllable durations
+    :param syllable_durations:
+    :param len_audio:
+    :return:
+    """
+    # normalize the syllable durations
+    sd_norm = syllable_durations / np.sum(syllable_durations)
+
+    onset_time_norm = np.cumsum(sd_norm)
+
+    # insert the 0 to the beginning or the excerpt
+    onset_time_norm = np.insert(onset_time_norm[:-1], 0, 0.0)
+
+    onset_time = onset_time_norm * len_audio
+
+    return onset_time, sd_norm * len_audio
+
+
+if __name__ == '__main__':
+    filename_score = '../data/score_exercise_01.txt'
+    tempo, syllable_durations, syllable_lists, beats = parse_score(filename_score=filename_score)
+    print(tempo)
+    print(syllable_durations)
+    print(syllable_lists)
+    print(beats)
+
+    get_onset_time_syllable_duration_ref(syllable_durations=syllable_durations,
+                                         len_audio=1.0)
diff --git a/plot_code.py b/plot_code.py
@@ -12,6 +12,7 @@
 
 def figure_plot_joint(score_png,
                       mfcc_line,
+                      onset_time_ref,
                       vad,
                       obs_syllable,
                       boundaries_syllable_start_time,
@@ -27,6 +28,8 @@ def figure_plot_joint(score_png,
     y = np.arange(0, 80)
     x = np.arange(0, mfcc_line.shape[0]) * hopsize_t
     plt.pcolormesh(x, y, np.transpose(mfcc_line[:, 80 * 7:80 * 8]))
+    for otr in onset_time_ref:
+        plt.axvline(otr, color='r', linewidth=2)
     ax2.set_ylabel('Mel bands', fontsize=12)
     ax2.get_xaxis().set_visible(False)
     ax2.axis('tight')