Skip to content

Commit

Permalink
rhythmic feature extraction code
Browse files Browse the repository at this point in the history
  • Loading branch information
ronggong committed Apr 20, 2018
1 parent fb0b862 commit ed05b81
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 6 deletions.
15 changes: 14 additions & 1 deletion distribute_proposed_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from general.parameters import varin
from general.utilFunctions import smooth_obs
from general.utilFunctions import parse_score
from general.utilFunctions import get_onset_time_syllable_duration_ref

from plot_code import figure_plot_joint

Expand All @@ -35,7 +36,7 @@
score_file = './data/score_exercise_03.txt'
score_png = './data/exercise_03.png'

syllable_durations, syllable_labels = parse_score(filename_score=score_file)
tempo, syllable_durations, syllable_labels, beats = parse_score(filename_score=score_file)

print('syllable durations (second):')
print(syllable_durations)
Expand All @@ -45,10 +46,18 @@
print(syllable_labels)
print('\n')

print(beats)

# get wav duration
data_wav, fs_wav = sf.read(wav_file)
time_wav = len(data_wav)/float(fs_wav)

onset_time_ref, syllable_durations_ref = get_onset_time_syllable_duration_ref(syllable_durations=syllable_durations,
len_audio=time_wav)

print(onset_time_ref)
print(syllable_durations_ref)

results_vad = VAD(wav_file=wav_file, hopsize_t=hopsize_t)

# calculate log mel feature
Expand Down Expand Up @@ -79,13 +88,17 @@
# syllable boundaries
boundaries_syllable_start_time = np.array(boundaries_syllable[:-1])*hopsize_t
boundaries_syllable_end_time = np.array(boundaries_syllable[1:])*hopsize_t
syllable_durations_detected = boundaries_syllable_end_time - boundaries_syllable_start_time

print('Detected syllable onset times (second):')
print(boundaries_syllable_start_time)
print('\n')

print(syllable_durations_detected)

figure_plot_joint(score_png=score_png,
mfcc_line=log_mel_old,
onset_time_ref=onset_time_ref,
vad=results_vad,
obs_syllable=obs_syllable,
boundaries_syllable_start_time=boundaries_syllable_start_time,
Expand Down
102 changes: 102 additions & 0 deletions feature_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import numpy as np
from scipy.stats import skew
from scipy.stats import kurtosis


class FeatureExtraction(object):
"""
extract rhythmic beat deviation features
"""
def __init__(self,
onset_time_ref,
syllable_durations_ref,
onset_time_detected,
syllable_durations_detected,
beats):
self.onset_time_ref = onset_time_ref
self.syllable_durations_ref = syllable_durations_ref
self.onset_time_detected = onset_time_detected
self.syllable_durations_detected = syllable_durations_detected
self.beats = beats

def onset_deviation(self):
return np.abs(self.onset_time_ref - self.onset_time_detected)

def syllable_durations_weighted_onset_deviation(self, od):
return od/self.syllable_durations_ref

def duration_deviation(self):
return np.abs(self.syllable_durations_ref - self.syllable_durations_detected)

def syllable_durations_weighted_duration_deviation(self, dd):
return dd/self.syllable_durations_ref

def on_beat_deviation(self, deviation):
indices = [i for i, x in enumerate(self.beats) if x == "on"]
return deviation[indices]

def off_beat_deviation(self, deviation):
indices = [i for i, x in enumerate(self.beats) if x == "off"]
return deviation[indices]

def other_beat_deviation(self, deviation):
indices = [i for i, x in enumerate(self.beats) if x is None]
return deviation[indices]

@staticmethod
def statistics_deviation(deviation):
return [np.min(deviation), np.max(deviation), np.median(deviation),
np.mean(deviation), np.std(deviation), skew(deviation), kurtosis(deviation)]


if __name__ == '__main__':
# test variables
onset_time_ref = np.array([0., 2.72727891, 3.06818878, 3.40909864, 3.7500085, 4.09091837,
4.43182823, 4.7727381, 5.11364796, 5.45455782, 6.13637755, 6.81819728,
7.50001701, 8.18183673, 9.54547619])
syllable_durations_ref = np.array([2.72727891, 0.34090986, 0.34090986, 0.34090986, 0.34090986, 0.34090986,
0.34090986, 0.34090986, 0.34090986, 0.68181973, 0.68181973, 0.68181973,
0.68181973, 1.36363946, 1.36363946])
onset_time_detected = np.array([0., 2.59, 3.02, 3.3, 3.69, 4.,
4.35, 4.71, 5.04, 5.39, 6.07, 6.54, 7.3, 7.91, 9.56])
syllable_durations_detected = np.array([2.59, 0.43, 0.28, 0.39,
0.31, 0.35, 0.36, 0.33,
0.35, 0.68, 0.47, 0.76,
0.61, 1.65, 1.34])
beats = [None, 'on', None, 'off', None, 'on', None, 'off', None, 'on', 'off', 'on', 'off', 'on', 'on']

fe = FeatureExtraction(onset_time_ref=onset_time_ref[1:],
syllable_durations_ref=syllable_durations_ref[1:],
onset_time_detected=onset_time_detected[1:],
syllable_durations_detected=syllable_durations_detected[1:],
beats=beats[1:])

# general features
od = fe.onset_deviation()
sdwod = fe.syllable_durations_weighted_onset_deviation(od)
dd = fe.duration_deviation()
sdwdd = fe.syllable_durations_weighted_duration_deviation(dd)

# on beat features
od_on = fe.on_beat_deviation(od)
sdwod_on = fe.on_beat_deviation(sdwod)
dd_on = fe.on_beat_deviation(dd)
sdwdd_on = fe.on_beat_deviation(sdwdd)

# off beat features
od_off = fe.off_beat_deviation(od)
sdwod_off = fe.off_beat_deviation(sdwod)
dd_off = fe.off_beat_deviation(dd)
sdwdd_off = fe.off_beat_deviation(sdwdd)

# other beats features
od_other = fe.other_beat_deviation(od)
sdwod_other = fe.other_beat_deviation(sdwod)
dd_other = fe.other_beat_deviation(dd)
sdwdd_other = fe.other_beat_deviation(sdwdd)

# calculate feature statistics
feature_set = fe.statistics_deviation(od) + fe.statistics_deviation(sdwod) + \
fe.statistics_deviation(dd) + fe.statistics_deviation(sdwdd)

print(feature_set)
48 changes: 43 additions & 5 deletions general/utilFunctions.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,47 @@ def parse_score(filename_score):
"""
with open(filename_score, 'r') as scorefile:
data = scorefile.readlines()
syllable_durations, syllable_labels = [], []
for line in data:
syllable_labels.append(line.split()[0])
syllable_durations.append(float(line.split()[1]))
syllable_durations, syllable_labels, beats = [], [], []
tempo = float(data[0].split()[1])
for line in data[1:]:
list_line = line.split()
if len(list_line) == 3:
beats.append(list_line[2])
else:
beats.append(None)
syllable_labels.append(list_line[0])
syllable_durations.append(float(list_line[1]))
syllable_durations = np.array(syllable_durations)
return syllable_durations, syllable_labels
return tempo, syllable_durations, syllable_labels, beats


def get_onset_time_syllable_duration_ref(syllable_durations, len_audio):
"""
get onset time positions from the syllable durations
:param syllable_durations:
:param len_audio:
:return:
"""
# normalize the syllable durations
sd_norm = syllable_durations / np.sum(syllable_durations)

onset_time_norm = np.cumsum(sd_norm)

# insert the 0 to the beginning or the excerpt
onset_time_norm = np.insert(onset_time_norm[:-1], 0, 0.0)

onset_time = onset_time_norm * len_audio

return onset_time, sd_norm * len_audio


if __name__ == '__main__':
filename_score = '../data/score_exercise_01.txt'
tempo, syllable_durations, syllable_lists, beats = parse_score(filename_score=filename_score)
print(tempo)
print(syllable_durations)
print(syllable_lists)
print(beats)

get_onset_time_syllable_duration_ref(syllable_durations=syllable_durations,
len_audio=1.0)
3 changes: 3 additions & 0 deletions plot_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

def figure_plot_joint(score_png,
mfcc_line,
onset_time_ref,
vad,
obs_syllable,
boundaries_syllable_start_time,
Expand All @@ -27,6 +28,8 @@ def figure_plot_joint(score_png,
y = np.arange(0, 80)
x = np.arange(0, mfcc_line.shape[0]) * hopsize_t
plt.pcolormesh(x, y, np.transpose(mfcc_line[:, 80 * 7:80 * 8]))
for otr in onset_time_ref:
plt.axvline(otr, color='r', linewidth=2)
ax2.set_ylabel('Mel bands', fontsize=12)
ax2.get_xaxis().set_visible(False)
ax2.axis('tight')
Expand Down

0 comments on commit ed05b81

Please sign in to comment.