Skip to content

Commit

Permalink
Audio: Share output buffers between instruments to decrease cache mis…
Browse files Browse the repository at this point in the history
…ses. Even though I mostly did this as a preparation to make internal blocksize configurable (which should improve CPU usage drastically), this commit alone seems to significantly decrease CPU usage when there"s many instruments.
  • Loading branch information
kmatheussen committed Mar 25, 2019
1 parent 1e619d0 commit 9e63ab4
Show file tree
Hide file tree
Showing 9 changed files with 667 additions and 164 deletions.
20 changes: 15 additions & 5 deletions Makefile.Qt
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ endif

# -fsanitize-thread sometimes require: "export __GL_THREADED_OPTIMIZATIONS=0"
# "-Og" very often destroys debugging (although it's not supposed to), but without it things are too slow (well, it's already too slow, but -Og helps a lot).
#THREAD_SANITIZE_OPTS=-Og -fsanitize=thread -DUSE_TSAN #-DDISABLE_BDWGC
#THREAD_SANITIZE_OPTS=-Og -fsanitize=thread -DRADIUM_USE_TSAN #-DDISABLE_BDWGC
THREAD_SANITIZE_OPTS=

ADDRESS_SANITIZE_OPTS=-fsanitize=address -fno-common
Expand Down Expand Up @@ -414,7 +414,7 @@ OBJ14=disk_midi_fx.o disk_midi_instrument.o midi_fx.o midi_instrument.o midi_pla
OBJ_FAUST=zita_rev.o stk_flute.o stk_bowed.o stk_blow_bottle.o stk_bass.o stk_blow_hole.o stk_brass.o stk_clarinet.o stk_flute_stk.o stk_glass_harmonica.o stk_harpsi.o stk_modal_bar.o stk_NLF_eks.o stk_NLF_fm.o stk_piano.o stk_saxophony.o stk_sitar.o stk_tibetan_bowl.o stk_tuned_bar.o stk_uni_bar.o stk_voice_form.o faust_tapiir.o faust_system_eq.o faust_system_tremolo.o faust_system_lowpass.o faust_system_highpass.o faust_system_lowshelf.o faust_system_highshelf.o faust_multibandcomp.o

#AUDIO
OBJ_AUDIO=audio_instrument.o SoundProducer.o Jack_plugin.o find_jack_library.o Faust_plugins.o Bus_plugins.o Timeskew_plugin.o Patchbay_plugin.o MidiMessages_plugin.o VST_plugins.o Ladspa_plugins.o Sampler_plugin.o SampleRecorder.o SampleReader.o Seqtrack_plugin.o FluidSynth_plugin.o Pd_plugin.o Modulator_plugin.o SoundPluginRegistry.o SoundPlugin.o AudioMeterPeaks.o Peaks.o Mixer.o get_windows_commandlines.o Envelope.o Resampler.o SoundFonts.o Smooth.o Presets.o undo_audio_effect.o undo_connection_enabled.o undo_audio_connection_gain.o undo_pd_controllers.o undo_plugin_state.o SoundfileSaver.o system_compressor_wrapper.o KillJackd.o
OBJ_AUDIO=audio_instrument.o SoundProducer.o AudioBuffer.o Jack_plugin.o find_jack_library.o Faust_plugins.o Bus_plugins.o Timeskew_plugin.o Patchbay_plugin.o MidiMessages_plugin.o VST_plugins.o Ladspa_plugins.o Sampler_plugin.o SampleRecorder.o SampleReader.o Seqtrack_plugin.o FluidSynth_plugin.o Pd_plugin.o Modulator_plugin.o SoundPluginRegistry.o SoundPlugin.o AudioMeterPeaks.o Peaks.o Mixer.o get_windows_commandlines.o Envelope.o Resampler.o SoundFonts.o Smooth.o Presets.o undo_audio_effect.o undo_connection_enabled.o undo_audio_connection_gain.o undo_pd_controllers.o undo_plugin_state.o SoundfileSaver.o system_compressor_wrapper.o KillJackd.o

#MultiCore.o

Expand Down Expand Up @@ -546,7 +546,7 @@ profile:
check: test
test: dotesting

dotesting: test_vector test_placement test_notes test_path_resolver
dotesting: test_vector test_placement test_notes test_path_resolver test_seqautomation test_audiobuffer
$(CCC) common/threading.cpp -DTEST_THREADING -Wall -lpthread && ./a.out
clang common/threading.cpp -DTEST_THREADING -Wall -lpthread && ./a.out
i686-w64-mingw32-g++ common/threading.cpp -DTEST_THREADING -Wall -lpthread -o test/a.exe && cd test && wine a.exe
Expand All @@ -569,12 +569,19 @@ test_path_resolver:
$(CCC) -g -Wall -Werror -DTEST_PATH_RESOLVER -DUSE_OPENGL -DUSE_QT_REQTYPE=1 -DUSE_QT4 Qt/Qt_path_resolver.cpp -lm -IQt $(QT_CFLAGS) $(QT_LDFLAGS) $(GCDIR)/.libs/libgc.a -lpthread && valgrind --suppressions=valgrind-python.supp --suppressions=Qt47supp.txt --suppressions=libgc.supp --tool=memcheck --leak-check=no --read-var-info=yes --track-origins=yes --fair-sched=yes ./a.out

test_seqautomation:
$(CCC) -g -Wall -Werror test/test_seqautomation.cpp -lm $(QTOPT) -UNDEBUG -fsanitize=address -fsanitize=undefined
$(CCC) -g -Wall -Wextra -Werror test/test_seqautomation.cpp -lm $(QTOPT) -UNDEBUG -fsanitize=address -fsanitize=undefined
$(CCC) test_seqautomation.o $(QT_LDFLAGS) $(QTLINKFLAGS) $(OS_LDFLAGS2) -fsanitize=address -fsanitize=undefined -o test_seqautomation
./test_seqautomation
@echo "=== success: test_seqautomation ==="
@rm test_seqautomation

test_audiobuffer:
$(CCC) -g -Wall -Werror test/test_audiobuffer.cpp -lm $(QTOPT) -UNDEBUG -fsanitize=address -fsanitize=undefined -fPIC -DFOR_LINUX
$(CCC) test_audiobuffer.o $(QT_LDFLAGS) $(QTLINKFLAGS) $(OS_LDFLAGS2) -fsanitize=address -fsanitize=undefined -o test_audiobuffer
LSAN_OPTIONS=detect_leaks=0 gdb -ex='set confirm on' -ex=run -ex=quit ./test_audiobuffer
@echo "=== success: test_audiobuffer ==="
@rm test_audiobuffer

packages: bin/packages/deletemetorebuild

bin/packages/deletemetorebuild: bin/packages/build.sh
Expand Down Expand Up @@ -2195,7 +2202,7 @@ audio/faust_system_highshelf.cpp: audio/system_highshelf.dsp $(FAUST_DEPENDENCIE
#audio/faust-examples/faust-stk/flute.cpp: audio/faust-examples/faust-stk/flute.dsp
# cd audio/faust-examples/faust-stk && $(FAUST) -cn my_dsp flute.dsp -o flute.cpp

SoundProducer.o: audio/SoundProducer.cpp audio/SoundProducer_proc.h audio/fade_envelopes.h audio/MultiCore.cpp audio/monotonic_timer.c common/sema.h common/Semaphores.hpp common/Queue.hpp audio/SmoothDelay.hpp common/Vector.hpp common/spinlock.h audio/CpuUsage.hpp
SoundProducer.o: audio/SoundProducer.cpp audio/SoundProducer_proc.h audio/fade_envelopes.h audio/MultiCore.cpp audio/monotonic_timer.c common/sema.h common/Semaphores.hpp common/Queue.hpp audio/SmoothDelay.hpp common/Vector.hpp common/spinlock.h audio/CpuUsage.hpp audio/AudioBuffer.hpp
$(MOC) audio/MultiCore.cpp >audio/mMultiCore.cpp
$(CCC) audio/SoundProducer.cpp $(QTOPT) $(JACKOPT) # -O3 # $(CPUOPT)
#-fno-tree-vectorize # tree-vectorize makes radium crash on windows 8 with gcc 4.7.2
Expand All @@ -2207,6 +2214,9 @@ SoundProducer.o: audio/SoundProducer.cpp audio/SoundProducer_proc.h audio/fade_e
# $(CCC) audio/MultiCore.cpp $(QTOPT)


AudioBuffer.o: audio/AudioBuffer.cpp audio/AudioBuffer.hpp
$(CCC) audio/AudioBuffer.cpp $(CPPOPT)

SoundPluginRegistry.o: audio/SoundPluginRegistry.cpp audio/SoundPluginRegistry_proc.h common/Vector.hpp
$(CCC) audio/SoundPluginRegistry.cpp $(QTOPT)

Expand Down
226 changes: 226 additions & 0 deletions audio/AudioBuffer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,226 @@

#include "../common/nsmtracker.h"
#include "../common/spinlock.h"
#include "../common/OS_visual_input.h"

#include "AudioBuffer.hpp"

/*
Test by running make test_audiobuffer
*/


static radium::AudioBufferChannel *g_audio_channels = NULL;
static radium::Spinlock g_audio_channels_spinlock;


#if !defined(RELEASE)
#include "../common/LockAsserter.hpp"
static radium::LockAsserter lockAsserter;
#endif

static constexpr int g_num_elements = 1024; // We normally don't need much more than the maximum number of parallell running channels, so this is probably plenty.
static radium::AudioBufferChannel *g_channels;
static radium::AudioBufferChannel *g_highest_used_channel = NULL;

static radium::AudioBufferChannel g_error_audio_channel;

#if !defined(RELEASE)
static int g_num_free_elements = 0;
#endif


void AUDIOBUFFERS_init(void){
#if !defined(RELEASE)
LOCKASSERTER_EXCLUSIVE(&lockAsserter);
#endif

static bool has_inited = false;
R_ASSERT_RETURN_IF_FALSE(has_inited==false);

g_channels = (radium::AudioBufferChannel*)V_calloc(sizeof(radium::AudioBufferChannel), g_num_elements);

for(int i=g_num_elements-1;i>=0;i--){
radium::AudioBufferChannel *channel = &g_channels[i];

channel->next = g_audio_channels;
g_audio_channels = channel;
}

#if !defined(RELEASE)
g_num_free_elements = g_num_elements;
#endif

has_inited = true;
}

// Called between each audio block. We do this to keep cache warm by always starting to use the start of the channel and to lower the chance of channels not to be aligned sequentially in memory.
void RT_AUDIOBUFFERS_optimize(void){
#if !defined(RELEASE)
if (g_num_free_elements != g_num_elements)
abort();
#endif

if (g_highest_used_channel==NULL)
return;

bool has_hit_highest = false;

radium::AudioBufferChannel *next_channel = &g_channels[0];

for(int i=0 ; i<g_num_elements ; i++){
radium::AudioBufferChannel *channel = next_channel;
if (i==g_num_elements-1)
next_channel = NULL;
else
next_channel = &g_channels[i+1];

if (channel==g_highest_used_channel)
has_hit_highest = true;

if (has_hit_highest && channel->next==next_channel)
break;

channel->next = next_channel;
}

g_highest_used_channel = NULL;
g_audio_channels = g_channels;
}

void RT_AUDIOBUFFER_release_channel(radium::AudioBufferChannel *channel, radium::NeedsLock needs_lock){
R_ASSERT_NON_RELEASE(THREADING_is_player_or_runner_thread());
R_ASSERT_NON_RELEASE(channel!=NULL);

if(channel==&g_error_audio_channel)
return;

radium::ScopedSpinlock lock(g_audio_channels_spinlock, needs_lock==radium::NeedsLock::YES);

#if !defined(RELEASE)
LOCKASSERTER_EXCLUSIVE(&lockAsserter);
g_num_free_elements++;
#endif

channel->next = g_audio_channels;
g_audio_channels = channel;
}

radium::AudioBufferChannel *RT_AUDIOBUFFER_get_channel(radium::NeedsLock needs_lock){
R_ASSERT_NON_RELEASE(THREADING_is_player_or_runner_thread());

radium::ScopedSpinlock lock(g_audio_channels_spinlock, needs_lock==radium::NeedsLock::YES);

#if !defined(RELEASE)
LOCKASSERTER_EXCLUSIVE(&lockAsserter);
g_num_free_elements--;

//if(g_audio_channels==NULL)
// abort();
#endif

if (g_audio_channels==NULL){

RT_message("No more free audio buffers. The audio will not be correct. If you have more than %d parallell audio channels, please request the program to allocate a higher number of audio buffers. If not, please file a bug report.", g_num_elements);

#if !defined(RELEASE)
g_num_free_elements++;
#endif

return &g_error_audio_channel; // Sound will probably be garbled, but this way we don't have to take care of handling NULL pointers.
}

auto *ret = g_audio_channels;
g_audio_channels = ret->next;

if (ret > g_highest_used_channel)
g_highest_used_channel = ret;

return ret;
}


#if TEST_AUDIOBUFFER

void RT_message_internal(const char *fmt,...){
//abort();
}

static void validate_channels(void){

radium::AudioBufferChannel *it = g_audio_channels;

for(int i=0 ; i<g_num_elements ; i++){
radium::AudioBufferChannel *channel = &g_channels[i];

assert(it==channel);

it = it->next;
}

assert(it==NULL);
}

static void testrun1(int num_elements){
QVector<radium::AudioBufferChannel*> stuff;

for(int i = 0 ; i < num_elements ; i++)
stuff.push_back(RT_AUDIOBUFFER_get_channel(radium::NeedsLock::NO));

if(num_elements==g_num_elements-1)
assert(stuff.last()->next == &g_channels[g_num_elements-1]);

if(num_elements==g_num_elements)
assert(stuff.last()->next == NULL);

for(auto *channel : stuff)
RT_AUDIOBUFFER_release_channel(channel, radium::NeedsLock::NO);

RT_AUDIOBUFFERS_optimize();
validate_channels();

RT_AUDIOBUFFERS_optimize();
validate_channels();
}

// Release in a random order.
static void testrun2(int num_elements){
QVector<radium::AudioBufferChannel*> stuff;

for(int i = 0 ; i < num_elements ; i++){
stuff.insert(stuff.size()==0 ? 0 : (qrand() % stuff.size()), RT_AUDIOBUFFER_get_channel(radium::NeedsLock::NO));
}

for(auto *channel : stuff)
RT_AUDIOBUFFER_release_channel(channel, radium::NeedsLock::NO);

RT_AUDIOBUFFERS_optimize();
validate_channels();

RT_AUDIOBUFFERS_optimize();
validate_channels();
}

static void testrun(int num_elements){
testrun1(num_elements);
for(int i =0 ; i < 500; i++)
testrun2(num_elements);
}


static void AUDIOBUFFERS_test(void){
AUDIOBUFFERS_init();

testrun(0);
testrun(1);
testrun(2);
testrun(g_num_elements/2-1);
testrun(g_num_elements/2+1);
testrun(g_num_elements-1);
testrun(g_num_elements);

testrun(g_num_elements+1);
testrun(g_num_elements*2);

}
#endif
Loading

0 comments on commit 9e63ab4

Please sign in to comment.