Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

How to support both mp3 and wav? I not found it in example (AUD-6007) #1353

Open
yizi15 opened this issue Jan 20, 2025 · 9 comments
Open

How to support both mp3 and wav? I not found it in example (AUD-6007) #1353

yizi15 opened this issue Jan 20, 2025 · 9 comments
Labels
good first issue Good for newcomers

Comments

@yizi15
Copy link

yizi15 commented Jan 20, 2025

No description provided.

@github-actions github-actions bot changed the title How to support both mp3 and wav? I not found it in example How to support both mp3 and wav? I not found it in example (AUD-6007) Jan 20, 2025
@TempoTian
Copy link
Contributor

You can use esp_audio to support playback both file, example code as:
https://github.com/espressif/esp-adf/blob/master/examples/cli/main/console_example.c#L585

If use want to use self constructed pipeline to fullfill this request, it's better to add a wrapped codec to support both WAV and MP3.
Following codes show how to add a wrapped codec to play AAC and MP3 both in TS formats.

#include "audio_element.h"
#include "audio_pipeline.h"
#include "audio_event_iface.h"
#include "audio_mem.h"
#include "audio_common.h"
#include "i2s_stream.h"
#include "audio_sys.h"
#include "aac_decoder.h"
#include "mp3_decoder.h"
#include "ringbuf.h"
#include "esp_log.h"
#include "string.h"

#define TAG "Wrapper_Dec"

#define GET32(d) (((d)[0] << 24) | ((d)[1] << 16) | ((d)[2] << 8) | ((d)[3]))

int ts_parser_search_stream(unsigned char *buf, int len);
int ts_parser_dec_init(void *user_data);
int ts_parser_dewrapper_prefilled_data(void *codec_handle, int datanum, void *ts_buf, unsigned char *data);
int ts_parser_read_data(void *codec_handle, int bytes, void *ts_buf, unsigned char *buf);
void ts_parser_dec_deinit(void *user_data);

typedef struct
{
    int buffer_size;
    unsigned char *buffer;
    int at_eof;
    int total_bytes_read;
    int bytes_into_buffer;
    int bytes_consumed;
    void *ts_buf;
} mpeg_buffer_t;

typedef struct {
    audio_element_handle_t dec_element;
    bool                   format_checked;
    bool                   is_ts;
    mpeg_buffer_t          parser;
    ringbuf_handle_t       ts_out;
} wrapper_dec_t;

ringbuf_handle_t wrapper_fifo;
static esp_err_t wrapper_dec_open(audio_element_handle_t self)
{
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    if (my->ts_out == NULL) {
        my->ts_out = rb_create(1024, 1);
    }
    wrapper_fifo = audio_element_get_input_ringbuf(self);

    if (my->ts_out == NULL) {
        return -1;
    }
    return 0;
}
static void print_hex(char* tag, char* data, int size) {
    int s = size > 32 ? 32 : size;
    ESP_LOGI(TAG, "Data for %s size %d is:", tag, size);
    printf("    ");
    for (int i = 0; i < s; i++) {
        printf("%02x ", data[i]);
    }
    printf("\n");
}

static esp_err_t actual_dec_cb(audio_element_handle_t el, audio_event_iface_msg_t *event, void *ctx)
{
    audio_element_handle_t parent = (audio_element_handle_t)ctx;
    if (xQueueSend(audio_element_get_event_queue(parent), (void *)event, 0) != pdPASS) {
        return -1;
    }
    return 0;
}

static int verify_and_start_dec(audio_element_handle_t el, wrapper_dec_t* dec, uint8_t* data, int size)
{
    uint32_t v = GET32(data);
    print_hex("After TS", (char*)data, size);
    if ((data[0] == 'I' && data[1] == 'D' && data[2] == '3') ||
        ((v & 0xffe00000) == 0xffe00000 && (data[1] & 0x06) > 0)) {
        // Init MP3
        mp3_decoder_cfg_t mp3_cfg = DEFAULT_MP3_DECODER_CONFIG();
        dec->dec_element = mp3_decoder_init(&mp3_cfg);
    }  else if ((v & 0xfff00000) == 0xfff00000) {
        // Init AAC
        aac_decoder_cfg_t aac_cfg = DEFAULT_AAC_DECODER_CONFIG();
        dec->dec_element = aac_decoder_init(&aac_cfg);
    }else {
        ESP_LOGE(TAG, "Not support format only aupport mp3 and aac");
        return -1;
    }
    if (dec->dec_element == NULL) {
        ESP_LOGE(TAG, "Fail to init element");
        return -1;
    }
    audio_element_set_input_ringbuf(dec->dec_element, dec->ts_out);
    audio_element_set_output_ringbuf(dec->dec_element, audio_element_get_output_ringbuf(el));
    audio_element_run(dec->dec_element);
    audio_element_resume(dec->dec_element, 0, 0);
    audio_element_set_event_callback(dec->dec_element, actual_dec_cb, el);
    rb_write(dec->ts_out, (char*)data, size, 120000);
    return 0;
}

static int ts_send_data(audio_element_handle_t el, wrapper_dec_t *dec, char *in_buffer, int in_len)
{
    int ret = ts_parser_read_data(el, in_len, dec->parser.ts_buf, (unsigned char*)in_buffer);
    if (ret > 0) {
        rb_write(dec->ts_out, in_buffer, ret, 120000);
    }
    return ret;
}

static int check_format(audio_element_handle_t el, wrapper_dec_t* dec, uint8_t* data, int size)
{
    print_hex("Before check format", (char*)data, size);
    int ofst = ts_parser_search_stream(data, size);
    int ret = 0;
    int filled = size;
    if (ts_parser_search_stream(data, size) >= 0) {
        ret = ts_parser_dec_init(&dec->parser);
        if (ret != 0) {
            ESP_LOGE(TAG, "Fail to init ts parser %d", ret);
            return -1;
        }
        dec->is_ts = true;
        int left = size - ofst;
        if (ofst) {
            memmove(data, data+ofst, left);
        }
        ret = ts_parser_dewrapper_prefilled_data(el, left, dec->parser.ts_buf, data);
        if (ret < 0) {
            ESP_LOGE(TAG, "Fail to prefill data ret %d", ret);
            return -1;
        }
        filled = ret;
    }
    // Try to parse audio data to get format
    ret = verify_and_start_dec(el, dec, data, filled);
    if (ret < 0) {
        return ret;
    }
    dec->format_checked = true;
    return 0;
}

static int wrapper_dec_process(audio_element_handle_t self, char *in_buffer, int in_len)
{
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    if (my->is_ts) {
        return ts_send_data(self, my, in_buffer, in_len);
    }
    int r_size = audio_element_input(self, in_buffer, in_len);
    int ret = 0;
    if (r_size <= 0) {
        return r_size;
    }
    if (my->format_checked == false) {
        ret = check_format(self, my, (uint8_t*)in_buffer, r_size);
        if (ret < 0) {
            return ret;
        }
    } else {
        ret = rb_write(my->ts_out, in_buffer, r_size, 120000);
    }
    if (ret < 0) {
        return ret;
    }
    return r_size;
}

static esp_err_t wrapper_dec_close(audio_element_handle_t self)
{
    return ESP_OK;
}

static esp_err_t wrapper_dec_destroy(audio_element_handle_t self)
{
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    if (my->dec_element) {
        audio_element_deinit(my->dec_element);
    }
    if (my->is_ts) {
        ts_parser_dec_init(&my->parser.ts_buf);
    }
    if (my->ts_out) {
        rb_destroy(my->ts_out);
    }
    audio_free(my);
    return ESP_OK;
}

audio_element_handle_t wrapper_dec_init(aac_decoder_cfg_t *config)
{
    audio_element_handle_t el;
    wrapper_dec_t *my = audio_calloc(1, sizeof(wrapper_dec_t));
    AUDIO_MEM_CHECK(TAG, my, return NULL);
    audio_element_cfg_t cfg = DEFAULT_AUDIO_ELEMENT_CONFIG();
    cfg.open = wrapper_dec_open;
    cfg.close = wrapper_dec_close;
    cfg.process = wrapper_dec_process;
    cfg.destroy = wrapper_dec_destroy;
    cfg.task_stack = 4096;
    cfg.task_prio = config->task_prio;
    cfg.task_core = config->task_core;
    cfg.out_rb_size = config->out_rb_size;
    if (cfg.buffer_len == 0) {
        cfg.buffer_len = 1024;
    }
    el = audio_element_init(&cfg);
    AUDIO_MEM_CHECK(TAG, el, goto _my_init_exit);
    audio_element_setdata(el, my);
    return el;
_my_init_exit:
    audio_free(my);
    return NULL;
}

@yizi15
Copy link
Author

yizi15 commented Jan 21, 2025

@TempoTian, in the example line:673 i2s_stream_set_clk(i2s_h, 48000, 16, 2)。 Other example call the function on the AEL_MSG_CMD_REPORT_MUSIC_INFO event. Is there some differences?

@TempoTian
Copy link
Contributor

The example call i2s_stream_set_clk, it is just initial setting. It will call again internally by esp_audio when receive AEL_MSG_CMD_REPORT_MUSIC_INFO . The logic is similar.

@yizi15
Copy link
Author

yizi15 commented Jan 21, 2025

@TempoTian W (17708) AUDIO_ELEMENT: [i2s-0x3c125e5c] RESUME timeout
My program is based on
https://github.com/espressif/esp-adf/blob/master/examples/get-started/play_mp3_control/main/play_mp3_control_example.c

I (699889) AUDIO_THREAD: The wav task allocate stack on external memory
I (699889) AUDIO_ELEMENT: [wav-0x3c125b64] Element task created
I (699892) AUDIO_THREAD: The i2s task allocate stack on internal memory
I (699899) AUDIO_ELEMENT: [i2s-0x3c125e5c] Element task created
I (699906) AUDIO_PIPELINE: Func:audio_pipeline_run, Line:359, MEM Total:8498232 Bytes, Inter:180827 Bytes, Dram:180827 Bytes

I (699918) AUDIO_ELEMENT: [wav] AEL_MSG_CMD_RESUME,state:1
I (699925) AUDIO_ELEMENT: [i2s] AEL_MSG_CMD_RESUME,state:1
I (699930) I2S_STREAM: AUDIO_STREAM_WRITER
I (699935) AUDIO_PIPELINE: Pipeline started
(699940) audio_play: [Play]
I (699936) CODEC_ELEMENT_HELPER: The element is 0x3c125b64. The reserve data 2 is 0x0.
I (699951) WAV_DECODER: a new song playing
(699957) audio_task_entry: [ * ] Receive music info from  decoder, sample_rates=44100, bits=16, ch=2
I (699987) AUDIO_ELEMENT: [i2s] AEL_MSG_CMD_PAUSE
W (699989) AUDIO_ELEMENT: [i2s-0x3c125e5c] RESUME timeout
I (699989) AUDIO_ELEMENT: [i2s] AEL_MSG_CMD_RESUME,state:4
I (699994) I2S_STREAM: AUDIO_STREAM_WRITER

a RESUME timeout warning is always printed. The music is play ok.

@yizi15
Copy link
Author

yizi15 commented Jan 21, 2025

modify your example below:

static esp_err_t wrapper_dec_close(audio_element_handle_t self)
{
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    if (my->dec_element) {
        audio_element_deinit(my->dec_element);
    }
    if (my->is_ts) {
        ts_parser_dec_deinit(&my->parser);
    }
    my->is_ts = false;
    my->dec_element = NULL;
    my->format_checked = false;
    if (my->ts_out) {
        rb_destroy(my->ts_out);
    }
    my->ts_out = NULL;
    return ESP_OK;
}

static esp_err_t wrapper_dec_destroy(audio_element_handle_t self)
{
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    if (my->dec_element) {
        audio_element_deinit(my->dec_element);
    }
//    if (my->is_ts) {
//        ts_parser_dec_init(&my->parser.ts_buf);
//    }
    if (my->is_ts) {
        ts_parser_dec_deinit(&my->parser);
    }
    if (my->ts_out) {
        rb_destroy(my->ts_out);
    }
    audio_free(my);
    return ESP_OK;
}

and all timeout to 120 from 120000, otherwise my thead will be block

@TempoTian
Copy link
Contributor

This is GPT gen code, I have reviewed it seems no big issues, you can debug and try

#include "audio_element.h"
#include "audio_pipeline.h"
#include "audio_event_iface.h"
#include "audio_mem.h"
#include "audio_common.h"
#include "i2s_stream.h"
#include "audio_sys.h"
#include "mp3_decoder.h"
#include "ringbuf.h"
#include "esp_log.h"
#include "string.h"

#define TAG "Wrapper_Dec"
#define GET32(d) (((d)[0] << 24) | ((d)[1] << 16) | ((d)[2] << 8) | ((d)[3]))

typedef struct {
    audio_element_handle_t dec_element;
    bool format_checked;
    ringbuf_handle_t ts_out;
} wrapper_dec_t;

ringbuf_handle_t wrapper_fifo;

static esp_err_t wrapper_dec_open(audio_element_handle_t self) {
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    if (my->ts_out == NULL) {
        my->ts_out = rb_create(1024, 1);
    }
    wrapper_fifo = audio_element_get_input_ringbuf(self);
    if (my->ts_out == NULL) {
        return -1;
    }
    return 0;
}

static void print_hex(char *tag, char *data, int size) {
    int s = size > 32 ? 32 : size;
    ESP_LOGI(TAG, "Data for %s size %d is:", tag, size);
    printf("    ");
    for (int i = 0; i < s; i++) {
        printf("%02x ", data[i]);
    }
    printf("\n");
}

static esp_err_t actual_dec_cb(audio_element_handle_t el, audio_event_iface_msg_t *event, void *ctx) {
    audio_element_handle_t parent = (audio_element_handle_t)ctx;
    if (xQueueSend(audio_element_get_event_queue(parent), (void *)event, 0) != pdPASS) {
        return -1;
    }
    return 0;
}

static int verify_and_start_dec(audio_element_handle_t el, wrapper_dec_t *dec, uint8_t *data, int size) {
    uint32_t v = GET32(data);
    print_hex("Format Detection", (char *)data, size);

    if ((data[0] == 'I' && data[1] == 'D' && data[2] == '3') ||
        ((v & 0xffe00000) == 0xffe00000 && (data[1] & 0x06) > 0)) {
        // Init MP3
        mp3_decoder_cfg_t mp3_cfg = DEFAULT_MP3_DECODER_CONFIG();
        dec->dec_element = mp3_decoder_init(&mp3_cfg);
    } else if (memcmp(data, "RIFF", 4) == 0 && memcmp(data + 8, "WAVE", 4) == 0) {
        // Verify WAV format and skip header
        uint32_t chunk_id = GET32(data + 12);
        uint32_t chunk_size = GET32(data + 16);
        uint32_t offset = 20;
        while (chunk_id != 0x64617461 && offset < size) { // 'data'
            offset += chunk_size + 8;
            chunk_id = GET32(data + offset);
            chunk_size = GET32(data + offset + 4);
        }

        if (chunk_id != 0x64617461) { // No 'data' chunk found
            ESP_LOGE(TAG, "Invalid WAV file, 'data' chunk not found");
            return -1;
        }

        ESP_LOGI(TAG, "WAV format detected, skipping header");
        rb_write(dec->ts_out, (char *)(data + offset + 8), size - (offset + 8), 120000);
        return 0;
    } else {
        ESP_LOGE(TAG, "Unsupported format, only MP3 and WAV (PCM) are supported");
        return -1;
    }

    if (dec->dec_element == NULL) {
        ESP_LOGE(TAG, "Failed to initialize decoder element");
        return -1;
    }

    audio_element_set_input_ringbuf(dec->dec_element, dec->ts_out);
    audio_element_set_output_ringbuf(dec->dec_element, audio_element_get_output_ringbuf(el));
    audio_element_run(dec->dec_element);
    audio_element_resume(dec->dec_element, 0, 0);
    audio_element_set_event_callback(dec->dec_element, actual_dec_cb, el);
    rb_write(dec->ts_out, (char *)data, size, 120000);
    return 0;
}

static int check_format(audio_element_handle_t el, wrapper_dec_t *dec, uint8_t *data, int size) {
    print_hex("Before format check", (char *)data, size);
    int ret = verify_and_start_dec(el, dec, data, size);
    if (ret < 0) {
        return ret;
    }
    dec->format_checked = true;
    return 0;
}

static int wrapper_dec_process(audio_element_handle_t self, char *in_buffer, int in_len) {
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    int r_size = audio_element_input(self, in_buffer, in_len);
    if (r_size <= 0) {
        return r_size;
    }
    if (!my->format_checked) {
        int ret = check_format(self, my, (uint8_t *)in_buffer, r_size);
        if (ret < 0) {
            return ret;
        }
    } else {
        rb_write(my->ts_out, in_buffer, r_size, 120000);
    }
    return r_size;
}

static esp_err_t wrapper_dec_close(audio_element_handle_t self) {
    return ESP_OK;
}

static esp_err_t wrapper_dec_destroy(audio_element_handle_t self) {
    wrapper_dec_t *my = (wrapper_dec_t *)audio_element_getdata(self);
    if (my->dec_element) {
        audio_element_deinit(my->dec_element);
    }
    if (my->ts_out) {
        rb_destroy(my->ts_out);
    }
    audio_free(my);
    return ESP_OK;
}

audio_element_handle_t wrapper_dec_init(audio_element_cfg_t *config) {
    audio_element_handle_t el;
    wrapper_dec_t *my = audio_calloc(1, sizeof(wrapper_dec_t));
    AUDIO_MEM_CHECK(TAG, my, return NULL);
    audio_element_cfg_t cfg = DEFAULT_AUDIO_ELEMENT_CONFIG();
    cfg.open = wrapper_dec_open;
    cfg.close = wrapper_dec_close;
    cfg.process = wrapper_dec_process;
    cfg.destroy = wrapper_dec_destroy;
    cfg.task_stack = 4096;
    cfg.task_prio = config->task_prio;
    cfg.task_core = config->task_core;
    cfg.out_rb_size = config->out_rb_size;
    if (cfg.buffer_len == 0) {
        cfg.buffer_len = 1024;
    }
    el = audio_element_init(&cfg);
    AUDIO_MEM_CHECK(TAG, el, goto _my_init_exit);
    audio_element_setdata(el, my);
    return el;
_my_init_exit:
    audio_free(my);
    return NULL;
}

@yizi15
Copy link
Author

yizi15 commented Jan 21, 2025

@TempoTian the resume is not raleated to the wrapper code. wav decoder is timeout too. The problem is found serval months ago, but ok for my project. the music is run or stop normal.

@jason-mao
Copy link
Collaborator

@yizi15 I've noticed that the title and content of this issue don't quite match. Could you help me improve it and update the progress at the same time?

@yizi15
Copy link
Author

yizi15 commented Feb 14, 2025

@jason-mao, the code TempoTian show cant be play repeatly. This may be a code bug, I modify the close function and work normal. Anyway I think the feature is required in the sdk example. For the RESUME timeout, I dont kown what the cause and has no time to do。When get more information for this ,I will update the projress.

@jason-mao jason-mao added the good first issue Good for newcomers label Feb 18, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
good first issue Good for newcomers
Projects
None yet
Development

No branches or pull requests

3 participants