From: ljsebald <ljs...@us...> - 2023-11-10 02:31:36
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "A pseudo Operating System for the Dreamcast.". The branch, master has been updated via 2be02ef1bc763f11f3551d0e336f17cd94124f03 (commit) from e3529c47c1e35fb3a488285e911d4cfdf06a48c0 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit 2be02ef1bc763f11f3551d0e336f17cd94124f03 Author: Ruslan Rostovtsev <sw...@21...> Date: Fri Nov 10 09:31:10 2023 +0700 Sound stream fixes and improvements. (#346) * Sound stream fixes and improvements. - Fixed multistream support. - Fixed non-16-bit stream support. - Fixed stream prefill. - Used single channel for mono. - Clean playback. - Some cleanup and refactoring. * Apply suggestions from code review * Minor fixes ----------------------------------------------------------------------- Summary of changes: kernel/arch/dreamcast/sound/snd_stream.c | 424 ++++++++++++++++++------------- 1 file changed, 253 insertions(+), 171 deletions(-) diff --git a/kernel/arch/dreamcast/sound/snd_stream.c b/kernel/arch/dreamcast/sound/snd_stream.c index fcbc9c2..f75f3aa 100644 --- a/kernel/arch/dreamcast/sound/snd_stream.c +++ b/kernel/arch/dreamcast/sound/snd_stream.c @@ -16,6 +16,7 @@ #include <malloc.h> #include <sys/queue.h> +#include <kos/mutex.h> #include <arch/cache.h> #include <arch/timer.h> #include <dc/g2bus.h> @@ -44,7 +45,7 @@ being available CPU time and channels. typedef struct filter { TAILQ_ENTRY(filter) lent; snd_stream_filter_t func; - void * data; + void *data; } filter_t; /* Each of these represents an active streaming channel */ @@ -53,14 +54,13 @@ typedef struct strchan { int ch[2]; // The last write position in the playing buffer - uint32 last_write_pos; // = 0 - int curbuffer; // = 0 + uint32_t last_write_pos; // The buffer size allocated for this stream. - uint32 buffer_size; // = 0x10000 + size_t buffer_size; // Stream data location in AICA RAM - uint32 spu_ram_sch[2]; + uint32_t spu_ram_sch[2]; // "Get data" callback; we'll call this any time we want to get // another buffer of output data. @@ -72,18 +72,21 @@ typedef struct strchan { // Sample type int type; + // Sample size + int bitsize; + // Stereo/mono flag - int stereo; + int channels; // Playback frequency int frequency; /* Stream queueing is where we get everything ready to go but don't actually start it playing until the signal (for music sync, etc) */ - int queueing; + int queueing; /* Have we been initialized yet? (and reserved a buffer, etc) */ - volatile int initted; + volatile int initted; /* User data. */ void *user_data; @@ -93,10 +96,14 @@ typedef struct strchan { static strchan_t streams[SND_STREAM_MAX]; // Separation buffers (for stereo) -static uint32 *sep_buffer[2] = {NULL, NULL}; +static uint32_t *sep_buffer[2] = {NULL, NULL}; + +static mutex_t stream_mutex = MUTEX_INITIALIZER; /* the address of the sound ram from the SH4 side */ -#define SPU_RAM_BASE 0xa0800000 +#define SPU_RAM_BASE 0xa0800000 + +#define LOCK_TIMEOUT_MS 1000 // Check an incoming handle #define CHECK_HND(x) do { \ @@ -106,6 +113,30 @@ static uint32 *sep_buffer[2] = {NULL, NULL}; void snd_pcm16_split_sq_start(uint32_t *data, uintptr_t left, uintptr_t right, size_t size); +static inline size_t samples_to_bytes(snd_stream_hnd_t hnd, size_t samples) { + switch(streams[hnd].bitsize) { + case 4: + return samples >> 1; + case 8: + return samples; + case 16: + default: + return samples << 1; + } +} + +static inline size_t bytes_to_samples(snd_stream_hnd_t hnd, size_t bytes) { + switch(streams[hnd].bitsize) { + case 4: + return bytes << 1; + case 8: + return bytes; + case 16: + default: + return bytes >> 1; + } +} + /* Set "get data" callback */ void snd_stream_set_callback(snd_stream_hnd_t hnd, snd_stream_callback_t cb) { CHECK_HND(hnd); @@ -151,50 +182,42 @@ static void process_filters(snd_stream_hnd_t hnd, void **buffer, int *samplecnt) filter_t * f; TAILQ_FOREACH(f, &streams[hnd].filters, lent) { - f->func(hnd, f->data, streams[hnd].frequency, streams[hnd].stereo ? 2 : 1, buffer, samplecnt); + f->func(hnd, f->data, streams[hnd].frequency, streams[hnd].channels, buffer, samplecnt); } } -static void sep_data(void *buffer, int len, int stereo) { +static void snd_pcm16_split_unaligned(void *buffer, void *left, void *right, size_t len) { uint32_t *buf = (uint32_t *)buffer; - uint32_t *left_ptr = (uint32_t *)sep_buffer[1]; - uint32_t *right_ptr = (uint32_t *)sep_buffer[0]; + uint32_t *left_ptr = (uint32_t *)left; + uint32_t *right_ptr = (uint32_t *)right; uint32_t data; uint32_t left_val; uint32_t right_val; - if(stereo) { - len <<= 1; + for(; len > 8; len -= 8) { + dcache_pref_block(buf + 8); - for(; len > 8; len -= 8) { - dcache_pref_block(buf + 8); + data = *buf++; + left_val = (data >> 16); + right_val = (data & 0xffff); - data = *buf++; - left_val = (data >> 16); - right_val = (data & 0xffff); + data = *buf++; + left_val |= (data & 0xffff0000); + right_val |= (data & 0xffff) << 16; - data = *buf++; - left_val |= (data & 0xffff0000); - right_val |= (data & 0xffff) << 16; - - if(((uintptr_t)left_ptr & 31) == 0) { - dcache_alloc_block(left_ptr++, left_val); - dcache_alloc_block(right_ptr++, right_val); - } - else { - *left_ptr++ = left_val; - *right_ptr++ = right_val; - } + if(((uintptr_t)left_ptr & 31) == 0) { + dcache_alloc_block(left_ptr++, left_val); + dcache_alloc_block(right_ptr++, right_val); } - if(len) { - data = *buf++; - *(uint16_t *)left_ptr = (data >> 16); - *(uint16_t *)right_ptr = (data & 0xffff); + else { + *left_ptr++ = left_val; + *right_ptr++ = right_val; } } - else { - memcpy(sep_buffer[0], buffer, len); - sep_buffer[1] = sep_buffer[0]; + if(len) { + data = *buf++; + *(uint16_t *)left_ptr = (data >> 16); + *(uint16_t *)right_ptr = (data & 0xffff); } } @@ -227,47 +250,49 @@ void snd_pcm16_split_sq(uint32_t *data, uintptr_t left, uintptr_t right, size_t } static void snd_stream_prefill_part(snd_stream_hnd_t hnd, uint32_t offset) { - - const uint32_t buffer_size = streams[hnd].buffer_size; - uintptr_t left = streams[hnd].spu_ram_sch[0] + offset; - uintptr_t right = streams[hnd].spu_ram_sch[1] + offset; - int got = buffer_size; - void *buf; - - if(streams[hnd].stereo) { - buf = streams[hnd].get_data(hnd, buffer_size, &got); - } - else { - buf = streams[hnd].get_data(hnd, buffer_size / 2, &got); - } + const size_t buffer_size = streams[hnd].buffer_size; + const int chans = streams[hnd].channels; + const uintptr_t left = streams[hnd].spu_ram_sch[0] + offset; + const uintptr_t right = streams[hnd].spu_ram_sch[1] + offset; + const int max_got = (buffer_size / 2) * chans; + int got = max_got; + void *buf = streams[hnd].get_data(hnd, max_got, &got); if(buf == NULL) { dbglog(DBG_ERROR, "snd_stream_prefill_part(): get_data() failed\n"); return; } + if(got > max_got) { + got = max_got; + } + process_filters(hnd, &buf, &got); - if(streams[hnd].stereo == 0) { + if(chans == 1) { spu_memload_sq(left, buf, got); spu_memload_sq(right, buf, got); + return; } - else if(((uintptr_t)buf & 31) || streams[hnd].type != AICA_SM_16BIT) { - if(streams[hnd].type == AICA_SM_8BIT) { - snd_pcm8_split(buf, sep_buffer[0], sep_buffer[1], got); - } - else if(streams[hnd].type == AICA_SM_ADPCM_LS) { - snd_adpcm_split(buf, sep_buffer[0], sep_buffer[1], got); + + if(streams[hnd].bitsize == 16) { + if((uintptr_t)buf & 31) { + snd_pcm16_split_unaligned(buf, sep_buffer[0], sep_buffer[1], got); } else { - sep_data(buf, got / 2, streams[hnd].stereo); + snd_pcm16_split_sq((uint32_t *)buf, left, right, got); + return; } - spu_memload_sq(left, sep_buffer[0], got); - spu_memload_sq(right, sep_buffer[1], got); } - else { - snd_pcm16_split_sq((uint32_t *)buf, left, right, got); + else if(streams[hnd].bitsize == 8) { + snd_pcm8_split(buf, sep_buffer[0], sep_buffer[1], got); + } + else if(streams[hnd].bitsize == 4) { + snd_adpcm_split(buf, sep_buffer[0], sep_buffer[1], got); } + + spu_memload_sq(left, sep_buffer[0], got / 2); + spu_memload_sq(right, sep_buffer[1], got / 2); } /* Prefill buffers -- do this before calling start() */ @@ -278,12 +303,13 @@ void snd_stream_prefill(snd_stream_hnd_t hnd) { return; } + mutex_lock_timed(&stream_mutex, LOCK_TIMEOUT_MS); snd_stream_prefill_part(hnd, 0); snd_stream_prefill_part(hnd, streams[hnd].buffer_size / 2); - /* Start with playing on buffer 0 */ + /* Start playing from the beginning */ streams[hnd].last_write_pos = 0; - streams[hnd].curbuffer = 0; + mutex_unlock(&stream_mutex); } /* Initialize stream system */ @@ -339,8 +365,8 @@ snd_stream_hnd_t snd_stream_alloc(snd_stream_callback_t cb, int bufsize) { TAILQ_INIT(&streams[hnd].filters); // Allocate stream buffers - streams[hnd].spu_ram_sch[0] = snd_mem_malloc(streams[hnd].buffer_size); - streams[hnd].spu_ram_sch[1] = snd_mem_malloc(streams[hnd].buffer_size); + streams[hnd].spu_ram_sch[0] = snd_mem_malloc(streams[hnd].buffer_size * 2); + streams[hnd].spu_ram_sch[1] = streams[hnd].spu_ram_sch[0] + streams[hnd].buffer_size; // And channels streams[hnd].ch[0] = snd_sfx_chn_alloc(); @@ -419,7 +445,7 @@ void snd_stream_queue_disable(snd_stream_hnd_t hnd) { } /* Start streaming (or if queueing is enabled, just get ready) */ -static void snd_stream_start_type(snd_stream_hnd_t hnd, uint32 type, uint32 freq, int st) { +static void snd_stream_start_type(snd_stream_hnd_t hnd, uint32_t type, uint32_t freq, int st) { AICA_CMDSTR_CHANNEL(tmp, cmd, chan); CHECK_HND(hnd); @@ -427,9 +453,25 @@ static void snd_stream_start_type(snd_stream_hnd_t hnd, uint32 type, uint32 freq if(!streams[hnd].get_data) return; streams[hnd].type = type; - streams[hnd].stereo = st; + streams[hnd].channels = st ? 2 : 1; streams[hnd].frequency = freq; + if(streams[hnd].type == AICA_SM_16BIT) { + streams[hnd].bitsize = 16; + } + else if(streams[hnd].type == AICA_SM_8BIT) { + streams[hnd].bitsize = 8; + } + else if(streams[hnd].type == AICA_SM_ADPCM_LS) { + streams[hnd].bitsize = 4; + + if(streams[hnd].buffer_size > (32 << 10)) { + /* The channel position data size is 16-bits. + Need to make sure that we don't go beyond these limits */ + streams[hnd].buffer_size = (32 << 10); + } + } + /* Make sure these are sync'd (and/or delayed) */ snd_sh4_to_aica_stop(); @@ -444,24 +486,31 @@ static void snd_stream_start_type(snd_stream_hnd_t hnd, uint32 type, uint32 freq chan->cmd = AICA_CH_CMD_START | AICA_CH_START_DELAY; chan->base = streams[hnd].spu_ram_sch[0]; chan->type = type; - chan->length = (streams[hnd].buffer_size / 2); + chan->length = bytes_to_samples(hnd, streams[hnd].buffer_size); chan->loop = 1; chan->loopstart = 0; - chan->loopend = (streams[hnd].buffer_size / 2); + chan->loopend = chan->length - 1; chan->freq = freq; chan->vol = 255; - chan->pan = 0; + chan->pan = streams[hnd].channels == 2 ? 0 : 128; snd_sh4_to_aica(tmp, cmd->size); - /* Channel 1 */ - cmd->cmd_id = streams[hnd].ch[1]; - chan->base = streams[hnd].spu_ram_sch[1]; - chan->pan = 255; - snd_sh4_to_aica(tmp, cmd->size); + if(streams[hnd].channels == 2) { + /* Channel 1 */ + cmd->cmd_id = streams[hnd].ch[1]; + chan->base = streams[hnd].spu_ram_sch[1]; + chan->pan = 255; + snd_sh4_to_aica(tmp, cmd->size); + + /* Start both channels simultaneously */ + cmd->cmd_id = (1 << streams[hnd].ch[0]) | + (1 << streams[hnd].ch[1]); + } + else { + /* Start one channel */ + cmd->cmd_id = (1 << streams[hnd].ch[0]); + } - /* Start both channels simultaneously */ - cmd->cmd_id = (1 << streams[hnd].ch[0]) | - (1 << streams[hnd].ch[1]); chan->cmd = AICA_CH_CMD_START | AICA_CH_START_SYNC; snd_sh4_to_aica(tmp, cmd->size); @@ -470,15 +519,15 @@ static void snd_stream_start_type(snd_stream_hnd_t hnd, uint32 type, uint32 freq snd_sh4_to_aica_start(); } -void snd_stream_start(snd_stream_hnd_t hnd, uint32 freq, int st) { +void snd_stream_start(snd_stream_hnd_t hnd, uint32_t freq, int st) { snd_stream_start_type(hnd, AICA_SM_16BIT, freq, st); } -void snd_stream_start_pcm8(snd_stream_hnd_t hnd, uint32 freq, int st) { +void snd_stream_start_pcm8(snd_stream_hnd_t hnd, uint32_t freq, int st) { snd_stream_start_type(hnd, AICA_SM_8BIT, freq, st); } -void snd_stream_start_adpcm(snd_stream_hnd_t hnd, uint32 freq, int st) { +void snd_stream_start_adpcm(snd_stream_hnd_t hnd, uint32_t freq, int st) { snd_stream_start_type(hnd, AICA_SM_ADPCM_LS, freq, st); } @@ -505,124 +554,157 @@ void snd_stream_stop(snd_stream_hnd_t hnd) { chan->cmd = AICA_CH_CMD_STOP; snd_sh4_to_aica(tmp, cmd->size); - /* Channel 1 */ - cmd->cmd_id = streams[hnd].ch[1]; - snd_sh4_to_aica(tmp, cmd->size); + if(streams[hnd].channels == 2) { + /* Channel 1 */ + cmd->cmd_id = streams[hnd].ch[1]; + snd_sh4_to_aica(tmp, cmd->size); + } } /* The DMA will chain to this to start the second DMA. */ -static uint32 dmadest, dmacnt; -static void dma_chain(ptr_t data) { +static uint32_t dmacnt; +static uintptr_t dmadest; +static inline void dma_done(uint32_t data) { + (void)data; + mutex_unlock(&stream_mutex); +} + +static inline void dma_chain(uint32_t data) { (void)data; - spu_dma_transfer(sep_buffer[1], dmadest, dmacnt, 0, NULL, 0); + spu_dma_transfer(sep_buffer[1], dmadest, dmacnt, 0, dma_done, 0); } /* Poll streamer to load more data if neccessary */ int snd_stream_poll(snd_stream_hnd_t hnd) { - uint32 ch0pos, ch1pos; - //int realbuffer; - uint32 current_play_pos; - int needed_samples; - int got_samples; - void *data; - void *first_dma_buf = sep_buffer[0]; + uint32_t ch0pos, ch1pos, write_pos; + uint16_t current_play_pos; + int needed_samples = 0; + int needed_bytes = 0; + int got_bytes = 0; + void *data; + void *first_dma_buf = sep_buffer[0]; + strchan_t *stream; CHECK_HND(hnd); + stream = &streams[hnd]; - if(!streams[hnd].get_data) return -1; + if(!stream->get_data) { + return -1; + } - /* Get "real" buffer */ - ch0pos = g2_read_32(SPU_RAM_BASE + AICA_CHANNEL(streams[hnd].ch[0]) + offsetof(aica_channel_t, pos)); - ch1pos = g2_read_32(SPU_RAM_BASE + AICA_CHANNEL(streams[hnd].ch[1]) + offsetof(aica_channel_t, pos)); + /* Get channels position */ + ch0pos = g2_read_32(SPU_RAM_BASE + + AICA_CHANNEL(stream->ch[0]) + + offsetof(aica_channel_t, pos)); + + if(stream->channels == 2) { + ch1pos = g2_read_32(SPU_RAM_BASE + + AICA_CHANNEL(stream->ch[1]) + + offsetof(aica_channel_t, pos)); + /* The channel position register is 16-bit on AICA side, keep it in mind */ + current_play_pos = (ch0pos < ch1pos ? ch0pos : ch1pos) & 0xffff; + } + else { + current_play_pos = (ch0pos & 0xffff); + } - if(ch0pos >= (streams[hnd].buffer_size / 2)) { - dbglog(DBG_ERROR, "snd_stream_poll: chan0(%d).pos = %ld (%08lx)\n", streams[hnd].ch[0], ch0pos, ch0pos); + if(samples_to_bytes(hnd, current_play_pos) >= stream->buffer_size) { + dbglog(DBG_ERROR, "snd_stream_poll: chan0(%d).pos = %ld\n", stream->ch[0], ch0pos); return -1; } - //realbuffer = !((ch0pos < (streams[hnd].buffer_size / 4)) && (ch1pos < (streams[hnd].buffer_size / 4))); + /* Count just till the end of the buffer, so we don't have to + handle buffer wraps */ + if(stream->last_write_pos <= current_play_pos) { + needed_samples = current_play_pos - stream->last_write_pos - 1; + /* Round it to max sector size of supported storage devices */ + needed_samples &= ~(bytes_to_samples(hnd, 2048 / stream->channels) - 1); + } + else { + needed_samples = bytes_to_samples(hnd, stream->buffer_size); + needed_samples -= stream->last_write_pos; + } - current_play_pos = (ch0pos < ch1pos) ? (ch0pos) : (ch1pos); + if(needed_samples <= 0) { + return 0; + } - /* count just till the end of the buffer, so we don't have to - handle buffer wraps */ - if(streams[hnd].last_write_pos <= current_play_pos) - needed_samples = current_play_pos - streams[hnd].last_write_pos; - else - needed_samples = (streams[hnd].buffer_size / 2) - streams[hnd].last_write_pos; - - /* round it a little bit */ - needed_samples &= ~0x7ff; - /* printf("last_write_pos %6u, current_play_pos %6u, needed_samples %6i\n",streams[hnd].last_write_pos,current_play_pos,needed_samples); */ - - if(needed_samples > 0) { - if(streams[hnd].stereo) { - needed_samples = ((unsigned)needed_samples > streams[hnd].buffer_size/4) ? (int)streams[hnd].buffer_size/4 : needed_samples; - data = streams[hnd].get_data(hnd, needed_samples * 4, &got_samples); - process_filters(hnd, &data, &got_samples); - - if(got_samples < needed_samples * 4) { - needed_samples = got_samples / 4; - - if(needed_samples & 3) - needed_samples = (needed_samples + 4) & ~3; - } - } ...<truncated>... hooks/post-receive -- A pseudo Operating System for the Dreamcast. |