From: ljsebald <ljs...@us...> - 2023-10-25 03:16:55
|
This is an automated email from the git hooks/post-receive script. It was generated because a ref change was pushed to the repository containing the project "A pseudo Operating System for the Dreamcast.". The branch, master has been updated via b1ddc3dcdd776ee4ce7d38d2cd68f161ee493e3e (commit) from 56564f79b67478b7c5b68692f1200768cec6c983 (commit) Those revisions listed above that are new to this repository have not appeared on any other notification email; so we list those revisions in full, below. - Log ----------------------------------------------------------------- commit b1ddc3dcdd776ee4ce7d38d2cd68f161ee493e3e Author: Ruslan Rostovtsev <sw...@21...> Date: Wed Oct 25 10:16:28 2023 +0700 Optimized functions for separating stereo PCM 16-bit for streams (#326) * Optimized separating stereo PCM 16-bit for streams ----------------------------------------------------------------------- Summary of changes: kernel/arch/dreamcast/exports-naomi.txt | 19 ++++ kernel/arch/dreamcast/exports-pristine.txt | 19 ++++ kernel/arch/dreamcast/include/dc/sound/sound.h | 42 +++++++++ kernel/arch/dreamcast/include/dc/sound/stream.h | 5 +- kernel/arch/dreamcast/sound/Makefile | 9 +- kernel/arch/dreamcast/sound/snd_pcm_split.s | 114 ++++++++++++++++++++++++ kernel/arch/dreamcast/sound/snd_stream.c | 112 ++++++++++++++--------- 7 files changed, 278 insertions(+), 42 deletions(-) create mode 100644 kernel/arch/dreamcast/sound/snd_pcm_split.s diff --git a/kernel/arch/dreamcast/exports-naomi.txt b/kernel/arch/dreamcast/exports-naomi.txt index ed14140..1744f16 100644 --- a/kernel/arch/dreamcast/exports-naomi.txt +++ b/kernel/arch/dreamcast/exports-naomi.txt @@ -91,14 +91,27 @@ sq_set16 sq_set32 # Sound +snd_mem_init +snd_mem_shutdown snd_mem_malloc snd_mem_free snd_mem_available +snd_init +snd_shutdown +snd_sh4_to_aica +snd_sh4_to_aica_start +snd_sh4_to_aica_stop +snd_aica_to_sh4 +snd_poll_resp snd_sfx_unload_all snd_sfx_unload snd_sfx_load snd_sfx_play snd_sfx_stop_all +snd_sfx_play_chn +snd_sfx_stop +snd_sfx_chn_alloc +snd_sfx_chn_free snd_stream_set_callback snd_stream_filter_add snd_stream_filter_remove @@ -111,6 +124,12 @@ snd_stream_queue_go snd_stream_stop snd_stream_poll snd_stream_volume +snd_stream_alloc +snd_stream_destroy +snd_stream_reinit +snd_stream_prefill +snd_pcm16_split +snd_pcm16_split_sq # Video vid_check_cable diff --git a/kernel/arch/dreamcast/exports-pristine.txt b/kernel/arch/dreamcast/exports-pristine.txt index 8ce4182..361b525 100644 --- a/kernel/arch/dreamcast/exports-pristine.txt +++ b/kernel/arch/dreamcast/exports-pristine.txt @@ -134,14 +134,27 @@ sq_set16 sq_set32 # Sound +snd_mem_init +snd_mem_shutdown snd_mem_malloc snd_mem_free snd_mem_available +snd_init +snd_shutdown +snd_sh4_to_aica +snd_sh4_to_aica_start +snd_sh4_to_aica_stop +snd_aica_to_sh4 +snd_poll_resp snd_sfx_unload_all snd_sfx_unload snd_sfx_load snd_sfx_play snd_sfx_stop_all +snd_sfx_play_chn +snd_sfx_stop +snd_sfx_chn_alloc +snd_sfx_chn_free snd_stream_set_callback snd_stream_filter_add snd_stream_filter_remove @@ -154,6 +167,12 @@ snd_stream_queue_go snd_stream_stop snd_stream_poll snd_stream_volume +snd_stream_alloc +snd_stream_destroy +snd_stream_reinit +snd_stream_prefill +snd_pcm16_split +snd_pcm16_split_sq # Video vid_check_cable diff --git a/kernel/arch/dreamcast/include/dc/sound/sound.h b/kernel/arch/dreamcast/include/dc/sound/sound.h index abe9dd4..86ed253 100644 --- a/kernel/arch/dreamcast/include/dc/sound/sound.h +++ b/kernel/arch/dreamcast/include/dc/sound/sound.h @@ -2,6 +2,7 @@ dc/sound/sound.h Copyright (C) 2002 Megan Potter + Copyright (C) 2023 Ruslan Rostovtsev */ @@ -23,6 +24,7 @@ __BEGIN_DECLS #include <arch/types.h> +#include <stdint.h> /** \brief Allocate memory in the SPU RAM pool @@ -138,6 +140,46 @@ int snd_aica_to_sh4(void *packetout); */ void snd_poll_resp(void); +/** \brief Separates stereo PCM samples into 2 mono channels. + + Splits a buffer containing 2 interleaved channels of 16-bit PCM samples + into 2 separate buffers of 16-bit PCM samples. + + \warning + All arguments must be 32-byte aligned. + + \param data Source buffer of interleaved stereo samples + \param left Destination buffer for left mono samples + \param right Destination buffer for right mono samples + \param size Size of the source buffer in bytes (must be divisible by 32) + + \sa snd_pcm16_split_sq() +*/ +void snd_pcm16_split(uint32_t *data, uint32_t *left, uint32_t *right, size_t size); + +/** \brief Separates stereo PCM samples into 2 mono channels with SQ transfer. + + Splits a buffer containing 2 interleaved channels of 16-bit PCM samples + into 2 separate buffers of 16-bit PCM samples by using the store queues + for data transfer. + + \warning + All arguments must be 32-byte aligned. + + \warning + The store queues must be configured for transferring to the left and right + destination buffers beforehand (QACRO <= left, QACRO1 <= right). + + \param data Source buffer of interleaved stereo samples + \param left SQ-masked left destination buffer address + \param right SQ-masked right destination buffer address + \param size Size of the source buffer in bytes (must be divisible by 32) + + \sa snd_pcm16_split() + Store queues must be prepared before. +*/ +void snd_pcm16_split_sq(uint32_t *data, uintptr_t left, uintptr_t right, size_t size); + __END_DECLS #endif /* __DC_SOUND_SOUND_H */ diff --git a/kernel/arch/dreamcast/include/dc/sound/stream.h b/kernel/arch/dreamcast/include/dc/sound/stream.h index 98371ef..80c8204 100644 --- a/kernel/arch/dreamcast/include/dc/sound/stream.h +++ b/kernel/arch/dreamcast/include/dc/sound/stream.h @@ -3,6 +3,7 @@ dc/sound/stream.h Copyright (C) 2002, 2004 Megan Potter Copyright (C) 2020 Lawrence Sebald + Copyright (C) 2023 Ruslan Rostovtsev */ @@ -17,6 +18,7 @@ \author Megan Potter \author Florian Schulze \author Lawrence Sebald + \author Ruslan Rostovtsev */ #ifndef __DC_SOUND_STREAM_H @@ -56,7 +58,8 @@ typedef int snd_stream_hnd_t; \param smp_req The number of samples requested. \param smp_recv Used to return the number of samples available. \return A pointer to the buffer of samples. If stereo, the - samples should be interleaved. + samples should be interleaved. For best perfomance + use 32-byte aligned pointer. */ typedef void *(*snd_stream_callback_t)(snd_stream_hnd_t hnd, int smp_req, int *smp_recv); diff --git a/kernel/arch/dreamcast/sound/Makefile b/kernel/arch/dreamcast/sound/Makefile index 5a3f98b..51db029 100644 --- a/kernel/arch/dreamcast/sound/Makefile +++ b/kernel/arch/dreamcast/sound/Makefile @@ -2,9 +2,16 @@ # # libmp3/Makefile # (c)2001 Megan Potter +# (c)2023 Ruslan Rostovtsev # -OBJS = snd_iface.o snd_sfxmgr.o snd_stream.o snd_stream_drv.o snd_mem.o +OBJS = snd_iface.o \ + snd_sfxmgr.o \ + snd_stream.o \ + snd_stream_drv.o \ + snd_mem.o \ + snd_pcm_split.o + KOS_CFLAGS += -I $(KOS_BASE)/kernel/arch/dreamcast/include/dc/sound SUBDIRS = arm diff --git a/kernel/arch/dreamcast/sound/snd_pcm_split.s b/kernel/arch/dreamcast/sound/snd_pcm_split.s new file mode 100644 index 0000000..276507a --- /dev/null +++ b/kernel/arch/dreamcast/sound/snd_pcm_split.s @@ -0,0 +1,114 @@ +! KallistiOS ##version## +! +! arch/dreamcast/sound/snd_pcm_split.s +! Copyright (C) 2023 Ruslan Rostovtsev +! +! Optimized assembler code for separating stereo PCM 16-bit to single channels +! + +.section .text +.globl _snd_pcm16_split +.globl _snd_pcm16_split_sq + +.align 2 + +! +! void snd_pcm16_split(uint32_t *data, uint32_t *left, uint32_t *right, uint32_t size); +! +_snd_pcm16_split: + mov #-5, r3 + shld r3, r7 + mov.l r8, @-r15 + mov.l r11, @-r15 + mov.l r12, @-r15 + mov r4, r8 + add #32, r8 + mov #31, r3 + mov #0, r0 +.pcm16_pref: + pref @r8 +.pcm16_load: + tst r3, r0 + mov.l @r4+, r1 + mov.l @r4+, r2 + swap.w r1, r11 + mov r2, r12 + xtrct r11, r12 + swap.w r2, r11 + bt/s .pcm16_store_alloc + xtrct r1, r11 +.pcm16_store: + mov.l r11, @(r0,r5) + mov.l r12, @(r0,r6) +.pcm16_loops: + tst r3, r4 + bf/s .pcm16_load + add #4, r0 + dt r7 + bf/s .pcm16_pref + add #32, r8 +.pcm16_exit: + mov.l @r15+, r12 + mov.l @r15+, r11 + mov.l @r15+, r8 + rts + nop +.pcm16_store_alloc: + add r0, r5 + add r0, r6 + mov r11, r0 + movca.l r0, @r5 + mov r12, r0 + movca.l r0, @r6 + bra .pcm16_loops + mov #0, r0 + +! +! void snd_pcm16_split_sq(uint32_t *data, uint32_t left, uint32_t right, uint32_t size); +! +_snd_pcm16_split_sq: + mov #-5, r3 + shld r3, r7 + mov.l r8, @-r15 + mov.l r11, @-r15 + mov.l r12, @-r15 + mov r4, r8 + add #32, r8 + mov #31, r3 + mov #0, r0 +.pcm16_sq_pref: + pref @r8 +.pcm16_sq_load: + mov.l @r4+, r1 + mov.l @r4+, r2 + swap.w r1, r11 + mov r2, r12 + xtrct r11, r12 + swap.w r2, r11 + xtrct r1, r11 + mov.l r11, @(r0,r5) + mov.l r12, @(r0,r6) + tst r3, r4 + bf/s .pcm16_sq_load + add #4, r0 + tst r3, r0 + bf .pcm16_sq_count +.pcm16_sq_flush: + mov r5, r1 + add r0, r1 + add #-32, r1 + pref @r1 + mov r6, r2 + add r0, r2 + add #-32, r2 + pref @r2 +.pcm16_sq_count: + dt r7 + bf/s .pcm16_sq_pref + add #32, r8 +.pcm16_sq_exit: + mov.l @r15+, r12 + mov.l @r15+, r11 + mov.l @r15+, r8 + rts + nop diff --git a/kernel/arch/dreamcast/sound/snd_stream.c b/kernel/arch/dreamcast/sound/snd_stream.c index e1063a1..14efad6 100644 --- a/kernel/arch/dreamcast/sound/snd_stream.c +++ b/kernel/arch/dreamcast/sound/snd_stream.c @@ -4,6 +4,7 @@ Copyright (C) 2000, 2001, 2002, 2003, 2004 Megan Potter Copyright (C) 2002 Florian Schulze Copyright (C) 2020 Lawrence Sebald + Copyright (C) 2023 Ruslan Rostovtsev SH-4 support routines for SPU streaming sound driver */ @@ -18,6 +19,7 @@ #include <arch/cache.h> #include <arch/timer.h> #include <dc/g2bus.h> +#include <dc/sq.h> #include <dc/spu.h> #include <dc/sound/sound.h> #include <dc/sound/stream.h> @@ -88,7 +90,7 @@ typedef struct strchan { static strchan_t streams[SND_STREAM_MAX]; // Separation buffers (for stereo) -int16 * sep_buffer[2] = { NULL, NULL }; +static uint32 *sep_buffer[2] = {NULL, NULL}; /* the address of the sound ram from the SH4 side */ #define SPU_RAM_BASE 0xa0800000 @@ -148,16 +150,13 @@ static void process_filters(snd_stream_hnd_t hnd, void **buffer, int *samplecnt) } } - -/* Performs stereo seperation for the two channels; this routine - has been optimized for the SH-4. */ static void sep_data(void *buffer, int len, int stereo) { register int16 *bufsrc, *bufdst; register int x, y, cnt; if(stereo) { bufsrc = (int16*)buffer; - bufdst = sep_buffer[0]; + bufdst = (int16 *)sep_buffer[0]; x = 0; y = 0; cnt = len / 2; @@ -172,7 +171,7 @@ static void sep_data(void *buffer, int len, int stereo) { bufsrc = (int16*)buffer; bufsrc++; - bufdst = sep_buffer[1]; + bufdst = (int16 *)sep_buffer[1]; x = 1; y = 0; cnt = len / 2; @@ -189,10 +188,29 @@ static void sep_data(void *buffer, int len, int stereo) { } else { memcpy(sep_buffer[0], buffer, len); - memcpy(sep_buffer[1], buffer, len); + sep_buffer[1] = sep_buffer[0]; } } +static void stereo_pcm16_split_sq(uint32 *data, uint32 aica_left, uint32 aica_right, uint32 size) { + + /* Wait for both store queues to complete if they are already used */ + uint32 *d = (uint32 *)0xe0000000; + d[0] = d[8] = 0; + + uint32 masked_left = (0xe0000000 | (aica_left & 0x03ffffe0)); + uint32 masked_right = (0xe0000000 | (aica_right & 0x03ffffe0)); + + /* Set store queue memory area as desired */ + QACR0 = (aica_left >> 24) & 0x1c; + QACR1 = (aica_right >> 24) & 0x1c; + + g2_fifo_wait(); + + /* Separating channels and do fill/write queues as many times necessary. */ + snd_pcm16_split_sq(data, masked_left, masked_right, size); +} + /* Prefill buffers -- do this before calling start() */ void snd_stream_prefill(snd_stream_hnd_t hnd) { void *buf; @@ -202,37 +220,34 @@ void snd_stream_prefill(snd_stream_hnd_t hnd) { if(!streams[hnd].get_data) return; - /* Load first buffer */ - /* XXX Note: This will not work if the full data size is less than - buffer_size or buffer_size/2. */ - if(streams[hnd].stereo) - buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size, &got); - else - buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size / 2, &got); + const uint32 buffer_size = streams[hnd].buffer_size; - process_filters(hnd, &buf, &got); - sep_data(buf, (streams[hnd].buffer_size / 2), streams[hnd].stereo); - spu_memload( - streams[hnd].spu_ram_sch[0] + (streams[hnd].buffer_size / 2) * 0, - (uint8*)sep_buffer[0], streams[hnd].buffer_size / 2); - spu_memload( - streams[hnd].spu_ram_sch[1] + (streams[hnd].buffer_size / 2) * 0, - (uint8*)sep_buffer[1], streams[hnd].buffer_size / 2); - - /* Load second buffer */ if(streams[hnd].stereo) - buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size, &got); + buf = streams[hnd].get_data(hnd, buffer_size * 2, &got); else - buf = streams[hnd].get_data(hnd, streams[hnd].buffer_size / 2, &got); + buf = streams[hnd].get_data(hnd, buffer_size, &got); process_filters(hnd, &buf, &got); - sep_data(buf, (streams[hnd].buffer_size / 2), streams[hnd].stereo); - spu_memload( - streams[hnd].spu_ram_sch[0] + (streams[hnd].buffer_size / 2) * 1, - (uint8*)sep_buffer[0], streams[hnd].buffer_size / 2); - spu_memload( - streams[hnd].spu_ram_sch[1] + (streams[hnd].buffer_size / 2) * 1, - (uint8*)sep_buffer[1], streams[hnd].buffer_size / 2); + + if ((uintptr_t)buf & 31) { + sep_data(buf, got, streams[hnd].stereo); + spu_memload(streams[hnd].spu_ram_sch[0], (uint8*)sep_buffer[0], got); + spu_memload(streams[hnd].spu_ram_sch[1], (uint8*)sep_buffer[1], got); + } + else { + if (streams[hnd].stereo) { + stereo_pcm16_split_sq((uint32 *)buf, + streams[hnd].spu_ram_sch[0], + streams[hnd].spu_ram_sch[1], + got); + } + else { + g2_fifo_wait(); + sq_cpy((uint32 *)streams[hnd].spu_ram_sch[0], buf, got); + g2_fifo_wait(); + sq_cpy((uint32 *)streams[hnd].spu_ram_sch[1], buf, got); + } + } /* Start with playing on buffer 0 */ streams[hnd].last_write_pos = 0; @@ -243,8 +258,8 @@ void snd_stream_prefill(snd_stream_hnd_t hnd) { int snd_stream_init(void) { /* Create stereo seperation buffers */ if(!sep_buffer[0]) { - sep_buffer[0] = memalign(32, (SND_STREAM_BUFFER_MAX / 2)); - sep_buffer[1] = memalign(32, (SND_STREAM_BUFFER_MAX / 2)); + sep_buffer[0] = memalign(32, SND_STREAM_BUFFER_MAX); + sep_buffer[1] = sep_buffer[0] + (SND_STREAM_BUFFER_MAX / 8); } /* Finish loading the stream driver */ @@ -355,7 +370,6 @@ void snd_stream_shutdown(void) { if(sep_buffer[0]) { free(sep_buffer[0]); sep_buffer[0] = NULL; - free(sep_buffer[1]); sep_buffer[1] = NULL; } } @@ -465,6 +479,7 @@ int snd_stream_poll(snd_stream_hnd_t hnd) { int needed_samples; int got_samples; void *data; + void *first_dma_buf = sep_buffer[0]; ...<truncated>... hooks/post-receive -- A pseudo Operating System for the Dreamcast. |