From: GitLab M. <git...@ke...> - 2021-09-20 19:12:05
|
data/amdgpu.ids | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) New commits: commit a97f265c7dc1924a38a899082caf97519f8c532e Author: Alex Deucher <ale...@am...> Date: Fri Sep 17 09:16:57 2021 -0400 amdgpu: add new marketing name Acked-by: Andrey Grodzovsky <and...@am...> Signed-off-by: Alex Deucher <ale...@am...> diff --git a/data/amdgpu.ids b/data/amdgpu.ids index 0933acba..ac5213b0 100644 --- a/data/amdgpu.ids +++ b/data/amdgpu.ids @@ -65,6 +65,7 @@ 15D8, 92, AMD Ryzen Embedded R1505G with Radeon Vega Gfx 15D8, CF, AMD Ryzen Embedded R1305G with Radeon Vega Gfx 15D8, E4, AMD Ryzen Embedded R1102G with Radeon Vega Gfx +163F, AE, AMD Custom GPU 0405 6600, 0, AMD Radeon HD 8600/8700M 6600, 81, AMD Radeon (TM) R7 M370 6601, 0, AMD Radeon (TM) HD 8500M/8700M commit 4529056e4fc427987eaad388f9dd6b9ca5b7148f Author: Alex Deucher <ale...@am...> Date: Fri Sep 17 09:14:13 2021 -0400 amdgpu: add marketing names from 21.30 Add new marketing names Acked-by: Andrey Grodzovsky <and...@am...> Signed-off-by: Alex Deucher <ale...@am...> diff --git a/data/amdgpu.ids b/data/amdgpu.ids index cd6fa67e..0933acba 100644 --- a/data/amdgpu.ids +++ b/data/amdgpu.ids @@ -137,7 +137,7 @@ 67C4, 00, AMD Radeon (TM) Pro WX 7100 Graphics 67C4, 80, AMD Radeon (TM) E9560/E9565 Graphics 67C7, 00, AMD Radeon (TM) Pro WX 5100 Graphics -67C7, 80, AMD Radeon (TM) Pro E9390 Graphics +67C7, 80, AMD Radeon (TM) E9390 Graphics 67C0, 00, AMD Radeon (TM) Pro WX 7100 Graphics 67D0, 01, AMD Radeon (TM) Pro V7350x2 67D0, 02, AMD Radeon (TM) Pro V7300X @@ -279,9 +279,13 @@ 73BF, C1, AMD Radeon RX 6800 XT 73BF, C3, AMD Radeon RX 6800 73DF, C1, AMD Radeon RX 6700 XT +73DF, C3, AMD Radeon RX 6800M 73DF, C5, AMD Radeon RX 6700 XT +73DF, CF, AMD Radeon RX 6700M 73E1, 00, AMD Radeon PRO W6600M 73E3, 00, AMD Radeon PRO W6600 +73FF, C1, AMD Radeon RX 6600 XT +73FF, C3, AMD Radeon RX 6600M 9874, C4, AMD Radeon R7 Graphics 9874, C5, AMD Radeon R6 Graphics 9874, C6, AMD Radeon R6 Graphics |
From: GitLab M. <git...@ke...> - 2021-10-04 10:44:30
|
gen_table_fourcc.py | 2 include/drm/drm_fourcc.h | 109 +++++++++++++++++++++++++++++++++++++++++++++-- xf86drm.c | 104 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 191 insertions(+), 24 deletions(-) New commits: commit bd26b61cff33517840b3f1900bbe1849b8672f65 Author: Dennis Tsiang <den...@ar...> Date: Wed Sep 22 14:53:50 2021 +0100 xf86drm: Update drmGetFormatModifierNameFromArm to handle AFRC Update drmGetFormatModifierNameFromArm function to handle AFRC modifiers. Signed-off-by: Dennis Tsiang <den...@ar...> diff --git a/gen_table_fourcc.py b/gen_table_fourcc.py index 4e8bc409..4236fd79 100644 --- a/gen_table_fourcc.py +++ b/gen_table_fourcc.py @@ -64,7 +64,7 @@ static const struct drmFormatModifierInfo drm_format_modifier_table[] = { for entry in fm_re['others']: (vendor, mod) = entry.split('_', 1) - if vendor == 'ARM' and (mod == 'TYPE_AFBC' or mod == 'TYPE_MISC'): + if vendor == 'ARM' and (mod == 'TYPE_AFBC' or mod == 'TYPE_MISC' or mod == 'TYPE_AFRC'): continue print_fm(f, vendor, mod, mod) diff --git a/xf86drm.c b/xf86drm.c index 7561039f..2abc744e 100644 --- a/xf86drm.c +++ b/xf86drm.c @@ -210,30 +210,16 @@ static bool is_x_t_amd_gfx9_tile(uint64_t tile) return false; } -static char * -drmGetFormatModifierNameFromArm(uint64_t modifier) +static bool +drmGetAfbcFormatModifierNameFromArm(uint64_t modifier, FILE *fp) { - uint64_t type = (modifier >> 52) & 0xf; uint64_t mode_value = modifier & AFBC_FORMAT_MOD_MODE_VALUE_MASK; uint64_t block_size = mode_value & AFBC_FORMAT_MOD_BLOCK_SIZE_MASK; - FILE *fp; - char *modifier_name = NULL; - size_t size = 0; - unsigned int i; - const char *block = NULL; const char *mode = NULL; bool did_print_mode = false; - /* misc type is already handled by the static table */ - if (type != DRM_FORMAT_MOD_ARM_TYPE_AFBC) - return NULL; - - fp = open_memstream(&modifier_name, &size); - if (!fp) - return NULL; - /* add block, can only have a (single) block */ switch (block_size) { case AFBC_FORMAT_MOD_BLOCK_SIZE_16x16: @@ -251,15 +237,13 @@ drmGetFormatModifierNameFromArm(uint64_t modifier) } if (!block) { - fclose(fp); - free(modifier_name); - return NULL; + return false; } fprintf(fp, "BLOCK_SIZE=%s,", block); /* add mode */ - for (i = 0; i < ARRAY_SIZE(arm_mode_value_table); i++) { + for (unsigned int i = 0; i < ARRAY_SIZE(arm_mode_value_table); i++) { if (arm_mode_value_table[i].modifier & mode_value) { mode = arm_mode_value_table[i].modifier_name; if (!did_print_mode) { @@ -271,7 +255,87 @@ drmGetFormatModifierNameFromArm(uint64_t modifier) } } + return true; +} + +static bool +drmGetAfrcFormatModifierNameFromArm(uint64_t modifier, FILE *fp) +{ + for (unsigned int i = 0; i < 2; ++i) { + uint64_t coding_unit_block = + (modifier >> (i * 4)) & AFRC_FORMAT_MOD_CU_SIZE_MASK; + const char *coding_unit_size = NULL; + + switch (coding_unit_block) { + case AFRC_FORMAT_MOD_CU_SIZE_16: + coding_unit_size = "CU_16"; + break; + case AFRC_FORMAT_MOD_CU_SIZE_24: + coding_unit_size = "CU_24"; + break; + case AFRC_FORMAT_MOD_CU_SIZE_32: + coding_unit_size = "CU_32"; + break; + } + + if (!coding_unit_size) { + if (i == 0) { + return false; + } + break; + } + + if (i == 0) { + fprintf(fp, "P0=%s,", coding_unit_size); + } else { + fprintf(fp, "P12=%s,", coding_unit_size); + } + } + + bool scan_layout = + (modifier & AFRC_FORMAT_MOD_LAYOUT_SCAN) == AFRC_FORMAT_MOD_LAYOUT_SCAN; + if (scan_layout) { + fprintf(fp, "SCAN"); + } else { + fprintf(fp, "ROT"); + } + return true; +} + +static char * +drmGetFormatModifierNameFromArm(uint64_t modifier) +{ + uint64_t type = (modifier >> 52) & 0xf; + + FILE *fp; + size_t size = 0; + char *modifier_name = NULL; + bool result = false; + + fp = open_memstream(&modifier_name, &size); + if (!fp) + return NULL; + + switch (type) { + case DRM_FORMAT_MOD_ARM_TYPE_AFBC: + result = drmGetAfbcFormatModifierNameFromArm(modifier, fp); + break; + case DRM_FORMAT_MOD_ARM_TYPE_AFRC: + result = drmGetAfrcFormatModifierNameFromArm(modifier, fp); + break; + /* misc type is already handled by the static table */ + case DRM_FORMAT_MOD_ARM_TYPE_MISC: + default: + result = false; + break; + } + fclose(fp); + if (!result) { + free(modifier_name); + return NULL; + } + return modifier_name; } commit d2875fe008663d08269107ca3e6364a14d6ccc5d Author: Dennis Tsiang <den...@ar...> Date: Mon Sep 20 11:14:52 2021 +0100 drm_fourcc: sync drm_fourcc with latest drm-next kernel Update drm_fourcc.h to include latest changes from drm-next branch. This brings in AFRC (Arm Fixed-Rate Compression) modifiers. Generated using make headers_install. Generated from drm-next branch commit 6880fa6 Signed-off-by: Dennis Tsiang <den...@ar...> diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index cd3ce8a8..957c7be2 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -900,9 +900,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) /* * The top 4 bits (out of the 56 bits alloted for specifying vendor specific - * modifiers) denote the category for modifiers. Currently we have only two - * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen - * different categories. + * modifiers) denote the category for modifiers. Currently we have three + * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of + * sixteen different categories. */ #define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ fourcc_mod_code(ARM, ((__u64)(__type) << 52) | ((__val) & 0x000fffffffffffffULL)) @@ -1017,6 +1017,109 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) */ #define AFBC_FORMAT_MOD_USM (1ULL << 12) +/* + * Arm Fixed-Rate Compression (AFRC) modifiers + * + * AFRC is a proprietary fixed rate image compression protocol and format, + * designed to provide guaranteed bandwidth and memory footprint + * reductions in graphics and media use-cases. + * + * AFRC buffers consist of one or more planes, with the same components + * and meaning as an uncompressed buffer using the same pixel format. + * + * Within each plane, the pixel/luma/chroma values are grouped into + * "coding unit" blocks which are individually compressed to a + * fixed size (in bytes). All coding units within a given plane of a buffer + * store the same number of values, and have the same compressed size. + * + * The coding unit size is configurable, allowing different rates of compression. + * + * The start of each AFRC buffer plane must be aligned to an alignment granule which + * depends on the coding unit size. + * + * Coding Unit Size Plane Alignment + * ---------------- --------------- + * 16 bytes 1024 bytes + * 24 bytes 512 bytes + * 32 bytes 2048 bytes + * + * Coding units are grouped into paging tiles. AFRC buffer dimensions must be aligned + * to a multiple of the paging tile dimensions. + * The dimensions of each paging tile depend on whether the buffer is optimised for + * scanline (SCAN layout) or rotated (ROT layout) access. + * + * Layout Paging Tile Width Paging Tile Height + * ------ ----------------- ------------------ + * SCAN 16 coding units 4 coding units + * ROT 8 coding units 8 coding units + * + * The dimensions of each coding unit depend on the number of components + * in the compressed plane and whether the buffer is optimised for + * scanline (SCAN layout) or rotated (ROT layout) access. + * + * Number of Components in Plane Layout Coding Unit Width Coding Unit Height + * ----------------------------- --------- ----------------- ------------------ + * 1 SCAN 16 samples 4 samples + * Example: 16x4 luma samples in a 'Y' plane + * 16x4 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 1 ROT 8 samples 8 samples + * Example: 8x8 luma samples in a 'Y' plane + * 8x8 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 2 DONT CARE 8 samples 4 samples + * Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV buffer + * ----------------------------- --------- ----------------- ------------------ + * 3 DONT CARE 4 samples 4 samples + * Example: 4x4 pixels in an RGB buffer without alpha + * ----------------------------- --------- ----------------- ------------------ + * 4 DONT CARE 4 samples 4 samples + * Example: 4x4 pixels in an RGB buffer with alpha + */ + +#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02 + +#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \ + DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode) + +/* + * AFRC coding unit size modifier. + * + * Indicates the number of bytes used to store each compressed coding unit for + * one or more planes in an AFRC encoded buffer. The coding unit size for chrominance + * is the same for both Cb and Cr, which may be stored in separate planes. + * + * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store + * each compressed coding unit in the first plane of the buffer. For RGBA buffers + * this is the only plane, while for semi-planar and fully-planar YUV buffers, + * this corresponds to the luma plane. + * + * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store + * each compressed coding unit in the second and third planes in the buffer. + * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma plane(s). + * + * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified + * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero. + * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and + * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified. + */ +#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf +#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL) +#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL) + +#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size) +#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4) + +/* + * AFRC scanline memory layout. + * + * Indicates if the buffer uses the scanline-optimised layout + * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout. + * The memory layout is the same for all planes. + */ +#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8) + /* * Arm 16x16 Block U-Interleaved modifier * |
From: GitLab M. <git...@ke...> - 2021-11-23 08:54:44
|
.gitlab-ci.yml | 255 +++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 203 insertions(+), 52 deletions(-) New commits: commit b40d0a7d6c247347f777c417e92f84ec67ea0b39 Author: Emmanuel Vadot <ma...@Fr...> Date: Wed Nov 10 17:49:55 2021 +0100 ci: Add FreeBSD support Use qemu to do CI on FreeBSD. Not everything is compiled as all arm aren't supported on FreeBSD. Same thing for Nouveau. The tests aren't enable for now as they are all failing. Signed-off-by: Emmanuel Vadot <ma...@Fr...> diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b63559ba..876be951 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,6 +19,7 @@ include: ref: *template_sha file: - '/templates/debian.yml' + - '/templates/freebsd.yml' - '/templates/ci-fairy.yml' variables: @@ -63,6 +64,21 @@ stages: variables: BUILD_ARCH: "armv7" +.os-freebsd: + variables: + BUILD_OS: freebsd + FDO_DISTRIBUTION_VERSION: "13.0" + FDO_DISTRIBUTION_PACKAGES: 'meson ninja pkgconf libpciaccess libpthread-stubs py38-docutils cairo' + # bump this tag every time you change something which requires rebuilding the + # base image + FDO_DISTRIBUTION_TAG: "2021-11-10.1" + +.freebsd-x86_64: + extends: + - .os-freebsd + variables: + BUILD_ARCH: "x86_64" + # Build our base container image, which contains the core distribution, the # toolchain, and all our build dependencies. This will be reused in the build # stage. @@ -98,6 +114,15 @@ armv7-debian-container_prep: GIT_STRATEGY: none FDO_BASE_IMAGE: "arm32v7/debian:$FDO_DISTRIBUTION_VERSION" +x86_64-freebsd-container_prep: + extends: + - .ci-rules + - .freebsd-x86_64 + - .fdo.qemu-build@freebsd@x86_64 + stage: "Base container" + variables: + GIT_STRATEGY: none + # Core build environment. .build-env: variables: @@ -142,6 +167,20 @@ armv7-debian-container_prep: - job: armv7-debian-container_prep artifacts: false +.build-env-freebsd-x86_64: + variables: + # Compiling with ASan+UBSan appears to trigger an infinite loop in the + # compiler shipped with FreeBSD 13.0, so we only use UBSan here. + # Additionally, sanitizers can't be used with b_lundef on FreeBSD. + MESON_BUILD_TYPE: "-Dbuildtype=debug -Db_sanitize=undefined -Db_lundef=false" + extends: + - .fdo.suffixed-image@freebsd + - .freebsd-x86_64 + - .build-env + needs: + - job: x86_64-freebsd-container_prep + artifacts: false + # BUILD .do-build: @@ -177,6 +216,30 @@ armv7-debian-container_prep: paths: - build/meson-logs/* +.do-build-qemu: + extends: + - .ci-rules + stage: "Build" + script: + # Start the VM and copy our workspace to the VM + - /app/vmctl start + - scp -r $PWD "vm:" + # The `set +e is needed to ensure that we always copy the meson logs back to + # the workspace to see details about the failed tests. + - | + set +e + /app/vmctl exec "pkg info; cd $CI_PROJECT_NAME ; meson build -D amdgpu=true -D cairo-tests=true -D intel=true -D libkms=true -D man-pages=true -D nouveau=false -D radeon=true -D valgrind=auto && ninja -C build" + set -ex + scp -r vm:$CI_PROJECT_NAME/build/meson-logs . + /app/vmctl exec "ninja -C $CI_PROJECT_NAME/build install" + mkdir -p $PREFIX && scp -r vm:$PREFIX/ $PREFIX/ + # Finally, shut down the VM. + - /app/vmctl stop + artifacts: + when: on_failure + paths: + - build/meson-logs/* + # Full build and test. x86_64-debian-build: extends: @@ -210,3 +273,8 @@ meson-arch-daily: valgrind python-docutils extends: .do-build + +x86_64-freebsd-build: + extends: + - .build-env-freebsd-x86_64 + - .do-build-qemu commit e722ba9f675e29f3cc7998c2aeab7dd2451cc458 Author: Emmanuel Vadot <ma...@Fr...> Date: Tue Nov 9 21:26:24 2021 +0100 ci: Switch freedesktop/ci-templates This switch to the latest ci-templates. Most of the file is taken from the one in wayland. Signed-off-by: Emmanuel Vadot <ma...@Fr...> diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 26eaf83b..b63559ba 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,47 +12,142 @@ # main repository, it's recommended to remove the image from the source # repository's container registry, so that the image from the main # repository's registry will be used there as well. -variables: - UPSTREAM_REPO: mesa/drm - DEBIAN_TAG: "2021-02-11" - DEBIAN_VERSION: buster-slim - DEBIAN_IMAGE: "$CI_REGISTRY_IMAGE/debian/$DEBIAN_VERSION:$DEBIAN_TAG" +.templates_sha: &template_sha 567700e483aabed992d0a4fea84994a0472deff6 # see https://docs.gitlab.com/ee/ci/yaml/#includefile include: - - project: 'wayland/ci-templates' - ref: 0a9bdd33a98f05af6761ab118b5074952242aab0 - file: '/templates/debian.yml' + - project: 'freedesktop/ci-templates' + ref: *template_sha + file: + - '/templates/debian.yml' + - '/templates/ci-fairy.yml' -stages: - - containers - - build +variables: + FDO_UPSTREAM_REPO: mesa/drm + FDO_REPO_SUFFIX: "$BUILD_OS/$BUILD_ARCH" +stages: + - "Base container" + - "Build" -# When & how to run the CI -.ci-run-policy: - except: - - schedules - retry: - max: 2 - when: - - runner_system_failure +.ci-rules: + rules: + - when: on_success # CONTAINERS -debian: - stage: containers +.os-debian: + variables: + BUILD_OS: debian + FDO_DISTRIBUTION_VERSION: buster + FDO_DISTRIBUTION_PACKAGES: 'build-essential docbook-xsl libatomic-ops-dev libcairo2-dev libcunit1-dev libpciaccess-dev meson ninja-build pkg-config python3 python3-pip python3-wheel python3-setuptools python3-docutils valgrind' + FDO_DISTRIBUTION_EXEC: 'pip3 install meson==0.52.1' + # bump this tag every time you change something which requires rebuilding the + # base image + FDO_DISTRIBUTION_TAG: "2021-08-03.0" + +.debian-x86_64: + extends: + - .os-debian + variables: + BUILD_ARCH: "x86-64" + +.debian-aarch64: + extends: + - .os-debian + variables: + BUILD_ARCH: "aarch64" + +.debian-armv7: extends: - - .ci-run-policy - - .debian@container-ifnot-exists + - .os-debian variables: - GIT_STRATEGY: none # no need to pull the whole tree for rebuilding the image - DEBIAN_EXEC: 'bash .gitlab-ci/debian-install.sh' + BUILD_ARCH: "armv7" +# Build our base container image, which contains the core distribution, the +# toolchain, and all our build dependencies. This will be reused in the build +# stage. +x86_64-debian-container_prep: + extends: + - .ci-rules + - .debian-x86_64 + - .fdo.container-build@debian + stage: "Base container" + variables: + GIT_STRATEGY: none + +aarch64-debian-container_prep: + extends: + - .ci-rules + - .debian-aarch64 + - .fdo.container-build@debian + tags: + - aarch64 + stage: "Base container" + variables: + GIT_STRATEGY: none + +armv7-debian-container_prep: + extends: + - .ci-rules + - .debian-armv7 + - .fdo.container-build@debian + tags: + - aarch64 + stage: "Base container" + variables: + GIT_STRATEGY: none + FDO_BASE_IMAGE: "arm32v7/debian:$FDO_DISTRIBUTION_VERSION" + +# Core build environment. +.build-env: + variables: + MESON_BUILD_TYPE: "-Dbuildtype=debug -Doptimization=0 -Db_sanitize=address,undefined" + +# OS/architecture-specific variants +.build-env-debian-x86_64: + extends: + - .fdo.suffixed-image@debian + - .debian-x86_64 + - .build-env + needs: + - job: x86_64-debian-container_prep + artifacts: false + +.build-env-debian-aarch64: + extends: + - .fdo.suffixed-image@debian + - .debian-aarch64 + - .build-env + variables: + # At least with the versions we have, the LSan runtime makes fork unusably + # slow on AArch64, which is bad news since the test suite decides to fork + # for every single subtest. For now, in order to get AArch64 builds and + # tests into CI, just assume that we're not going to leak any more on + # AArch64 than we would on ARMv7 or x86-64. + ASAN_OPTIONS: "detect_leaks=0" + tags: + - aarch64 + needs: + - job: aarch64-debian-container_prep + artifacts: false + +.build-env-debian-armv7: + extends: + - .fdo.suffixed-image@debian + - .debian-armv7 + - .build-env + tags: + - aarch64 + needs: + - job: armv7-debian-container_prep + artifacts: false # BUILD -.meson-build: - stage: build +.do-build: + extends: + - .ci-rules + stage: "Build" variables: GIT_DEPTH: 10 script: @@ -74,7 +169,6 @@ debian: -D valgrind=auto -D vc4=true -D vmwgfx=true - ${CROSS+--cross /cross_file-$CROSS.txt} - ninja -C build - ninja -C build test - DESTDIR=$PWD/install ninja -C build install @@ -83,34 +177,23 @@ debian: paths: - build/meson-logs/* -meson-x86_64: +# Full build and test. +x86_64-debian-build: extends: - - .ci-run-policy - - .meson-build - image: $DEBIAN_IMAGE - needs: - - debian - -meson-i386: - extends: meson-x86_64 - variables: - CROSS: i386 - -meson-aarch64: - extends: meson-x86_64 - variables: - CROSS: arm64 + - .build-env-debian-x86_64 + - .do-build -meson-armhf: - extends: meson-x86_64 - variables: - CROSS: armhf +aarch64-debian-build: + extends: + - .build-env-debian-aarch64 + - .do-build -meson-ppc64el: - extends: meson-x86_64 - variables: - CROSS: ppc64el +armv7-debian-build: + extends: + - .build-env-debian-armv7 + - .do-build +# Daily build meson-arch-daily: rules: - if: '$SCHEDULE == "arch-daily"' @@ -126,4 +209,4 @@ meson-arch-daily: meson valgrind python-docutils - extends: .meson-build + extends: .do-build |
From: GitLab M. <git...@ke...> - 2021-12-14 00:24:13
|
amdgpu/meson.build | 4 ++++ etnaviv/meson.build | 4 ++++ exynos/meson.build | 4 ++++ freedreno/meson.build | 4 ++++ intel/meson.build | 4 ++++ libkms/meson.build | 4 ++++ meson.build | 30 +++++++++++++++++------------- nouveau/meson.build | 4 ++++ omap/meson.build | 4 ++++ radeon/meson.build | 4 ++++ tegra/meson.build | 4 ++++ 11 files changed, 57 insertions(+), 13 deletions(-) New commits: commit d9188a7750c99ce9f3d9e0e2aea93f86491e0cb6 Author: Dylan Baker <dy...@pn...> Date: Wed Nov 17 12:57:54 2021 -0800 meson: add override_dependency when possible This allows consumers of libdrm as a subproject to use the simpler `dependency('libdrm', fallback : 'libdrm')` syntax, as the libdrm build files already tell meson that they override a dependency called "libdrm". Signed-off-by: Dylan Baker <dy...@pn...> Reviewed-by: Simon Ser <co...@em...> diff --git a/amdgpu/meson.build b/amdgpu/meson.build index 3301a10e..b2d19875 100644 --- a/amdgpu/meson.build +++ b/amdgpu/meson.build @@ -57,6 +57,10 @@ ext_libdrm_amdgpu = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_amdgpu', ext_libdrm_amdgpu) +endif + test( 'amdgpu-symbols-check', symbols_check, diff --git a/etnaviv/meson.build b/etnaviv/meson.build index 8b82ed07..ffc0a673 100644 --- a/etnaviv/meson.build +++ b/etnaviv/meson.build @@ -52,6 +52,10 @@ ext_libdrm_etnaviv = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_etnaviv', ext_libdrm_etnaviv) +endif + test( 'etnaviv-symbols-check', symbols_check, diff --git a/exynos/meson.build b/exynos/meson.build index 7d1edfea..bd55a500 100644 --- a/exynos/meson.build +++ b/exynos/meson.build @@ -37,6 +37,10 @@ ext_libdrm_exynos = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_exynos', ext_libdrm_exynos) +endif + pkg.generate( name : 'libdrm_exynos', libraries : libdrm_exynos, diff --git a/freedreno/meson.build b/freedreno/meson.build index 49e66593..4fdc8143 100644 --- a/freedreno/meson.build +++ b/freedreno/meson.build @@ -55,6 +55,10 @@ ext_libdrm_freedreno = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_freedreno', ext_libdrm_freedreno) +endif + install_headers( 'freedreno_drmif.h', 'freedreno_ringbuffer.h', subdir : 'freedreno' diff --git a/intel/meson.build b/intel/meson.build index 5fa06c28..75e6bc3e 100644 --- a/intel/meson.build +++ b/intel/meson.build @@ -40,6 +40,10 @@ ext_libdrm_intel = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_intel', ext_libdrm_intel) +endif + install_headers( 'intel_bufmgr.h', 'intel_aub.h', 'intel_debug.h', subdir : 'libdrm', diff --git a/libkms/meson.build b/libkms/meson.build index 8d17bb2e..cc487227 100644 --- a/libkms/meson.build +++ b/libkms/meson.build @@ -56,6 +56,10 @@ ext_libkms = declare_dependency( include_directories : [libkms_include], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('kms', ext_libkms) +endif + install_headers('libkms.h', subdir : 'libkms') pkg.generate( diff --git a/meson.build b/meson.build index 62dec8dd..f5704cf1 100644 --- a/meson.build +++ b/meson.build @@ -340,6 +340,10 @@ ext_libdrm = declare_dependency( include_directories : [inc_root, inc_drm], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm', ext_libdrm) +endif + install_headers('libsync.h', 'xf86drm.h', 'xf86drmMode.h') install_headers( 'include/drm/drm.h', 'include/drm/drm_fourcc.h', 'include/drm/drm_mode.h', diff --git a/nouveau/meson.build b/nouveau/meson.build index af45336c..82947951 100644 --- a/nouveau/meson.build +++ b/nouveau/meson.build @@ -35,6 +35,10 @@ ext_libdrm_nouveau = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_nouveau', ext_libdrm_nouveau) +endif + install_headers('nouveau.h', subdir : 'libdrm/nouveau') install_headers( 'nvif/class.h', 'nvif/cl0080.h', 'nvif/cl9097.h', 'nvif/if0002.h', diff --git a/omap/meson.build b/omap/meson.build index bfd59f05..bcf5b622 100644 --- a/omap/meson.build +++ b/omap/meson.build @@ -34,6 +34,10 @@ ext_libdrm_omap = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_omap', ext_libdrm_omap) +endif + install_headers('omap_drmif.h', subdir : 'libdrm') install_headers('omap_drm.h', subdir : 'omap') diff --git a/radeon/meson.build b/radeon/meson.build index 31fe9cd0..abc9be83 100644 --- a/radeon/meson.build +++ b/radeon/meson.build @@ -41,6 +41,10 @@ ext_libdrm_radeon = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_radeon', ext_libdrm_radeon) +endif + install_headers( 'radeon_bo.h', 'radeon_cs.h', 'radeon_surface.h', 'radeon_bo_gem.h', 'radeon_cs_gem.h', 'radeon_bo_int.h', 'radeon_cs_int.h', 'r600_pci_ids.h', diff --git a/tegra/meson.build b/tegra/meson.build index edddf72b..51caf67a 100644 --- a/tegra/meson.build +++ b/tegra/meson.build @@ -34,6 +34,10 @@ ext_libdrm_tegra = declare_dependency( include_directories : [inc_drm, include_directories('.')], ) +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_tegra', ext_libdrm_tegra) +endif + install_headers('tegra.h', subdir : 'libdrm') pkg.generate( commit 9324e4f054fa4d7c213400545e8fa6b42392c5a5 Author: Dylan Baker <dy...@pn...> Date: Wed Nov 17 12:50:01 2021 -0800 meson: use dictionary kwargs So we don't have to duplicate the libdrm library call just to not set the version keyword for android Reviewed-by: Simon Ser <co...@em...> Signed-off-by: Dylan Baker <dy...@pn...> diff --git a/meson.build b/meson.build index 5824da8a..62dec8dd 100644 --- a/meson.build +++ b/meson.build @@ -308,23 +308,23 @@ libdrm_files = [files( config_file, format_mod_static_table ] +# Build an unversioned so on android if android - libdrm = library('drm', libdrm_files, - c_args : libdrm_c_args, - dependencies : [dep_valgrind, dep_rt, dep_m], - include_directories : inc_drm, - install : true, - ) + libdrm_kw = {} else - libdrm = library('drm', libdrm_files, - c_args : libdrm_c_args, - dependencies : [dep_valgrind, dep_rt, dep_m], - include_directories : inc_drm, - install : true, - version: '2.4.0' - ) + libdrm_kw = {'version' : '2.4.0'} endif +libdrm = library( + 'drm', + libdrm_files, + c_args : libdrm_c_args, + dependencies : [dep_valgrind, dep_rt, dep_m], + include_directories : inc_drm, + install : true, + kwargs : libdrm_kw, +) + test( 'core-symbols-check', symbols_check, |
From: GitLab M. <git...@ke...> - 2022-02-19 23:21:31
|
tests/amdgpu/jpeg_tests.c | 9 +---- tests/amdgpu/vcn_tests.c | 81 +++++++++++++++------------------------------- 2 files changed, 31 insertions(+), 59 deletions(-) New commits: commit d13ab997f5664db24f1d4e324374c1d5b19895eb Author: Sathishkumar S <sat...@am...> Date: Thu Feb 10 19:19:50 2022 +0530 tests/amdgpu: enable vcn test based on ip query family_id checks can be removed and instead use ip major/minor version Signed-off-by: Sathishkumar S <sat...@am...> Reviewed-by: Veerabadhran Gopalakrishnan <vee...@am...> Reviewed-by: Leo Liu <le...@am...> diff --git a/tests/amdgpu/vcn_tests.c b/tests/amdgpu/vcn_tests.c index 15d573d3..ff97f344 100644 --- a/tests/amdgpu/vcn_tests.c +++ b/tests/amdgpu/vcn_tests.c @@ -70,7 +70,13 @@ static uint32_t *ib_cpu; static amdgpu_bo_handle resources[MAX_RESOURCES]; static unsigned num_resources; -static struct amdgpu_vcn_reg reg; + +static uint8_t vcn_reg_index; +static struct amdgpu_vcn_reg reg[] = { + {0x81c4, 0x81c5, 0x81c3, 0x81ff, 0x81c6}, + {0x504, 0x505, 0x503, 0x53f, 0x506}, + {0x10, 0x11, 0xf, 0x29, 0x26d}, +}; static void amdgpu_cs_vcn_dec_create(void); static void amdgpu_cs_vcn_dec_decode(void); @@ -125,45 +131,14 @@ CU_BOOL suite_vcn_tests_enable(void) amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE); } - if (family_id == AMDGPU_FAMILY_RV) { - if (chip_id >= (chip_rev + 0x91)) { - reg.data0 = 0x504; - reg.data1 = 0x505; - reg.cmd = 0x503; - reg.nop = 0x53f; - reg.cntl = 0x506; - } else { - reg.data0 = 0x81c4; - reg.data1 = 0x81c5; - reg.cmd = 0x81c3; - reg.nop = 0x81ff; - reg.cntl = 0x81c6; - } - } else if (family_id == AMDGPU_FAMILY_NV) { - if (chip_id == (chip_rev + 0x28) || - chip_id == (chip_rev + 0x32) || - chip_id == (chip_rev + 0x3c) || - chip_id == (chip_rev + 0x46)) { - reg.data0 = 0x10; - reg.data1 = 0x11; - reg.cmd = 0xf; - reg.nop = 0x29; - reg.cntl = 0x26d; - } - else { - reg.data0 = 0x504; - reg.data1 = 0x505; - reg.cmd = 0x503; - reg.nop = 0x53f; - reg.cntl = 0x506; - } - } else if (family_id == AMDGPU_FAMILY_AI) { - reg.data0 = 0x10; - reg.data1 = 0x11; - reg.cmd = 0xf; - reg.nop = 0x29; - reg.cntl = 0x26d; - } else + if (info.hw_ip_version_major == 1) + vcn_reg_index = 0; + else if (info.hw_ip_version_major == 2) + vcn_reg_index = 1; + else if ((info.hw_ip_version_major == 2 && info.hw_ip_version_minor >= 5) || + info.hw_ip_version_major == 3) + vcn_reg_index = 2; + else return CU_FALSE; return CU_TRUE; @@ -307,11 +282,11 @@ static void free_resource(struct amdgpu_vcn_bo *vcn_bo) static void vcn_dec_cmd(uint64_t addr, unsigned cmd, int *idx) { - ib_cpu[(*idx)++] = reg.data0; + ib_cpu[(*idx)++] = reg[vcn_reg_index].data0; ib_cpu[(*idx)++] = addr; - ib_cpu[(*idx)++] = reg.data1; + ib_cpu[(*idx)++] = reg[vcn_reg_index].data1; ib_cpu[(*idx)++] = addr >> 32; - ib_cpu[(*idx)++] = reg.cmd; + ib_cpu[(*idx)++] = reg[vcn_reg_index].cmd; ib_cpu[(*idx)++] = cmd << 1; } @@ -332,14 +307,14 @@ static void amdgpu_cs_vcn_dec_create(void) memcpy(msg_buf.ptr, vcn_dec_create_msg, sizeof(vcn_dec_create_msg)); len = 0; - ib_cpu[len++] = reg.data0; + ib_cpu[len++] = reg[vcn_reg_index].data0; ib_cpu[len++] = msg_buf.addr; - ib_cpu[len++] = reg.data1; + ib_cpu[len++] = reg[vcn_reg_index].data1; ib_cpu[len++] = msg_buf.addr >> 32; - ib_cpu[len++] = reg.cmd; + ib_cpu[len++] = reg[vcn_reg_index].cmd; ib_cpu[len++] = 0; for (; len % 16; ) { - ib_cpu[len++] = reg.nop; + ib_cpu[len++] = reg[vcn_reg_index].nop; ib_cpu[len++] = 0; } @@ -407,10 +382,10 @@ static void amdgpu_cs_vcn_dec_decode(void) vcn_dec_cmd(it_addr, 0x204, &len); vcn_dec_cmd(ctx_addr, 0x206, &len); - ib_cpu[len++] = reg.cntl; + ib_cpu[len++] = reg[vcn_reg_index].cntl; ib_cpu[len++] = 0x1; for (; len % 16; ) { - ib_cpu[len++] = reg.nop; + ib_cpu[len++] = reg[vcn_reg_index].nop; ib_cpu[len++] = 0; } @@ -442,14 +417,14 @@ static void amdgpu_cs_vcn_dec_destroy(void) memcpy(msg_buf.ptr, vcn_dec_destroy_msg, sizeof(vcn_dec_destroy_msg)); len = 0; - ib_cpu[len++] = reg.data0; + ib_cpu[len++] = reg[vcn_reg_index].data0; ib_cpu[len++] = msg_buf.addr; - ib_cpu[len++] = reg.data1; + ib_cpu[len++] = reg[vcn_reg_index].data1; ib_cpu[len++] = msg_buf.addr >> 32; - ib_cpu[len++] = reg.cmd; + ib_cpu[len++] = reg[vcn_reg_index].cmd; ib_cpu[len++] = 0; for (; len % 16; ) { - ib_cpu[len++] = reg.nop; + ib_cpu[len++] = reg[vcn_reg_index].nop; ib_cpu[len++] = 0; } commit 1d92f327411308dbc98502b8e64a5f3f3bc42f2a Author: Sathishkumar S <sat...@am...> Date: Thu Feb 10 17:50:57 2022 +0530 tests/amdgpu: enable jpeg test based on ip query enable jpeg test if ip query is successful and avoid family_id based checks, instead use ip major/minor version Signed-off-by: Sathishkumar S <sat...@am...> Reviewed-by: Veerabadhran Gopalakrishnan <vee...@am...> Reviewed-by: Leo Liu <le...@am...> diff --git a/tests/amdgpu/jpeg_tests.c b/tests/amdgpu/jpeg_tests.c index 5e50bef6..772a4fec 100644 --- a/tests/amdgpu/jpeg_tests.c +++ b/tests/amdgpu/jpeg_tests.c @@ -179,12 +179,9 @@ CU_BOOL suite_jpeg_tests_enable(void) return CU_FALSE; } - if (family_id == AMDGPU_FAMILY_RV) { - if (chip_id >= (chip_rev + 0x91)) - jpeg_direct_reg = true; - else - jpeg_direct_reg = false; - } else if (family_id == AMDGPU_FAMILY_NV) + if (info.hw_ip_version_major == 1) + jpeg_direct_reg = false; + else if (info.hw_ip_version_major > 1 && info.hw_ip_version_major <= 3) jpeg_direct_reg = true; else return CU_FALSE; |
From: GitLab M. <git...@ke...> - 2022-03-31 13:48:47
|
tests/amdgpu/amdgpu_test.h | 4 tests/amdgpu/basic_tests.c | 770 ++++++++++++++++++++++++++++++++---------- tests/amdgpu/deadlock_tests.c | 28 + 3 files changed, 614 insertions(+), 188 deletions(-) New commits: commit 85393adb12ad6277b21b885f11a3b94ef2d531db Author: Flora Cui <flo...@am...> Date: Wed Nov 13 16:16:55 2019 +0800 tests/amdgpu: add dispatch test for gfx10 Signed-off-by: Flora Cui <flo...@am...> diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index 43b80c88..688260d9 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -353,6 +353,12 @@ static const uint32_t bufferclear_cs_shader_gfx9[] = { 0xbf810000 }; +static const uint32_t bufferclear_cs_shader_gfx10[] = { + 0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205, + 0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004, + 0xBF810000 +}; + static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, @@ -369,6 +375,11 @@ static const uint32_t buffercopy_cs_shader_gfx9[] = { 0xe01c2000, 0x80010200, 0xbf810000 }; +static const uint32_t buffercopy_cs_shader_gfx10[] = { + 0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201, + 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000 +}; + static const uint32_t preamblecache_gfx9[] = { 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, @@ -657,6 +668,18 @@ struct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 1 }; +unsigned int memcpy_cs_hang_slow_nv_codes[] = { + 0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100, + 0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000 +}; + +struct amdgpu_test_shader memcpy_cs_hang_slow_nv = { + memcpy_cs_hang_slow_nv_codes, + 4, + 3, + 1 +}; + unsigned int memcpy_ps_hang_slow_ai_codes[] = { 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, @@ -2446,6 +2469,9 @@ static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) case AMDGPU_FAMILY_RV: shader = &memcpy_cs_hang_slow_rv; break; + case AMDGPU_FAMILY_NV: + shader = &memcpy_cs_hang_slow_nv; + break; default: return -1; break; @@ -2466,19 +2492,30 @@ static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) } static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, - int cs_type) + int cs_type, + uint32_t version) { uint32_t shader_size; const uint32_t *shader; switch (cs_type) { case CS_BUFFERCLEAR: - shader = bufferclear_cs_shader_gfx9; - shader_size = sizeof(bufferclear_cs_shader_gfx9); + if (version == 9) { + shader = bufferclear_cs_shader_gfx9; + shader_size = sizeof(bufferclear_cs_shader_gfx9); + } else if (version == 10) { + shader = bufferclear_cs_shader_gfx10; + shader_size = sizeof(bufferclear_cs_shader_gfx10); + } break; case CS_BUFFERCOPY: - shader = buffercopy_cs_shader_gfx9; - shader_size = sizeof(buffercopy_cs_shader_gfx9); + if (version == 9) { + shader = buffercopy_cs_shader_gfx9; + shader_size = sizeof(buffercopy_cs_shader_gfx9); + } else if (version == 10) { + shader = buffercopy_cs_shader_gfx10; + shader_size = sizeof(buffercopy_cs_shader_gfx10); + } break; case CS_HANG: shader = memcpy_ps_hang; @@ -2493,7 +2530,7 @@ static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, return 0; } -static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) +static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version) { int i = 0; @@ -2515,29 +2552,57 @@ static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type) ptr[i++] = 0x218; ptr[i++] = 0; + /* Set new sh registers in GFX10 to 0 */ + if (version == 10) { + /* mmCOMPUTE_SHADER_CHKSUM */ + ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); + ptr[i++] = 0x22a; + ptr[i++] = 0; + /* mmCOMPUTE_REQ_CTRL */ + ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6); + ptr[i++] = 0x222; + i += 6; + /* mmCP_COHER_START_DELAY */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x7b; + ptr[i++] = 0x20; + } return i; } -static int amdgpu_dispatch_write_cumask(uint32_t *ptr) +static int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version) { int i = 0; /* Issue commands to set cu mask used in current dispatch */ - /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); - ptr[i++] = 0x216; - ptr[i++] = 0xffffffff; - ptr[i++] = 0xffffffff; - /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ - ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); - ptr[i++] = 0x219; - ptr[i++] = 0xffffffff; - ptr[i++] = 0xffffffff; + if (version == 9) { + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ + ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); + ptr[i++] = 0x216; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ + ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); + ptr[i++] = 0x219; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + } else if (version == 10) { + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ + ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); + ptr[i++] = 0x30000216; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ + ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); + ptr[i++] = 0x30000219; + ptr[i++] = 0xffffffff; + ptr[i++] = 0xffffffff; + } return i; } -static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) +static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version) { int i, j; @@ -2557,12 +2622,20 @@ static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr) ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; } + if (version == 10) { + /* mmCOMPUTE_PGM_RSRC3 */ + ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); + ptr[i++] = 0x228; + ptr[i++] = 0; + } + return i; } static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, - uint32_t ring) + uint32_t ring, + uint32_t version) { amdgpu_context_handle context_handle; amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; @@ -2598,7 +2671,7 @@ static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, CU_ASSERT_EQUAL(r, 0); memset(ptr_shader, 0, bo_shader_size); - r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR); + r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version); CU_ASSERT_EQUAL(r, 0); r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, @@ -2608,13 +2681,13 @@ static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, CU_ASSERT_EQUAL(r, 0); i = 0; - i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); + i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); /* Issue commands to set cu mask used in current dispatch */ - i += amdgpu_dispatch_write_cumask(ptr_cmd + i); + i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); /* Writes shader state to HW */ - i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); + i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); /* Write constant data */ /* Writes the UAV constant data to the SGPRs. */ @@ -2623,7 +2696,10 @@ static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, ptr_cmd[i++] = mc_address_dst; ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; ptr_cmd[i++] = 0x400; - ptr_cmd[i++] = 0x74fac; + if (version == 9) + ptr_cmd[i++] = 0x74fac; + else if (version == 10) + ptr_cmd[i++] = 0x1104bfac; /* Sets a range of pixel shader constants */ ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); @@ -2705,6 +2781,7 @@ static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, uint32_t ip_type, uint32_t ring, + uint32_t version, int hang) { amdgpu_context_handle context_handle; @@ -2744,7 +2821,7 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, memset(ptr_shader, 0, bo_shader_size); cs_type = hang ? CS_HANG : CS_BUFFERCOPY; - r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type); + r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version); CU_ASSERT_EQUAL(r, 0); r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, @@ -2762,13 +2839,13 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, memset(ptr_src, 0x55, bo_dst_size); i = 0; - i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); + i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); /* Issue commands to set cu mask used in current dispatch */ - i += amdgpu_dispatch_write_cumask(ptr_cmd + i); + i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); /* Writes shader state to HW */ - i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); + i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); /* Write constant data */ /* Writes the texture resource constants data to the SGPRs */ @@ -2777,7 +2854,10 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, ptr_cmd[i++] = mc_address_src; ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; ptr_cmd[i++] = 0x400; - ptr_cmd[i++] = 0x74fac; + if (version == 9) + ptr_cmd[i++] = 0x74fac; + else if (version == 10) + ptr_cmd[i++] = 0x1104bfac; /* Writes the UAV constant data to the SGPRs. */ ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); @@ -2785,7 +2865,10 @@ static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, ptr_cmd[i++] = mc_address_dst; ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; ptr_cmd[i++] = 0x400; - ptr_cmd[i++] = 0x74fac; + if (version == 9) + ptr_cmd[i++] = 0x74fac; + else if (version == 10) + ptr_cmd[i++] = 0x1104bfac; /* clear mmCOMPUTE_RESOURCE_LIMITS */ ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); @@ -2869,16 +2952,22 @@ static void amdgpu_compute_dispatch_test(void) { int r; struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id; + uint32_t ring_id, version; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); CU_ASSERT_EQUAL(r, 0); if (!info.available_rings) printf("SKIP ... as there's no compute ring\n"); + version = info.hw_ip_version_major; + if (version != 9 && version != 10) { + printf("SKIP ... unsupported gfx version %d\n", version); + return; + } + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id); - amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, 0); + amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version); + amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0); } } @@ -2886,16 +2975,22 @@ static void amdgpu_gfx_dispatch_test(void) { int r; struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id; + uint32_t ring_id, version; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); CU_ASSERT_EQUAL(r, 0); if (!info.available_rings) printf("SKIP ... as there's no graphics ring\n"); + version = info.hw_ip_version_major; + if (version != 9 && version != 10) { + printf("SKIP ... unsupported gfx version %d\n", version); + return; + } + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id); - amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, 0); + amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version); + amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0); } } @@ -2903,22 +2998,28 @@ void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip { int r; struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id; + uint32_t ring_id, version; r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); CU_ASSERT_EQUAL(r, 0); if (!info.available_rings) printf("SKIP ... as there's no ring for ip %d\n", ip_type); + version = info.hw_ip_version_major; + if (version != 9 && version != 10) { + printf("SKIP ... unsupported gfx version %d\n", version); + return; + } + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 1); - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1); + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); } } static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, - uint32_t ip_type, uint32_t ring) + uint32_t ip_type, uint32_t ring, int version) { amdgpu_context_handle context_handle; amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; @@ -2977,13 +3078,13 @@ static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_ha memset(ptr_src, 0x55, bo_dst_size); i = 0; - i += amdgpu_dispatch_init(ptr_cmd + i, ip_type); + i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); /* Issue commands to set cu mask used in current dispatch */ - i += amdgpu_dispatch_write_cumask(ptr_cmd + i); + i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); /* Writes shader state to HW */ - i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader); + i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); /* Write constant data */ /* Writes the texture resource constants data to the SGPRs */ @@ -2992,7 +3093,10 @@ static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_ha ptr_cmd[i++] = mc_address_src; ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; ptr_cmd[i++] = 0x400000; - ptr_cmd[i++] = 0x74fac; + if (version == 9) + ptr_cmd[i++] = 0x74fac; + else if (version == 10) + ptr_cmd[i++] = 0x1104bfac; /* Writes the UAV constant data to the SGPRs. */ ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); @@ -3000,7 +3104,10 @@ static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_ha ptr_cmd[i++] = mc_address_dst; ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; ptr_cmd[i++] = 0x400000; - ptr_cmd[i++] = 0x74fac; + if (version == 9) + ptr_cmd[i++] = 0x74fac; + else if (version == 10) + ptr_cmd[i++] = 0x1104bfac; /* clear mmCOMPUTE_RESOURCE_LIMITS */ ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); @@ -3072,17 +3179,23 @@ void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32 { int r; struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id; + uint32_t ring_id, version; r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); CU_ASSERT_EQUAL(r, 0); if (!info.available_rings) printf("SKIP ... as there's no ring for ip %d\n", ip_type); + version = info.hw_ip_version_major; + if (version != 9 && version != 10) { + printf("SKIP ... unsupported gfx version %d\n", version); + return; + } + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); - amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id); - amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, 0); + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); + amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version); + amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); } } commit f2314a487158cde5f4eccf72dd77de30bb437c63 Author: Flora Cui <flo...@am...> Date: Wed Nov 13 13:54:54 2019 +0800 tests/amdgpu: add draw test for gfx10 Signed-off-by: Flora Cui <flo...@am...> diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h index 068840c9..9f4453db 100644 --- a/tests/amdgpu/amdgpu_test.h +++ b/tests/amdgpu/amdgpu_test.h @@ -285,8 +285,8 @@ extern CU_TestInfo cp_dma_tests[]; void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type); void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type); void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, - int hang); -void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring); + int version, int hang); +void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version); /** * Initialize security test suite diff --git a/tests/amdgpu/basic_tests.c b/tests/amdgpu/basic_tests.c index 8afd05c1..43b80c88 100644 --- a/tests/amdgpu/basic_tests.c +++ b/tests/amdgpu/basic_tests.c @@ -292,6 +292,8 @@ CU_TestInfo basic_tests[] = { #define PKT3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 +#define PKT3_SET_SH_REG_INDEX 0x9B + #define PACKET3_DISPATCH_DIRECT 0x15 #define PACKET3_EVENT_WRITE 0x46 #define PACKET3_ACQUIRE_MEM 0x58 @@ -390,6 +392,32 @@ static const uint32_t preamblecache_gfx9[] = { 0xc0017900, 0x24b, 0x0 }; +static const uint32_t preamblecache_gfx10[] = { + 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, + 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, + 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, + 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, + 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, + 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, + 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, + 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, + 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20, + 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, + 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0, + 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, + 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, + 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, + 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2, + 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0, + 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff, + 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0, + 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0, + 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 +}; + enum ps_type { PS_CONST, PS_TEX, @@ -442,6 +470,39 @@ static const uint32_t ps_const_context_reg_gfx9[][2] = { {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } }; +static const uint32_t ps_const_shader_gfx10[] = { + 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, + 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, + 0xF8001C0F, 0x00000100, 0xBF810000 +}; + +static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6; + +static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = { + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 }, + { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, + { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 } + } +}; + +static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = { + 0x00000004 +}; + +static const uint32_t ps_num_sh_registers_gfx10 = 2; + +static const uint32_t ps_const_sh_registers_gfx10[][2] = { + {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 }, + {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } +}; + static const uint32_t ps_tex_shader_gfx9[] = { 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, @@ -485,6 +546,34 @@ static const uint32_t ps_tex_context_reg_gfx9[][2] = { {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } }; +static const uint32_t ps_tex_shader_gfx10[] = { + 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000, + 0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A, + 0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70, + 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, + 0xF8001C0F, 0x00000100, 0xBF810000 +}; + +static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = { + 0x0000000C +}; + +static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6; + +static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = { + {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 }, + { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, + { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, + { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 } + } +}; + static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, @@ -496,6 +585,17 @@ static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 0xC400020F, 0x05060403, 0xBF810000 }; +static const uint32_t vs_RectPosTexFast_shader_gfx10[] = { + 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206, + 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200, + 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207, + 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001, + 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002, + 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209, + 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402, + 0xBF810000 +}; + static const uint32_t cached_cmd_gfx9[] = { 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, @@ -507,6 +607,17 @@ static const uint32_t cached_cmd_gfx9[] = { 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 }; +static const uint32_t cached_cmd_gfx10[] = { + 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, + 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, + 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, + 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18, + 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, + 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, + 0xc0026900, 0x292, 0x20, 0x6020000, + 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 +}; + unsigned int memcpy_ps_hang[] = { 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, @@ -3004,7 +3115,7 @@ static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) return 0; } -static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) +static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version) { int i; uint32_t shader_offset= 256; @@ -3016,18 +3127,34 @@ static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) switch (ps_type) { case PS_CONST: - shader = ps_const_shader_gfx9; - shader_size = sizeof(ps_const_shader_gfx9); - patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; - patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; - patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; + if (version == 9) { + shader = ps_const_shader_gfx9; + shader_size = sizeof(ps_const_shader_gfx9); + patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; + patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; + patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; + } else if (version == 10){ + shader = ps_const_shader_gfx10; + shader_size = sizeof(ps_const_shader_gfx10); + patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10; + patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10; + patchcode_offset = ps_const_shader_patchinfo_offset_gfx10; + } break; case PS_TEX: - shader = ps_tex_shader_gfx9; - shader_size = sizeof(ps_tex_shader_gfx9); - patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; - patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; - patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; + if (version == 9) { + shader = ps_tex_shader_gfx9; + shader_size = sizeof(ps_tex_shader_gfx9); + patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; + patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; + patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; + } else if (version == 10) { + shader = ps_tex_shader_gfx10; + shader_size = sizeof(ps_tex_shader_gfx10); + patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10; + patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10; + patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10; + } break; case PS_HANG: shader = memcpy_ps_hang; @@ -3059,20 +3186,25 @@ static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type) } /* load RectPosTexFast_VS */ -static int amdgpu_draw_load_vs_shader(uint8_t *ptr) +static int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version) { const uint32_t *shader; uint32_t shader_size; - shader = vs_RectPosTexFast_shader_gfx9; - shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); + if (version == 9) { + shader = vs_RectPosTexFast_shader_gfx9; + shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); + } else if (version == 10) { + shader = vs_RectPosTexFast_shader_gfx10; + shader_size = sizeof(vs_RectPosTexFast_shader_gfx10); + } memcpy(ptr, shader, shader_size); return 0; } -static int amdgpu_draw_init(uint32_t *ptr) +static int amdgpu_draw_init(uint32_t *ptr, uint32_t version) { int i = 0; const uint32_t *preamblecache_ptr; @@ -3083,8 +3215,13 @@ static int amdgpu_draw_init(uint32_t *ptr) ptr[i++] = 0x80000000; ptr[i++] = 0x80000000; - preamblecache_ptr = preamblecache_gfx9; - preamblecache_size = sizeof(preamblecache_gfx9); + if (version == 9) { + preamblecache_ptr = preamblecache_gfx9; + preamblecache_size = sizeof(preamblecache_gfx9); + } else if (version == 10) { + preamblecache_ptr = preamblecache_gfx10; + preamblecache_size = sizeof(preamblecache_gfx10); + } memcpy(ptr + i, preamblecache_ptr, preamblecache_size); return i + preamblecache_size/sizeof(uint32_t); @@ -3092,41 +3229,95 @@ static int amdgpu_draw_init(uint32_t *ptr) static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, uint64_t dst_addr, + uint32_t version, int hang_slow) { int i = 0; /* setup color buffer */ - /* offset reg - 0xA318 CB_COLOR0_BASE - 0xA319 CB_COLOR0_BASE_EXT - 0xA31A CB_COLOR0_ATTRIB2 - 0xA31B CB_COLOR0_VIEW - 0xA31C CB_COLOR0_INFO - 0xA31D CB_COLOR0_ATTRIB - 0xA31E CB_COLOR0_DCC_CONTROL - 0xA31F CB_COLOR0_CMASK - 0xA320 CB_COLOR0_CMASK_BASE_EXT - 0xA321 CB_COLOR0_FMASK - 0xA322 CB_COLOR0_FMASK_BASE_EXT - 0xA323 CB_COLOR0_CLEAR_WORD0 - 0xA324 CB_COLOR0_CLEAR_WORD1 - 0xA325 CB_COLOR0_DCC_BASE - 0xA326 CB_COLOR0_DCC_BASE_EXT */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); - ptr[i++] = 0x318; - ptr[i++] = dst_addr >> 8; - ptr[i++] = dst_addr >> 40; - ptr[i++] = hang_slow ? 0x1ffc7ff : 0x7c01f; - ptr[i++] = 0; - ptr[i++] = 0x50438; - ptr[i++] = 0x10140000; - i += 9; + if (version == 9) { + /* offset reg + 0xA318 CB_COLOR0_BASE + 0xA319 CB_COLOR0_BASE_EXT + 0xA31A CB_COLOR0_ATTRIB2 + 0xA31B CB_COLOR0_VIEW + 0xA31C CB_COLOR0_INFO + 0xA31D CB_COLOR0_ATTRIB + 0xA31E CB_COLOR0_DCC_CONTROL + 0xA31F CB_COLOR0_CMASK + 0xA320 CB_COLOR0_CMASK_BASE_EXT + 0xA321 CB_COLOR0_FMASK + 0xA322 CB_COLOR0_FMASK_BASE_EXT + 0xA323 CB_COLOR0_CLEAR_WORD0 + 0xA324 CB_COLOR0_CLEAR_WORD1 + 0xA325 CB_COLOR0_DCC_BASE + 0xA326 CB_COLOR0_DCC_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); + ptr[i++] = 0x318; + ptr[i++] = dst_addr >> 8; + ptr[i++] = dst_addr >> 40; + ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; + ptr[i++] = 0; + ptr[i++] = 0x50438; + ptr[i++] = 0x10140000; + i += 9; + + /* mmCB_MRT0_EPITCH */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1e8; + ptr[i++] = hang_slow ? 0xfff : 0x1f; + } else if (version == 10) { + /* 0xA318 CB_COLOR0_BASE + 0xA319 CB_COLOR0_PITCH + 0xA31A CB_COLOR0_SLICE + 0xA31B CB_COLOR0_VIEW + 0xA31C CB_COLOR0_INFO + 0xA31D CB_COLOR0_ATTRIB + 0xA31E CB_COLOR0_DCC_CONTROL + 0xA31F CB_COLOR0_CMASK + 0xA320 CB_COLOR0_CMASK_SLICE + 0xA321 CB_COLOR0_FMASK + 0xA322 CB_COLOR0_FMASK_SLICE + 0xA323 CB_COLOR0_CLEAR_WORD0 + 0xA324 CB_COLOR0_CLEAR_WORD1 + 0xA325 CB_COLOR0_DCC_BASE */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14); + ptr[i++] = 0x318; + ptr[i++] = dst_addr >> 8; + i += 3; + ptr[i++] = 0x50438; + i += 9; + + /* 0xA390 CB_COLOR0_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x390; + ptr[i++] = dst_addr >> 40; + + /* 0xA398 CB_COLOR0_CMASK_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x398; + ptr[i++] = 0; - /* mmCB_MRT0_EPITCH */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); - ptr[i++] = 0x1e8; - ptr[i++] = hang_slow ? 0x7ff : 0x1f; + /* 0xA3A0 CB_COLOR0_FMASK_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3a0; + ptr[i++] = 0; + + /* 0xA3A8 CB_COLOR0_DCC_BASE_EXT */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3a8; + ptr[i++] = 0; + + /* 0xA3B0 CB_COLOR0_ATTRIB2 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3b0; + ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; + + /* 0xA3B8 CB_COLOR0_ATTRIB3 */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x3b8; + ptr[i++] = 0x9014000; + } /* 0xA32B CB_COLOR1_BASE */ ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); @@ -3144,15 +3335,24 @@ static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, ptr[i++] = 9; /* Setup depth buffer */ - /* mmDB_Z_INFO */ - ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); - ptr[i++] = 0xe; - i += 2; + if (version == 9) { + /* mmDB_Z_INFO */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0xe; + i += 2; + } else if (version == 10) { + /* mmDB_Z_INFO */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); + ptr[i++] = 0x10; + i += 2; + } return i; } -static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slow) +static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, + uint32_t version, + int hang_slow) { int i = 0; const uint32_t *cached_cmd_ptr; @@ -3168,7 +3368,10 @@ static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slo ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); ptr[i++] = 0xd7; - ptr[i++] = 1; + if (version == 9) + ptr[i++] = 1; + else if (version == 10) + ptr[i++] = 0; /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); @@ -3180,20 +3383,37 @@ static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, int hang_slo ptr[i++] = 0x2f5; i += 2; - cached_cmd_ptr = cached_cmd_gfx9; - cached_cmd_size = sizeof(cached_cmd_gfx9); + if (version == 9) { + cached_cmd_ptr = cached_cmd_gfx9; + cached_cmd_size = sizeof(cached_cmd_gfx9); + } else if (version == 10) { + cached_cmd_ptr = cached_cmd_gfx10; + cached_cmd_size = sizeof(cached_cmd_gfx10); + } memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); if (hang_slow) *(ptr + i + 12) = 0x8000800; i += cached_cmd_size/sizeof(uint32_t); + if (version == 10) { + /* mmCB_RMI_GL2_CACHE_CONTROL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x104; + ptr[i++] = 0x40aa0055; + /* mmDB_RMI_L2_CACHE_CONTROL */ + ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); + ptr[i++] = 0x1f; + ptr[i++] = 0x2a0055; + } + return i; } static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, int ps_type, uint64_t shader_addr, + uint32_t version, int hang_slow) { int i = 0; @@ -3203,10 +3423,21 @@ static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, ptr[i++] = 0x207; ptr[i++] = 0; - /* mmSPI_SHADER_PGM_RSRC3_VS */ - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); - ptr[i++] = 0x46; - ptr[i++] = 0xffff; + if (version == 9) { + /* mmSPI_SHADER_PGM_RSRC3_VS */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); + ptr[i++] = 0x46; + ptr[i++] = 0xffff; + } else if (version == 10) { + /* mmSPI_SHADER_PGM_RSRC3_VS */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000046; + ptr[i++] = 0xffff; + /* mmSPI_SHADER_PGM_RSRC4_VS */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000041; + ptr[i++] = 0xffff; + } /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); @@ -3217,7 +3448,10 @@ static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, /* mmSPI_SHADER_PGM_RSRC1_VS */ ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); ptr[i++] = 0x4a; - ptr[i++] = 0xc0081; + if (version == 9) + ptr[i++] = 0xc0081; + else if (version == 10) + ptr[i++] = 0xc0041; /* mmSPI_SHADER_PGM_RSRC2_VS */ ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); ptr[i++] = 0x4b; @@ -3258,7 +3492,8 @@ static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, static int amdgpu_draw_ps_write2hw(uint32_t *ptr, int ps_type, - uint64_t shader_addr) + uint64_t shader_addr, + uint32_t version) { int i, j; const uint32_t *sh_registers; @@ -3266,9 +3501,14 @@ static int amdgpu_draw_ps_write2hw(uint32_t *ptr, uint32_t num_sh_reg, num_context_reg; if (ps_type == PS_CONST) { - sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; + if (version == 9) { + sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; + num_sh_reg = ps_num_sh_registers_gfx9; + } else if (version == 10) { + sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10; + num_sh_reg = ps_num_sh_registers_gfx10; + } context_registers = (const uint32_t *)ps_const_context_reg_gfx9; - num_sh_reg = ps_num_sh_registers_gfx9; num_context_reg = ps_num_context_registers_gfx9; } else if (ps_type == PS_TEX) { sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; @@ -3279,15 +3519,35 @@ static int amdgpu_draw_ps_write2hw(uint32_t *ptr, i = 0; - /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS - 0x2c08 SPI_SHADER_PGM_LO_PS - 0x2c09 SPI_SHADER_PGM_HI_PS */ - shader_addr += 256 * 9; - ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); - ptr[i++] = 0x7; - ptr[i++] = 0xffff; - ptr[i++] = shader_addr >> 8; - ptr[i++] = shader_addr >> 40; + if (version == 9) { + /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS + 0x2c08 SPI_SHADER_PGM_LO_PS + 0x2c09 SPI_SHADER_PGM_HI_PS */ + /* multiplicator 9 is from SPI_SHADER_COL_FORMAT */ + shader_addr += 256 * 9; + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); + ptr[i++] = 0x7; + ptr[i++] = 0xffff; + ptr[i++] = shader_addr >> 8; + ptr[i++] = shader_addr >> 40; + } else if (version == 10) { + shader_addr += 256 * 9; + /* 0x2c08 SPI_SHADER_PGM_LO_PS + 0x2c09 SPI_SHADER_PGM_HI_PS */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); + ptr[i++] = 0x8; + ptr[i++] = shader_addr >> 8; + ptr[i++] = shader_addr >> 40; + + /* mmSPI_SHADER_PGM_RSRC3_PS */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000007; + ptr[i++] = 0xffff; + /* mmSPI_SHADER_PGM_RSRC4_PS */ + ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); + ptr[i++] = 0x30000001; + ptr[i++] = 0xffff; + } for (j = 0; j < num_sh_reg; j++) { ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); @@ -3312,19 +3572,29 @@ static int amdgpu_draw_ps_write2hw(uint32_t *ptr, return i; } -static int amdgpu_draw_draw(uint32_t *ptr) +static int amdgpu_draw_draw(uint32_t *ptr, uint32_t version) { int i = 0; - /* mmIA_MULTI_VGT_PARAM */ - ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ptr[i++] = 0x40000258; - ptr[i++] = 0xd00ff; - - /* mmVGT_PRIMITIVE_TYPE */ - ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); - ptr[i++] = 0x10000242; - ptr[i++] = 0x11; + if (version == 9) { + /* mmIA_MULTI_VGT_PARAM */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x40000258; + ptr[i++] = 0xd00ff; + /* mmVGT_PRIMITIVE_TYPE */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x10000242; + ptr[i++] = 0x11; + } else if (version == 10) { + /* mmGE_CNTL */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x25b; + ptr[i++] = 0xff; + /* mmVGT_PRIMITIVE_TYPE */ + ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); + ptr[i++] = 0x242; + ptr[i++] = 0x11; + } ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); ptr[i++] = 3; @@ -3338,7 +3608,7 @@ void amdgpu_memset_draw(amdgpu_device_handle device_handle, amdgpu_bo_handle bo_shader_vs, uint64_t mc_address_shader_ps, uint64_t mc_address_shader_vs, - uint32_t ring_id) + uint32_t ring_id, uint32_t version) { amdgpu_context_handle context_handle; amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; @@ -3372,15 +3642,16 @@ void amdgpu_memset_draw(amdgpu_device_handle device_handle, CU_ASSERT_EQUAL(r, 0); i = 0; - i += amdgpu_draw_init(ptr_cmd + i); + i += amdgpu_draw_init(ptr_cmd + i, version); - i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); + i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); - i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); + i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); - i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 0); + i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, + version, 0); - i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps); + i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version); ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); ptr_cmd[i++] = 0xc; @@ -3389,7 +3660,7 @@ void amdgpu_memset_draw(amdgpu_device_handle device_handle, ptr_cmd[i++] = 0x33333333; ptr_cmd[i++] = 0x33333333; - i += amdgpu_draw_draw(ptr_cmd + i); + i += amdgpu_draw_draw(ptr_cmd + i, version); while (i & 7) ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ @@ -3447,7 +3718,7 @@ void amdgpu_memset_draw(amdgpu_device_handle device_handle, } static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, - uint32_t ring) + uint32_t ring, int version) { amdgpu_bo_handle bo_shader_ps, bo_shader_vs; void *ptr_shader_ps; @@ -3471,14 +3742,15 @@ static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, CU_ASSERT_EQUAL(r, 0); memset(ptr_shader_vs, 0, bo_shader_size); - r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST); + r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_draw_load_vs_shader(ptr_shader_vs); + r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); CU_ASSERT_EQUAL(r, 0); amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, - mc_address_shader_ps, mc_address_shader_vs, ring); + mc_address_shader_ps, mc_address_shader_vs, + ring, version); r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); CU_ASSERT_EQUAL(r, 0); @@ -3492,7 +3764,7 @@ static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, amdgpu_bo_handle bo_shader_vs, uint64_t mc_address_shader_ps, uint64_t mc_address_shader_vs, - uint32_t ring, int hang) + uint32_t ring, int version, int hang) { amdgpu_context_handle context_handle; amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; @@ -3536,24 +3808,36 @@ static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, memset(ptr_src, 0x55, bo_size); i = 0; - i += amdgpu_draw_init(ptr_cmd + i); + i += amdgpu_draw_init(ptr_cmd + i, version); - i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 0); + i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); - i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 0); + i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); - i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 0); + i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, + version, 0); - i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); + i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); - ptr_cmd[i++] = 0xc; - ptr_cmd[i++] = mc_address_src >> 8; - ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; - ptr_cmd[i++] = 0x7c01f; - ptr_cmd[i++] = 0x90500fac; - ptr_cmd[i++] = 0x3e000; - i += 3; + if (version == 9) { + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = mc_address_src >> 8; + ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; + ptr_cmd[i++] = 0x7c01f; + ptr_cmd[i++] = 0x90500fac; + ptr_cmd[i++] = 0x3e000; + i += 3; + } else if (version == 10) { + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = mc_address_src >> 8; + ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; + ptr_cmd[i++] = 0x8007c007; + ptr_cmd[i++] = 0x90500fac; + i += 2; + ptr_cmd[i++] = 0x400; + i++; + } ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); ptr_cmd[i++] = 0x14; @@ -3564,7 +3848,7 @@ static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, ptr_cmd[i++] = 0x191; ptr_cmd[i++] = 0; - i += amdgpu_draw_draw(ptr_cmd + i); + i += amdgpu_draw_draw(ptr_cmd + i, version); while (i & 7) ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ @@ -3630,7 +3914,7 @@ static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, } void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, - int hang) + int version, int hang) { amdgpu_bo_handle bo_shader_ps, bo_shader_vs; void *ptr_shader_ps; @@ -3655,14 +3939,15 @@ void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, CU_ASSERT_EQUAL(r, 0); memset(ptr_shader_vs, 0, bo_shader_size); - r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type); + r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_draw_load_vs_shader(ptr_shader_vs); + r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); CU_ASSERT_EQUAL(r, 0); amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, - mc_address_shader_ps, mc_address_shader_vs, ring, hang); + mc_address_shader_ps, mc_address_shader_vs, + ring, version, hang); r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); CU_ASSERT_EQUAL(r, 0); @@ -3675,20 +3960,26 @@ static void amdgpu_draw_test(void) { int r; struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id; + uint32_t ring_id, version; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); CU_ASSERT_EQUAL(r, 0); if (!info.available_rings) printf("SKIP ... as there's no graphics ring\n"); + version = info.hw_ip_version_major; + if (version != 9 && version != 10) { + printf("SKIP ... unsupported gfx version %d\n", version); + return; + } + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memset_draw_test(device_handle, ring_id); - amdgpu_memcpy_draw_test(device_handle, ring_id, 0); + amdgpu_memset_draw_test(device_handle, ring_id, version); + amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); } } -void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring) +void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version) { amdgpu_context_handle context_handle; amdgpu_bo_handle bo_shader_ps, bo_shader_vs; @@ -3744,7 +4035,7 @@ void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint3 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); CU_ASSERT_EQUAL(r, 0); - r = amdgpu_draw_load_vs_shader(ptr_shader_vs); + r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); CU_ASSERT_EQUAL(r, 0); r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, @@ -3762,25 +4053,35 @@ void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint3 memset(ptr_src, 0x55, bo_size); i = 0; - i += amdgpu_draw_init(ptr_cmd + i); + i += amdgpu_draw_init(ptr_cmd + i, version); - i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, 1); + i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1); - i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, 1); + i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1); i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, - mc_address_shader_vs, 1); + mc_address_shader_vs, version, 1); - i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps); + i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); - ptr_cmd[i++] = 0xc; - ptr_cmd[i++] = mc_address_src >> 8; - ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; - ptr_cmd[i++] = 0x1ffc7ff; - ptr_cmd[i++] = 0x90500fac; - ptr_cmd[i++] = 0xffe000; - i += 3; + + if (version == 9) { + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = mc_address_src >> 8; + ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; + ptr_cmd[i++] = 0x1ffcfff; + ptr_cmd[i++] = 0x90500fac; + ptr_cmd[i++] = 0x1ffe000; + i += 3; + } else if (version == 10) { + ptr_cmd[i++] = 0xc; + ptr_cmd[i++] = mc_address_src >> 8; + ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; + ptr_cmd[i++] = 0x81ffc1ff; + ptr_cmd[i++] = 0x90500fac; + i += 4; + } ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); ptr_cmd[i++] = 0x14; @@ -3791,7 +4092,7 @@ void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint3 ptr_cmd[i++] = 0x191; ptr_cmd[i++] = 0; - i += amdgpu_draw_draw(ptr_cmd + i); + i += amdgpu_draw_draw(ptr_cmd + i, version); while (i & 7) ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ diff --git a/tests/amdgpu/deadlock_tests.c b/tests/amdgpu/deadlock_tests.c index 0f780d48..f29a83ab 100644 --- a/tests/amdgpu/deadlock_tests.c +++ b/tests/amdgpu/deadlock_tests.c @@ -533,32 +533,44 @@ static void amdgpu_draw_hang_gfx(void) { int r; struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id; + uint32_t ring_id, version; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); CU_ASSERT_EQUAL(r, 0); if (!info.available_rings) printf("SKIP ... as there's no graphic ring\n"); + version = info.hw_ip_version_major; + if (version != 9 && version != 10) { + printf("SKIP ... unsupported gfx version %d\n", version); + return; + } + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_draw_test(device_handle, ring_id, 0); - amdgpu_memcpy_draw_test(device_handle, ring_id, 1); - amdgpu_memcpy_draw_test(device_handle, ring_id, 0); + amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); + amdgpu_memcpy_draw_test(device_handle, ring_id, version, 1); + amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); } } static void amdgpu_draw_hang_slow_gfx(void) { struct drm_amdgpu_info_hw_ip info; - uint32_t ring_id; + uint32_t ring_id, version; int r; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); CU_ASSERT_EQUAL(r, 0); + version = info.hw_ip_version_major; + if (version != 9 && version != 10) { + printf("SKIP ... unsupported gfx version %d\n", version); + return; + } + for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { - amdgpu_memcpy_draw_test(device_handle, ring_id, 0); - amdgpu_memcpy_draw_hang_slow_test(device_handle, ring_id); - amdgpu_memcpy_draw_test(device_handle, ring_id, 0); + amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); + amdgpu_memcpy_draw_hang_slow_test(device_handle, ring_id, version); + amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); } } |
From: GitLab M. <git...@ke...> - 2022-04-19 20:16:01
|
man/drm-kms.7.rst | 4 ++-- man/drm.7.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) New commits: commit 62e25c8baac80d6af1287c964d5d83eb255ef339 Author: Matt Turner <mat...@gm...> Date: Tue Apr 19 13:10:57 2022 -0700 man: Add formatting to drmModeGetConnector reference Signed-off-by: Matt Turner <mat...@gm...> diff --git a/man/drm-kms.7.rst b/man/drm-kms.7.rst index 7a4151ae..587d7eb7 100644 --- a/man/drm-kms.7.rst +++ b/man/drm-kms.7.rst @@ -102,7 +102,7 @@ for the selected connector. See the *Examples* section below for more information. All valid modes for a connector can be retrieved with a call to -drmModeGetConnector3 You need to select the mode you want to use and save it. +**drmModeGetConnector**\ (3) You need to select the mode you want to use and save it. The first mode in the list is the default mode with the highest resolution possible and often a suitable choice. commit 37d50e1cbfdb48a70e360c2c412568d3ed33a8aa Author: Matt Turner <mat...@gm...> Date: Tue Apr 19 11:53:55 2022 -0700 man: Fix some typos Signed-off-by: Matt Turner <mat...@gm...> diff --git a/man/drm-kms.7.rst b/man/drm-kms.7.rst index e91fbe27..7a4151ae 100644 --- a/man/drm-kms.7.rst +++ b/man/drm-kms.7.rst @@ -108,7 +108,7 @@ possible and often a suitable choice. After you have a working connector+CRTC+mode combination, you need to create a framebuffer that is used for scanout. Memory buffer allocation is -driver-depedent and described in **drm-memory**\ (7). You need to create a +driver-dependent and described in **drm-memory**\ (7). You need to create a buffer big enough for your selected mode. Now you can create a framebuffer object that uses your memory-buffer as scanout buffer. You can do this with **drmModeAddFB**\ (3) and **drmModeAddFB2**\ (3). diff --git a/man/drm.7.rst b/man/drm.7.rst index c9b5696f..df2c1c74 100644 --- a/man/drm.7.rst +++ b/man/drm.7.rst @@ -73,7 +73,7 @@ driver dependent. However, two generic frameworks are available that are used by most DRM drivers. These are the *Translation Table Manager* (TTM) and the *Graphics Execution Manager* (GEM). They provide generic APIs to create, destroy and access buffers from user-space. However, -there are still many differences between the drivers so driver-depedent +there are still many differences between the drivers so driver-dependent code is still needed. Many helpers are provided in *libgbm* (Graphics Buffer Manager) from the *Mesa* project. For more information on DRM memory management, see **drm-memory**\ (7). |
From: GitLab M. <git...@ke...> - 2022-06-11 16:15:36
|
tests/amdgpu/vce_tests.c | 3 tests/amdgpu/vcn_tests.c | 950 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 939 insertions(+), 14 deletions(-) New commits: commit f7828dc180ba3427b5e405fab25e9846c07322bc Author: Saleemkhan Jamadar <sal...@am...> Date: Tue Oct 19 14:30:11 2021 +0530 tests/amdgpu/vcn:vcn encoder unit test Add support for vcn encoder unit test Reviewed-by: Ruijing Dong <rui...@am...> Signed-off-by: Saleemkhan Jamadar <sal...@am...> Signed-off-by: Satyajit Sahu <sat...@am...> diff --git a/tests/amdgpu/vce_tests.c b/tests/amdgpu/vce_tests.c index 4e925cae..9aa0a8eb 100644 --- a/tests/amdgpu/vce_tests.c +++ b/tests/amdgpu/vce_tests.c @@ -96,7 +96,7 @@ CU_TestInfo vce_tests[] = { CU_BOOL suite_vce_tests_enable(void) { - uint32_t version, feature, asic_id; + uint32_t version, feature; CU_BOOL ret_mv = CU_FALSE; if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, @@ -107,7 +107,6 @@ CU_BOOL suite_vce_tests_enable(void) chip_rev = device_handle->info.chip_rev; chip_id = device_handle->info.chip_external_rev; ids_flags = device_handle->info.ids_flags; - asic_id = device_handle->info.asic_id; amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 0, &version, &feature); diff --git a/tests/amdgpu/vcn_tests.c b/tests/amdgpu/vcn_tests.c index a924fec6..add73ce5 100644 --- a/tests/amdgpu/vcn_tests.c +++ b/tests/amdgpu/vcn_tests.c @@ -22,7 +22,9 @@ */ #include <stdio.h> +#include <string.h> #include <inttypes.h> +#include <unistd.h> #include "CUnit/Basic.h" @@ -32,10 +34,27 @@ #include "amdgpu_drm.h" #include "amdgpu_internal.h" #include "decode_messages.h" +#include "frame.h" #define IB_SIZE 4096 #define MAX_RESOURCES 16 +#define H264_NAL_TYPE_NON_IDR_SLICE 1 +#define H264_NAL_TYPE_DP_A_SLICE 2 +#define H264_NAL_TYPE_DP_B_SLICE 3 +#define H264_NAL_TYPE_DP_C_SLICE 0x4 +#define H264_NAL_TYPE_IDR_SLICE 0x5 +#define H264_NAL_TYPE_SEI 0x6 +#define H264_NAL_TYPE_SEQ_PARAM 0x7 +#define H264_NAL_TYPE_PIC_PARAM 0x8 +#define H264_NAL_TYPE_ACCESS_UNIT 0x9 +#define H264_NAL_TYPE_END_OF_SEQ 0xa +#define H264_NAL_TYPE_END_OF_STREAM 0xb +#define H264_NAL_TYPE_FILLER_DATA 0xc +#define H264_NAL_TYPE_SEQ_EXTENSION 0xd + +#define H264_START_CODE 0x000001 + struct amdgpu_vcn_bo { amdgpu_bo_handle handle; amdgpu_va_handle va_handle; @@ -52,6 +71,23 @@ struct amdgpu_vcn_reg { uint32_t cntl; }; +typedef struct BufferInfo_t { + uint32_t numOfBitsInBuffer; + const uint8_t *decBuffer; + uint8_t decData; + uint32_t decBufferSize; + const uint8_t *end; +} bufferInfo; + +typedef struct h264_decode_t { + uint8_t profile; + uint8_t level_idc; + uint8_t nal_ref_idc; + uint8_t nal_unit_type; + uint32_t pic_width, pic_height; + uint32_t slice_type; +} h264_decode; + static amdgpu_device_handle device_handle; static uint32_t major_version; static uint32_t minor_version; @@ -60,7 +96,9 @@ static uint32_t chip_rev; static uint32_t chip_id; static uint32_t asic_id; static uint32_t chip_rev; -static uint32_t chip_id; +static struct amdgpu_vcn_bo enc_buf; +static struct amdgpu_vcn_bo cpb_buf; +static uint32_t enc_task_id; static amdgpu_context_handle context_handle; static amdgpu_bo_handle ib_handle; @@ -78,6 +116,9 @@ static struct amdgpu_vcn_reg reg[] = { {0x10, 0x11, 0xf, 0x29, 0x26d}, }; +uint32_t gWidth, gHeight, gSliceType; +struct drm_amdgpu_info_hw_ip einfo; + static void amdgpu_cs_vcn_dec_create(void); static void amdgpu_cs_vcn_dec_decode(void); static void amdgpu_cs_vcn_dec_destroy(void); @@ -86,6 +127,20 @@ static void amdgpu_cs_vcn_enc_create(void); static void amdgpu_cs_vcn_enc_encode(void); static void amdgpu_cs_vcn_enc_destroy(void); +static void h264_check_0s (bufferInfo * bufInfo, int count); +static int32_t h264_se (bufferInfo * bufInfo); +static inline uint32_t bs_read_u1(bufferInfo *bufinfo); +static inline int bs_eof(bufferInfo *bufinfo); +static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n); +static inline uint32_t bs_read_ue(bufferInfo* bufinfo); +static uint32_t remove_03 (uint8_t *bptr, uint32_t len); +static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo *bufInfo); +static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo *bufInfo); +static void h264_slice_header (h264_decode *dec, bufferInfo *bufInfo); +static uint8_t h264_parse_nal (h264_decode *dec, bufferInfo *bufInfo); +static uint32_t h264_find_next_start_code (uint8_t *pBuf, uint32_t bufLen); +static int verify_checksum(uint8_t *buffer, uint32_t buffer_size); + CU_TestInfo vcn_tests[] = { { "VCN DEC create", amdgpu_cs_vcn_dec_create }, @@ -93,7 +148,7 @@ CU_TestInfo vcn_tests[] = { { "VCN DEC destroy", amdgpu_cs_vcn_dec_destroy }, { "VCN ENC create", amdgpu_cs_vcn_enc_create }, - { "VCN ENC decode", amdgpu_cs_vcn_enc_encode }, + { "VCN ENC encode", amdgpu_cs_vcn_enc_encode }, { "VCN ENC destroy", amdgpu_cs_vcn_enc_destroy }, CU_TEST_INFO_NULL, }; @@ -101,7 +156,7 @@ CU_TestInfo vcn_tests[] = { CU_BOOL suite_vcn_tests_enable(void) { struct drm_amdgpu_info_hw_ip info; - int r; + int r, ret; if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, &minor_version, &device_handle)) @@ -113,9 +168,10 @@ CU_BOOL suite_vcn_tests_enable(void) chip_id = device_handle->info.chip_external_rev; r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_DEC, 0, &info); + ret = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_VCN_ENC, 0, &einfo); if (amdgpu_device_deinitialize(device_handle)) - return CU_FALSE; + return CU_FALSE; if (r != 0 || !info.available_rings || (family_id < AMDGPU_FAMILY_RV && @@ -125,9 +181,10 @@ CU_BOOL suite_vcn_tests_enable(void) return CU_FALSE; } - if (family_id == AMDGPU_FAMILY_AI) { + if (family_id == AMDGPU_FAMILY_AI || (ret != 0) || + (!einfo.available_rings)) { amdgpu_set_test_active("VCN Tests", "VCN ENC create", CU_FALSE); - amdgpu_set_test_active("VCN Tests", "VCN ENC decode", CU_FALSE); + amdgpu_set_test_active("VCN Tests", "VCN ENC encode", CU_FALSE); amdgpu_set_test_active("VCN Tests", "VCN ENC destroy", CU_FALSE); } @@ -339,7 +396,7 @@ static void amdgpu_cs_vcn_dec_decode(void) size += ALIGN(dpb_size, 4*1024); size += ALIGN(dt_size, 4*1024); - num_resources = 0; + num_resources = 0; alloc_resource(&dec_buf, size, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = dec_buf.handle; resources[num_resources++] = ib_handle; @@ -405,7 +462,7 @@ static void amdgpu_cs_vcn_dec_destroy(void) struct amdgpu_vcn_bo msg_buf; int len, r; - num_resources = 0; + num_resources = 0; alloc_resource(&msg_buf, 1024, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = msg_buf.handle; resources[num_resources++] = ib_handle; @@ -436,15 +493,884 @@ static void amdgpu_cs_vcn_dec_destroy(void) static void amdgpu_cs_vcn_enc_create(void) { - /* TODO */ + int len, r; + uint32_t *p_task_size = NULL; + uint32_t task_offset = 0, st_offset; + uint32_t *st_size = NULL; + unsigned width = 160, height = 128, buf_size; + uint32_t fw_maj = 1, fw_min = 9; + + if (einfo.hw_ip_version_major == 2) { + fw_maj = 1; + fw_min = 1; + } else if (einfo.hw_ip_version_major == 3) { + fw_maj = 1; + fw_min = 0; + } + + gWidth = width; + gHeight = height; + buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; + enc_task_id = 1; + + num_resources = 0; + alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); + alloc_resource(&cpb_buf, buf_size * 2, AMDGPU_GEM_DOMAIN_GTT); + resources[num_resources++] = enc_buf.handle; + resources[num_resources++] = cpb_buf.handle; + resources[num_resources++] = ib_handle; + + r = amdgpu_bo_cpu_map(enc_buf.handle, (void**)&enc_buf.ptr); + memset(enc_buf.ptr, 0, 128 * 1024); + r = amdgpu_bo_cpu_unmap(enc_buf.handle); + + r = amdgpu_bo_cpu_map(cpb_buf.handle, (void**)&enc_buf.ptr); + memset(enc_buf.ptr, 0, buf_size * 2); + r = amdgpu_bo_cpu_unmap(cpb_buf.handle); + + len = 0; + /* session info */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ + ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); + ib_cpu[len++] = enc_buf.addr >> 32; + ib_cpu[len++] = enc_buf.addr; + ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ + *st_size = (len - st_offset) * 4; + + /* task info */ + task_offset = len; + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ + p_task_size = &ib_cpu[len++]; + ib_cpu[len++] = enc_task_id++; /* task_id */ + ib_cpu[len++] = 0; /* feedback */ + *st_size = (len - st_offset) * 4; + + /* op init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x01000001; /* RENCODE_IB_OP_INITIALIZE */ + *st_size = (len - st_offset) * 4; + + /* session_init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000003; /* RENCODE_IB_PARAM_SESSION_INIT */ + ib_cpu[len++] = 1; /* RENCODE_ENCODE_STANDARD_H264 */ + ib_cpu[len++] = width; + ib_cpu[len++] = height; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; /* pre encode mode */ + ib_cpu[len++] = 0; /* chroma enabled : false */ + *st_size = (len - st_offset) * 4; + + /* slice control */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200001; /* RENCODE_H264_IB_PARAM_SLICE_CONTROL */ + ib_cpu[len++] = 0; /* RENCODE_H264_SLICE_CONTROL_MODE_FIXED_MBS */ + ib_cpu[len++] = ALIGN(width, 16) / 16 * ALIGN(height, 16) / 16; + *st_size = (len - st_offset) * 4; + + /* enc spec misc */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200002; /* RENCODE_H264_IB_PARAM_SPEC_MISC */ + ib_cpu[len++] = 0; /* constrained intra pred flag */ + ib_cpu[len++] = 0; /* cabac enable */ + ib_cpu[len++] = 0; /* cabac init idc */ + ib_cpu[len++] = 1; /* half pel enabled */ + ib_cpu[len++] = 1; /* quarter pel enabled */ + ib_cpu[len++] = 100; /* BASELINE profile */ + ib_cpu[len++] = 11; /* level */ + if (einfo.hw_ip_version_major == 3) { + ib_cpu[len++] = 0; /* b_picture_enabled */ + ib_cpu[len++] = 0; /* weighted_bipred_idc */ + } + *st_size = (len - st_offset) * 4; + + /* deblocking filter */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200004; /* RENCODE_H264_IB_PARAM_DEBLOCKING_FILTER */ + ib_cpu[len++] = 0; /* disable deblocking filter idc */ + ib_cpu[len++] = 0; /* alpha c0 offset */ + ib_cpu[len++] = 0; /* tc offset */ + ib_cpu[len++] = 0; /* cb offset */ + ib_cpu[len++] = 0; /* cr offset */ + *st_size = (len - st_offset) * 4; + + /* layer control */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000004; /* RENCODE_IB_PARAM_LAYER_CONTROL */ + ib_cpu[len++] = 1; /* max temporal layer */ + ib_cpu[len++] = 1; /* no of temporal layer */ + *st_size = (len - st_offset) * 4; + + /* rc_session init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000006; /* RENCODE_IB_PARAM_RATE_CONTROL_SESSION_INIT */ + ib_cpu[len++] = 0; /* rate control */ + ib_cpu[len++] = 48; /* vbv buffer level */ + *st_size = (len - st_offset) * 4; + + /* quality params */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000009; /* RENCODE_IB_PARAM_QUALITY_PARAMS */ + ib_cpu[len++] = 0; /* vbaq mode */ + ib_cpu[len++] = 0; /* scene change sensitivity */ + ib_cpu[len++] = 0; /* scene change min idr interval */ + ib_cpu[len++] = 0; + if (einfo.hw_ip_version_major == 3) + ib_cpu[len++] = 0; + *st_size = (len - st_offset) * 4; + + /* layer select */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ + ib_cpu[len++] = 0; /* temporal layer */ + *st_size = (len - st_offset) * 4; + + /* rc layer init */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000007; /* RENCODE_IB_PARAM_RATE_CONTROL_LAYER_INIT */ + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + ib_cpu[len++] = 25; + ib_cpu[len++] = 1; + ib_cpu[len++] = 0x01312d00; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; + *st_size = (len - st_offset) * 4; + + /* layer select */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000005; /* RENCODE_IB_PARAM_LAYER_SELECT */ + ib_cpu[len++] = 0; /* temporal layer */ + *st_size = (len - st_offset) * 4; + + /* rc per pic */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000008; /* RENCODE_IB_PARAM_RATE_CONTROL_PER_PICTURE */ + ib_cpu[len++] = 20; + ib_cpu[len++] = 0; + ib_cpu[len++] = 51; + ib_cpu[len++] = 0; + ib_cpu[len++] = 1; + ib_cpu[len++] = 0; + ib_cpu[len++] = 1; + *st_size = (len - st_offset) * 4; + + /* op init rc */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x01000004; /* RENCODE_IB_OP_INIT_RC */ + *st_size = (len - st_offset) * 4; + + /* op init rc vbv */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x01000005; /* RENCODE_IB_OP_INIT_RC_VBV_BUFFER_LEVEL */ + *st_size = (len - st_offset) * 4; + + *p_task_size = (len - task_offset) * 4; + + r = submit(len, AMDGPU_HW_IP_VCN_ENC); + CU_ASSERT_EQUAL(r, 0); +} + +static int32_t h264_se (bufferInfo * bufInfo) +{ + uint32_t ret; + + ret = bs_read_ue (bufInfo); + if ((ret & 0x1) == 0) { + ret >>= 1; + int32_t temp = 0 - ret; + return temp; + } + + return (ret + 1) >> 1; +} + +static void h264_check_0s (bufferInfo * bufInfo, int count) +{ + uint32_t val; + + val = bs_read_u (bufInfo, count); + if (val != 0) { + printf ("field error - %d bits should be 0 is %x\n", count, val); + } +} + +static inline int bs_eof(bufferInfo * bufinfo) +{ + if (bufinfo->decBuffer >= bufinfo->end) + return 1; + else + return 0; +} + +static inline uint32_t bs_read_u1(bufferInfo *bufinfo) +{ + uint32_t r = 0; + uint32_t temp = 0; + + bufinfo->numOfBitsInBuffer--; + if (! bs_eof(bufinfo)) { + temp = (((bufinfo->decData)) >> bufinfo->numOfBitsInBuffer); + r = temp & 0x01; + } + + if (bufinfo->numOfBitsInBuffer == 0) { + bufinfo->decBuffer++; + bufinfo->decData = *bufinfo->decBuffer; + bufinfo->numOfBitsInBuffer = 8; + } + + return r; +} + +static inline uint32_t bs_read_u(bufferInfo* bufinfo, int n) +{ + uint32_t r = 0; + int i; + + for (i = 0; i < n; i++) { + r |= ( bs_read_u1(bufinfo) << ( n - i - 1 ) ); + } + + return r; +} + +static inline uint32_t bs_read_ue(bufferInfo* bufinfo) +{ + int32_t r = 0; + int i = 0; + + while( (bs_read_u1(bufinfo) == 0) && (i < 32) && (!bs_eof(bufinfo))) { + i++; + } + r = bs_read_u(bufinfo, i); + r += (1 << i) - 1; + return r; +} + +static uint32_t remove_03 (uint8_t * bptr, uint32_t len) +{ + uint32_t nal_len = 0; + while (nal_len + 2 < len) { + if (bptr[0] == 0 && bptr[1] == 0 && bptr[2] == 3) { + bptr += 2; + nal_len += 2; + len--; + memmove (bptr, bptr + 1, len - nal_len); + } else { + bptr++; + nal_len++; + } + } + return len; +} + +static void scaling_list (uint32_t ix, uint32_t sizeOfScalingList, bufferInfo * bufInfo) +{ + uint32_t lastScale = 8, nextScale = 8; + uint32_t jx; + int deltaScale; + + for (jx = 0; jx < sizeOfScalingList; jx++) { + if (nextScale != 0) { + deltaScale = h264_se (bufInfo); + nextScale = (lastScale + deltaScale + 256) % 256; + } + if (nextScale == 0) { + lastScale = lastScale; + } else { + lastScale = nextScale; + } + } +} + +static void h264_parse_sequence_parameter_set (h264_decode * dec, bufferInfo * bufInfo) +{ + uint32_t temp; + + dec->profile = bs_read_u (bufInfo, 8); + bs_read_u (bufInfo, 1); /* constaint_set0_flag */ + bs_read_u (bufInfo, 1); /* constaint_set1_flag */ + bs_read_u (bufInfo, 1); /* constaint_set2_flag */ + bs_read_u (bufInfo, 1); /* constaint_set3_flag */ + bs_read_u (bufInfo, 1); /* constaint_set4_flag */ + bs_read_u (bufInfo, 1); /* constaint_set5_flag */ + + + h264_check_0s (bufInfo, 2); + dec->level_idc = bs_read_u (bufInfo, 8); + bs_read_ue (bufInfo); /* SPS id*/ + + if (dec->profile == 100 || dec->profile == 110 || + dec->profile == 122 || dec->profile == 144) { + uint32_t chroma_format_idc = bs_read_ue (bufInfo); + if (chroma_format_idc == 3) { + bs_read_u (bufInfo, 1); /* residual_colour_transform_flag */ + } + bs_read_ue (bufInfo); /* bit_depth_luma_minus8 */ + bs_read_ue (bufInfo); /* bit_depth_chroma_minus8 */ + bs_read_u (bufInfo, 1); /* qpprime_y_zero_transform_bypass_flag */ + uint32_t seq_scaling_matrix_present_flag = bs_read_u (bufInfo, 1); + + if (seq_scaling_matrix_present_flag) { + for (uint32_t ix = 0; ix < 8; ix++) { + temp = bs_read_u (bufInfo, 1); + if (temp) { + scaling_list (ix, ix < 6 ? 16 : 64, bufInfo); + } + } + } + } + + bs_read_ue (bufInfo); /* log2_max_frame_num_minus4 */ + uint32_t pic_order_cnt_type = bs_read_ue (bufInfo); + + if (pic_order_cnt_type == 0) { + bs_read_ue (bufInfo); /* log2_max_pic_order_cnt_lsb_minus4 */ + } else if (pic_order_cnt_type == 1) { + bs_read_u (bufInfo, 1); /* delta_pic_order_always_zero_flag */ + h264_se (bufInfo); /* offset_for_non_ref_pic */ + h264_se (bufInfo); /* offset_for_top_to_bottom_field */ + temp = bs_read_ue (bufInfo); + for (uint32_t ix = 0; ix < temp; ix++) { + h264_se (bufInfo); /* offset_for_ref_frame[index] */ + } + } + bs_read_ue (bufInfo); /* num_ref_frames */ + bs_read_u (bufInfo, 1); /* gaps_in_frame_num_flag */ + uint32_t PicWidthInMbs = bs_read_ue (bufInfo) + 1; + + dec->pic_width = PicWidthInMbs * 16; + uint32_t PicHeightInMapUnits = bs_read_ue (bufInfo) + 1; + + dec->pic_height = PicHeightInMapUnits * 16; + uint32_t frame_mbs_only_flag = bs_read_u (bufInfo, 1); + if (!frame_mbs_only_flag) { + bs_read_u (bufInfo, 1); /* mb_adaptive_frame_field_flag */ + } + bs_read_u (bufInfo, 1); /* direct_8x8_inference_flag */ + temp = bs_read_u (bufInfo, 1); + if (temp) { + bs_read_ue (bufInfo); /* frame_crop_left_offset */ + bs_read_ue (bufInfo); /* frame_crop_right_offset */ + bs_read_ue (bufInfo); /* frame_crop_top_offset */ + bs_read_ue (bufInfo); /* frame_crop_bottom_offset */ + } + temp = bs_read_u (bufInfo, 1); /* VUI Parameters */ +} + +static void h264_slice_header (h264_decode * dec, bufferInfo * bufInfo) +{ + uint32_t temp; + + bs_read_ue (bufInfo); /* first_mb_in_slice */ + temp = bs_read_ue (bufInfo); + dec->slice_type = ((temp > 5) ? (temp - 5) : temp); +} + +static uint8_t h264_parse_nal (h264_decode * dec, bufferInfo * bufInfo) +{ + uint8_t type = 0; + + h264_check_0s (bufInfo, 1); + dec->nal_ref_idc = bs_read_u (bufInfo, 2); + dec->nal_unit_type = type = bs_read_u (bufInfo, 5); + switch (type) + { + case H264_NAL_TYPE_NON_IDR_SLICE: + case H264_NAL_TYPE_IDR_SLICE: + h264_slice_header (dec, bufInfo); + break; + case H264_NAL_TYPE_SEQ_PARAM: + h264_parse_sequence_parameter_set (dec, bufInfo); + break; + case H264_NAL_TYPE_PIC_PARAM: + case H264_NAL_TYPE_SEI: + case H264_NAL_TYPE_ACCESS_UNIT: + case H264_NAL_TYPE_SEQ_EXTENSION: + /* NOP */ + break; + default: + printf ("Nal type unknown %d \n ", type); + break; + } + return type; +} + +static uint32_t h264_find_next_start_code (uint8_t * pBuf, uint32_t bufLen) +{ + uint32_t val; + uint32_t offset, startBytes; + + offset = startBytes = 0; + if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 0 && pBuf[3] == 1) { + pBuf += 4; + offset = 4; + startBytes = 1; + } else if (pBuf[0] == 0 && pBuf[1] == 0 && pBuf[2] == 1) { + pBuf += 3; + offset = 3; + startBytes = 1; + } + val = 0xffffffff; + while (offset < bufLen - 3) { + val <<= 8; + val |= *pBuf++; + offset++; + if (val == H264_START_CODE) + return offset - 4; + + if ((val & 0x00ffffff) == H264_START_CODE) + return offset - 3; + } + if (bufLen - offset <= 3 && startBytes == 0) { + startBytes = 0; + return 0; + } + + return offset; +} + +static int verify_checksum(uint8_t *buffer, uint32_t buffer_size) +{ + uint32_t buffer_pos = 0; + int done = 0; + h264_decode dec; + + memset(&dec, 0, sizeof(h264_decode)); + do { + uint32_t ret; + + ret = h264_find_next_start_code (buffer + buffer_pos, + buffer_size - buffer_pos); + if (ret == 0) { + done = 1; + if (buffer_pos == 0) { + fprintf (stderr, + "couldn't find start code in buffer from 0\n"); + } + } else { + /* have a complete NAL from buffer_pos to end */ + if (ret > 3) { + uint32_t nal_len; + bufferInfo bufinfo; + + nal_len = remove_03 (buffer + buffer_pos, ret); + bufinfo.decBuffer = buffer + buffer_pos + (buffer[buffer_pos + 2] == 1 ? 3 : 4); + bufinfo.decBufferSize = (nal_len - (buffer[buffer_pos + 2] == 1 ? 3 : 4)) * 8; + bufinfo.end = buffer + buffer_pos + nal_len; + bufinfo.numOfBitsInBuffer = 8; + bufinfo.decData = *bufinfo.decBuffer; + h264_parse_nal (&dec, &bufinfo); + } + buffer_pos += ret; /* buffer_pos points to next code */ + } + } while (done == 0); + + if ((dec.pic_width == gWidth) && + (dec.pic_height == gHeight) && + (dec.slice_type == gSliceType)) + return 0; + else + return -1; +} + +static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_buf, int frame_type) +{ + uint32_t *fb_ptr; + uint8_t *bs_ptr; + uint32_t size; + int r; +/* uint64_t s[3] = {0, 1121279001727, 1059312481445}; */ + + r = amdgpu_bo_cpu_map(fb_buf.handle, (void **)&fb_buf.ptr); + CU_ASSERT_EQUAL(r, 0); + fb_ptr = (uint32_t*)fb_buf.ptr; + size = fb_ptr[6]; + r = amdgpu_bo_cpu_unmap(fb_buf.handle); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_bo_cpu_map(bs_buf.handle, (void **)&bs_buf.ptr); + CU_ASSERT_EQUAL(r, 0); + + bs_ptr = (uint8_t*)bs_buf.ptr; + r = verify_checksum(bs_ptr, size); + CU_ASSERT_EQUAL(r, 0); + r = amdgpu_bo_cpu_unmap(bs_buf.handle); + + CU_ASSERT_EQUAL(r, 0); +} + +static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) +{ + struct amdgpu_vcn_bo bs_buf, fb_buf, vbv_buf; + int len, r, i; + unsigned width = 160, height = 128, buf_size; + uint32_t *p_task_size = NULL; + uint32_t task_offset = 0, st_offset; + uint32_t *st_size = NULL; + uint32_t fw_maj = 1, fw_min = 9; + + if (einfo.hw_ip_version_major == 2) { + fw_maj = 1; + fw_min = 1; + } else if (einfo.hw_ip_version_major == 3) { + fw_maj = 1; + fw_min = 0; + } + gSliceType = frame_type; + buf_size = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; + + num_resources = 0; + alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); + alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); + alloc_resource(&vbv_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT); + resources[num_resources++] = enc_buf.handle; + resources[num_resources++] = cpb_buf.handle; + resources[num_resources++] = bs_buf.handle; + resources[num_resources++] = fb_buf.handle; + resources[num_resources++] = vbv_buf.handle; + resources[num_resources++] = ib_handle; + + + r = amdgpu_bo_cpu_map(bs_buf.handle, (void**)&bs_buf.ptr); + memset(bs_buf.ptr, 0, 4096); + r = amdgpu_bo_cpu_unmap(bs_buf.handle); + + r = amdgpu_bo_cpu_map(fb_buf.handle, (void**)&fb_buf.ptr); + memset(fb_buf.ptr, 0, 4096); + r = amdgpu_bo_cpu_unmap(fb_buf.handle); + + r = amdgpu_bo_cpu_map(vbv_buf.handle, (void **)&vbv_buf.ptr); + CU_ASSERT_EQUAL(r, 0); + + for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++) + memcpy(vbv_buf.ptr + i * ALIGN(width, 256), frame + i * width, width); + + r = amdgpu_bo_cpu_unmap(vbv_buf.handle); + CU_ASSERT_EQUAL(r, 0); + + len = 0; + /* session info */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ + ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); + ib_cpu[len++] = enc_buf.addr >> 32; + ib_cpu[len++] = enc_buf.addr; + ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE */; + *st_size = (len - st_offset) * 4; + + /* task info */ + task_offset = len; + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ + p_task_size = &ib_cpu[len++]; + ib_cpu[len++] = enc_task_id++; /* task_id */ + ib_cpu[len++] = 1; /* feedback */ + *st_size = (len - st_offset) * 4; + + if (frame_type == 2) { + /* sps */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */ + else + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3 */ + ib_cpu[len++] = 0x00000002; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */ + ib_cpu[len++] = 0x00000011; /* sps len */ + ib_cpu[len++] = 0x00000001; /* start code */ + ib_cpu[len++] = 0x6764440b; + ib_cpu[len++] = 0xac54c284; + ib_cpu[len++] = 0x68078442; + ib_cpu[len++] = 0x37000000; + *st_size = (len - st_offset) * 4; + + /* pps */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/ + else + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3*/ + ib_cpu[len++] = 0x00000003; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */ + ib_cpu[len++] = 0x00000008; /* pps len */ + ib_cpu[len++] = 0x00000001; /* start code */ + ib_cpu[len++] = 0x68ce3c80; + *st_size = (len - st_offset) * 4; + } + + /* slice header */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */ + else + ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 2,3 */ + if (frame_type == 2) { + ib_cpu[len++] = 0x65000000; + ib_cpu[len++] = 0x11040000; + } else { + ib_cpu[len++] = 0x41000000; + ib_cpu[len++] = 0x34210000; + } + ib_cpu[len++] = 0xe0000000; + for(i = 0; i < 13; i++) + ib_cpu[len++] = 0x00000000; + + ib_cpu[len++] = 0x00000001; + ib_cpu[len++] = 0x00000008; + ib_cpu[len++] = 0x00020000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000001; + ib_cpu[len++] = 0x00000015; + ib_cpu[len++] = 0x00020001; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000001; + ib_cpu[len++] = 0x00000003; + for(i = 0; i < 22; i++) + ib_cpu[len++] = 0x00000000; + + *st_size = (len - st_offset) * 4; + + /* encode params */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1*/ + else + ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/ + ib_cpu[len++] = frame_type; + ib_cpu[len++] = 0x0001f000; + ib_cpu[len++] = vbv_buf.addr >> 32; + ib_cpu[len++] = vbv_buf.addr; + ib_cpu[len++] = (vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32; + ib_cpu[len++] = vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32); + ib_cpu[len++] = 0x00000100; + ib_cpu[len++] = 0x00000080; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + /* encode params h264 */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */ + if (einfo.hw_ip_version_major != 3) { + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + } else { + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0xffffffff; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + } + *st_size = (len - st_offset) * 4; + + /* encode context */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */ + else + ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER vcn 2,3 */ + ib_cpu[len++] = cpb_buf.addr >> 32; + ib_cpu[len++] = cpb_buf.addr; + ib_cpu[len++] = 0x00000000; /* swizzle mode */ + ib_cpu[len++] = 0x00000100; /* luma pitch */ + ib_cpu[len++] = 0x00000100; /* chroma pitch */ + ib_cpu[len++] = 0x00000003; /* no reconstructed picture */ + ib_cpu[len++] = 0x00000000; /* reconstructed pic 1 luma offset */ + ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32); /* pic1 chroma offset */ + ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; /* pic2 luma offset */ + ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2; /* pic2 chroma offset */ + + for (int i = 0; i < 136; i++) + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + /* bitstream buffer */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */ + else + ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER vcn 2,3 */ + ib_cpu[len++] = 0x00000000; /* mode */ + ib_cpu[len++] = bs_buf.addr >> 32; + ib_cpu[len++] = bs_buf.addr; + ib_cpu[len++] = 0x0001f000; + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + /* feedback */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x00000010; /* FEEDBACK_BUFFER vcn 1 */ + else + ib_cpu[len++] = 0x00000015; /* FEEDBACK_BUFFER vcn 2,3 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = fb_buf.addr >> 32; + ib_cpu[len++] = fb_buf.addr; + ib_cpu[len++] = 0x00000010; + ib_cpu[len++] = 0x00000028; + *st_size = (len - st_offset) * 4; + + /* intra refresh */ + st_offset = len; + st_size = &ib_cpu[len++]; + if(einfo.hw_ip_version_major == 1) + ib_cpu[len++] = 0x0000000c; /* INTRA_REFRESH vcn 1 */ + else + ib_cpu[len++] = 0x00000010; /* INTRA_REFRESH vcn 2,3 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + *st_size = (len - st_offset) * 4; + + if(einfo.hw_ip_version_major != 1) { + /* Input Format */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x0000000c; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_PACKING_FORMAT_NV12 */ + *st_size = (len - st_offset) * 4; + + /* Output Format */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x0000000d; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_VOLUME_G22_BT709 */ + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000000; /* RENCODE_COLOR_BIT_DEPTH_8_BIT */ + *st_size = (len - st_offset) * 4; + } + /* op_speed */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x01000006; /* SPEED_ENCODING_MODE */ + *st_size = (len - st_offset) * 4; + + /* op_enc */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x01000003; + *st_size = (len - st_offset) * 4; + + *p_task_size = (len - task_offset) * 4; + r = submit(len, AMDGPU_HW_IP_VCN_ENC); + CU_ASSERT_EQUAL(r, 0); + + /* check result */ + check_result(fb_buf, bs_buf, frame_type); + + free_resource(&fb_buf); + free_resource(&bs_buf); + free_resource(&vbv_buf); } static void amdgpu_cs_vcn_enc_encode(void) { - /* TODO */ + amdgpu_cs_vcn_enc_encode_frame(2); /* IDR frame */ } static void amdgpu_cs_vcn_enc_destroy(void) { - /* TODO */ + int len = 0, r; + uint32_t *p_task_size = NULL; + uint32_t task_offset = 0, st_offset; + uint32_t *st_size = NULL; + uint32_t fw_maj = 1, fw_min = 9; + + if (einfo.hw_ip_version_major == 2) { + fw_maj = 1; + fw_min = 1; + } else if (einfo.hw_ip_version_major == 3) { + fw_maj = 1; + fw_min = 0; + } + + num_resources = 0; +/* alloc_resource(&enc_buf, 128 * 1024, AMDGPU_GEM_DOMAIN_GTT); */ + resources[num_resources++] = enc_buf.handle; + resources[num_resources++] = ib_handle; + + /* session info */ + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000001; /* RENCODE_IB_PARAM_SESSION_INFO */ + ib_cpu[len++] = ((fw_maj << 16) | (fw_min << 0)); + ib_cpu[len++] = enc_buf.addr >> 32; + ib_cpu[len++] = enc_buf.addr; + ib_cpu[len++] = 1; /* RENCODE_ENGINE_TYPE_ENCODE; */ + *st_size = (len - st_offset) * 4; + + /* task info */ + task_offset = len; + st_offset = len; + st_size = &ib_cpu[len++]; /* size */ + ib_cpu[len++] = 0x00000002; /* RENCODE_IB_PARAM_TASK_INFO */ + p_task_size = &ib_cpu[len++]; + ib_cpu[len++] = enc_task_id++; /* task_id */ + ib_cpu[len++] = 0; /* feedback */ + *st_size = (len - st_offset) * 4; + + /* op close */ + st_offset = len; + st_size = &ib_cpu[len++]; + ib_cpu[len++] = 0x01000002; /* RENCODE_IB_OP_CLOSE_SESSION */ + *st_size = (len - st_offset) * 4; + + *p_task_size = (len - task_offset) * 4; + + r = submit(len, AMDGPU_HW_IP_VCN_ENC); + CU_ASSERT_EQUAL(r, 0); + + free_resource(&cpb_buf); + free_resource(&enc_buf); } commit 0b21fcb214c46d2d3ff03d7c2abe5f50470ad83c Author: Sathishkumar S <sat...@am...> Date: Wed May 25 14:45:08 2022 +0530 tests/amdgpu: fix decode test failure on VCN2.5 For VCN2.5 wrong index was chosen, fix it. Signed-off-by: Sathishkumar S <sat...@am...> Reviewed-by: Leo Liu <le...@am...> diff --git a/tests/amdgpu/vcn_tests.c b/tests/amdgpu/vcn_tests.c index ff97f344..a924fec6 100644 --- a/tests/amdgpu/vcn_tests.c +++ b/tests/amdgpu/vcn_tests.c @@ -133,7 +133,7 @@ CU_BOOL suite_vcn_tests_enable(void) if (info.hw_ip_version_major == 1) vcn_reg_index = 0; - else if (info.hw_ip_version_major == 2) + else if (info.hw_ip_version_major == 2 && info.hw_ip_version_minor == 0) vcn_reg_index = 1; else if ((info.hw_ip_version_major == 2 && info.hw_ip_version_minor >= 5) || info.hw_ip_version_major == 3) |
From: GitLab M. <git...@ke...> - 2022-07-02 20:16:57
|
core-symbols.txt | 1 tests/modeprint/modeprint.c | 2 - tests/modetest/modetest.c | 2 - tests/proptest/proptest.c | 2 - tests/util/kms.c | 27 ---------------------- tests/util/kms.h | 1 xf86drmMode.c | 53 ++++++++++++++++++++++++++++++++++++++++++++ xf86drmMode.h | 9 +++++++ 8 files changed, 66 insertions(+), 31 deletions(-) New commits: commit 0427c1f669ea9b71a9c417f558fcff1666a045e3 Author: Simon Ser <co...@em...> Date: Tue Feb 1 14:36:52 2022 +0100 tests: use drmModeGetConnectorTypeName Drop util_lookup_connector_type_name and use drmModeGetConnectorTypeName instead. Signed-off-by: Simon Ser <co...@em...> diff --git a/tests/modeprint/modeprint.c b/tests/modeprint/modeprint.c index 9372ad92..0f644c74 100644 --- a/tests/modeprint/modeprint.c +++ b/tests/modeprint/modeprint.c @@ -135,7 +135,7 @@ static int printConnector(int fd, drmModeResPtr res, drmModeConnectorPtr connect drmModePropertyPtr props; const char *connector_type_name = NULL; - connector_type_name = util_lookup_connector_type_name(connector->connector_type); + connector_type_name = drmModeGetConnectorTypeName(connector->connector_type); if (connector_type_name) printf("Connector: %s-%d\n", connector_type_name, diff --git a/tests/modetest/modetest.c b/tests/modetest/modetest.c index d6ab9dc8..8ff6c80d 100644 --- a/tests/modetest/modetest.c +++ b/tests/modetest/modetest.c @@ -656,7 +656,7 @@ static struct resources *get_resources(struct device *dev) int num; num = asprintf(&connector->name, "%s-%u", - util_lookup_connector_type_name(conn->connector_type), + drmModeGetConnectorTypeName(conn->connector_type), conn->connector_type_id); if (num < 0) goto error; diff --git a/tests/proptest/proptest.c b/tests/proptest/proptest.c index 88bed10b..97cb01fa 100644 --- a/tests/proptest/proptest.c +++ b/tests/proptest/proptest.c @@ -192,7 +192,7 @@ static void listConnectorProperties(void) } printf("Connector %u (%s-%u)\n", c->connector_id, - util_lookup_connector_type_name(c->connector_type), + drmModeGetConnectorTypeName(c->connector_type), c->connector_type_id); listObjectProperties(c->connector_id, diff --git a/tests/util/kms.c b/tests/util/kms.c index 39a93866..680e21ac 100644 --- a/tests/util/kms.c +++ b/tests/util/kms.c @@ -96,33 +96,6 @@ const char *util_lookup_connector_status_name(unsigned int status) ARRAY_SIZE(connector_status_names)); } -static const struct type_name connector_type_names[] = { - { DRM_MODE_CONNECTOR_Unknown, "unknown" }, - { DRM_MODE_CONNECTOR_VGA, "VGA" }, - { DRM_MODE_CONNECTOR_DVII, "DVI-I" }, - { DRM_MODE_CONNECTOR_DVID, "DVI-D" }, - { DRM_MODE_CONNECTOR_DVIA, "DVI-A" }, - { DRM_MODE_CONNECTOR_Composite, "composite" }, - { DRM_MODE_CONNECTOR_SVIDEO, "s-video" }, - { DRM_MODE_CONNECTOR_LVDS, "LVDS" }, - { DRM_MODE_CONNECTOR_Component, "component" }, - { DRM_MODE_CONNECTOR_9PinDIN, "9-pin DIN" }, - { DRM_MODE_CONNECTOR_DisplayPort, "DP" }, - { DRM_MODE_CONNECTOR_HDMIA, "HDMI-A" }, - { DRM_MODE_CONNECTOR_HDMIB, "HDMI-B" }, - { DRM_MODE_CONNECTOR_TV, "TV" }, - { DRM_MODE_CONNECTOR_eDP, "eDP" }, - { DRM_MODE_CONNECTOR_VIRTUAL, "Virtual" }, - { DRM_MODE_CONNECTOR_DSI, "DSI" }, - { DRM_MODE_CONNECTOR_DPI, "DPI" }, -}; - -const char *util_lookup_connector_type_name(unsigned int type) -{ - return util_lookup_type_name(type, connector_type_names, - ARRAY_SIZE(connector_type_names)); -} - static const char * const modules[] = { "i915", "amdgpu", diff --git a/tests/util/kms.h b/tests/util/kms.h index dde2ed2c..f79f4617 100644 --- a/tests/util/kms.h +++ b/tests/util/kms.h @@ -28,7 +28,6 @@ const char *util_lookup_encoder_type_name(unsigned int type); const char *util_lookup_connector_status_name(unsigned int type); -const char *util_lookup_connector_type_name(unsigned int type); int util_open(const char *device, const char *module); commit 50f8d517733d24fce6693ffae552f9833e2e6aa9 Author: Simon Ser <co...@em...> Date: Tue Feb 1 14:33:34 2022 +0100 xf86drmMode: introduce drmModeGetConnectorTypeName User-space often needs to print the name of a connector type. When a new connector type is added, all user-space programs need to be updated to support the new connector type. Expose a function to get a connector type name in libdrm. The names are taken from the kernel [1]. [1]: https://cgit.freedesktop.org/drm/drm/tree/drivers/gpu/drm/drm_connector.c?h=4fc8cb47fcfdc93e274a1291757e478df4f9c39b#n83 Signed-off-by: Simon Ser <co...@em...> diff --git a/core-symbols.txt b/core-symbols.txt index 31bbcf8f..dcf90018 100644 --- a/core-symbols.txt +++ b/core-symbols.txt @@ -126,6 +126,7 @@ drmModeFreePropertyBlob drmModeFreeResources drmModeGetConnector drmModeGetConnectorCurrent +drmModeGetConnectorTypeName drmModeGetCrtc drmModeGetEncoder drmModeGetFB diff --git a/xf86drmMode.c b/xf86drmMode.c index 6b79d768..6d636dcc 100644 --- a/xf86drmMode.c +++ b/xf86drmMode.c @@ -1747,3 +1747,56 @@ drm_public void drmModeFreeFB2(drmModeFB2Ptr ptr) { drmFree(ptr); } + +drm_public const char * +drmModeGetConnectorTypeName(uint32_t connector_type) +{ + /* Keep the strings in sync with the kernel's drm_connector_enum_list in + * drm_connector.c. */ + switch (connector_type) { + case DRM_MODE_CONNECTOR_Unknown: + return "Unknown"; + case DRM_MODE_CONNECTOR_VGA: + return "VGA"; + case DRM_MODE_CONNECTOR_DVII: + return "DVI-I"; + case DRM_MODE_CONNECTOR_DVID: + return "DVI-D"; + case DRM_MODE_CONNECTOR_DVIA: + return "DVI-A"; + case DRM_MODE_CONNECTOR_Composite: + return "Composite"; + case DRM_MODE_CONNECTOR_SVIDEO: + return "SVIDEO"; + case DRM_MODE_CONNECTOR_LVDS: + return "LVDS"; + case DRM_MODE_CONNECTOR_Component: + return "Component"; + case DRM_MODE_CONNECTOR_9PinDIN: + return "DIN"; + case DRM_MODE_CONNECTOR_DisplayPort: + return "DP"; + case DRM_MODE_CONNECTOR_HDMIA: + return "HDMI-A"; + case DRM_MODE_CONNECTOR_HDMIB: + return "HDMI-B"; + case DRM_MODE_CONNECTOR_TV: + return "TV"; + case DRM_MODE_CONNECTOR_eDP: + return "eDP"; + case DRM_MODE_CONNECTOR_VIRTUAL: + return "Virtual"; + case DRM_MODE_CONNECTOR_DSI: + return "DSI"; + case DRM_MODE_CONNECTOR_DPI: + return "DPI"; + case DRM_MODE_CONNECTOR_WRITEBACK: + return "Writeback"; + case DRM_MODE_CONNECTOR_SPI: + return "SPI"; + case DRM_MODE_CONNECTOR_USB: + return "USB"; + default: + return NULL; + } +} diff --git a/xf86drmMode.h b/xf86drmMode.h index 08557642..46dc80a2 100644 --- a/xf86drmMode.h +++ b/xf86drmMode.h @@ -475,6 +475,15 @@ extern drmModeObjectListPtr drmModeGetLease(int fd); extern int drmModeRevokeLease(int fd, uint32_t lessee_id); +/** + * Get a string describing a connector type. + * + * NULL is returned if the connector type is unsupported. Callers should handle + * this gracefully, e.g. by falling back to "Unknown" or printing the raw value. + */ +extern const char * +drmModeGetConnectorTypeName(uint32_t connector_type); + #if defined(__cplusplus) } #endif |
From: GitLab M. <git...@ke...> - 2022-08-22 17:14:30
|
.gitlab-ci/debian-install.sh | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) New commits: commit 1637d8b02088fff62aed5a9796eddcb9f0f60656 Author: Eric Engestrom <er...@en...> Date: Mon Aug 22 17:41:18 2022 +0100 ci: fix the tested meson version Reviewed-by: Simon Ser <co...@em...> diff --git a/.gitlab-ci/debian-install.sh b/.gitlab-ci/debian-install.sh index d50dc613..6f7a212f 100644 --- a/.gitlab-ci/debian-install.sh +++ b/.gitlab-ci/debian-install.sh @@ -68,4 +68,4 @@ done apt-get purge -y $EPHEMERAL # Test that the oldest Meson version we claim to support is still supported -pip3 install meson==0.46 +pip3 install meson==0.53 commit 24163f40d3b4c7790c843e99f85143e551201608 Author: Eric Engestrom <er...@en...> Date: Mon Aug 22 17:40:09 2022 +0100 ci: remove system meson before installing the pip one Reviewed-by: Simon Ser <co...@em...> diff --git a/.gitlab-ci/debian-install.sh b/.gitlab-ci/debian-install.sh index ab901360..d50dc613 100644 --- a/.gitlab-ci/debian-install.sh +++ b/.gitlab-ci/debian-install.sh @@ -26,14 +26,18 @@ EOF apt-get dist-upgrade -y +EPHEMERAL=" + meson \ + " + apt-get install -y --no-remove \ + $EPHEMERAL \ build-essential \ docbook-xsl \ libatomic-ops-dev \ libcairo2-dev \ libcunit1-dev \ libpciaccess-dev \ - meson \ ninja-build \ pkg-config \ python3 \ @@ -61,6 +65,7 @@ for arch in ${CROSS_ARCHITECTURES[@]}; do fi done +apt-get purge -y $EPHEMERAL # Test that the oldest Meson version we claim to support is still supported pip3 install meson==0.46 |
From: GitLab M. <git...@ke...> - 2022-08-22 18:03:04
|
.gitlab-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) New commits: commit 3e3874d50d29d1179ad996afcd8d359d3aea2639 Author: Eric Engestrom <er...@en...> Date: Mon Aug 22 18:16:46 2022 +0100 ci: bump images tags to take !255 into effect Reviewed-by: Simon Ser <co...@em...> diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index eccebd83..9657e20e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -44,7 +44,7 @@ stages: FDO_DISTRIBUTION_EXEC: 'pip3 install meson==0.53.0' # bump this tag every time you change something which requires rebuilding the # base image - FDO_DISTRIBUTION_TAG: "2022-01-19.0" + FDO_DISTRIBUTION_TAG: "2022-08-22.0" .debian-x86_64: extends: @@ -71,7 +71,7 @@ stages: FDO_DISTRIBUTION_PACKAGES: 'meson ninja pkgconf libpciaccess libpthread-stubs py39-docutils cairo' # bump this tag every time you change something which requires rebuilding the # base image - FDO_DISTRIBUTION_TAG: "2021-11-10.1" + FDO_DISTRIBUTION_TAG: "2022-08-22.0" .freebsd-x86_64: extends: commit f32db9d35469efab8e717bb9baf8e681fdefa86b Author: Eric Engestrom <er...@en...> Date: Mon Aug 22 18:22:46 2022 +0100 ci/freedesktop: bump python version of docutils package as 3.8 no longer exists diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3c79867a..eccebd83 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,7 +68,7 @@ stages: variables: BUILD_OS: freebsd FDO_DISTRIBUTION_VERSION: "13.0" - FDO_DISTRIBUTION_PACKAGES: 'meson ninja pkgconf libpciaccess libpthread-stubs py38-docutils cairo' + FDO_DISTRIBUTION_PACKAGES: 'meson ninja pkgconf libpciaccess libpthread-stubs py39-docutils cairo' # bump this tag every time you change something which requires rebuilding the # base image FDO_DISTRIBUTION_TAG: "2021-11-10.1" |
From: GitLab M. <git...@ke...> - 2022-08-23 17:55:23
|
.gitlab-ci.yml | 18 +------------ meson.build | 70 ++++++++++++++++++++++-------------------------------- meson_options.txt | 60 +++++++++++++--------------------------------- 3 files changed, 49 insertions(+), 99 deletions(-) New commits: commit a64a176cfd62956cc4967aa2929db2526d4faee2 Author: Eric Engestrom <er...@en...> Date: Mon Aug 22 17:10:16 2022 +0100 meson: simplify some more build options by using features Signed-off-by: Eric Engestrom <er...@en...> Reviewed-by: Simon Ser <co...@em...> diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 37f8515e..27027233 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -192,8 +192,6 @@ x86_64-freebsd-container_prep: script: - meson build --auto-features=enabled - -D cairo-tests=true - -D man-pages=true -D udev=true - ninja -C build - ninja -C build test @@ -215,7 +213,7 @@ x86_64-freebsd-container_prep: # the workspace to see details about the failed tests. - | set +e - /app/vmctl exec "pkg info; cd $CI_PROJECT_NAME ; meson build --auto-features=enabled -D nouveau=disabled -D cairo-tests=true -D man-pages=true -D valgrind=false && ninja -C build" + /app/vmctl exec "pkg info; cd $CI_PROJECT_NAME ; meson build --auto-features=enabled -D nouveau=disabled -D valgrind=disabled && ninja -C build" set -ex scp -r vm:$CI_PROJECT_NAME/build/meson-logs . /app/vmctl exec "ninja -C $CI_PROJECT_NAME/build install" diff --git a/meson.build b/meson.build index 2364baab..2b142c68 100644 --- a/meson.build +++ b/meson.build @@ -237,30 +237,18 @@ libdrm_c_args = cc.get_supported_arguments([ dep_pciaccess = dependency('pciaccess', version : '>= 0.10', required : with_intel) dep_cunit = dependency('cunit', version : '>= 2.1', required : false) -_cairo_tests = get_option('cairo-tests') -if _cairo_tests != 'false' - dep_cairo = dependency('cairo', required : _cairo_tests == 'true') - with_cairo_tests = dep_cairo.found() -else - dep_cairo = [] - with_cairo_tests = false -endif -_valgrind = get_option('valgrind') -if _valgrind != 'false' - if with_freedreno - dep_valgrind = dependency('valgrind', required : _valgrind == 'true', version : '>=3.10.0') - else - dep_valgrind = dependency('valgrind', required : _valgrind == 'true') - endif - with_valgrind = dep_valgrind.found() -else - dep_valgrind = [] - with_valgrind = false +dep_cairo = dependency('cairo', required : get_option('cairo-tests')) +with_cairo_tests = dep_cairo.found() + +valgrind_version = [] +if with_freedreno + valgrind_version = '>=3.10.0' endif +dep_valgrind = dependency('valgrind', required : get_option('valgrind'), version : valgrind_version) +with_valgrind = dep_valgrind.found() -with_man_pages = get_option('man-pages') -prog_rst2man = find_program('rst2man', 'rst2man.py', required: with_man_pages == 'true') -with_man_pages = with_man_pages != 'false' and prog_rst2man.found() +prog_rst2man = find_program('rst2man', 'rst2man.py', required: get_option('man-pages')) +with_man_pages = prog_rst2man.found() config.set10('HAVE_VISIBILITY', cc.has_function_attribute('visibility:hidden')) diff --git a/meson_options.txt b/meson_options.txt index 761b40da..caddeff2 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -79,23 +79,17 @@ option( ) option( 'cairo-tests', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : 'Enable support for Cairo rendering in tests.', ) option( 'man-pages', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : 'Enable manpage generation and installation.', ) option( 'valgrind', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : 'Build libdrm with valgrind support.', ) option( commit 26eb15165b767de50bef3c02abd29cec256cf4a1 Author: Eric Engestrom <er...@en...> Date: Mon Aug 22 17:04:52 2022 +0100 meson: convert auto combos into proper features Allows users to easily enable everything (eg. packagers), or select just the drivers they want with something like: -D auto-features=disabled -D amdgpu=enabled Signed-off-by: Eric Engestrom <er...@en...> Reviewed-by: Simon Ser <co...@em...> diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9657e20e..37f8515e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -191,22 +191,10 @@ x86_64-freebsd-container_prep: GIT_DEPTH: 10 script: - meson build - -D amdgpu=true + --auto-features=enabled -D cairo-tests=true - -D etnaviv=true - -D exynos=true - -D freedreno=true - -D freedreno-kgsl=true - -D intel=true -D man-pages=true - -D nouveau=true - -D omap=true - -D radeon=true - -D tegra=true -D udev=true - -D valgrind=auto - -D vc4=true - -D vmwgfx=true - ninja -C build - ninja -C build test - DESTDIR=$PWD/install ninja -C build install @@ -227,7 +215,7 @@ x86_64-freebsd-container_prep: # the workspace to see details about the failed tests. - | set +e - /app/vmctl exec "pkg info; cd $CI_PROJECT_NAME ; meson build -D amdgpu=true -D cairo-tests=true -D intel=true -D man-pages=true -D nouveau=false -D radeon=true -D valgrind=auto && ninja -C build" + /app/vmctl exec "pkg info; cd $CI_PROJECT_NAME ; meson build --auto-features=enabled -D nouveau=disabled -D cairo-tests=true -D man-pages=true -D valgrind=false && ninja -C build" set -ex scp -r vm:$CI_PROJECT_NAME/build/meson-logs . /app/vmctl exec "ninja -C $CI_PROJECT_NAME/build install" diff --git a/meson.build b/meson.build index 70e87306..2364baab 100644 --- a/meson.build +++ b/meson.build @@ -85,19 +85,19 @@ config.set10('HAVE_LIB_ATOMIC_OPS', lib_atomics) with_intel = false _intel = get_option('intel') -if _intel != 'false' - if _intel == 'true' and not with_atomics +if not _intel.disabled() + if _intel.enabled() and not with_atomics error('libdrm_intel requires atomics.') else - with_intel = (_intel == 'true' or host_machine.cpu_family().startswith('x86')) and with_atomics + with_intel = (_intel.enabled() or host_machine.cpu_family().startswith('x86')) and with_atomics endif endif summary('Intel', with_intel) with_radeon = false _radeon = get_option('radeon') -if _radeon != 'false' - if _radeon == 'true' and not with_atomics +if not _radeon.disabled() + if _radeon.enabled() and not with_atomics error('libdrm_radeon requires atomics.') endif with_radeon = with_atomics @@ -106,8 +106,8 @@ summary('Radeon', with_radeon) with_amdgpu = false _amdgpu = get_option('amdgpu') -if _amdgpu != 'false' - if _amdgpu == 'true' and not with_atomics +if not _amdgpu.disabled() + if _amdgpu.enabled() and not with_atomics error('libdrm_amdgpu requires atomics.') endif with_amdgpu = with_atomics @@ -116,8 +116,8 @@ summary('AMDGPU', with_amdgpu) with_nouveau = false _nouveau = get_option('nouveau') -if _nouveau != 'false' - if _nouveau == 'true' and not with_atomics +if not _nouveau.disabled() + if _nouveau.enabled() and not with_atomics error('libdrm_nouveau requires atomics.') endif with_nouveau = with_atomics @@ -126,14 +126,14 @@ summary('Nouveau', with_nouveau) with_vmwgfx = false _vmwgfx = get_option('vmwgfx') -if _vmwgfx != 'false' +if not _vmwgfx.disabled() with_vmwgfx = true endif summary('vmwgfx', with_vmwgfx) with_omap = false _omap = get_option('omap') -if _omap == 'true' +if _omap.enabled() if not with_atomics error('libdrm_omap requires atomics.') endif @@ -143,11 +143,11 @@ summary('OMAP', with_omap) with_freedreno = false _freedreno = get_option('freedreno') -if _freedreno != 'false' - if _freedreno == 'true' and not with_atomics +if not _freedreno.disabled() + if _freedreno.enabled() and not with_atomics error('libdrm_freedreno requires atomics.') else - with_freedreno = (_freedreno == 'true' or ['arm', 'aarch64'].contains(host_machine.cpu_family())) and with_atomics + with_freedreno = (_freedreno.enabled() or ['arm', 'aarch64'].contains(host_machine.cpu_family())) and with_atomics endif endif summary('Freedreno', with_freedreno) @@ -155,7 +155,7 @@ summary('Freedreon-kgsl', with_freedreno_kgsl) with_tegra = false _tegra = get_option('tegra') -if _tegra == 'true' +if _tegra.enabled() if not with_atomics error('libdrm_tegra requires atomics.') endif @@ -165,7 +165,7 @@ summary('Tegra', with_tegra) with_etnaviv = false _etnaviv = get_option('etnaviv') -if _etnaviv == 'true' +if _etnaviv.enabled() if not with_atomics error('libdrm_etnaviv requires atomics.') endif @@ -173,13 +173,13 @@ if _etnaviv == 'true' endif summary('Etnaviv', with_etnaviv) -with_exynos = get_option('exynos') == 'true' +with_exynos = get_option('exynos').enabled() summary('EXYNOS', with_exynos) with_vc4 = false _vc4 = get_option('vc4') -if _vc4 != 'false' - with_vc4 = _vc4 == 'true' or ['arm', 'aarch64'].contains(host_machine.cpu_family()) +if not _vc4.disabled() + with_vc4 = _vc4.enabled() or ['arm', 'aarch64'].contains(host_machine.cpu_family()) endif summary('VC4', with_vc4) diff --git a/meson_options.txt b/meson_options.txt index e487676a..761b40da 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -20,79 +20,61 @@ option( 'intel', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for Intel's KMS API.''', ) option( 'radeon', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for radeons's KMS API.''', ) option( 'amdgpu', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for amdgpu's KMS API.''', ) option( 'nouveau', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for nouveau's KMS API.''', ) option( 'vmwgfx', - type : 'combo', - value : 'true', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for vmgfx's KMS API.''', ) option( 'omap', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], + type : 'feature', + value : 'disabled', description : '''Enable support for OMAP's experimental KMS API.''', ) option( 'exynos', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], + type : 'feature', + value : 'disabled', description : '''Enable support for EXYNOS's experimental KMS API.''', ) option( 'freedreno', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for freedreno's KMS API.''', ) option( 'tegra', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], + type : 'feature', + value : 'disabled', description : '''Enable support for Tegra's experimental KMS API.''', ) option( 'vc4', - type : 'combo', - value : 'auto', - choices : ['true', 'false', 'auto'], + type : 'feature', description : '''Enable support for vc4's KMS API.''', ) option( 'etnaviv', - type : 'combo', - value : 'false', - choices : ['true', 'false', 'auto'], + type : 'feature', + value : 'disabled', description : '''Enable support for etnaviv's experimental KMS API.''', ) option( |
From: GitLab M. <git...@ke...> - 2022-08-31 06:44:01
|
core-symbols.txt | 1 + tests/modetest/modetest.c | 8 +++----- xf86drm.c | 35 +++++++++++++++++++++++++++++++++++ xf86drm.h | 3 +++ 4 files changed, 42 insertions(+), 5 deletions(-) New commits: commit e761875fc5d29080316029b7c174797751d82a97 Author: Simon Ser <co...@em...> Date: Sun Aug 21 14:18:28 2022 +0200 tests/modetest: use drmGetFormatName() Signed-off-by: Simon Ser <co...@em...> Reviewed-by: Marius Vlad <mar...@co...> Reviewed-by: Eric Engestrom <er...@ig...> diff --git a/tests/modetest/modetest.c b/tests/modetest/modetest.c index 8ff6c80d..42e2d1f4 100644 --- a/tests/modetest/modetest.c +++ b/tests/modetest/modetest.c @@ -187,11 +187,9 @@ static bit_name_fn(mode_flag) static void dump_fourcc(uint32_t fourcc) { - printf(" %c%c%c%c", - fourcc, - fourcc >> 8, - fourcc >> 16, - fourcc >> 24); + char *name = drmGetFormatName(fourcc); + printf(" %s", name); + free(name); } static void dump_encoders(struct device *dev) commit baa4b8cafca0d52189bfd6e7cda9c558a261ae71 Author: Simon Ser <co...@em...> Date: Sun Aug 21 14:17:35 2022 +0200 xf86drm: add drmGetFormatName() Same as drmGetFormatModifierName() but for formats. Signed-off-by: Simon Ser <co...@em...> Reviewed-by: Marius Vlad <mar...@co...> Reviewed-by: Eric Engestrom <er...@ig...> diff --git a/core-symbols.txt b/core-symbols.txt index da98a6a9..6c5a4af6 100644 --- a/core-symbols.txt +++ b/core-symbols.txt @@ -203,3 +203,4 @@ drmUpdateDrawableInfo drmWaitVBlank drmGetFormatModifierName drmGetFormatModifierVendor +drmGetFormatName diff --git a/xf86drm.c b/xf86drm.c index cca126b9..42c1a5d0 100644 --- a/xf86drm.c +++ b/xf86drm.c @@ -5106,3 +5106,38 @@ drmGetFormatModifierName(uint64_t modifier) return modifier_found; } + +/** + * Get a human-readable name for a DRM FourCC format. + * + * \param format The format. + * \return A malloc'ed string containing the format name. Caller is responsible + * for freeing it. + */ +drm_public char * +drmGetFormatName(uint32_t format) +{ + char *str; + size_t str_size, i; + char a, b, c, d; + + if (format == DRM_FORMAT_INVALID) + return strdup("INVALID"); + + str_size = 5; + str = malloc(str_size); + if (!str) + return NULL; + + a = (char) ((format >> 0) & 0xFF); + b = (char) ((format >> 8) & 0xFF); + c = (char) ((format >> 16) & 0xFF); + d = (char) ((format >> 24) & 0xFF); + snprintf(str, str_size, "%c%c%c%c", a, b, c, d); + + /* Trim spaces at the end */ + for (i = 3; i > 0 && str[i] == ' '; i--) + str[i] = '\0'; + + return str; +} diff --git a/xf86drm.h b/xf86drm.h index 1631396a..4badaae5 100644 --- a/xf86drm.h +++ b/xf86drm.h @@ -954,6 +954,9 @@ drmGetFormatModifierVendor(uint64_t modifier); extern char * drmGetFormatModifierName(uint64_t modifier); +extern char * +drmGetFormatName(uint32_t format); + #ifndef fourcc_mod_get_vendor #define fourcc_mod_get_vendor(modifier) \ (((modifier) >> 56) & 0xff) |
From: GitLab M. <git...@ke...> - 2022-10-03 06:54:03
|
include/drm/drm_fourcc.h | 100 ++ include/drm/i915_drm.h | 2087 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 2045 insertions(+), 142 deletions(-) New commits: commit e0df5fce890c8f70ccd72a3e7260aa8f5dd842d8 Author: Jordan Justen <jor...@in...> Date: Wed Sep 28 16:46:20 2022 -0700 include/drm/i915_drm.h: Update from Linux v6.0-rc7 Generated from the Linux v6.0-rc7 tag with a sha1 of f76349cf41451c5c42a99f18a9163377e4b364ff. Signed-off-by: Jordan Justen <jor...@in...> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 72afd94e..1de0433f 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -55,15 +55,15 @@ extern "C" { * cause the related events to not be seen. * * I915_RESET_UEVENT - Event is generated just before an attempt to reset the - * the GPU. The value supplied with the event is always 1. NOTE: Disable + * GPU. The value supplied with the event is always 1. NOTE: Disable * reset via module parameter will cause this event to not be seen. */ #define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR" #define I915_ERROR_UEVENT "ERROR" #define I915_RESET_UEVENT "RESET" -/* - * i915_user_extension: Base class for defining a chain of extensions +/** + * struct i915_user_extension - Base class for defining a chain of extensions * * Many interfaces need to grow over time. In most cases we can simply * extend the struct and have userspace pass in more data. Another option, @@ -76,12 +76,58 @@ extern "C" { * increasing complexity, and for large parts of that interface to be * entirely optional. The downside is more pointer chasing; chasing across * the boundary with pointers encapsulated inside u64. + * + * Example chaining: + * + * .. code-block:: C + * + * struct i915_user_extension ext3 { + * .next_extension = 0, // end + * .name = ..., + * }; + * struct i915_user_extension ext2 { + * .next_extension = (uintptr_t)&ext3, + * .name = ..., + * }; + * struct i915_user_extension ext1 { + * .next_extension = (uintptr_t)&ext2, + * .name = ..., + * }; + * + * Typically the struct i915_user_extension would be embedded in some uAPI + * struct, and in this case we would feed it the head of the chain(i.e ext1), + * which would then apply all of the above extensions. + * */ struct i915_user_extension { + /** + * @next_extension: + * + * Pointer to the next struct i915_user_extension, or zero if the end. + */ __u64 next_extension; + /** + * @name: Name of the extension. + * + * Note that the name here is just some integer. + * + * Also note that the name space for this is not global for the whole + * driver, but rather its scope/meaning is limited to the specific piece + * of uAPI which has embedded the struct i915_user_extension. + */ __u32 name; - __u32 flags; /* All undefined bits must be zero. */ - __u32 rsvd[4]; /* Reserved for future use; must be zero. */ + /** + * @flags: MBZ + * + * All undefined bits must be zero. + */ + __u32 flags; + /** + * @rsvd: MBZ + * + * Reserved for future use; must be zero. + */ + __u32 rsvd[4]; }; /* @@ -108,24 +154,101 @@ enum i915_mocs_table_index { I915_MOCS_CACHED, }; -/* +/** + * enum drm_i915_gem_engine_class - uapi engine type enumeration + * * Different engines serve different roles, and there may be more than one - * engine serving each role. enum drm_i915_gem_engine_class provides a - * classification of the role of the engine, which may be used when requesting - * operations to be performed on a certain subset of engines, or for providing - * information about that group. + * engine serving each role. This enum provides a classification of the role + * of the engine, which may be used when requesting operations to be performed + * on a certain subset of engines, or for providing information about that + * group. */ enum drm_i915_gem_engine_class { + /** + * @I915_ENGINE_CLASS_RENDER: + * + * Render engines support instructions used for 3D, Compute (GPGPU), + * and programmable media workloads. These instructions fetch data and + * dispatch individual work items to threads that operate in parallel. + * The threads run small programs (called "kernels" or "shaders") on + * the GPU's execution units (EUs). + */ I915_ENGINE_CLASS_RENDER = 0, + + /** + * @I915_ENGINE_CLASS_COPY: + * + * Copy engines (also referred to as "blitters") support instructions + * that move blocks of data from one location in memory to another, + * or that fill a specified location of memory with fixed data. + * Copy engines can perform pre-defined logical or bitwise operations + * on the source, destination, or pattern data. + */ I915_ENGINE_CLASS_COPY = 1, + + /** + * @I915_ENGINE_CLASS_VIDEO: + * + * Video engines (also referred to as "bit stream decode" (BSD) or + * "vdbox") support instructions that perform fixed-function media + * decode and encode. + */ I915_ENGINE_CLASS_VIDEO = 2, + + /** + * @I915_ENGINE_CLASS_VIDEO_ENHANCE: + * + * Video enhancement engines (also referred to as "vebox") support + * instructions related to image enhancement. + */ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3, - /* should be kept compact */ + /** + * @I915_ENGINE_CLASS_COMPUTE: + * + * Compute engines support a subset of the instructions available + * on render engines: compute engines support Compute (GPGPU) and + * programmable media workloads, but do not support the 3D pipeline. + */ + I915_ENGINE_CLASS_COMPUTE = 4, + + /* Values in this enum should be kept compact. */ + /** + * @I915_ENGINE_CLASS_INVALID: + * + * Placeholder value to represent an invalid engine class assignment. + */ I915_ENGINE_CLASS_INVALID = -1 }; +/** + * struct i915_engine_class_instance - Engine class/instance identifier + * + * There may be more than one engine fulfilling any role within the system. + * Each engine of a class is given a unique instance number and therefore + * any engine can be specified by its class:instance tuplet. APIs that allow + * access to any engine in the system will use struct i915_engine_class_instance + * for this identification. + */ +struct i915_engine_class_instance { + /** + * @engine_class: + * + * Engine class from enum drm_i915_gem_engine_class + */ + __u16 engine_class; +#define I915_ENGINE_CLASS_INVALID_NONE -1 +#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2 + + /** + * @engine_instance: + * + * Engine instance. + */ + __u16 engine_instance; +}; + /** * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915 * @@ -163,8 +286,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ @@ -343,6 +467,9 @@ typedef struct _drm_i915_sarea { #define DRM_I915_PERF_ADD_CONFIG 0x37 #define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_I915_QUERY 0x39 +#define DRM_I915_GEM_VM_CREATE 0x3a +#define DRM_I915_GEM_VM_DESTROY 0x3b +#define DRM_I915_GEM_CREATE_EXT 0x3c /* Must be kept compact -- no holes */ #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) @@ -375,10 +502,12 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT) #define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT) #define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create) +#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext) #define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread) #define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite) #define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap) #define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt) +#define DRM_IOCTL_I915_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_offset) #define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain) #define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish) #define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling) @@ -403,6 +532,8 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) #define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query) +#define DRM_IOCTL_I915_GEM_VM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_CREATE, struct drm_i915_gem_vm_control) +#define DRM_IOCTL_I915_GEM_VM_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_VM_DESTROY, struct drm_i915_gem_vm_control) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -503,6 +634,16 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) +#define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42 @@ -520,7 +661,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_EXEC_FENCE 44 /* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture - * user specified buffers for post-mortem debugging of GPU hangs. See + * user specified bufffers for post-mortem debugging of GPU hangs. See * EXEC_OBJECT_CAPTURE. */ #define I915_PARAM_HAS_EXEC_CAPTURE 45 @@ -586,16 +727,51 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_MMAP_GTT_COHERENT 52 +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports coordination of parallel + * execution through use of explicit fence support. + * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT. + */ +#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53 + +/* + * Revision of the i915-perf uAPI. The value returned helps determine what + * i915-perf features are available. See drm_i915_perf_property_id. + */ +#define I915_PARAM_PERF_REVISION 54 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + /* Must be kept compact -- no holes and well documented */ -typedef struct drm_i915_getparam { +/** + * struct drm_i915_getparam - Driver parameter query structure. + */ +struct drm_i915_getparam { + /** @param: Driver parameter to query. */ __s32 param; - /* + + /** + * @value: Address of memory where queried value should be put. + * * WARNING: Using pointers instead of fixed-size u64 means we need to write * compat32 code. Don't repeat this mistake. */ int *value; -} drm_i915_getparam_t; +}; + +/** + * typedef drm_i915_getparam_t - Driver parameter query structure. + * See struct drm_i915_getparam. + */ +typedef struct drm_i915_getparam drm_i915_getparam_t; /* Ioctl to set kernel params: */ @@ -761,14 +937,113 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ +struct drm_i915_gem_mmap_offset { + /** @handle: Handle for the object being mapped. */ + __u32 handle; + /** @pad: Must be zero */ + __u32 pad; + /** + * @offset: The fake offset to use for subsequent mmap call + * + * This is a fixed-size type for 32/64 compatibility. + */ + __u64 offset; + + /** + * @flags: Flags for extended behaviour. + * + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: + * + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. + */ + __u64 flags; + +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. + * + * No current extensions defined; mbz. + */ + __u64 extensions; +}; + +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle; - /** New read domains */ + /** @read_domains: New read domains. */ __u32 read_domains; - /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; }; @@ -872,6 +1147,7 @@ struct drm_i915_gem_exec_object { __u64 offset; }; +/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ struct drm_i915_gem_execbuffer { /** * List of buffers to be validated with their relocations to be @@ -918,10 +1194,16 @@ struct drm_i915_gem_exec_object2 { /** * When the EXEC_OBJECT_PINNED flag is specified this is populated by * the user with the GTT offset at which this object will be pinned. + * * When the I915_EXEC_NO_RELOC flag is specified this must contain the * presumed_offset of the object. + * * During execbuffer2 the kernel populates it with the value of the * current GTT offset of the object, for future presumed_offset writes. + * + * See struct drm_i915_gem_create_ext for the rules when dealing with + * alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with + * minimum page sizes, like DG2. */ __u64 offset; @@ -970,38 +1252,119 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +/** + * struct drm_i915_gem_exec_fence - An input or output fence for the execbuf + * ioctl. + * + * The request will wait for input fence to signal before submission. + * + * The returned output fence will be signaled after the completion of the + * request. + */ struct drm_i915_gem_exec_fence { - /** - * User's handle for a drm_syncobj to wait on or signal. - */ + /** @handle: User's handle for a drm_syncobj to wait on or signal. */ __u32 handle; + /** + * @flags: Supported flags are: + * + * I915_EXEC_FENCE_WAIT: + * Wait for the input fence before request submission. + * + * I915_EXEC_FENCE_SIGNAL: + * Return request completion fence as output + */ + __u32 flags; #define I915_EXEC_FENCE_WAIT (1<<0) #define I915_EXEC_FENCE_SIGNAL (1<<1) #define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1)) - __u32 flags; }; -struct drm_i915_gem_execbuffer2 { +/** + * struct drm_i915_gem_execbuffer_ext_timeline_fences - Timeline fences + * for execbuf ioctl. + * + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + /** @base: Extension link. See struct i915_user_extension. */ + struct i915_user_extension base; + + /** + * @fence_count: Number of elements in the @handles_ptr & @value_ptr + * arrays. + */ + __u64 fence_count; + + /** + * @handles_ptr: Pointer to an array of struct drm_i915_gem_exec_fence + * of length @fence_count. + */ + __u64 handles_ptr; + /** - * List of gem_exec_object2 structs + * @values_ptr: Pointer to an array of u64 values of length + * @fence_count. + * Values must be 0 for a binary drm_syncobj. A Value of 0 for a + * timeline drm_syncobj is invalid as it turns a drm_syncobj into a + * binary one. */ + __u64 values_ptr; +}; + +/** + * struct drm_i915_gem_execbuffer2 - Structure for DRM_I915_GEM_EXECBUFFER2 + * ioctl. + */ +struct drm_i915_gem_execbuffer2 { + /** @buffers_ptr: Pointer to a list of gem_exec_object2 structs */ __u64 buffers_ptr; + + /** @buffer_count: Number of elements in @buffers_ptr array */ __u32 buffer_count; - /** Offset in the batchbuffer to start execution from. */ + /** + * @batch_start_offset: Offset in the batchbuffer to start execution + * from. + */ __u32 batch_start_offset; - /** Bytes used in batchbuffer from batch_start_offset */ + + /** + * @batch_len: Length in bytes of the batch buffer, starting from the + * @batch_start_offset. If 0, length is assumed to be the batch buffer + * object size. + */ __u32 batch_len; + + /** @DR1: deprecated */ __u32 DR1; + + /** @DR4: deprecated */ __u32 DR4; + + /** @num_cliprects: See @cliprects_ptr */ __u32 num_cliprects; + /** - * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY - * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a - * struct drm_i915_gem_exec_fence *fences. + * @cliprects_ptr: Kernel clipping was a DRI1 misfeature. + * + * It is invalid to use this field if I915_EXEC_FENCE_ARRAY or + * I915_EXEC_USE_EXTENSIONS flags are not set. + * + * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array + * of &drm_i915_gem_exec_fence and @num_cliprects is the length of the + * array. + * + * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a + * single &i915_user_extension and num_cliprects is 0. */ __u64 cliprects_ptr; + + /** @flags: Execbuf flags */ + __u64 flags; #define I915_EXEC_RING_MASK (0x3f) #define I915_EXEC_DEFAULT (0<<0) #define I915_EXEC_RENDER (1<<0) @@ -1019,10 +1382,6 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */ #define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6) #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */ - __u64 flags; - __u64 rsvd1; /* now used for context info */ - __u64 rsvd2; -}; /** Resets the SO write offset registers for transform feedback on gen7. */ #define I915_EXEC_GEN7_SOL_RESET (1<<8) @@ -1108,7 +1467,39 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_ARRAY (1<<19) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) +/* + * Setting I915_EXEC_FENCE_SUBMIT implies that lower_32_bits(rsvd2) represent + * a sync_file fd to wait upon (in a nonblocking manner) prior to executing + * the batch. + * + * Returns -EINVAL if the sync_file fd cannot be found. + */ +#define I915_EXEC_FENCE_SUBMIT (1 << 20) + +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list of supported structures are listed in the + * drm_i915_gem_execbuffer_ext enum. + */ +#define I915_EXEC_USE_EXTENSIONS (1 << 21) +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) + + /** @rsvd1: Context id */ + __u64 rsvd1; + + /** + * @rsvd2: in and out sync_file file descriptors. + * + * When I915_EXEC_FENCE_IN or I915_EXEC_FENCE_SUBMIT flag is set, the + * lower 32 bits of this field will have the in sync_file fd (input). + * + * When I915_EXEC_FENCE_OUT flag is set, the upper 32 bits of this + * field will have the out sync_file fd (output). + */ + __u64 rsvd2; +}; #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ @@ -1172,12 +1563,11 @@ struct drm_i915_gem_busy { * reading from the object simultaneously. * * The value of each engine class is the same as specified in the - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same class identifier and so are not separately - * reported for busyness. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether @@ -1188,49 +1578,91 @@ struct drm_i915_gem_busy { }; /** - * I915_CACHING_NONE + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. - */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. - */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle; /** - * Caching level to apply or return value + * @caching: The GTT caching level to apply or possible return value. * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * The supported @caching values: + * + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; }; #define I915_TILING_NONE 0 #define I915_TILING_X 1 #define I915_TILING_Y 2 +/* + * Do not add new tiling types here. The I915_TILING_* values are for + * de-tiling fence registers that no longer exist on modern platforms. Although + * the hardware may support new types of tiling in general (e.g., Tile4), we + * do not need to add them to the uapi that is specific to now-defunct ioctls. + */ #define I915_TILING_LAST I915_TILING_Y #define I915_BIT_6_SWIZZLE_NONE 0 @@ -1448,20 +1880,64 @@ struct drm_i915_gem_context_create { __u32 pad; }; +/** + * struct drm_i915_gem_context_create_ext - Structure for creating contexts. + */ struct drm_i915_gem_context_create_ext { - __u32 ctx_id; /* output: id of new context*/ + /** @ctx_id: Id of the created context (output) */ + __u32 ctx_id; + + /** + * @flags: Supported flags are: + * + * I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS: + * + * Extensions may be appended to this structure and driver must check + * for those. See @extensions. + * + * I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE + * + * Created context will have single timeline. + */ __u32 flags; #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS (1u << 0) +#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE (1u << 1) #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \ - (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1)) + (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1)) + + /** + * @extensions: Zero-terminated chain of extensions. + * + * I915_CONTEXT_CREATE_EXT_SETPARAM: + * Context parameter to set or query during context creation. + * See struct drm_i915_gem_context_create_ext_setparam. + * + * I915_CONTEXT_CREATE_EXT_CLONE: + * This extension has been removed. On the off chance someone somewhere + * has attempted to use it, never re-use this extension number. + */ __u64 extensions; +#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 +#define I915_CONTEXT_CREATE_EXT_CLONE 1 }; +/** + * struct drm_i915_gem_context_param - Context parameter to set or query. + */ struct drm_i915_gem_context_param { + /** @ctx_id: Context id */ __u32 ctx_id; + + /** @size: Size of the parameter @value */ __u32 size; + + /** @param: Parameter to set or query */ __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 @@ -1495,20 +1971,126 @@ struct drm_i915_gem_context_param { * On creation, all new contexts are marked as recoverable. */ #define I915_CONTEXT_PARAM_RECOVERABLE 0x8 -/* Must be kept compact -- no holes and well documented */ - __u64 value; -}; + /* + * The id of the associated virtual memory address space (ppGTT) of + * this context. Can be retrieved and passed to another context + * (on the same fd) for both to use the same ppGTT and so share + * address layouts, and avoid reloading the page tables on context + * switches between themselves. + * + * See DRM_I915_GEM_VM_CREATE and DRM_I915_GEM_VM_DESTROY. + */ +#define I915_CONTEXT_PARAM_VM 0x9 -/** - * Context SSEU programming +/* + * I915_CONTEXT_PARAM_ENGINES: * - * It may be necessary for either functional or performance reason to configure - * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ - * Sub-slice/EU). + * Bind this context to operate on this subset of available engines. Henceforth, + * the I915_EXEC_RING selector for DRM_IOCTL_I915_GEM_EXECBUFFER2 operates as + * an index into this array of engines; I915_EXEC_DEFAULT selecting engine[0] + * and upwards. Slots 0...N are filled in using the specified (class, instance). + * Use + * engine_class: I915_ENGINE_CLASS_INVALID, + * engine_instance: I915_ENGINE_CLASS_INVALID_NONE + * to specify a gap in the array that can be filled in later, e.g. by a + * virtual engine used for load balancing. * - * This is done by configuring SSEU configuration using the below - * @struct drm_i915_gem_context_param_sseu for every supported engine which + * Setting the number of engines bound to the context to 0, by passing a zero + * sized argument, will revert back to default settings. + * + * See struct i915_context_param_engines. + * + * Extensions: + * i915_context_engines_load_balance (I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE) + * i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND) + * i915_context_engines_parallel_submit (I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT) + */ +#define I915_CONTEXT_PARAM_ENGINES 0xa + +/* + * I915_CONTEXT_PARAM_PERSISTENCE: + * + * Allow the context and active rendering to survive the process until + * completion. Persistence allows fire-and-forget clients to queue up a + * bunch of work, hand the output over to a display server and then quit. + * If the context is marked as not persistent, upon closing (either via + * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure + * or process termination), the context and any outstanding requests will be + * cancelled (and exported fences for cancelled requests marked as -EIO). + * + * By default, new contexts allow persistence. + */ +#define I915_CONTEXT_PARAM_PERSISTENCE 0xb + +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. + */ +#define I915_CONTEXT_PARAM_RINGSIZE 0xc + +/* + * I915_CONTEXT_PARAM_PROTECTED_CONTENT: + * + * Mark that the context makes use of protected content, which will result + * in the context being invalidated when the protected content session is. + * Given that the protected content session is killed on suspend, the device + * is kept awake for the lifetime of a protected context, so the user should + * make sure to dispose of them once done. + * This flag can only be set at context creation time and, when set to true, + * must be preceded by an explicit setting of I915_CONTEXT_PARAM_RECOVERABLE + * to false. This flag can't be set to true in conjunction with setting the + * I915_CONTEXT_PARAM_BANNABLE flag to false. Creation example: + * + * .. code-block:: C + * + * struct drm_i915_gem_context_create_ext_setparam p_protected = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_PROTECTED_CONTENT, + * .value = 1, + * } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_norecover = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * .next_extension = to_user_pointer(&p_protected), + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_RECOVERABLE, + * .value = 0, + * } + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_norecover); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * In addition to the normal failure cases, setting this flag during context + * creation can result in the following errors: + * + * -ENODEV: feature not available + * -EPERM: trying to mark a recoverable or not bannable context as protected + */ +#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd +/* Must be kept compact -- no holes and well documented */ + + /** @value: Context parameter value to be set or queried */ + __u64 value; +}; + +/* + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which * userspace intends to use. * * Not all GPUs or engines support this functionality in which case an error @@ -1525,13 +2107,13 @@ struct drm_i915_gem_context_param_sseu { /* * Engine class & instance to be configured or queried. */ - __u16 engine_class; - __u16 engine_instance; + struct i915_engine_class_instance engine; /* - * Unused for now. Must be cleared to zero. + * Unknown flags must be cleared to zero. */ __u32 flags; +#define I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX (1u << 0) /* * Mask of slices to enable for the context. Valid values are a subset @@ -1559,9 +2141,354 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + +/* + * i915_context_engines_load_balance: + * + * Enable load balancing across this set of engines. + * + * Into the I915_EXEC_DEFAULT slot [0], a virtual engine is created that when + * used will proxy the execbuffer request onto one of the set of engines + * in such a way as to distribute the load evenly across the set. + * + * The set of engines must be compatible (e.g. the same HW class) as they + * will share the same logical GPU context and ring. + * + * To intermix rendering with the virtual engine and direct rendering onto + * the backing engines (bypassing the load balancing proxy), the context must + * be defined to use a single timeline for all engines. + */ +struct i915_context_engines_load_balance { + struct i915_user_extension base; + + __u16 engine_index; + __u16 num_siblings; + __u32 flags; /* all undefined flags must be zero */ + + __u64 mbz64; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(name__, N__) struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 num_siblings; \ + __u32 flags; \ + __u64 mbz64; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +/* + * i915_context_engines_bond: + * + * Constructed bonded pairs for execution within a virtual engine. + * + * All engines are equal, but some are more equal than others. Given + * the distribution of resources in the HW, it may be preferable to run + * a request on a given subset of engines in parallel to a request on a + * specific engine. We enable this selection of engines within a virtual + * engine by specifying bonding pairs, for any given master engine we will + * only execute on one of the corresponding siblings within the virtual engine. + * + * To execute a request in parallel on the master engine and a sibling requires + * coordination with a I915_EXEC_FENCE_SUBMIT. + */ +struct i915_context_engines_bond { + struct i915_user_extension base; + + struct i915_engine_class_instance master; + + __u16 virtual_index; /* index of virtual engine in ctx->engines[] */ + __u16 num_bonds; + + __u64 flags; /* all undefined flags must be zero */ + __u64 mbz64[4]; /* reserved for future use; must be zero */ + + struct i915_engine_class_instance engines[]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_BOND(name__, N__) struct { \ + struct i915_user_extension base; \ + struct i915_engine_class_instance master; \ + __u16 virtual_index; \ + __u16 num_bonds; \ + __u64 flags; \ + __u64 mbz64[4]; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +/** + * struct i915_context_engines_parallel_submit - Configure engine for + * parallel submission. + * + * Setup a slot in the context engine map to allow multiple BBs to be submitted + * in a single execbuf IOCTL. Those BBs will then be scheduled to run on the GPU + * in parallel. Multiple hardware contexts are created internally in the i915 to + * run these BBs. Once a slot is configured for N BBs only N BBs can be + * submitted in each execbuf IOCTL and this is implicit behavior e.g. The user + * doesn't tell the execbuf IOCTL there are N BBs, the execbuf IOCTL knows how + * many BBs there are based on the slot's configuration. The N BBs are the last + * N buffer objects or first N if I915_EXEC_BATCH_FIRST is set. + * + * The default placement behavior is to create implicit bonds between each + * context if each context maps to more than 1 physical engine (e.g. context is + * a virtual engine). Also we only allow contexts of same engine class and these + * contexts must be in logically contiguous order. Examples of the placement + * behavior are described below. Lastly, the default is to not allow BBs to be + * preempted mid-batch. Rather insert coordinated preemption points on all + * hardware contexts between each set of BBs. Flags could be added in the future + * to change both of these default behaviors. + * + * Returns -EINVAL if hardware context placement configuration is invalid or if + * the placement configuration isn't supported on the platform / submission + * interface. + * Returns -ENODEV if extension isn't supported on the platform / submission + * interface. + * + * .. code-block:: none + * + * Examples syntax: + * CS[X] = generic engine of same class, logical instance X + * INVALID = I915_ENGINE_CLASS_INVALID, I915_ENGINE_CLASS_INVALID_NONE + * + * Example 1 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=1, + * engines=CS[0],CS[1]) + * + * Results in the following valid placement: + * CS[0], CS[1] + * + * Example 2 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[2],CS[1],CS[3]) + * + * Results in the following valid placements: + * CS[0], CS[1] + * CS[2], CS[3] + * + * This can be thought of as two virtual engines, each containing two + * engines thereby making a 2D array. However, there are bonds tying the + * entries together and placing restrictions on how they can be scheduled. + * Specifically, the scheduler can choose only vertical columns from the 2D + * array. That is, CS[0] is bonded to CS[1] and CS[2] to CS[3]. So if the + * scheduler wants to submit to CS[0], it must also choose CS[1] and vice + * versa. Same for CS[2] requires also using CS[3]. + * VE[0] = CS[0], CS[2] + * VE[1] = CS[1], CS[3] + * + * Example 3 pseudo code: + * set_engines(INVALID) + * set_parallel(engine_index=0, width=2, num_siblings=2, + * engines=CS[0],CS[1],CS[1],CS[3]) + * + * Results in the following valid and invalid placements: + * CS[0], CS[1] + * CS[1], CS[3] - Not logically contiguous, return -EINVAL + */ +struct i915_context_engines_parallel_submit { + /** + * @base: base user extension. + */ + struct i915_user_extension base; + + /** + * @engine_index: slot for parallel engine + */ + __u16 engine_index; + + /** + * @width: number of contexts per parallel engine or in other words the + * number of batches in each submission + */ + __u16 width; + + /** + * @num_siblings: number of siblings per context or in other words the + * number of possible placements for each submission + */ + __u16 num_siblings; + + /** + * @mbz16: reserved for future use; must be zero + */ + __u16 mbz16; + + /** + * @flags: all undefined flags must be zero, currently not defined flags + */ + __u64 flags; + + /** + * @mbz64: reserved for future use; must be zero + */ + __u64 mbz64[3]; + + /** + * @engines: 2-d array of engine instances to configure parallel engine + * + * length = width (i) * num_siblings (j) + * index = j + i * num_siblings + */ + struct i915_engine_class_instance engines[]; + +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_ENGINES_PARALLEL_SUBMIT(name__, N__) struct { \ + struct i915_user_extension base; \ + __u16 engine_index; \ + __u16 width; \ + __u16 num_siblings; \ + __u16 mbz16; \ + __u64 flags; \ + __u64 mbz64[3]; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + +struct i915_context_param_engines { + __u64 extensions; /* linked chain of extension blocks, 0 terminates */ +#define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ +#define I915_CONTEXT_ENGINES_EXT_BOND 1 /* see i915_context_engines_bond */ +#define I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT 2 /* see i915_context_engines_parallel_submit */ + struct i915_engine_class_instance engines[0]; +} __attribute__((packed)); + +#define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__, N__) struct { \ + __u64 extensions; \ + struct i915_engine_class_instance engines[N__]; \ +} __attribute__((packed)) name__ + +/** + * struct drm_i915_gem_context_create_ext_setparam - Context parameter + * to set or query during context creation. + */ struct drm_i915_gem_context_create_ext_setparam { -#define I915_CONTEXT_CREATE_EXT_SETPARAM 0 + /** @base: Extension link. See struct i915_user_extension. */ struct i915_user_extension base; + + /** + * @param: Context parameter to set or query. + * See struct drm_i915_gem_context_param. + */ struct drm_i915_gem_context_param param; }; @@ -1570,7 +2497,9 @@ struct drm_i915_gem_context_destroy { __u32 pad; }; -/* +/** + * struct drm_i915_gem_vm_control - Structure to create or destroy VM. + * * DRM_I915_GEM_VM_CREATE - * * Create a new virtual memory address space (ppGTT) for use within a context @@ -1580,20 +2509,23 @@ struct drm_i915_gem_context_destroy { * The id of new VM (bound to the fd) for use with I915_CONTEXT_PARAM_VM is * returned in the outparam @id. * - * No flags are defined, with all bits reserved and must be zero. - * * An extension chain maybe provided, starting with @extensions, and terminated * by the @next_extension being 0. Currently, no extensions are defined. * * DRM_I915_GEM_VM_DESTROY - * - * Destroys a previously created VM id, specified in @id. + * Destroys a previously created VM id, specified in @vm_id. * * No extensions or flags are allowed currently, and so must be zero. */ struct drm_i915_gem_vm_control { + /** @extensions: Zero-terminated chain of extensions. */ __u64 extensions; + + /** @flags: reserved for future usage, currently MBZ */ __u32 flags; + + /** @vm_id: Id of the VM created or to be destroyed */ __u32 vm_id; }; @@ -1635,14 +2567,69 @@ struct drm_i915_reset_stats { __u32 pad; }; +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** - * Returned handle for the object. + * @handle: Returned handle for the object. * * Object handles are nonzero. */ @@ -1671,23 +2658,31 @@ enum drm_i915_perf_property_id { * Open the stream for a specific context handle (as used with * execbuffer2). A stream opened for a specific context this way * won't typically require root privileges. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_CTX_HANDLE = 1, /** * A value of 1 requests the inclusion of raw OA unit reports as * part of stream samples. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_SAMPLE_OA, /** * The value specifies which set of OA unit metrics should be - * be configured, defining the contents of any OA unit reports. + * configured, defining the contents of any OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_METRICS_SET, /** * The value specifies the size and layout of OA unit reports. + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_FORMAT, @@ -1697,9 +2692,46 @@ enum drm_i915_perf_property_id { * from this exponent as follows: * * 80ns * 2^(period_exponent + 1) + * + * This property is available in perf revision 1. */ DRM_I915_PERF_PROP_OA_EXPONENT, + /** + * Specifying this property is only valid when specify a context to + * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property + * will hold preemption of the particular context we want to gather + * performance data about. The execbuf2 submissions must include a + * drm_i915_gem_execbuffer_ext_perf parameter for this to apply. + * + * This property is available in perf revision 3. + */ + DRM_I915_PERF_PROP_HOLD_PREEMPTION, + + /** + * Specifying this pins all contexts to the specified SSEU power + * configuration for the duration of the recording. + * + * This parameter's value is a pointer to a struct + * drm_i915_gem_context_param_sseu. + * + * This property is available in perf revision 4. + */ + DRM_I915_PERF_PROP_GLOBAL_SSEU, + + /** + * This optional parameter specifies the timer interval in nanoseconds + * at which the i915 driver will check the OA buffer for available data. + * Minimum allowed value is 100 microseconds. A default value is used by + * the driver if this parameter is not specified. Note that larger timer + * values will reduce cpu consumption during OA perf captures. However, + * excessively large values would potentially result in OA buffer + * overwrites as captures reach end of the OA buffer. + * + * This property is available in perf revision 5. + */ + DRM_I915_PERF_PROP_POLL_OA_PERIOD, + DRM_I915_PERF_PROP_MAX /* non-ABI */ }; @@ -1719,7 +2751,7 @@ struct drm_i915_perf_open_param { __u64 properties_ptr; }; -/** +/* * Enable data capture for a stream that was either opened in a disabled state * via I915_PERF_FLAG_DISABLED or was later disabled via * I915_PERF_IOCTL_DISABLE. @@ -1728,17 +2760,34 @@ struct drm_i915_perf_open_param { * to close and re-open a stream with the same configuration. * * It's undefined whether any pending data for the stream will be lost. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_ENABLE _IO('i', 0x0) -/** +/* * Disable data capture for a stream. * * It is an error to try and read a stream that is disabled. + * + * This ioctl is available in perf revision 1. */ #define I915_PERF_IOCTL_DISABLE _IO('i', 0x1) -/** +/* + * Change metrics_set captured by a stream. + * + * If the stream is bound to a specific context, the configuration change + * will performed __inline__ with that context such that it takes effect before + * the next execbuf submission. + * + * Returns the previously bound metrics set id, or a negative error code. + * + * This ioctl is available in perf revision 2. + */ +#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2) + +/* * Common to all i915 perf records */ struct drm_i915_perf_record_header { @@ -1787,127 +2836,887 @@ enum drm_i915_perf_record_type { }; /** + * struct drm_i915_perf_oa_config + * * Structure to upload perf dynamic configuration into the kernel. */ struct drm_i915_perf_oa_config { - /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + /** + * @uuid: + * + * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" + */ char uuid[36]; + /** + * @n_mux_regs: + * + * Number of mux regs in &mux_regs_ptr. + */ __u32 n_mux_regs; + + /** + * @n_boolean_regs: + * + * Number of boolean regs in &boolean_regs_ptr. + */ __u32 n_boolean_regs; + + /** + * @n_flex_regs: + * + * Number of flex regs in &flex_regs_ptr. + */ __u32 n_flex_regs; - /* - * These fields are pointers to tuples of u32 values (register address, - * value). For example the expected length of the buffer pointed by - * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs). + /** + * @mux_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_mux_regs). */ __u64 mux_regs_ptr; + + /** + * @boolean_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_boolean_regs). + */ __u64 boolean_regs_ptr; + + /** + * @flex_regs_ptr: + * + * Pointer to tuples of u32 values (register address, value) for mux + * registers. Expected length of buffer is (2 * sizeof(u32) * + * &n_flex_regs). + */ __u64 flex_regs_ptr; }; +/** + * struct drm_i915_query_item - An individual query for the kernel to process. + * + * The behaviour is determined by the @query_id. Note that exactly what + * @data_ptr is also depends on the specific @query_id. + */ struct drm_i915_query_item { + /** + * @query_id: + * + * The id for this query. Currently accepted query IDs are: + * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info) + * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info) + * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config) + * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions) + * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`) + * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info) + */ __u64 query_id; -#define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_TOPOLOGY_INFO 1 +#define DRM_I915_QUERY_ENGINE_INFO 2 +#define DRM_I915_QUERY_PERF_CONFIG 3 +#define DRM_I915_QUERY_MEMORY_REGIONS 4 +#define DRM_I915_QUERY_HWCONFIG_BLOB 5 +#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6 /* Must be kept compact -- no holes and well documented */ - /* + /** + * @length: + * * When set to zero by userspace, this is filled with the size of the - * data to be written at the data_ptr pointer. The kernel sets this + * data to be written at the @data_ptr pointer. The kernel sets this * value to a negative value to signal an error on a particular query * item. */ __s32 length; - /* - * Unused for now. Must be cleared to zero. + /** + * @flags: + * + * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0. + * + * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the + * following: + * + * - %DRM_I915_QUERY_PERF_CONFIG_LIST + * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID + * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID + * + * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain + * a struct i915_engine_class_instance that references a render engine. */ __u32 flags; +#define DRM_I915_QUERY_PERF_CONFIG_LIST 1 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2 +#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3 - /* - * Data will be written at the location pointed by data_ptr when the - * value of length matches the length of the dat... [truncated message content] |
From: GitLab M. <git...@ke...> - 2022-12-07 18:02:29
|
include/drm/drm_fourcc.h | 63 ++++++++++++++++++++++++++++++++++++++++--- xf86drm.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 4 deletions(-) New commits: commit 874af994636a534236a747a88aec086b9b35b001 Author: Philipp Zabel <p....@pe...> Date: Thu Oct 13 13:44:38 2022 +0200 xf86drm: Add support for decoding Vivante format modifiers Allow applications to pretty-print Vivante format modifiers. Signed-off-by: Philipp Zabel <p....@pe...> diff --git a/xf86drm.c b/xf86drm.c index 0faa5972..33d55a7f 100644 --- a/xf86drm.c +++ b/xf86drm.c @@ -175,11 +175,15 @@ drmGetFormatModifierNameFromAmd(uint64_t modifier); static char * drmGetFormatModifierNameFromAmlogic(uint64_t modifier); +static char * +drmGetFormatModifierNameFromVivante(uint64_t modifier); + static const struct drmVendorInfo modifier_format_vendor_table[] = { { DRM_FORMAT_MOD_VENDOR_ARM, drmGetFormatModifierNameFromArm }, { DRM_FORMAT_MOD_VENDOR_NVIDIA, drmGetFormatModifierNameFromNvidia }, { DRM_FORMAT_MOD_VENDOR_AMD, drmGetFormatModifierNameFromAmd }, { DRM_FORMAT_MOD_VENDOR_AMLOGIC, drmGetFormatModifierNameFromAmlogic }, + { DRM_FORMAT_MOD_VENDOR_VIVANTE, drmGetFormatModifierNameFromVivante }, }; #ifndef AFBC_FORMAT_MOD_MODE_VALUE_MASK @@ -547,6 +551,70 @@ drmGetFormatModifierNameFromAmlogic(uint64_t modifier) return mod_amlogic; } +static char * +drmGetFormatModifierNameFromVivante(uint64_t modifier) +{ + const char *color_tiling, *tile_status, *compression; + const char *mod_vivante = NULL; + + switch (modifier & VIVANTE_MOD_TS_MASK) { + case 0: + tile_status = ""; + break; + case VIVANTE_MOD_TS_64_4: + tile_status = ",TS=64B_4"; + break; + case VIVANTE_MOD_TS_64_2: + tile_status = ",TS=64B_2"; + break; + case VIVANTE_MOD_TS_128_4: + tile_status = ",TS=128B_4"; + break; + case VIVANTE_MOD_TS_256_4: + tile_status = ",TS=256B_4"; + break; + default: + tile_status = ",TS=UNKNOWN"; + break; + } + + switch (modifier & VIVANTE_MOD_COMP_MASK) { + case 0: + compression = ""; + break; + case VIVANTE_MOD_COMP_DEC400: + compression = ",COMP=DEC400"; + break; + default: + compression = ",COMP=UNKNOWN"; + break; + } + + switch (modifier & ~VIVANTE_MOD_EXT_MASK) { + case 0: + color_tiling = "LINEAR"; + break; + case DRM_FORMAT_MOD_VIVANTE_TILED: + color_tiling = "TILED"; + break; + case DRM_FORMAT_MOD_VIVANTE_SUPER_TILED: + color_tiling = "SUPER_TILED"; + break; + case DRM_FORMAT_MOD_VIVANTE_SPLIT_TILED: + color_tiling = "SPLIT_TILED"; + break; + case DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED: + color_tiling = "SPLIT_SUPER_TILED"; + break; + default: + color_tiling = "UNKNOWN"; + break; + } + + asprintf(&mod_vivante, "%s%s%s", color_tiling, tile_status, compression); + return mod_vivante; +} + static unsigned log2_int(unsigned x) { unsigned l; commit 329eebcf32793361390edfc95ffcbc51fbec2043 Author: Philipp Zabel <p....@pe...> Date: Wed Nov 30 18:13:37 2022 +0100 drm_fourcc: sync drm_fourcc with latest drm-next kernel Update drm_fourcc.h to include latest changes from drm-next branch. This brings in sub-8bpp formats, AVUY and XVUY 8:8:8:8, and Vivante tile-status and compression modifiers. Generated using make headers_install. Generated from drm-next branch commit 077bd80083ab Signed-off-by: Philipp Zabel <p....@pe...> diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index 2c9051ff..0e70e36c 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -99,18 +99,42 @@ extern "C" { #define DRM_FORMAT_INVALID 0 /* color index */ +#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ +#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ +#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ -/* 8 bpp Red */ +/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ + +/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ + +/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ + +/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ +#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ + +/* 1 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ + +/* 2 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ + +/* 4 bpp Red (direct relationship between channel value and brightness) */ +#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ + +/* 8 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ -/* 10 bpp Red */ +/* 10 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ -/* 12 bpp Red */ +/* 12 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ -/* 16 bpp Red */ +/* 16 bpp Red (direct relationship between channel value and brightness) */ #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ /* 16 bpp RG */ @@ -205,7 +229,9 @@ extern "C" { #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ +#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ +#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ @@ -718,6 +744,35 @@ extern "C" { */ #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4) +/* + * Vivante TS (tile-status) buffer modifiers. They can be combined with all of + * the color buffer tiling modifiers defined above. When TS is present it's a + * separate buffer containing the clear/compression status of each tile. The + * modifiers are defined as VIVANTE_MOD_TS_c_s, where c is the color buffer + * tile size in bytes covered by one entry in the status buffer and s is the + * number of status bits per entry. + * We reserve the top 8 bits of the Vivante modifier space for tile status + * clear/compression modifiers, as future cores might add some more TS layout + * variations. + */ +#define VIVANTE_MOD_TS_64_4 (1ULL << 48) +#define VIVANTE_MOD_TS_64_2 (2ULL << 48) +#define VIVANTE_MOD_TS_128_4 (3ULL << 48) +#define VIVANTE_MOD_TS_256_4 (4ULL << 48) +#define VIVANTE_MOD_TS_MASK (0xfULL << 48) + +/* + * Vivante compression modifiers. Those depend on a TS modifier being present + * as the TS bits get reinterpreted as compression tags instead of simple + * clear markers when compression is enabled. + */ +#define VIVANTE_MOD_COMP_DEC400 (1ULL << 52) +#define VIVANTE_MOD_COMP_MASK (0xfULL << 52) + +/* Masking out the extension bits will yield the base modifier. */ +#define VIVANTE_MOD_EXT_MASK (VIVANTE_MOD_TS_MASK | \ + VIVANTE_MOD_COMP_MASK) + /* NVIDIA frame buffer modifiers */ /* |
From: GitLab M. <git...@ke...> - 2023-01-03 16:24:23
|
data/amdgpu.ids | 9 +++++++++ 1 file changed, 9 insertions(+) New commits: commit 0e2c7d05712d65903a9b77fb9f960ddff43bac64 Author: Alex Deucher <ale...@am...> Date: Sun Jan 1 14:14:58 2023 -0500 amdgpu: add some additional marketing names Reviewed-by: Pierre-Eric Pelloux-Prayer <pie...@am...> Signed-off-by: Alex Deucher <ale...@am...> diff --git a/data/amdgpu.ids b/data/amdgpu.ids index 04014379..53cf61cb 100644 --- a/data/amdgpu.ids +++ b/data/amdgpu.ids @@ -392,7 +392,10 @@ 743F, C3, AMD Radeon RX 6500 743F, C3, AMD Radeon RX 6500M 743F, C7, AMD Radeon RX 6400 +743F, C8, AMD Radeon RX 6500M +743F, CC, AMD Radeon 6550S 743F, CF, AMD Radeon RX 6300M +743F, D7, AMD Radeon RX 6400 744C, C8, AMD Radeon RX 7900 XTX 744C, CC, AMD Radeon RX 7900 XT 9830, 00, AMD Radeon HD 8400 / R3 Series commit bdcd49232836569a97b45f80b64c2ad014535db3 Author: Alex Deucher <ale...@am...> Date: Tue Dec 27 12:24:14 2022 -0500 amdgpu: add marketing names from amd-5.4 (22.40) Reviewed-by: Pierre-Eric Pelloux-Prayer <pie...@am...> Signed-off-by: Alex Deucher <ale...@am...> diff --git a/data/amdgpu.ids b/data/amdgpu.ids index abf3d694..04014379 100644 --- a/data/amdgpu.ids +++ b/data/amdgpu.ids @@ -351,9 +351,13 @@ 7347, 00, AMD Radeon Pro W5500M 7360, 41, AMD Radeon Pro 5600M 7360, C3, AMD Radeon Pro V520 +7362, C1, AMD Radeon Pro V540 +7362, C3, AMD Radeon Pro V520 738C, 01, AMD Instinct MI100 +73A1, 00, AMD Radeon Pro V620 73A3, 00, AMD Radeon Pro W6800 73A5, C0, AMD Radeon RX 6950 XT +73AE, 00, AMD Radeon Pro V620 73AF, C0, AMD Radeon RX 6900 XT 73BF, C0, AMD Radeon RX 6900 XT 73BF, C1, AMD Radeon RX 6800 XT @@ -389,6 +393,8 @@ 743F, C3, AMD Radeon RX 6500M 743F, C7, AMD Radeon RX 6400 743F, CF, AMD Radeon RX 6300M +744C, C8, AMD Radeon RX 7900 XTX +744C, CC, AMD Radeon RX 7900 XT 9830, 00, AMD Radeon HD 8400 / R3 Series 9831, 00, AMD Radeon HD 8400E 9832, 00, AMD Radeon HD 8330 |
From: GitLab M. <git...@ke...> - 2023-05-11 15:28:29
|
tests/amdgpu/vcn_tests.c | 69 ++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 30 deletions(-) New commits: commit 7784d571661e3c1019f98ece7947dbf791405f8a Author: Ruijing Dong <rui...@am...> Date: Tue May 9 16:15:41 2023 -0400 tests/amdgpu/vcn: fix drm test failure 1. fixed an issue that drm test vcn3/4 encoding test could cause VCN engine stuck. 2. adding missing or errous encoding ib package members. Reviewed-by: Boyuan Zhang <Boy...@am...> Signed-off-by: Ruijing Dong <rui...@am...> diff --git a/tests/amdgpu/vcn_tests.c b/tests/amdgpu/vcn_tests.c index 0fdb55dd..27da9da5 100644 --- a/tests/amdgpu/vcn_tests.c +++ b/tests/amdgpu/vcn_tests.c @@ -808,6 +808,8 @@ static void amdgpu_cs_vcn_enc_create(void) ib_cpu[len++] = 0; ib_cpu[len++] = 0; /* pre encode mode */ ib_cpu[len++] = 0; /* chroma enabled : false */ + ib_cpu[len++] = 0; + ib_cpu[len++] = 0; *st_size = (len - st_offset) * 4; /* slice control */ @@ -829,7 +831,7 @@ static void amdgpu_cs_vcn_enc_create(void) ib_cpu[len++] = 1; /* quarter pel enabled */ ib_cpu[len++] = 100; /* BASELINE profile */ ib_cpu[len++] = 11; /* level */ - if (vcn_ip_version_major == 3) { + if (vcn_ip_version_major >= 3) { ib_cpu[len++] = 0; /* b_picture_enabled */ ib_cpu[len++] = 0; /* weighted_bipred_idc */ } @@ -870,7 +872,7 @@ static void amdgpu_cs_vcn_enc_create(void) ib_cpu[len++] = 0; /* scene change sensitivity */ ib_cpu[len++] = 0; /* scene change min idr interval */ ib_cpu[len++] = 0; - if (vcn_ip_version_major == 3) + if (vcn_ip_version_major >= 3) ib_cpu[len++] = 0; *st_size = (len - st_offset) * 4; @@ -913,6 +915,7 @@ static void amdgpu_cs_vcn_enc_create(void) ib_cpu[len++] = 1; ib_cpu[len++] = 0; ib_cpu[len++] = 1; + ib_cpu[len++] = 0; *st_size = (len - st_offset) * 4; /* op init rc */ @@ -1265,10 +1268,16 @@ static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_bu CU_ASSERT_EQUAL(r, 0); } +static void amdgpu_cs_vcn_ib_zero_count(int *len, int num) +{ + for (int i = 0; i < num; i++) + ib_cpu[(*len)++] = 0; +} + static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) { struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf; - int len, r, i; + int len, r; unsigned width = 160, height = 128, buf_size; uint32_t *p_task_size = NULL; uint32_t task_offset = 0, st_offset; @@ -1346,7 +1355,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1 */ else - ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3 */ + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn */ ib_cpu[len++] = 0x00000002; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS */ ib_cpu[len++] = 0x00000011; /* sps len */ ib_cpu[len++] = 0x00000001; /* start code */ @@ -1362,7 +1371,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x00000020; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 1*/ else - ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU vcn 2,3*/ + ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU other vcn*/ ib_cpu[len++] = 0x00000003; /* RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS */ ib_cpu[len++] = 0x00000008; /* pps len */ ib_cpu[len++] = 0x00000001; /* start code */ @@ -1376,7 +1385,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000a; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 1 */ else - ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER vcn 2,3 */ + ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_SLICE_HEADER other vcn */ if (frame_type == 2) { ib_cpu[len++] = 0x65000000; ib_cpu[len++] = 0x11040000; @@ -1385,8 +1394,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) ib_cpu[len++] = 0x34210000; } ib_cpu[len++] = 0xe0000000; - for(i = 0; i < 13; i++) - ib_cpu[len++] = 0x00000000; + amdgpu_cs_vcn_ib_zero_count(&len, 13); ib_cpu[len++] = 0x00000001; ib_cpu[len++] = 0x00000008; @@ -1398,18 +1406,16 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) ib_cpu[len++] = 0x00000000; ib_cpu[len++] = 0x00000001; ib_cpu[len++] = 0x00000003; - for(i = 0; i < 22; i++) - ib_cpu[len++] = 0x00000000; - + amdgpu_cs_vcn_ib_zero_count(&len, 22); *st_size = (len - st_offset) * 4; /* encode params */ st_offset = len; st_size = &ib_cpu[len++]; /* size */ if(vcn_ip_version_major == 1) - ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1*/ + ib_cpu[len++] = 0x0000000b; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 1 */ else - ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/ + ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS other vcn */ ib_cpu[len++] = frame_type; ib_cpu[len++] = 0x0001f000; ib_cpu[len++] = input_buf.addr >> 32; @@ -1427,7 +1433,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) st_offset = len; st_size = &ib_cpu[len++]; /* size */ ib_cpu[len++] = 0x00200003; /* RENCODE_H264_IB_PARAM_ENCODE_PARAMS */ - if (vcn_ip_version_major != 3) { + if (vcn_ip_version_major <= 2) { ib_cpu[len++] = 0x00000000; ib_cpu[len++] = 0x00000000; ib_cpu[len++] = 0x00000000; @@ -1450,6 +1456,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) ib_cpu[len++] = 0x00000000; ib_cpu[len++] = 0x00000000; ib_cpu[len++] = 0x00000000; + ib_cpu[len++] = 0x00000001; } *st_size = (len - st_offset) * 4; @@ -1459,20 +1466,21 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000d; /* ENCODE_CONTEXT_BUFFER vcn 1 */ else - ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER vcn 2,3 */ + ib_cpu[len++] = 0x00000011; /* ENCODE_CONTEXT_BUFFER other vcn */ ib_cpu[len++] = cpb_buf.addr >> 32; ib_cpu[len++] = cpb_buf.addr; ib_cpu[len++] = 0x00000000; /* swizzle mode */ ib_cpu[len++] = 0x00000100; /* luma pitch */ ib_cpu[len++] = 0x00000100; /* chroma pitch */ - ib_cpu[len++] = 0x00000003; /* no reconstructed picture */ + ib_cpu[len++] = 0x00000002; /* no reconstructed picture */ ib_cpu[len++] = 0x00000000; /* reconstructed pic 1 luma offset */ ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32); /* pic1 chroma offset */ + if(vcn_ip_version_major == 4) + amdgpu_cs_vcn_ib_zero_count(&len, 2); ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 3 / 2; /* pic2 luma offset */ ib_cpu[len++] = ALIGN(width, 256) * ALIGN(height, 32) * 5 / 2; /* pic2 chroma offset */ - for (int i = 0; i < 136; i++) - ib_cpu[len++] = 0x00000000; + amdgpu_cs_vcn_ib_zero_count(&len, 280); *st_size = (len - st_offset) * 4; /* bitstream buffer */ @@ -1481,7 +1489,8 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) if(vcn_ip_version_major == 1) ib_cpu[len++] = 0x0000000e; /* VIDEO_BITSTREAM_BUFFER vcn 1 */ else - ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER vcn 2,3 */ + ib_cpu[len++] = 0x00000012; /* VIDEO_BITSTREAM_BUFFER other vcn */ + ib_cpu[len++] = 0x00000000; /* mode */ ib_cpu[len++] = bs_buf.addr >> 32; ib_cpu[len++] = bs_buf.addr; commit 03d18b44db345b754759adacf602b2b3575b5339 Author: Ruijing Dong <rui...@am...> Date: Tue May 9 11:19:34 2023 -0400 tests/amdgpu/vcn: change vbv_buffer name to input It is input buffer instead of vbv_buffer. Correct its name. Reviewed-by: Boyuan Zhang <Boy...@am...> Signed-off-by: Ruijing Dong <rui...@am...> diff --git a/tests/amdgpu/vcn_tests.c b/tests/amdgpu/vcn_tests.c index 5e20fb65..0fdb55dd 100644 --- a/tests/amdgpu/vcn_tests.c +++ b/tests/amdgpu/vcn_tests.c @@ -1267,7 +1267,7 @@ static void check_result(struct amdgpu_vcn_bo fb_buf, struct amdgpu_vcn_bo bs_bu static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) { - struct amdgpu_vcn_bo bs_buf, fb_buf, vbv_buf; + struct amdgpu_vcn_bo bs_buf, fb_buf, input_buf; int len, r, i; unsigned width = 160, height = 128, buf_size; uint32_t *p_task_size = NULL; @@ -1288,12 +1288,12 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) num_resources = 0; alloc_resource(&bs_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); alloc_resource(&fb_buf, 4096, AMDGPU_GEM_DOMAIN_GTT); - alloc_resource(&vbv_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT); + alloc_resource(&input_buf, buf_size, AMDGPU_GEM_DOMAIN_GTT); resources[num_resources++] = enc_buf.handle; resources[num_resources++] = cpb_buf.handle; resources[num_resources++] = bs_buf.handle; resources[num_resources++] = fb_buf.handle; - resources[num_resources++] = vbv_buf.handle; + resources[num_resources++] = input_buf.handle; resources[num_resources++] = ib_handle; @@ -1305,13 +1305,13 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) memset(fb_buf.ptr, 0, 4096); r = amdgpu_bo_cpu_unmap(fb_buf.handle); - r = amdgpu_bo_cpu_map(vbv_buf.handle, (void **)&vbv_buf.ptr); + r = amdgpu_bo_cpu_map(input_buf.handle, (void **)&input_buf.ptr); CU_ASSERT_EQUAL(r, 0); for (int i = 0; i < ALIGN(height, 32) * 3 / 2; i++) - memcpy(vbv_buf.ptr + i * ALIGN(width, 256), frame + i * width, width); + memcpy(input_buf.ptr + i * ALIGN(width, 256), frame + i * width, width); - r = amdgpu_bo_cpu_unmap(vbv_buf.handle); + r = amdgpu_bo_cpu_unmap(input_buf.handle); CU_ASSERT_EQUAL(r, 0); len = 0; @@ -1412,10 +1412,10 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) ib_cpu[len++] = 0x0000000f; /* RENCODE_IB_PARAM_ENCODE_PARAMS vcn 2,3*/ ib_cpu[len++] = frame_type; ib_cpu[len++] = 0x0001f000; - ib_cpu[len++] = vbv_buf.addr >> 32; - ib_cpu[len++] = vbv_buf.addr; - ib_cpu[len++] = (vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32; - ib_cpu[len++] = vbv_buf.addr + ALIGN(width, 256) * ALIGN(height, 32); + ib_cpu[len++] = input_buf.addr >> 32; + ib_cpu[len++] = input_buf.addr; + ib_cpu[len++] = (input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32)) >> 32; + ib_cpu[len++] = input_buf.addr + ALIGN(width, 256) * ALIGN(height, 32); ib_cpu[len++] = 0x00000100; ib_cpu[len++] = 0x00000080; ib_cpu[len++] = 0x00000000; @@ -1564,7 +1564,7 @@ static void amdgpu_cs_vcn_enc_encode_frame(int frame_type) free_resource(&fb_buf); free_resource(&bs_buf); - free_resource(&vbv_buf); + free_resource(&input_buf); } static void amdgpu_cs_vcn_enc_encode(void) |
From: GitLab M. <git...@ke...> - 2023-07-27 18:48:04
|
core-symbols.txt | 1 include/drm/drm.h | 108 +++++++++++++++++++ include/drm/drm_fourcc.h | 55 ++++++++++ include/drm/drm_mode.h | 256 ++++++++++++++++++++++++++++++++++++++++------- xf86drm.c | 14 ++ xf86drm.h | 2 6 files changed, 399 insertions(+), 37 deletions(-) New commits: commit c6013245ce9ce287bb86d327f9b6420a320a08e6 Author: Simon Ser <co...@em...> Date: Thu Jul 20 12:45:57 2023 +0200 xf86drm: add drmSyncobjEventfd This is a wrapper for DRM_IOCTL_SYNCOBJ_EVENTFD. Signed-off-by: Simon Ser <co...@em...> diff --git a/core-symbols.txt b/core-symbols.txt index 9f8a323b..0d3464e9 100644 --- a/core-symbols.txt +++ b/core-symbols.txt @@ -187,6 +187,7 @@ drmSLNext drmSwitchToContext drmSyncobjCreate drmSyncobjDestroy +drmSyncobjEventfd drmSyncobjExportSyncFile drmSyncobjFDToHandle drmSyncobjHandleToFD diff --git a/xf86drm.c b/xf86drm.c index 2afb99e0..52b83ccc 100644 --- a/xf86drm.c +++ b/xf86drm.c @@ -5102,6 +5102,20 @@ drm_public int drmSyncobjTransfer(int fd, return ret; } +drm_public int drmSyncobjEventfd(int fd, uint32_t handle, uint64_t point, int ev_fd, + uint32_t flags) +{ + struct drm_syncobj_eventfd args; + + memclear(args); + args.handle = handle; + args.point = point; + args.fd = ev_fd; + args.flags = flags; + + return drmIoctl(fd, DRM_IOCTL_SYNCOBJ_EVENTFD, &args); +} + static char * drmGetFormatModifierFromSimpleTokens(uint64_t modifier) { diff --git a/xf86drm.h b/xf86drm.h index de756e88..1c38cb18 100644 --- a/xf86drm.h +++ b/xf86drm.h @@ -947,6 +947,8 @@ extern int drmSyncobjTransfer(int fd, uint32_t dst_handle, uint64_t dst_point, uint32_t src_handle, uint64_t src_point, uint32_t flags); +extern int drmSyncobjEventfd(int fd, uint32_t handle, uint64_t point, int ev_fd, + uint32_t flags); extern char * drmGetFormatModifierVendor(uint64_t modifier); commit 431becd4e0dddf974aadd7bf7965b3a28b22dd23 Author: Simon Ser <co...@em...> Date: Thu Jul 27 15:56:46 2023 +0200 Sync headers with drm-next Synchronize drm.h, drm_mode.h and drm_fourcc.h to drm-next. Generated using make headers_install. Generated from drm-next branch commit 52920704df878050123dfeb469aa6ab8022547c1 Signed-off-by: Simon Ser <co...@em...> diff --git a/include/drm/drm.h b/include/drm/drm.h index 398c396f..78805ad0 100644 --- a/include/drm/drm.h +++ b/include/drm/drm.h @@ -629,8 +629,8 @@ struct drm_gem_open { /** * DRM_CAP_VBLANK_HIGH_CRTC * - * If set to 1, the kernel supports specifying a CRTC index in the high bits of - * &drm_wait_vblank_request.type. + * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` + * in the high bits of &drm_wait_vblank_request.type. * * Starting kernel version 2.6.39, this capability is always set to 1. */ @@ -903,6 +903,27 @@ struct drm_syncobj_timeline_wait { __u32 pad; }; +/** + * struct drm_syncobj_eventfd + * @handle: syncobj handle. + * @flags: Zero to wait for the point to be signalled, or + * &DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE to wait for a fence to be + * available for the point. + * @point: syncobj timeline point (set to zero for binary syncobjs). + * @fd: Existing eventfd to sent events to. + * @pad: Must be zero. + * + * Register an eventfd to be signalled by a syncobj. The eventfd counter will + * be incremented by one. + */ +struct drm_syncobj_eventfd { + __u32 handle; + __u32 flags; + __u64 point; + __s32 fd; + __u32 pad; +}; + struct drm_syncobj_array { __u64 handles; @@ -966,6 +987,19 @@ extern "C" { #define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats) #define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version) #define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl) +/** + * DRM_IOCTL_GEM_CLOSE - Close a GEM handle. + * + * GEM handles are not reference-counted by the kernel. User-space is + * responsible for managing their lifetime. For example, if user-space imports + * the same memory object twice on the same DRM file description, the same GEM + * handle is returned by both imports, and user-space needs to ensure + * &DRM_IOCTL_GEM_CLOSE is performed once only. The same situation can happen + * when a memory object is allocated, then exported and imported again on the + * same DRM file description. The &DRM_IOCTL_MODE_GETFB2 IOCTL is an exception + * and always returns fresh new GEM handles even if an existing GEM handle + * already refers to the same memory object before the IOCTL is performed. + */ #define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close) #define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink) #define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open) @@ -1006,7 +1040,37 @@ extern "C" { #define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock) #define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock) +/** + * DRM_IOCTL_PRIME_HANDLE_TO_FD - Convert a GEM handle to a DMA-BUF FD. + * + * User-space sets &drm_prime_handle.handle with the GEM handle to export and + * &drm_prime_handle.flags, and gets back a DMA-BUF file descriptor in + * &drm_prime_handle.fd. + * + * The export can fail for any driver-specific reason, e.g. because export is + * not supported for this specific GEM handle (but might be for others). + * + * Support for exporting DMA-BUFs is advertised via &DRM_PRIME_CAP_EXPORT. + */ #define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle) +/** + * DRM_IOCTL_PRIME_FD_TO_HANDLE - Convert a DMA-BUF FD to a GEM handle. + * + * User-space sets &drm_prime_handle.fd with a DMA-BUF file descriptor to + * import, and gets back a GEM handle in &drm_prime_handle.handle. + * &drm_prime_handle.flags is unused. + * + * If an existing GEM handle refers to the memory object backing the DMA-BUF, + * that GEM handle is returned. Therefore user-space which needs to handle + * arbitrary DMA-BUFs must have a user-space lookup data structure to manually + * reference-count duplicated GEM handles. For more information see + * &DRM_IOCTL_GEM_CLOSE. + * + * The import can fail for any driver-specific reason, e.g. because import is + * only supported for DMA-BUFs allocated on this DRM device. + * + * Support for importing DMA-BUFs is advertised via &DRM_PRIME_CAP_IMPORT. + */ #define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle) #define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30) @@ -1044,6 +1108,16 @@ extern "C" { #define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) @@ -1080,8 +1154,38 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer) #define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array) +/** + * DRM_IOCTL_MODE_GETFB2 - Get framebuffer metadata. + * + * This queries metadata about a framebuffer. User-space fills + * &drm_mode_fb_cmd2.fb_id as the input, and the kernels fills the rest of the + * struct as the output. + * + * If the client is DRM master or has &CAP_SYS_ADMIN, &drm_mode_fb_cmd2.handles + * will be filled with GEM buffer handles. Fresh new GEM handles are always + * returned, even if another GEM handle referring to the same memory object + * already exists on the DRM file description. The caller is responsible for + * removing the new handles, e.g. via the &DRM_IOCTL_GEM_CLOSE IOCTL. The same + * new handle will be returned for multiple planes in case they use the same + * memory object. Planes are valid until one has a zero handle -- this can be + * used to compute the number of planes. + * + * Otherwise, &drm_mode_fb_cmd2.handles will be zeroed and planes are valid + * until one has a zero &drm_mode_fb_cmd2.pitches. + * + * If the framebuffer has a format modifier, &DRM_MODE_FB_MODIFIERS will be set + * in &drm_mode_fb_cmd2.flags and &drm_mode_fb_cmd2.modifier will contain the + * modifier. Otherwise, user-space must ignore &drm_mode_fb_cmd2.modifier. + * + * To obtain DMA-BUF FDs for each plane without leaking GEM handles, user-space + * can export each handle via &DRM_IOCTL_PRIME_HANDLE_TO_FD, then immediately + * close each unique handle via &DRM_IOCTL_GEM_CLOSE, making sure to not + * double-close handles which are specified multiple times in the array. + */ #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) +#define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. diff --git a/include/drm/drm_fourcc.h b/include/drm/drm_fourcc.h index 0e70e36c..6b6235f7 100644 --- a/include/drm/drm_fourcc.h +++ b/include/drm/drm_fourcc.h @@ -88,6 +88,18 @@ extern "C" { * * The authoritative list of format modifier codes is found in * `include/uapi/drm/drm_fourcc.h` + * + * Open Source User Waiver + * ----------------------- + * + * Because this is the authoritative source for pixel formats and modifiers + * referenced by GL, Vulkan extensions and other standards and hence used both + * by open source and closed source driver stacks, the usual requirement for an + * upstream in-kernel or open source userspace user does not apply. + * + * To ensure, as much as feasible, compatibility across stacks and avoid + * confusion with incompatible enumerations stakeholders for all relevant driver + * stacks should approve additions. */ #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \ @@ -645,6 +657,49 @@ extern "C" { */ #define I915_FORMAT_MOD_4_TILED_DG2_RC_CCS_CC fourcc_mod_code(INTEL, 12) +/* + * Intel Color Control Surfaces (CCS) for display ver. 14 render compression. + * + * The main surface is tile4 and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * tile4 widths. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS fourcc_mod_code(INTEL, 13) + +/* + * Intel Color Control Surfaces (CCS) for display ver. 14 media compression + * + * The main surface is tile4 and at plane index 0, the CCS is linear and + * at index 1. A 64B CCS cache line corresponds to an area of 4x1 tiles in + * main surface. In other words, 4 bits in CCS map to a main surface cache + * line pair. The main surface pitch is required to be a multiple of four + * tile4 widths. For semi-planar formats like NV12, CCS planes follow the + * Y and UV planes i.e., planes 0 and 1 are used for Y and UV surfaces, + * planes 2 and 3 for the respective CCS. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_MC_CCS fourcc_mod_code(INTEL, 14) + +/* + * Intel Color Control Surface with Clear Color (CCS) for display ver. 14 render + * compression. + * + * The main surface is tile4 and is at plane index 0 whereas CCS is linear + * and at index 1. The clear color is stored at index 2, and the pitch should + * be ignored. The clear color structure is 256 bits. The first 128 bits + * represents Raw Clear Color Red, Green, Blue and Alpha color each represented + * by 32 bits. The raw clear color is consumed by the 3d engine and generates + * the converted clear color of size 64 bits. The first 32 bits store the Lower + * Converted Clear Color value and the next 32 bits store the Higher Converted + * Clear Color value when applicable. The Converted Clear Color values are + * consumed by the DE. The last 64 bits are used to store Color Discard Enable + * and Depth Clear Value Valid which are ignored by the DE. A CCS cache line + * corresponds to an area of 4x1 tiles in the main surface. The main surface + * pitch is required to be a multiple of 4 tile widths. + */ +#define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index 9b6722d4..92d96a2b 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -312,16 +312,48 @@ struct drm_mode_set_plane { __u32 src_w; }; +/** + * struct drm_mode_get_plane - Get plane metadata. + * + * Userspace can perform a GETPLANE ioctl to retrieve information about a + * plane. + * + * To retrieve the number of formats supported, set @count_format_types to zero + * and call the ioctl. @count_format_types will be updated with the value. + * + * To retrieve these formats, allocate an array with the memory needed to store + * @count_format_types formats. Point @format_type_ptr to this array and call + * the ioctl again (with @count_format_types still set to the value returned in + * the first ioctl call). + */ struct drm_mode_get_plane { + /** + * @plane_id: Object ID of the plane whose information should be + * retrieved. Set by caller. + */ __u32 plane_id; + /** @crtc_id: Object ID of the current CRTC. */ __u32 crtc_id; + /** @fb_id: Object ID of the current fb. */ __u32 fb_id; + /** + * @possible_crtcs: Bitmask of CRTC's compatible with the plane. CRTC's + * are created and they receive an index, which corresponds to their + * position in the bitmask. Bit N corresponds to + * :ref:`CRTC index<crtc_index>` N. + */ __u32 possible_crtcs; + /** @gamma_size: Never used. */ __u32 gamma_size; + /** @count_format_types: Number of formats. */ __u32 count_format_types; + /** + * @format_type_ptr: Pointer to ``__u32`` array of formats that are + * supported by the plane. These formats do not require modifiers. + */ __u64 format_type_ptr; }; @@ -509,22 +541,74 @@ struct drm_mode_get_connector { */ #define DRM_MODE_PROP_ATOMIC 0x80000000 +/** + * struct drm_mode_property_enum - Description for an enum/bitfield entry. + * @value: numeric value for this enum entry. + * @name: symbolic name for this enum entry. + * + * See struct drm_property_enum for details. + */ struct drm_mode_property_enum { __u64 value; char name[DRM_PROP_NAME_LEN]; }; +/** + * struct drm_mode_get_property - Get property metadata. + * + * User-space can perform a GETPROPERTY ioctl to retrieve information about a + * property. The same property may be attached to multiple objects, see + * "Modeset Base Object Abstraction". + * + * The meaning of the @values_ptr field changes depending on the property type. + * See &drm_property.flags for more details. + * + * The @enum_blob_ptr and @count_enum_blobs fields are only meaningful when the + * property has the type &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK. For + * backwards compatibility, the kernel will always set @count_enum_blobs to + * zero when the property has the type &DRM_MODE_PROP_BLOB. User-space must + * ignore these two fields if the property has a different type. + * + * User-space is expected to retrieve values and enums by performing this ioctl + * at least twice: the first time to retrieve the number of elements, the + * second time to retrieve the elements themselves. + * + * To retrieve the number of elements, set @count_values and @count_enum_blobs + * to zero, then call the ioctl. @count_values will be updated with the number + * of elements. If the property has the type &DRM_MODE_PROP_ENUM or + * &DRM_MODE_PROP_BITMASK, @count_enum_blobs will be updated as well. + * + * To retrieve the elements themselves, allocate an array for @values_ptr and + * set @count_values to its capacity. If the property has the type + * &DRM_MODE_PROP_ENUM or &DRM_MODE_PROP_BITMASK, allocate an array for + * @enum_blob_ptr and set @count_enum_blobs to its capacity. Calling the ioctl + * again will fill the arrays. + */ struct drm_mode_get_property { - __u64 values_ptr; /* values and blob lengths */ - __u64 enum_blob_ptr; /* enum and blob id ptrs */ + /** @values_ptr: Pointer to a ``__u64`` array. */ + __u64 values_ptr; + /** @enum_blob_ptr: Pointer to a struct drm_mode_property_enum array. */ + __u64 enum_blob_ptr; + /** + * @prop_id: Object ID of the property which should be retrieved. Set + * by the caller. + */ __u32 prop_id; + /** + * @flags: ``DRM_MODE_PROP_*`` bitfield. See &drm_property.flags for + * a definition of the flags. + */ __u32 flags; + /** + * @name: Symbolic property name. User-space should use this field to + * recognize properties. + */ char name[DRM_PROP_NAME_LEN]; + /** @count_values: Number of elements in @values_ptr. */ __u32 count_values; - /* This is only used to count enum values, not blobs. The _blobs is - * simply because of a historical reason, i.e. backwards compat. */ + /** @count_enum_blobs: Number of elements in @enum_blob_ptr. */ __u32 count_enum_blobs; }; @@ -579,41 +663,73 @@ struct drm_mode_fb_cmd { #define DRM_MODE_FB_INTERLACED (1<<0) /* for interlaced framebuffers */ #define DRM_MODE_FB_MODIFIERS (1<<1) /* enables ->modifer[] */ +/** + * struct drm_mode_fb_cmd2 - Frame-buffer metadata. + * + * This struct holds frame-buffer metadata. There are two ways to use it: + * + * - User-space can fill this struct and perform a &DRM_IOCTL_MODE_ADDFB2 + * ioctl to register a new frame-buffer. The new frame-buffer object ID will + * be set by the kernel in @fb_id. + * - User-space can set @fb_id and perform a &DRM_IOCTL_MODE_GETFB2 ioctl to + * fetch metadata about an existing frame-buffer. + * + * In case of planar formats, this struct allows up to 4 buffer objects with + * offsets and pitches per plane. The pitch and offset order are dictated by + * the format FourCC as defined by ``drm_fourcc.h``, e.g. NV12 is described as: + * + * YUV 4:2:0 image with a plane of 8-bit Y samples followed by an + * interleaved U/V plane containing 8-bit 2x2 subsampled colour difference + * samples. + * + * So it would consist of a Y plane at ``offsets[0]`` and a UV plane at + * ``offsets[1]``. + * + * To accommodate tiled, compressed, etc formats, a modifier can be specified. + * For more information see the "Format Modifiers" section. Note that even + * though it looks like we have a modifier per-plane, we in fact do not. The + * modifier for each plane must be identical. Thus all combinations of + * different data layouts for multi-plane formats must be enumerated as + * separate modifiers. + * + * All of the entries in @handles, @pitches, @offsets and @modifier must be + * zero when unused. Warning, for @offsets and @modifier zero can't be used to + * figure out whether the entry is used or not since it's a valid value (a zero + * offset is common, and a zero modifier is &DRM_FORMAT_MOD_LINEAR). + */ struct drm_mode_fb_cmd2 { + /** @fb_id: Object ID of the frame-buffer. */ __u32 fb_id; + /** @width: Width of the frame-buffer. */ __u32 width; + /** @height: Height of the frame-buffer. */ __u32 height; - __u32 pixel_format; /* fourcc code from drm_fourcc.h */ - __u32 flags; /* see above flags */ + /** + * @pixel_format: FourCC format code, see ``DRM_FORMAT_*`` constants in + * ``drm_fourcc.h``. + */ + __u32 pixel_format; + /** + * @flags: Frame-buffer flags (see &DRM_MODE_FB_INTERLACED and + * &DRM_MODE_FB_MODIFIERS). + */ + __u32 flags; - /* - * In case of planar formats, this ioctl allows up to 4 - * buffer objects with offsets and pitches per plane. - * The pitch and offset order is dictated by the fourcc, - * e.g. NV12 (https://fourcc.org/yuv.php#NV12) is described as: - * - * YUV 4:2:0 image with a plane of 8 bit Y samples - * followed by an interleaved U/V plane containing - * 8 bit 2x2 subsampled colour difference samples. - * - * So it would consist of Y as offsets[0] and UV as - * offsets[1]. Note that offsets[0] will generally - * be 0 (but this is not required). - * - * To accommodate tiled, compressed, etc formats, a - * modifier can be specified. The default value of zero - * indicates "native" format as specified by the fourcc. - * Vendor specific modifier token. Note that even though - * it looks like we have a modifier per-plane, we in fact - * do not. The modifier for each plane must be identical. - * Thus all combinations of different data layouts for - * multi plane formats must be enumerated as separate - * modifiers. + /** + * @handles: GEM buffer handle, one per plane. Set to 0 if the plane is + * unused. The same handle can be used for multiple planes. */ __u32 handles[4]; - __u32 pitches[4]; /* pitch for each plane */ - __u32 offsets[4]; /* offset of each plane */ - __u64 modifier[4]; /* ie, tiling, compress */ + /** @pitches: Pitch (aka. stride) in bytes, one per plane. */ + __u32 pitches[4]; + /** @offsets: Offset into the buffer in bytes, one per plane. */ + __u32 offsets[4]; + /** + * @modifier: Format modifier, one per plane. See ``DRM_FORMAT_MOD_*`` + * constants in ``drm_fourcc.h``. All planes must use the same + * modifier. Ignored unless &DRM_MODE_FB_MODIFIERS is set in @flags. + */ + __u64 modifier[4]; }; #define DRM_MODE_FB_DIRTY_ANNOTATE_COPY 0x01 @@ -718,6 +834,11 @@ struct drm_color_ctm { /* * Conversion matrix in S31.32 sign-magnitude * (not two's complement!) format. + * + * out matrix in + * |R| |0 1 2| |R| + * |G| = |3 4 5| x |G| + * |B| |6 7 8| |B| */ __u64 matrix[9]; }; @@ -762,7 +883,7 @@ struct hdr_metadata_infoframe { */ struct { __u16 x, y; - } display_primaries[3]; + } display_primaries[3]; /** * @white_point: White Point of Colorspace Data. * These are coded as unsigned 16-bit values in units of @@ -773,7 +894,7 @@ struct hdr_metadata_infoframe { */ struct { __u16 x, y; - } white_point; + } white_point; /** * @max_display_mastering_luminance: Max Mastering Display Luminance. * This value is coded as an unsigned 16-bit value in units of 1 cd/m2, @@ -819,12 +940,31 @@ struct hdr_output_metadata { }; }; +/** + * DRM_MODE_PAGE_FLIP_EVENT + * + * Request that the kernel sends back a vblank event (see + * struct drm_event_vblank) with the &DRM_EVENT_FLIP_COMPLETE type when the + * page-flip is done. + */ #define DRM_MODE_PAGE_FLIP_EVENT 0x01 +/** + * DRM_MODE_PAGE_FLIP_ASYNC + * + * Request that the page-flip is performed as soon as possible, ie. with no + * delay due to waiting for vblank. This may cause tearing to be visible on + * the screen. + */ #define DRM_MODE_PAGE_FLIP_ASYNC 0x02 #define DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE 0x4 #define DRM_MODE_PAGE_FLIP_TARGET_RELATIVE 0x8 #define DRM_MODE_PAGE_FLIP_TARGET (DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE | \ DRM_MODE_PAGE_FLIP_TARGET_RELATIVE) +/** + * DRM_MODE_PAGE_FLIP_FLAGS + * + * Bitmask of flags suitable for &drm_mode_crtc_page_flip_target.flags. + */ #define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT | \ DRM_MODE_PAGE_FLIP_ASYNC | \ DRM_MODE_PAGE_FLIP_TARGET) @@ -918,11 +1058,53 @@ struct drm_mode_destroy_dumb { __u32 handle; }; -/* page-flip flags are valid, plus: */ +/** + * DRM_MODE_ATOMIC_TEST_ONLY + * + * Do not apply the atomic commit, instead check whether the hardware supports + * this configuration. + * + * See &drm_mode_config_funcs.atomic_check for more details on test-only + * commits. + */ #define DRM_MODE_ATOMIC_TEST_ONLY 0x0100 +/** + * DRM_MODE_ATOMIC_NONBLOCK + * + * Do not block while applying the atomic commit. The &DRM_IOCTL_MODE_ATOMIC + * IOCTL returns immediately instead of waiting for the changes to be applied + * in hardware. Note, the driver will still check that the update can be + * applied before retuning. + */ #define DRM_MODE_ATOMIC_NONBLOCK 0x0200 +/** + * DRM_MODE_ATOMIC_ALLOW_MODESET + * + * Allow the update to result in temporary or transient visible artifacts while + * the update is being applied. Applying the update may also take significantly + * more time than a page flip. All visual artifacts will disappear by the time + * the update is completed, as signalled through the vblank event's timestamp + * (see struct drm_event_vblank). + * + * This flag must be set when the KMS update might cause visible artifacts. + * Without this flag such KMS update will return a EINVAL error. What kind of + * update may cause visible artifacts depends on the driver and the hardware. + * User-space that needs to know beforehand if an update might cause visible + * artifacts can use &DRM_MODE_ATOMIC_TEST_ONLY without + * &DRM_MODE_ATOMIC_ALLOW_MODESET to see if it fails. + * + * To the best of the driver's knowledge, visual artifacts are guaranteed to + * not appear when this flag is not set. Some sinks might display visual + * artifacts outside of the driver's control. + */ #define DRM_MODE_ATOMIC_ALLOW_MODESET 0x0400 +/** + * DRM_MODE_ATOMIC_FLAGS + * + * Bitfield of flags accepted by the &DRM_IOCTL_MODE_ATOMIC IOCTL in + * &drm_mode_atomic.flags. + */ #define DRM_MODE_ATOMIC_FLAGS (\ DRM_MODE_PAGE_FLIP_EVENT |\ DRM_MODE_PAGE_FLIP_ASYNC |\ @@ -1026,6 +1208,10 @@ struct drm_mode_destroy_blob { * struct drm_mode_create_lease - Create lease * * Lease mode resources, creating another drm_master. + * + * The @object_ids array must reference at least one CRTC, one connector and + * one plane if &DRM_CLIENT_CAP_UNIVERSAL_PLANES is enabled. Alternatively, + * the lease can be completely empty. */ struct drm_mode_create_lease { /** @object_ids: Pointer to array of object ids (__u32) */ |
From: GitLab M. <git...@ke...> - 2023-08-16 13:29:52
|
xf86drm.c | 22 ++++++++++++---------- xf86drm.h | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) New commits: commit 3bc3cca230c5a064b2f554f26fdec27db0f5ead8 Author: James Zhu <Jam...@am...> Date: Mon Aug 7 10:14:18 2023 -0400 xf86drm: use drm device name to identify drm node type Currently drm node's minor range is used to identify node's type. Since kernel drm uses node type name and minor to generate drm device name, It will be more general to use drm device name to identify drm node type. Signed-off-by: James Zhu <Jam...@am...> Reviewed-by: Simon Ser <co...@em...> diff --git a/xf86drm.c b/xf86drm.c index 52b83ccc..ebc60956 100644 --- a/xf86drm.c +++ b/xf86drm.c @@ -1051,18 +1051,20 @@ static int drmGetMinorType(int major, int minor) minor = id; #endif - int type = minor >> 6; - - if (minor < 0) - return -1; + char path[DRM_NODE_NAME_MAX]; + const char *dev_name; + int i; - switch (type) { - case DRM_NODE_PRIMARY: - case DRM_NODE_RENDER: - return type; - default: - return -1; + for (i = DRM_NODE_PRIMARY; i < DRM_NODE_MAX; i++) { + dev_name = drmGetDeviceName(i); + if (!dev_name) + continue; + snprintf(path, sizeof(path), dev_name, DRM_DIR_NAME, minor); + if (!access(path, F_OK)) + return i; } + + return -1; } static const char *drmGetMinorName(int type) commit 7130cb163eb860d4a965c6708b64fe87cee881d6 Author: James Zhu <Jam...@am...> Date: Mon Aug 7 10:06:32 2023 -0400 xf86drm: update DRM_NODE_NAME_MAX supporting more nodes Current DRM_NODE_NAME_MAX only can support up to 999 nodes, Update to support up to 2^MINORBITS nodes. Signed-off-by: James Zhu <Jam...@am...> Reviewed-by: Simon Ser <co...@em...> diff --git a/xf86drm.h b/xf86drm.h index 1c38cb18..67955ca5 100644 --- a/xf86drm.h +++ b/xf86drm.h @@ -98,7 +98,7 @@ extern "C" { + MAX3(sizeof(DRM_PRIMARY_MINOR_NAME), \ sizeof(DRM_CONTROL_MINOR_NAME), \ sizeof(DRM_RENDER_MINOR_NAME)) \ - + sizeof("144") /* highest possible node number */ \ + + sizeof("1048575") /* highest possible node number 2^MINORBITS - 1 */ \ + 1) /* NULL-terminator */ #define DRM_ERR_NO_DEVICE (-1001) |
From: GitLab M. <git...@ke...> - 2023-09-04 17:45:07
|
amdgpu/amdgpu_bo.c | 2 +- tests/amdgpu/amdgpu_stress.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) New commits: commit 6a961ca843d154be994bd4d232a99d59d18fe26d Author: Geert Uytterhoeven <gee...@gl...> Date: Wed Jul 5 15:23:44 2023 +0200 amdgpu: Use PRI?64 to format uint64_t On 32-bit: ../tests/amdgpu/amdgpu_stress.c: In function ‘alloc_bo’: ../tests/amdgpu/amdgpu_stress.c:178:49: warning: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 4 has type ‘uint64_t’ {aka ‘long long unsigned int’} [-Wformat=] fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size %lu\n", ~~^ %llx num_buffers++, addr, domain, size); ~~~~ ../tests/amdgpu/amdgpu_stress.c:178:72: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 6 has type ‘uint64_t’ {aka ‘long long unsigned int’} [-Wformat=] fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size %lu\n", ~~^ %llu num_buffers++, addr, domain, size); ~~~~ ../tests/amdgpu/amdgpu_stress.c: In function ‘submit_ib’: ../tests/amdgpu/amdgpu_stress.c:276:54: warning: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 5 has type ‘uint64_t’ {aka ‘long long unsigned int’} [-Wformat=] fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n", ~~^ %llx count, from, virtual[from], to, virtual[to], copied, delta / 1000); ~~~~~~~~~~~~~ ../tests/amdgpu/amdgpu_stress.c:276:65: warning: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 7 has type ‘uint64_t’ {aka ‘long long unsigned int’} [-Wformat=] fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n", ~~^ %llx count, from, virtual[from], to, virtual[to], copied, delta / 1000); ~~~~~~~~~~~ ../tests/amdgpu/amdgpu_stress.c:276:70: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 8 has type ‘uint64_t’ {aka ‘long long unsigned int’} [-Wformat=] fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n", ~~^ %llu count, from, virtual[from], to, virtual[to], copied, delta / 1000); ~~~~~~ ../tests/amdgpu/amdgpu_stress.c:276:85: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 9 has type ‘uint64_t’ {aka ‘long long unsigned int’} [-Wformat=] fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n", ~~^ %llu count, from, virtual[from], to, virtual[to], copied, delta / 1000); ~~~~~~~~~~~~ ../tests/amdgpu/amdgpu_stress.c: In function ‘parse_size’: ../tests/amdgpu/amdgpu_stress.c:296:24: warning: format ‘%li’ expects argument of type ‘long int *’, but argument 3 has type ‘uint64_t *’ {aka ‘long long unsigned int *’} [-Wformat=] if (sscanf(optarg, "%li%1[kmgKMG]", &size, ext) < 1) { ~~^ ~~~~~ %lli ../tests/amdgpu/amdgpu_stress.c: In function ‘main’: ../tests/amdgpu/amdgpu_stress.c:378:45: warning: format ‘%lu’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘uint64_t’ {aka ‘long long unsigned int’} [-Wformat=] fprintf(stderr, "Buffer size to small %lu\n", size); ~~^ ~~~~ %llu Fix this by using the proper "PRI?64" format specifiers. Fixes: d77ccdf3ba6f5a39 ("amdgpu: add amdgpu_stress utility v2") Signed-off-by: Geert Uytterhoeven <gee...@gl...> Reviewed-by: Marek Olšák <mar...@am...> --- On Linux/amd64, the format strings in the resulting binary are unchanged. v3: - Add Reviewed-by, v2: - Use PRI?64 to unbreak 64-bit build. diff --git a/tests/amdgpu/amdgpu_stress.c b/tests/amdgpu/amdgpu_stress.c index 5c5c88c5..f919351e 100644 --- a/tests/amdgpu/amdgpu_stress.c +++ b/tests/amdgpu/amdgpu_stress.c @@ -30,6 +30,7 @@ #include <errno.h> #include <unistd.h> #include <stdlib.h> +#include <inttypes.h> #include "drm.h" #include "xf86drmMode.h" @@ -175,7 +176,7 @@ int alloc_bo(uint32_t domain, uint64_t size) resources[num_buffers] = bo; virtual[num_buffers] = addr; - fprintf(stdout, "Allocated BO number %u at 0x%lx, domain 0x%x, size %lu\n", + fprintf(stdout, "Allocated BO number %u at 0x%" PRIx64 ", domain 0x%x, size %" PRIu64 "\n", num_buffers++, addr, domain, size); return 0; } @@ -273,7 +274,7 @@ int submit_ib(uint32_t from, uint32_t to, uint64_t size, uint32_t count) delta = stop.tv_nsec + stop.tv_sec * 1000000000UL; delta -= start.tv_nsec + start.tv_sec * 1000000000UL; - fprintf(stdout, "Submitted %u IBs to copy from %u(%lx) to %u(%lx) %lu bytes took %lu usec\n", + fprintf(stdout, "Submitted %u IBs to copy from %u(%" PRIx64 ") to %u(%" PRIx64 ") %" PRIu64 " bytes took %" PRIu64 " usec\n", count, from, virtual[from], to, virtual[to], copied, delta / 1000); return 0; } @@ -293,7 +294,7 @@ uint64_t parse_size(void) char ext[2]; ext[0] = 0; - if (sscanf(optarg, "%li%1[kmgKMG]", &size, ext) < 1) { + if (sscanf(optarg, "%" PRIi64 "%1[kmgKMG]", &size, ext) < 1) { fprintf(stderr, "Can't parse size arg: %s\n", optarg); exit(EXIT_FAILURE); } @@ -375,7 +376,7 @@ int main(int argc, char **argv) next_arg(argc, argv, "Missing buffer size"); size = parse_size(); if (size < getpagesize()) { - fprintf(stderr, "Buffer size to small %lu\n", size); + fprintf(stderr, "Buffer size to small %" PRIu64 "\n", size); exit(EXIT_FAILURE); } r = alloc_bo(domain, size); commit ca041d5fe66bd22542dbfd93e991d5aa20950d76 Author: Geert Uytterhoeven <gee...@gl...> Date: Wed Jul 5 15:23:24 2023 +0200 amdgpu: Fix pointer/integer mismatch warning On 32-bit: ../amdgpu/amdgpu_bo.c: In function ‘amdgpu_find_bo_by_cpu_mapping’: ../amdgpu/amdgpu_bo.c:554:13: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size)) ^ Indeed, as amdgpu_bo_info.alloc_size is "uint64_t", the sum is always 64-bit, while "void *" can be 32-bit or 64-bit. Fix this by casting bo->alloc_size to "size_t", which is either 32-bit or 64-bit, just like "void *". Fixes: c6493f360e7529c2 ("amdgpu: Eliminate void* arithmetic in amdgpu_find_bo_by_cpu_mapping") Signed-off-by: Geert Uytterhoeven <gee...@gl...> Reviewed-by: Marek Olšák <mar...@am...> --- v2: - Add Reviewed-by. diff --git a/amdgpu/amdgpu_bo.c b/amdgpu/amdgpu_bo.c index f4e04352..672f000d 100644 --- a/amdgpu/amdgpu_bo.c +++ b/amdgpu/amdgpu_bo.c @@ -551,7 +551,7 @@ drm_public int amdgpu_find_bo_by_cpu_mapping(amdgpu_device_handle dev, if (!bo || !bo->cpu_ptr || size > bo->alloc_size) continue; if (cpu >= bo->cpu_ptr && - cpu < (void*)((uintptr_t)bo->cpu_ptr + bo->alloc_size)) + cpu < (void*)((uintptr_t)bo->cpu_ptr + (size_t)bo->alloc_size)) break; } |
From: GitLab M. <git...@ke...> - 2023-09-10 11:49:09
|
tests/modetest/modetest.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) New commits: commit 64b09cef49f9cd7e7eb9871d8fe36ed5b315149a Author: Marijn Suijten <mar...@so...> Date: Sat Sep 9 10:26:33 2023 +0200 modetest: document why no blob is created for linear gamma LUT As found and discussed in [MR 58] a blob is not created in the else arm because adding the GAMMA_LUT property with a NULL/0 blob_id causes it to be reset to a default linear / pass-thru gamma table. The values in the gamma_lut table might still be consumed in the legacy API path below though, so it has to be initialized to a linear table. [MR 58]: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/58#note_466972 Signed-off-by: Marijn Suijten <mar...@so...> diff --git a/tests/modetest/modetest.c b/tests/modetest/modetest.c index 3d24e0dc..8bc3d5a5 100644 --- a/tests/modetest/modetest.c +++ b/tests/modetest/modetest.c @@ -1158,6 +1158,11 @@ static void set_gamma(struct device *dev, unsigned crtc_id, unsigned fourcc) util_smpte_c8_gamma(256, gamma_lut); drmModeCreatePropertyBlob(dev->fd, gamma_lut, sizeof(gamma_lut), &blob_id); } else { + /* + * Initialize gamma_lut to a linear table for the legacy API below. + * The modern property API resets to a linear/pass-thru table if blob_id + * is 0, hence no PropertyBlob is created here. + */ for (i = 0; i < 256; i++) { gamma_lut[i].red = gamma_lut[i].green = commit b709c3010e9a39b7ed4ed63a16148d4fa0055d3f Author: Ezequiel Garcia <eze...@co...> Date: Sat Oct 5 13:31:22 2019 -0300 modetest: avoid erroring if there's no gamma legacy support Let's follow the Rule of Silence. And while here, document what's going on. Signed-off-by: Ezequiel Garcia <eze...@co...> Reviewed-by: Marijn Suijten <mar...@so...> diff --git a/tests/modetest/modetest.c b/tests/modetest/modetest.c index 2f6f328d..3d24e0dc 100644 --- a/tests/modetest/modetest.c +++ b/tests/modetest/modetest.c @@ -1168,6 +1168,7 @@ static void set_gamma(struct device *dev, unsigned crtc_id, unsigned fourcc) add_property_optional(dev, crtc_id, "DEGAMMA_LUT", 0); add_property_optional(dev, crtc_id, "CTM", 0); if (!add_property_optional(dev, crtc_id, "GAMMA_LUT", blob_id)) { + /* If we can't add the GAMMA_LUT property, try the legacy API. */ uint16_t r[256], g[256], b[256]; for (i = 0; i < 256; i++) { @@ -1177,7 +1178,7 @@ static void set_gamma(struct device *dev, unsigned crtc_id, unsigned fourcc) } ret = drmModeCrtcSetGamma(dev->fd, crtc_id, 256, r, g, b); - if (ret) + if (ret && errno != ENOSYS) fprintf(stderr, "failed to set gamma: %s\n", strerror(errno)); } } |
From: GitLab M. <git...@ke...> - 2023-09-10 11:57:43
|
tests/modetest/modetest.c | 56 +++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 28 deletions(-) New commits: commit 7618a646330d2dda663d44ef9066994f28d7812f Author: Neil Armstrong <nei...@li...> Date: Wed Jan 18 15:01:22 2023 +0100 modetest: allow using -r and -P Since now -r sets the pipe struct and count like -s we can also use -P with -r. Signed-off-by: Neil Armstrong <nei...@li...> diff --git a/tests/modetest/modetest.c b/tests/modetest/modetest.c index 58ef9ec3..4b45994e 100644 --- a/tests/modetest/modetest.c +++ b/tests/modetest/modetest.c @@ -2290,11 +2290,6 @@ int main(int argc, char **argv) return -1; } - if (set_preferred && plane_count) { - fprintf(stderr, "cannot use -r (preferred) when -P (plane) is set\n"); - return -1; - } - dev.fd = util_open(device, module); if (dev.fd < 0) return -1; commit 2e17aea5735a377112c2f2d325c1ec28b11f7285 Author: Neil Armstrong <nei...@li...> Date: Wed Jan 18 14:29:42 2023 +0100 modetest: permit -r and -s to work together Let's permit testing vsync with the default mode, this returns back the pipe content and count when calling set_mode() so the vsync test can also be used. Signed-off-by: Neil Armstrong <nei...@li...> diff --git a/tests/modetest/modetest.c b/tests/modetest/modetest.c index 8bc3d5a5..58ef9ec3 100644 --- a/tests/modetest/modetest.c +++ b/tests/modetest/modetest.c @@ -1608,32 +1608,35 @@ static struct plane *get_primary_plane_by_crtc(struct device *dev, struct crtc * return NULL; } -static void set_mode(struct device *dev, struct pipe_arg *pipes, unsigned int count) +static unsigned int set_mode(struct device *dev, struct pipe_arg **pipe_args, unsigned int count) { unsigned int i, j; int ret, x = 0; int preferred = count == 0; + struct pipe_arg *pipes; - for (i = 0; i < count; i++) { - struct pipe_arg *pipe = &pipes[i]; - - ret = pipe_resolve_connectors(dev, pipe); - if (ret < 0) - return; - - ret = pipe_find_crtc_and_mode(dev, pipe); - if (ret < 0) - continue; - } if (preferred) { - struct pipe_arg *pipe_args; - - count = pipe_find_preferred(dev, &pipe_args); + count = pipe_find_preferred(dev, pipe_args); if (!count) { fprintf(stderr, "can't find any preferred connector/mode.\n"); - return; + return 0; + } + + pipes = *pipe_args; + } else { + pipes = *pipe_args; + + for (i = 0; i < count; i++) { + struct pipe_arg *pipe = &pipes[i]; + + ret = pipe_resolve_connectors(dev, pipe); + if (ret < 0) + return 0; + + ret = pipe_find_crtc_and_mode(dev, pipe); + if (ret < 0) + continue; } - pipes = pipe_args; } if (!dev->use_atomic) { @@ -1660,7 +1663,7 @@ static void set_mode(struct device *dev, struct pipe_arg *pipes, unsigned int co if (bo_fb_create(dev->fd, pipes[0].fourcc, dev->mode.width, dev->mode.height, primary_fill, &dev->mode.bo, &dev->mode.fb_id)) - return; + return 0; } for (i = 0; i < count; i++) { @@ -1692,7 +1695,7 @@ static void set_mode(struct device *dev, struct pipe_arg *pipes, unsigned int co if (ret) { fprintf(stderr, "failed to set mode: %s\n", strerror(errno)); - return; + return 0; } set_gamma(dev, pipe->crtc_id, pipe->fourcc); @@ -1718,6 +1721,8 @@ static void set_mode(struct device *dev, struct pipe_arg *pipes, unsigned int co } } } + + return count; } static void writeback_config(struct device *dev, struct pipe_arg *pipes, unsigned int count) @@ -2276,8 +2281,8 @@ int main(int argc, char **argv) if (!args) encoders = connectors = crtcs = planes = framebuffers = 1; - if (test_vsync && !count) { - fprintf(stderr, "page flipping requires at least one -s option.\n"); + if (test_vsync && !count && !set_preferred) { + fprintf(stderr, "page flipping requires at least one -s or -r option.\n"); return -1; } if (set_preferred && count) { @@ -2336,7 +2341,7 @@ int main(int argc, char **argv) } if (set_preferred || count) - set_mode(&dev, pipe_args, count); + count = set_mode(&dev, &pipe_args, count); if (dump_path) { if (!pipe_has_writeback_connector(&dev, pipe_args, count)) { @@ -2411,7 +2416,7 @@ int main(int argc, char **argv) } if (set_preferred || count) - set_mode(&dev, pipe_args, count); + count = set_mode(&dev, &pipe_args, count); if (plane_count) set_planes(&dev, plane_args, plane_count); |
From: GitLab M. <git...@ke...> - 2023-10-13 14:45:33
|
amdgpu/amdgpu-symbols.txt | 1 amdgpu/amdgpu.h | 16 +++++ amdgpu/amdgpu_gpu_info.c | 14 +++++ include/drm/amdgpu_drm.h | 127 +++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 151 insertions(+), 7 deletions(-) New commits: commit 8d8357dc6482c771d2f9501257c29abe8b06ba9c Author: Samuel Pitoiset <sam...@gm...> Date: Fri Feb 24 10:01:53 2023 +0100 amdgpu: add support for querying VM faults information Signed-off-by: Samuel Pitoiset <sam...@gm...> diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt index d41d9c24..1f7f56ce 100644 --- a/amdgpu/amdgpu-symbols.txt +++ b/amdgpu/amdgpu-symbols.txt @@ -63,6 +63,7 @@ amdgpu_query_crtc_from_id amdgpu_query_firmware_version amdgpu_query_gds_info amdgpu_query_gpu_info +amdgpu_query_gpuvm_fault_info amdgpu_query_heap_info amdgpu_query_hw_ip_count amdgpu_query_hw_ip_info diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 5ef2524a..1cd96f86 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -1282,6 +1282,22 @@ int amdgpu_query_sensor_info(amdgpu_device_handle dev, unsigned sensor_type, int amdgpu_query_video_caps_info(amdgpu_device_handle dev, unsigned cap_type, unsigned size, void *value); +/** + * Query information about VM faults + * + * The return sizeof(struct drm_amdgpu_info_gpuvm_fault) + * + * \param dev - \c [in] Device handle. See #amdgpu_device_initialize() + * \param size - \c [in] Size of the returned value. + * \param value - \c [out] Pointer to the return value. + * + * \return 0 on success\n + * <0 - Negative POSIX Error code + * +*/ +int amdgpu_query_gpuvm_fault_info(amdgpu_device_handle dev, unsigned size, + void *value); + /** * Read a set of consecutive memory-mapped registers. * Not all registers are allowed to be read by userspace. diff --git a/amdgpu/amdgpu_gpu_info.c b/amdgpu/amdgpu_gpu_info.c index 9f8695ce..1a5143a6 100644 --- a/amdgpu/amdgpu_gpu_info.c +++ b/amdgpu/amdgpu_gpu_info.c @@ -346,3 +346,17 @@ drm_public int amdgpu_query_video_caps_info(amdgpu_device_handle dev, unsigned c return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)); } + +drm_public int amdgpu_query_gpuvm_fault_info(amdgpu_device_handle dev, + unsigned size, void *value) +{ + struct drm_amdgpu_info request; + + memset(&request, 0, sizeof(request)); + request.return_pointer = (uintptr_t)value; + request.return_size = size; + request.query = AMDGPU_INFO_GPUVM_FAULT; + + return drmCommandWrite(dev->fd, DRM_AMDGPU_INFO, &request, + sizeof(struct drm_amdgpu_info)); +} commit 22b698a5990292bce0eeb2782754d1eba3fe7a2e Author: Samuel Pitoiset <sam...@gm...> Date: Fri Feb 24 10:03:36 2023 +0100 amdgpu: amdgpu_drm.h for new GPUVM fault ioctl Based on agd5f/drm-next. Signed-off-by: Samuel Pitoiset <sam...@gm...> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h index c0a0ad10..ad21c613 100644 --- a/include/drm/amdgpu_drm.h +++ b/include/drm/amdgpu_drm.h @@ -94,6 +94,9 @@ extern "C" { * * %AMDGPU_GEM_DOMAIN_OA Ordered append, used by 3D or Compute engines * for appending data. + * + * %AMDGPU_GEM_DOMAIN_DOORBELL Doorbell. It is an MMIO region for + * signalling user mode queues. */ #define AMDGPU_GEM_DOMAIN_CPU 0x1 #define AMDGPU_GEM_DOMAIN_GTT 0x2 @@ -101,12 +104,14 @@ extern "C" { #define AMDGPU_GEM_DOMAIN_GDS 0x8 #define AMDGPU_GEM_DOMAIN_GWS 0x10 #define AMDGPU_GEM_DOMAIN_OA 0x20 +#define AMDGPU_GEM_DOMAIN_DOORBELL 0x40 #define AMDGPU_GEM_DOMAIN_MASK (AMDGPU_GEM_DOMAIN_CPU | \ AMDGPU_GEM_DOMAIN_GTT | \ AMDGPU_GEM_DOMAIN_VRAM | \ AMDGPU_GEM_DOMAIN_GDS | \ AMDGPU_GEM_DOMAIN_GWS | \ - AMDGPU_GEM_DOMAIN_OA) + AMDGPU_GEM_DOMAIN_OA | \ + AMDGPU_GEM_DOMAIN_DOORBELL) /* Flag that CPU access will be required for the case of VRAM domain */ #define AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED (1 << 0) @@ -140,6 +145,32 @@ extern "C" { * not require GTT memory accounting */ #define AMDGPU_GEM_CREATE_PREEMPTIBLE (1 << 11) +/* Flag that BO can be discarded under memory pressure without keeping the + * content. + */ +#define AMDGPU_GEM_CREATE_DISCARDABLE (1 << 12) +/* Flag that BO is shared coherently between multiple devices or CPU threads. + * May depend on GPU instructions to flush caches to system scope explicitly. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_COHERENT (1 << 13) +/* Flag that BO should not be cached by GPU. Coherent without having to flush + * GPU caches explicitly + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_UNCACHED (1 << 14) +/* Flag that BO should be coherent across devices when using device-level + * atomics. May depend on GPU instructions to flush caches to device scope + * explicitly, promoting them to system scope automatically. + * + * This influences the choice of MTYPE in the PTEs on GFXv9 and later GPUs and + * may override the MTYPE selected in AMDGPU_VA_OP_MAP. + */ +#define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15) struct drm_amdgpu_gem_create_in { /** the requested memory size */ @@ -218,15 +249,17 @@ union drm_amdgpu_bo_list { /* unknown cause */ #define AMDGPU_CTX_UNKNOWN_RESET 3 -/* indicate gpu reset occured after ctx created */ +/* indicate gpu reset occurred after ctx created */ #define AMDGPU_CTX_QUERY2_FLAGS_RESET (1<<0) -/* indicate vram lost occured after ctx created */ +/* indicate vram lost occurred after ctx created */ #define AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST (1<<1) /* indicate some job from this context once cause gpu hang */ #define AMDGPU_CTX_QUERY2_FLAGS_GUILTY (1<<2) /* indicate some errors are detected by RAS */ #define AMDGPU_CTX_QUERY2_FLAGS_RAS_CE (1<<3) #define AMDGPU_CTX_QUERY2_FLAGS_RAS_UE (1<<4) +/* indicate that the reset hasn't completed yet */ +#define AMDGPU_CTX_QUERY2_FLAGS_RESET_IN_PROGRESS (1<<5) /* Context priority level */ #define AMDGPU_CTX_PRIORITY_UNSET -2048 @@ -529,6 +562,8 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_MTYPE_UC (4 << 5) /* Use Read Write MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_RW (5 << 5) +/* don't allocate MALL */ +#define AMDGPU_VM_PAGE_NOALLOC (1 << 9) struct drm_amdgpu_gem_va { /** GEM object handle */ @@ -559,7 +594,8 @@ struct drm_amdgpu_gem_va { */ #define AMDGPU_HW_IP_VCN_ENC 7 #define AMDGPU_HW_IP_VCN_JPEG 8 -#define AMDGPU_HW_IP_NUM 9 +#define AMDGPU_HW_IP_VPE 9 +#define AMDGPU_HW_IP_NUM 10 #define AMDGPU_HW_IP_INSTANCE_MAX_COUNT 1 @@ -572,6 +608,7 @@ struct drm_amdgpu_gem_va { #define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07 #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT 0x08 #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL 0x09 +#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW 0x0a struct drm_amdgpu_cs_chunk { __u32 chunk_id; @@ -688,6 +725,15 @@ struct drm_amdgpu_cs_chunk_data { }; }; +#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1 + +struct drm_amdgpu_cs_chunk_cp_gfx_shadow { + __u64 shadow_va; + __u64 csa_va; + __u64 gds_va; + __u64 flags; +}; + /* * Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU * @@ -695,6 +741,7 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_IDS_FLAGS_FUSION 0x1 #define AMDGPU_IDS_FLAGS_PREEMPTION 0x2 #define AMDGPU_IDS_FLAGS_TMZ 0x4 +#define AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD 0x8 /* indicate if acceleration can be working */ #define AMDGPU_INFO_ACCEL_WORKING 0x00 @@ -747,6 +794,20 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_DMCUB 0x14 /* Subquery id: Query TOC firmware version */ #define AMDGPU_INFO_FW_TOC 0x15 + /* Subquery id: Query CAP firmware version */ + #define AMDGPU_INFO_FW_CAP 0x16 + /* Subquery id: Query GFX RLCP firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCP 0x17 + /* Subquery id: Query GFX RLCV firmware version */ + #define AMDGPU_INFO_FW_GFX_RLCV 0x18 + /* Subquery id: Query MES_KIQ firmware version */ + #define AMDGPU_INFO_FW_MES_KIQ 0x19 + /* Subquery id: Query MES firmware version */ + #define AMDGPU_INFO_FW_MES 0x1a + /* Subquery id: Query IMU firmware version */ + #define AMDGPU_INFO_FW_IMU 0x1b + /* Subquery id: Query VPE firmware version */ + #define AMDGPU_INFO_FW_VPE 0x1c /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f @@ -800,6 +861,10 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK 0x8 /* Subquery id: Query GPU stable pstate memory clock */ #define AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK 0x9 + /* Subquery id: Query GPU peak pstate shader clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_SCLK 0xa + /* Subquery id: Query GPU peak pstate memory clock */ + #define AMDGPU_INFO_SENSOR_PEAK_PSTATE_GFX_MCLK 0xb /* Number of VRAM page faults on CPU access. */ #define AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS 0x1E #define AMDGPU_INFO_VRAM_LOST_COUNTER 0x1F @@ -839,6 +904,10 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_VIDEO_CAPS_DECODE 0 /* Subquery id: Encode */ #define AMDGPU_INFO_VIDEO_CAPS_ENCODE 1 +/* Query the max number of IBs per gang per submission */ +#define AMDGPU_INFO_MAX_IBS 0x22 +/* query last page fault info */ +#define AMDGPU_INFO_GPUVM_FAULT 0x23 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0 #define AMDGPU_INFO_MMR_SE_INDEX_MASK 0xff @@ -990,6 +1059,8 @@ struct drm_amdgpu_info_vbios { #define AMDGPU_VRAM_TYPE_DDR4 8 #define AMDGPU_VRAM_TYPE_GDDR6 9 #define AMDGPU_VRAM_TYPE_DDR5 10 +#define AMDGPU_VRAM_TYPE_LPDDR4 11 +#define AMDGPU_VRAM_TYPE_LPDDR5 12 struct drm_amdgpu_info_device { /** PCI Device ID */ @@ -1015,7 +1086,8 @@ struct drm_amdgpu_info_device { __u32 enabled_rb_pipes_mask; __u32 num_rb_pipes; __u32 num_hw_gfx_contexts; - __u32 _pad; + /* PCIe version (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_gen; __u64 ids_flags; /** Starting virtual address for UMDs. */ __u64 virtual_address_offset; @@ -1062,7 +1134,8 @@ struct drm_amdgpu_info_device { __u32 gs_prim_buffer_depth; /* max gs wavefront per vgt*/ __u32 max_gs_waves_per_vgt; - __u32 _pad1; + /* PCIe number of lanes (the smaller of the GPU and the CPU/motherboard) */ + __u32 pcie_num_lanes; /* always on cu bitmap */ __u32 cu_ao_bitmap[4][4]; /** Starting high virtual address for UMDs. */ @@ -1073,6 +1146,26 @@ struct drm_amdgpu_info_device { __u32 pa_sc_tile_steering_override; /* disabled TCCs */ __u64 tcc_disabled_mask; + __u64 min_engine_clock; + __u64 min_memory_clock; + /* The following fields are only set on gfx11+, older chips set 0. */ + __u32 tcp_cache_size; /* AKA GL0, VMEM cache */ + __u32 num_sqc_per_wgp; + __u32 sqc_data_cache_size; /* AKA SMEM cache */ + __u32 sqc_inst_cache_size; + __u32 gl1c_cache_size; + __u32 gl2c_cache_size; + __u64 mall_size; /* AKA infinity cache */ + /* high 32 bits of the rb pipes mask */ + __u32 enabled_rb_pipes_mask_hi; + /* shadow area size for gfx11 */ + __u32 shadow_size; + /* shadow area base virtual alignment for gfx11 */ + __u32 shadow_alignment; + /* context save area size for gfx11 */ + __u32 csa_size; + /* context save area base virtual alignment for gfx11 */ + __u32 csa_alignment; }; struct drm_amdgpu_info_hw_ip { @@ -1087,7 +1180,8 @@ struct drm_amdgpu_info_hw_ip { __u32 ib_size_alignment; /** Bitmask of available rings. Bit 0 means ring 0, etc. */ __u32 available_rings; - __u32 _pad; + /** version info: bits 23:16 major, 15:8 minor, 7:0 revision */ + __u32 ip_discovery_version; }; struct drm_amdgpu_info_num_handles { @@ -1139,6 +1233,20 @@ struct drm_amdgpu_info_video_caps { struct drm_amdgpu_info_video_codec_info codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT]; }; +#define AMDGPU_VMHUB_TYPE_MASK 0xff +#define AMDGPU_VMHUB_TYPE_SHIFT 0 +#define AMDGPU_VMHUB_TYPE_GFX 0 +#define AMDGPU_VMHUB_TYPE_MM0 1 +#define AMDGPU_VMHUB_TYPE_MM1 2 +#define AMDGPU_VMHUB_IDX_MASK 0xff00 +#define AMDGPU_VMHUB_IDX_SHIFT 8 + +struct drm_amdgpu_info_gpuvm_fault { + __u64 addr; + __u32 status; + __u32 vmhub; +}; + /* * Supported GPU families */ @@ -1152,7 +1260,12 @@ struct drm_amdgpu_info_video_caps { #define AMDGPU_FAMILY_RV 142 /* Raven */ #define AMDGPU_FAMILY_NV 143 /* Navi10 */ #define AMDGPU_FAMILY_VGH 144 /* Van Gogh */ +#define AMDGPU_FAMILY_GC_11_0_0 145 /* GC 11.0.0 */ #define AMDGPU_FAMILY_YC 146 /* Yellow Carp */ +#define AMDGPU_FAMILY_GC_11_0_1 148 /* GC 11.0.1 */ +#define AMDGPU_FAMILY_GC_10_3_6 149 /* GC 10.3.6 */ +#define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ +#define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ #if defined(__cplusplus) } |
From: GitLab M. <git...@ke...> - 2023-11-20 12:58:17
|
core-symbols.txt | 1 include/drm/drm.h | 101 +++++++++++++++++++++++++++++++++++++++++-------- include/drm/drm_mode.h | 29 +++++++++++++- xf86drmMode.c | 10 ++++ xf86drmMode.h | 7 +++ 5 files changed, 131 insertions(+), 17 deletions(-) New commits: commit 07f4948bfc60288f179dafcf4ea0c0c8f9a558fd Author: Simon Ser <co...@em...> Date: Fri Oct 27 13:58:12 2023 +0200 xf86drmMode: add drmModeCloseFB() Add a wrapper for the new CLOSEFB IOCTL, to close a framebuffer without implicitly disabling planes or CRTCs. See https://lore.kernel.org/dri-devel/202...@em.../ Signed-off-by: Simon Ser <co...@em...> diff --git a/core-symbols.txt b/core-symbols.txt index 8b22f3a1..766c342d 100644 --- a/core-symbols.txt +++ b/core-symbols.txt @@ -104,6 +104,7 @@ drmModeAtomicGetCursor drmModeAtomicMerge drmModeAtomicSetCursor drmModeAttachMode +drmModeCloseFB drmModeConnectorGetPossibleCrtcs drmModeConnectorSetProperty drmModeCreateDumbBuffer diff --git a/xf86drmMode.c b/xf86drmMode.c index 65d54d78..a4873a0f 100644 --- a/xf86drmMode.c +++ b/xf86drmMode.c @@ -320,6 +320,16 @@ drm_public int drmModeRmFB(int fd, uint32_t bufferId) return DRM_IOCTL(fd, DRM_IOCTL_MODE_RMFB, &bufferId); } +drm_public int drmModeCloseFB(int fd, uint32_t buffer_id) +{ + struct drm_mode_closefb closefb; + + memclear(closefb); + closefb.fb_id = buffer_id; + + return DRM_IOCTL(fd, DRM_IOCTL_MODE_CLOSEFB, &closefb); +} + drm_public drmModeFBPtr drmModeGetFB(int fd, uint32_t buf) { struct drm_mode_fb_cmd info; diff --git a/xf86drmMode.h b/xf86drmMode.h index b6410ab8..08487887 100644 --- a/xf86drmMode.h +++ b/xf86drmMode.h @@ -314,6 +314,13 @@ int drmModeAddFB2WithModifiers(int fd, uint32_t width, uint32_t height, */ extern int drmModeRmFB(int fd, uint32_t bufferId); +/** + * Close a framebuffer. + * + * Same as drmModeRmFB(), except it doesn't implicitly disable planes and CRTCs. + */ +extern int drmModeCloseFB(int fd, uint32_t buffer_id); + /** * Mark a region of a framebuffer as dirty. */ commit 7b60986640c6584f112222fb65b23a2fc326ce6e Author: Simon Ser <co...@em...> Date: Mon Nov 20 11:12:35 2023 +0100 Sync headers with drm-next Synchronize drm.h, drm_mode.h and drm_fourcc.h to drm-next. Generated using make headers_install. Generated from drm-next branch commit c79b972eb88b077d2765e7790d0902b3dc94d55c Signed-off-by: Simon Ser <co...@em...> diff --git a/include/drm/drm.h b/include/drm/drm.h index 78805ad0..a36cd937 100644 --- a/include/drm/drm.h +++ b/include/drm/drm.h @@ -667,8 +667,11 @@ struct drm_gem_open { * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT * and &DRM_PRIME_CAP_EXPORT. * - * PRIME buffers are exposed as dma-buf file descriptors. See - * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + * Starting from kernel version 6.6, both &DRM_PRIME_CAP_IMPORT and + * &DRM_PRIME_CAP_EXPORT are always advertised. + * + * PRIME buffers are exposed as dma-buf file descriptors. + * See :ref:`prime_buffer_sharing`. */ #define DRM_CAP_PRIME 0x5 /** @@ -676,6 +679,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_IMPORT 0x1 /** @@ -683,6 +688,8 @@ struct drm_gem_open { * * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + * + * Starting from kernel version 6.6, this bit is always set in &DRM_CAP_PRIME. */ #define DRM_PRIME_CAP_EXPORT 0x2 /** @@ -750,15 +757,14 @@ struct drm_gem_open { /** * DRM_CAP_SYNCOBJ * - * If set to 1, the driver supports sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * If set to 1, the driver supports sync objects. See :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ 0x13 /** * DRM_CAP_SYNCOBJ_TIMELINE * * If set to 1, the driver supports timeline operations on sync objects. See - * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + * :ref:`drm_sync_objects`. */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 @@ -1122,6 +1128,26 @@ extern "C" { #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) +/** + * DRM_IOCTL_MODE_CREATE_DUMB - Create a new dumb buffer object. + * + * KMS dumb buffers provide a very primitive way to allocate a buffer object + * suitable for scanout and map it for software rendering. KMS dumb buffers are + * not suitable for hardware-accelerated rendering nor video decoding. KMS dumb + * buffers are not suitable to be displayed on any other device than the KMS + * device where they were allocated from. Also see + * :ref:`kms_dumb_buffer_objects`. + * + * The IOCTL argument is a struct drm_mode_create_dumb. + * + * User-space is expected to create a KMS dumb buffer via this IOCTL, then add + * it as a KMS framebuffer via &DRM_IOCTL_MODE_ADDFB and map it via + * &DRM_IOCTL_MODE_MAP_DUMB. + * + * &DRM_CAP_DUMB_BUFFER indicates whether this IOCTL is supported. + * &DRM_CAP_DUMB_PREFERRED_DEPTH and &DRM_CAP_DUMB_PREFER_SHADOW indicate + * driver preferences for dumb buffers. + */ #define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb) #define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb) #define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb) @@ -1186,6 +1212,26 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_EVENTFD DRM_IOWR(0xCF, struct drm_syncobj_eventfd) +/** + * DRM_IOCTL_MODE_CLOSEFB - Close a framebuffer. + * + * This closes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * This IOCTL is similar to &DRM_IOCTL_MODE_RMFB, except it doesn't disable + * planes and CRTCs. As long as the framebuffer is used by a plane, it's kept + * alive. When the plane no longer uses the framebuffer (because the + * framebuffer is replaced with another one, or the plane is disabled), the + * framebuffer is cleaned up. + * + * This is useful to implement flicker-free transitions between two processes. + * + * Depending on the threat model, user-space may want to ensure that the + * framebuffer doesn't expose any sensitive user information: closed + * framebuffers attached to a plane can be read back by the next DRM master. + */ +#define DRM_IOCTL_MODE_CLOSEFB DRM_IOWR(0xD0, struct drm_mode_closefb) + /* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. @@ -1197,25 +1243,50 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/* - * Header for events written back to userspace on the drm fd. The - * type defines the type of event, the length specifies the total - * length of the event (including the header), and user_data is - * typically a 64 bit value passed with the ioctl that triggered the - * event. A read on the drm fd will always only return complete - * events, that is, if for example the read buffer is 100 bytes, and - * there are two 64 byte events pending, only one will be returned. +/** + * struct drm_event - Header for DRM events + * @type: event type. + * @length: total number of payload bytes (including header). + * + * This struct is a header for events written back to user-space on the DRM FD. + * A read on the DRM FD will always only return complete events: e.g. if the + * read buffer is 100 bytes large and there are two 64 byte events pending, + * only one will be returned. * - * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and - * up are chipset specific. + * Event types 0 - 0x7fffffff are generic DRM events, 0x80000000 and + * up are chipset specific. Generic DRM events include &DRM_EVENT_VBLANK, + * &DRM_EVENT_FLIP_COMPLETE and &DRM_EVENT_CRTC_SEQUENCE. */ struct drm_event { __u32 type; __u32 length; }; +/** + * DRM_EVENT_VBLANK - vertical blanking event + * + * This event is sent in response to &DRM_IOCTL_WAIT_VBLANK with the + * &_DRM_VBLANK_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_VBLANK 0x01 +/** + * DRM_EVENT_FLIP_COMPLETE - page-flip completion event + * + * This event is sent in response to an atomic commit or legacy page-flip with + * the &DRM_MODE_PAGE_FLIP_EVENT flag set. + * + * The event payload is a struct drm_event_vblank. + */ #define DRM_EVENT_FLIP_COMPLETE 0x02 +/** + * DRM_EVENT_CRTC_SEQUENCE - CRTC sequence event + * + * This event is sent in response to &DRM_IOCTL_CRTC_QUEUE_SEQUENCE. + * + * The event payload is a struct drm_event_crtc_sequence. + */ #define DRM_EVENT_CRTC_SEQUENCE 0x03 struct drm_event_vblank { diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h index 92d96a2b..09e7a471 100644 --- a/include/drm/drm_mode.h +++ b/include/drm/drm_mode.h @@ -488,6 +488,9 @@ struct drm_mode_get_connector { * This is not an object ID. This is a per-type connector number. Each * (type, type_id) combination is unique across all connectors of a DRM * device. + * + * The (type, type_id) combination is not a stable identifier: the + * type_id can change depending on the driver probe order. */ __u32 connector_type_id; @@ -1029,13 +1032,25 @@ struct drm_mode_crtc_page_flip_target { __u64 user_data; }; -/* create a dumb scanout buffer */ +/** + * struct drm_mode_create_dumb - Create a KMS dumb buffer for scanout. + * @height: buffer height in pixels + * @width: buffer width in pixels + * @bpp: bits per pixel + * @flags: must be zero + * @handle: buffer object handle + * @pitch: number of bytes between two consecutive lines + * @size: size of the whole buffer in bytes + * + * User-space fills @height, @width, @bpp and @flags. If the IOCTL succeeds, + * the kernel fills @handle, @pitch and @size. + */ struct drm_mode_create_dumb { __u32 height; __u32 width; __u32 bpp; __u32 flags; - /* handle, pitch, size will be returned */ + __u32 handle; __u32 pitch; __u64 size; @@ -1308,6 +1323,16 @@ struct drm_mode_rect { __s32 y2; }; +/** + * struct drm_mode_closefb + * @fb_id: Framebuffer ID. + * @pad: Must be zero. + */ +struct drm_mode_closefb { + __u32 fb_id; + __u32 pad; +}; + #if defined(__cplusplus) } #endif |
From: GitLab M. <git...@ke...> - 2023-12-21 11:52:38
|
amdgpu/amdgpu-symbols.txt | 1 + amdgpu/amdgpu.h | 5 +++++ amdgpu/amdgpu_vamgr.c | 5 +++++ meson.build | 2 +- 4 files changed, 12 insertions(+), 1 deletion(-) New commits: commit fc5f2239f3b7abacb9398b2f939f538dd195e860 Author: Marek Olšák <mar...@am...> Date: Thu Dec 21 00:32:13 2023 -0500 meson: bump libdrm version to 2.4.119 Reviewed-by: Pierre-Eric Pelloux-Prayer <pie...@am...> diff --git a/meson.build b/meson.build index c0a7e4ca..ca609fc3 100644 --- a/meson.build +++ b/meson.build @@ -21,7 +21,7 @@ project( 'libdrm', ['c'], - version : '2.4.118', + version : '2.4.119', license : 'MIT', meson_version : '>= 0.59', default_options : ['buildtype=debugoptimized', 'c_std=c11'], commit 85343095fd4c8db013936c15abe841f5e4e2deb4 Author: Marek Olšák <mar...@am...> Date: Thu Dec 21 00:30:41 2023 -0500 amdgpu: add amdgpu_va_get_start_addr for Mesa Reviewed-by: Pierre-Eric Pelloux-Prayer <pie...@am...> diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt index 1f7f56ce..530b343b 100644 --- a/amdgpu/amdgpu-symbols.txt +++ b/amdgpu/amdgpu-symbols.txt @@ -73,6 +73,7 @@ amdgpu_query_video_caps_info amdgpu_read_mm_registers amdgpu_va_range_alloc amdgpu_va_range_free +amdgpu_va_get_start_addr amdgpu_va_range_query amdgpu_vm_reserve_vmid amdgpu_vm_unreserve_vmid diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h index 1cd96f86..9bdbf366 100644 --- a/amdgpu/amdgpu.h +++ b/amdgpu/amdgpu.h @@ -1384,6 +1384,11 @@ int amdgpu_va_range_alloc(amdgpu_device_handle dev, */ int amdgpu_va_range_free(amdgpu_va_handle va_range_handle); +/** + * Return the starting address of the allocated virtual address range. + */ +uint64_t amdgpu_va_get_start_addr(amdgpu_va_handle va_handle); + /** * Query virtual address range * diff --git a/amdgpu/amdgpu_vamgr.c b/amdgpu/amdgpu_vamgr.c index 28a7a7d4..2c4c9dba 100644 --- a/amdgpu/amdgpu_vamgr.c +++ b/amdgpu/amdgpu_vamgr.c @@ -295,3 +295,8 @@ drm_public int amdgpu_va_range_free(amdgpu_va_handle va_range_handle) free(va_range_handle); return 0; } + +drm_public uint64_t amdgpu_va_get_start_addr(amdgpu_va_handle va_handle) +{ + return va_handle->address; +} |