[Valgrind-developers] [valgrind] Bug 513533 - Support macOS 11.0 (Big Sur)

SourceForge Headquarters 1320 Columbia Street Suite 310 San Diego, CA 92101 +1 (858) 422-6466

https://sourceware.org/cgit/valgrind/commit/?id=eab939e9c9a8893556b83f68424d087bbafeb5d9

commit eab939e9c9a8893556b83f68424d087bbafeb5d9
Author: Paul Floyd <pj...@wa...>
Date:   Sat Dec 20 13:01:26 2025 +0100

    Bug 513533 - Support macOS 11.0 (Big Sur)
    
    Most of the work for this was done by Louis Brunner.
    Thanks Louis.

Diff:
---
 NEWS                                            |   8 +-
 configure.ac                                    |   4 +
 coregrind/Makefile.am                           |   1 +
 coregrind/fixup_macho_loadcmds.c                |  24 +-
 coregrind/m_debuginfo/debuginfo.c               |  60 +++
 coregrind/m_debuginfo/image.c                   |  54 ++-
 coregrind/m_debuginfo/priv_storage.h            |   6 +
 coregrind/m_initimg/initimg-darwin.c            |  54 ++-
 coregrind/m_mach/dyld_cache.c                   | 504 ++++++++++++++++++++++++
 coregrind/m_mach/priv_dyld_internals.h          | 146 +++++++
 coregrind/m_main.c                              |  12 +
 coregrind/m_replacemalloc/vg_replace_malloc.c   |  33 +-
 coregrind/m_syswrap/priv_syswrap-darwin.h       |  21 +-
 coregrind/m_syswrap/syswrap-darwin.c            | 154 +++++++-
 coregrind/pub_core_debuginfo.h                  |   4 +
 coregrind/pub_core_initimg.h                    |   3 +
 darwin-drd.supp                                 |   7 +
 darwin-helgrind.supp                            |  14 +
 darwin.supp                                     |  27 ++
 helgrind/tests/Makefile.am                      |   1 +
 helgrind/tests/filter_darwin.awk                | 152 +++++++
 helgrind/tests/filter_stderr.in                 |   3 +-
 include/vki/vki-darwin.h                        |  11 +
 include/vki/vki-scnums-darwin.h                 |  14 +
 memcheck/mc_main.c                              |   4 +-
 memcheck/tests/Makefile.am                      |   1 +
 memcheck/tests/memalign_args.stderr.exp-darwin3 |  11 +
 none/tests/darwin/apple-main-arg.c              |  12 +-
 none/tests/filter_fdleak                        |  17 +-
 29 files changed, 1320 insertions(+), 42 deletions(-)

diff --git a/NEWS b/NEWS
index 6360e83a5b..51f0b301a8 100644
--- a/NEWS
+++ b/NEWS
@@ -6,7 +6,7 @@ PPC32/Linux, PPC64BE/Linux, PPC64LE/Linux, S390X/Linux, MIPS32/Linux,
 MIPS64/Linux, RISCV64/Linux, ARM/Android, ARM64/Android, MIPS32/Android,
 X86/Android, X86/Solaris, AMD64/Solaris, X86/macOS, AMD64/macOS. 
 X86/FreeBSD, AMD64/FreeBSD and ARM64/FreeBSD. There is preliminary support
-for nanoMIPS/Linux. macOS is supported up to version 10.15 Catalina.
+for nanoMIPS/Linux. macOS is supported up to version 11 Big Sur (amd64 only).
 
 * ==================== CORE CHANGES ===================
 
@@ -14,7 +14,11 @@ for nanoMIPS/Linux. macOS is supported up to version 10.15 Catalina.
 
 s390x: Machine models older than z196 are no longer supported.
 
-Initial support for macOS 10.14 Mojave has been added.
+Support for the following macOS versions has been added
+10,13 High Sierra (bug fixes)
+10.14 Mojave
+10.15 Calalina
+11.0  Big Sur
 
 * ==================== TOOL CHANGES ===================
 
diff --git a/configure.ac b/configure.ac
index 55d26b2a51..528e44b300 100644
--- a/configure.ac
+++ b/configure.ac
@@ -550,6 +550,10 @@ case "${host_os}" in
 		  AC_MSG_RESULT([Darwin 19.x (${kernel}) / macOS 10.15 Catalina])
 		  DARWIN_VERS=$DARWIN_10_15
 		  ;;
+	     20.*)
+		  AC_MSG_RESULT([Darwin 20.x (${kernel}) / macOS 11 Big Sur])
+		  DARWIN_VERS=$DARWIN_11_00
+		  ;;
 	     *)
 		  AC_MSG_RESULT([unsupported (${darwin_platform} ${kernel})])
 		  AC_MSG_ERROR([Valgrind works on Darwin 12.x-19.x (Mac OS X 10.8-10.11, macOS 10.12-10.15)])
diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am
index 90d921db28..f94aaf471b 100644
--- a/coregrind/Makefile.am
+++ b/coregrind/Makefile.am
@@ -433,6 +433,7 @@ COREGRIND_SOURCES_COMMON = \
 	m_mach/mach_traps-x86-darwin.S \
 	m_mach/mach_traps-amd64-darwin.S \
 	m_mach/mig_strncpy.c \
+        m_mach/dyld_cache.c \
 	m_replacemalloc/replacemalloc_core.c \
 	m_scheduler/sched-lock.c \
 	m_scheduler/sched-lock-generic.c \
diff --git a/coregrind/fixup_macho_loadcmds.c b/coregrind/fixup_macho_loadcmds.c
index b751829fbd..0031177ee9 100644
--- a/coregrind/fixup_macho_loadcmds.c
+++ b/coregrind/fixup_macho_loadcmds.c
@@ -117,10 +117,18 @@
 #include <mach-o/fat.h>
 #include <mach/i386/thread_status.h>
 
-/* Check that DARWIN_VERS is defined */
+/* Get hold of DARWIN_VERS, and check it has a sane value. */
 #include "config.h"
-#if !defined(DARWIN_VERS)
-#  error "DARWIN_VERS not defind. This file only compiles on Darwin."
+#if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \
+    && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \
+    && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10 \
+    && DARWIN_VERS != DARWIN_10_11 && DARWIN_VERS != DARWIN_10_12 \
+    && DARWIN_VERS != DARWIN_10_13 && DARWIN_VERS != DARWIN_10_14 \
+    && DARWIN_VERS != DARWIN_10_15 && DARWIN_VERS != DARWIN_11_00 \
+    && DARWIN_VERS != DARWIN_12_00 && DARWIN_VERS != DARWIN_13_00 \
+    && DARWIN_VERS != DARWIN_14_00 && DARWIN_VERS != DARWIN_15_00 \
+    && DARWIN_VERS != DARWIN_15_04 && DARWIN_VERS != DARWIN_26_00
+#  error "Unknown DARWIN_VERS value.  This file only compiles on Darwin."
 #endif
 
 
@@ -267,7 +275,7 @@ static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
    { struct fat_header*  fh_be;
      struct fat_header   fh;
      struct mach_header_64* mh;
-     
+
      // Assume initially that we have a thin image, and update
      // these if it turns out to be fat.
      ii->macho_img     = ii->img;
@@ -290,7 +298,7 @@ static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
                           + fh.nfat_arch * sizeof(struct fat_arch))
            fail("Invalid Mach-O file (1 too small).");
 
-        for (f = 0, arch_be = (struct fat_arch *)(fh_be+1); 
+        for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
              f < fh.nfat_arch;
              f++, arch_be++) {
            Int cputype;
@@ -573,7 +581,7 @@ void modify_macho_loadcmds ( HChar* filename,
    seg__pagezero->vmaddr = 0;
 #  endif
 
-  out:   
+  out:
    if (ii.img)
       unmap_image(&ii);
 }
@@ -606,7 +614,7 @@ int main ( int argc, char** argv )
 
    if (argc != 4)
       fail("args: -stack_addr-arg -stack_size-arg "
-           "name-of-tool-executable-to-modify"); 
+           "name-of-tool-executable-to-modify");
 
    r= sscanf(argv[1], "0x%llx", &req_stack_addr);
    if (r != 1) fail("invalid stack_addr arg");
@@ -621,7 +629,7 @@ int main ( int argc, char** argv )
    if (!is_plausible_tool_exe_name(argv[3]))
       fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
 
-   fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n", 
+   fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
            argv[3] );
    modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
                           req_stack_size );
diff --git a/coregrind/m_debuginfo/debuginfo.c b/coregrind/m_debuginfo/debuginfo.c
index 66430668b5..6ed649bc27 100644
--- a/coregrind/m_debuginfo/debuginfo.c
+++ b/coregrind/m_debuginfo/debuginfo.c
@@ -326,6 +326,10 @@ DebugInfo* alloc_DebugInfo( const HChar* filename )
       di->ddump_frames = VG_(clo_debug_dump_frames);
    }
 
+#if DARWIN_VERS >= DARWIN_11_00
+   di->from_memory = False;
+#endif
+
    return di;
 }
 
@@ -1880,6 +1884,62 @@ void VG_(di_notify_pdb_debuginfo)( Int fd_obj, Addr avma_obj,
 
 #endif /* defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_freebsd) */
 
+#if defined(VGO_darwin) && DARWIN_VERS >= DARWIN_11_00
+// Special version of VG_(di_notify_mmap) specifically to read debug info from the DYLD Shared Cache (DSC)
+// We only use this on macOS 11.0 and later, because Apple stopped shipping dylib on-disk then.
+
+ULong VG_(di_notify_dsc)( const HChar* filename, Addr header, SizeT len )
+{
+   DebugInfo* di;
+   Int rw_load_count;
+   const Bool       debug = VG_(debugLog_getLevel)() >= 3;
+
+   if (debug)
+      VG_(dmsg)("di_notify_dsc-1: %s at %#lx-%#lx\n", filename, header, header+len);
+
+   if (!ML_(check_macho_and_get_rw_loads_from_memory)( (const void*) header, len, &rw_load_count ))
+      return 0;
+
+   /* See if we have a DebugInfo for this filename.  If not,
+      create one. */
+   di = find_or_create_DebugInfo_for( filename );
+   vg_assert(di);
+
+   di->from_memory = True;
+
+   if (di->have_dinfo) {
+      if (debug)
+         VG_(dmsg)("di_notify_dsc-2x: "
+                   "ignoring mapping because we already read debuginfo "
+                   "for DebugInfo* %p\n", di);
+      return 0;
+   }
+
+   if (debug)
+      VG_(dmsg)("di_notify_dsc-2: "
+                "noting details in DebugInfo* at %p\n", di);
+
+   /* Note the details about the mapping. */
+   DebugInfoMapping map;
+   map.avma = header;
+   map.size = len;
+   map.foff = 0;
+   map.rx   = True;
+   map.rw   = False;
+   map.ro   = False;
+   VG_(addToXA)(di->fsm.maps, &map);
+
+   /* Update flags about what kind of mappings we've already seen. */
+   di->fsm.have_rx_map |= True;
+
+   vg_assert(!di->have_dinfo);
+
+   if (debug)
+      VG_(dmsg)("di_notify_dsc-3: "
+                "achieved accept state for %s\n", filename);
+   return di_notify_ACHIEVE_ACCEPT_STATE ( di );
+}
+#endif
 
 /*------------------------------------------------------------*/
 /*---                                                      ---*/
diff --git a/coregrind/m_debuginfo/image.c b/coregrind/m_debuginfo/image.c
index 21deabb356..7b5847820c 100644
--- a/coregrind/m_debuginfo/image.c
+++ b/coregrind/m_debuginfo/image.c
@@ -139,8 +139,8 @@ static Bool is_sane_CEnt ( const HChar* who, const DiImage* img, UInt i )
    if (!(ce->used <= ce->size)) goto fail;
    if (ce->fromC) {
       // ce->size can be anything, but ce->used must be either the
-      // same or zero, in the case that it hasn't been set yet.  
-      // Similarly, ce->off must either be above the real_size 
+      // same or zero, in the case that it hasn't been set yet.
+      // Similarly, ce->off must either be above the real_size
       // threshold, or zero if it hasn't been set yet.
       if (!(ce->off >= img->real_size || ce->off == 0)) goto fail;
       if (!(ce->off + ce->used <= img->size)) goto fail;
@@ -432,7 +432,7 @@ static Bool parse_Frame_asciiz ( const Frame* fr, const HChar* tag,
 static Bool parse_Frame_le64_le64_le64_bytes (
                const Frame* fr, const HChar* tag,
                /*OUT*/ULong* n1, /*OUT*/ULong* n2, /*OUT*/ULong* n3,
-               /*OUT*/UChar** data, /*OUT*/ULong* n_data 
+               /*OUT*/UChar** data, /*OUT*/ULong* n_data
             )
 {
    vg_assert(VG_(strlen)(tag) == 4);
@@ -581,13 +581,15 @@ static void set_CEnt ( const DiImage* img, UInt entNo, DiOffT off )
       UInt delay = now - t_last;
       t_last = now;
       nread += len;
-      VG_(printf)("XXXXXXXX (tot %'llu)  read %'lu  offset %'llu  delay %'u\n", 
+      VG_(printf)("XXXXXXXX (tot %'llu)  read %'lu  offset %'llu  delay %'u\n",
                   nread, len, off, delay);
    }
 
    if (img->source.is_local) {
       // Simple: just read it
-
+      if (img->source.fd == -1) {
+        VG_(memcpy)(&ce->data[0], ((const char *)img->source.session_id) + off, len);
+      } else {
       // PJF not quite so simple - see
       // https://bugs.kde.org/show_bug.cgi?id=480405
       // if img->source.fd was opened with O_DIRECT the memory needs
@@ -610,6 +612,7 @@ static void set_CEnt ( const DiImage* img, UInt entNo, DiOffT off )
       }
 #endif
       vg_assert(!sr_isError(sr));
+      }
    } else {
       // Not so simple: poke the server
       vg_assert(img->source.session_id > 0);
@@ -671,7 +674,7 @@ static void set_CEnt ( const DiImage* img, UInt entNo, DiOffT off )
      end_of_else_clause:
       {}
    }
-   
+
    ce->off  = off;
    ce->used = len;
    ce->fromC = False;
@@ -888,7 +891,7 @@ DiImage* ML_(img_from_local_file)(const HChar* fullpath)
        || /* size is unrepresentable as a SizeT */
           size != (DiOffT)(SizeT)(size)) {
       VG_(close)(sr_Res(fd));
-      return NULL; 
+      return NULL;
    }
 
    DiImage* img = ML_(dinfo_zalloc)("di.image.ML_iflf.1", sizeof(DiImage));
@@ -958,6 +961,39 @@ DiImage* ML_(img_from_fd)(Int fd, const HChar* fullpath)
    return img;
 }
 
+/* Create an image from a place in memory, this is to support certain use cases (DSC on macOS)
+   where images are already loaded in memory without changing every usage of DiImage. */
+DiImage* ML_(img_from_memory)(Addr a, SizeT size, const HChar* fullpath)
+{
+   if (size == 0 || size == DiOffT_INVALID
+       || /* size is unrepresentable as a SizeT */
+          size != (DiOffT)(SizeT)(size)) {
+      return NULL;
+   }
+
+   DiImage* img = ML_(dinfo_zalloc)("di.image.ML_iflf.1", sizeof(DiImage));
+   img->source.is_local   = True;
+   img->source.fd         = -1;
+   img->source.session_id = a; // FIXME: hacky, but avoids a new variable
+   img->size              = size;
+   img->real_size         = size;
+   img->ces_used          = 0;
+   img->source.name       = ML_(dinfo_strdup)("di.image.ML_iflf.2", fullpath);
+   img->cslc              = NULL;
+   img->cslc_size         = 0;
+   img->cslc_used         = 0;
+
+   /* Force the zeroth entry to be the first chunk of the file.
+      That's likely to be the first part that's requested anyway, and
+      loading it at this point forcing img->cent[0] to always be
+      non-empty, thereby saving us an is-it-empty check on the fast
+      path in get(). */
+   UInt entNo = alloc_CEnt(img, CACHE_ENTRY_SIZE, False/*!fromC*/);
+   vg_assert(entNo == 0);
+   set_CEnt(img, 0, 0);
+
+   return img;
+}
 
 
 /* Create an image from a file on a remote debuginfo server.  This is
@@ -984,7 +1020,7 @@ DiImage* ML_(img_from_di_server)(const HChar* filename,
    if (!set_blocking(sd))
       return NULL;
    Int one = 1;
-   Int sr = VG_(setsockopt)(sd, VKI_IPPROTO_TCP, VKI_TCP_NODELAY, 
+   Int sr = VG_(setsockopt)(sd, VKI_IPPROTO_TCP, VKI_TCP_NODELAY,
                             &one, sizeof(one));
    vg_assert(sr == 0);
 
@@ -1116,9 +1152,11 @@ void ML_(img_done)(DiImage* img)
 {
    vg_assert(img != NULL);
    if (img->source.is_local) {
+      if (img->source.fd != -1) {
       /* Close the file; nothing else to do. */
       vg_assert(img->source.session_id == 0);
       VG_(close)(img->source.fd);
+      }
    } else {
       /* Close the socket.  The server can detect this and will scrub
          the connection when it happens, so there's no need to tell it
diff --git a/coregrind/m_debuginfo/priv_storage.h b/coregrind/m_debuginfo/priv_storage.h
index c38dfd76f7..cd1ac3431e 100644
--- a/coregrind/m_debuginfo/priv_storage.h
+++ b/coregrind/m_debuginfo/priv_storage.h
@@ -1094,6 +1094,12 @@ struct _DebugInfo {
       This helps performance a lot during ML_(addLineInfo) etc., which can
       easily be invoked hundreds of thousands of times. */
    DebugInfoMapping* last_rx_map;
+
+#if DARWIN_VERS >= DARWIN_11_00
+   /* Indicate that this debug info was loaded from memory (i.e. DSC)
+      instead than from a file. This means that some data might be missing (e.g. rw data). */
+   Bool from_memory;
+#endif
 };
 
 /* --------------------- functions --------------------- */
diff --git a/coregrind/m_initimg/initimg-darwin.c b/coregrind/m_initimg/initimg-darwin.c
index c15c023723..4a8e8a0d78 100644
--- a/coregrind/m_initimg/initimg-darwin.c
+++ b/coregrind/m_initimg/initimg-darwin.c
@@ -43,12 +43,12 @@
 #include "pub_core_mallocfree.h"
 #include "pub_core_machine.h"
 #include "pub_core_ume.h"
+#include "pub_core_mach.h"
 #include "pub_core_options.h"
 #include "pub_core_tooliface.h"       /* VG_TRACK */
 #include "pub_core_threadstate.h"     /* ThreadArchState */
 #include "pub_core_pathscan.h"        /* find_executable */
 #include "pub_core_initimg.h"         /* self */
-#include "pub_core_mach.h"
 
 
 /*====================================================================*/
@@ -98,9 +98,14 @@ static void load_client ( /*OUT*/ExeInfo* info,
    Also, remove any binding for VALGRIND_LAUNCHER=.  The client should
    not be able to see this.
 
+   Before macOS 11:
    Also, add DYLD_SHARED_REGION=avoid, because V doesn't know how 
    to process the dyld shared cache file.
 
+   Since macOS 11:
+   Use DYLD_SHARED_REGION=use because system libraries aren't provided outside the cache anymore.
+   This means we need to start processing the dyld shared cache file.
+
    Also, change VYLD_* (mangled by launcher) back to DYLD_*.
 
    If this needs to handle any more variables it should be hacked
@@ -111,7 +116,11 @@ static HChar** setup_client_env ( HChar** origenv, const HChar* toolname)
    const HChar* preload_core    = "vgpreload_core";
    const HChar* ld_preload      = "DYLD_INSERT_LIBRARIES=";
    const HChar* dyld_cache      = "DYLD_SHARED_REGION=";
+#if DARWIN_VERS >= DARWIN_11_00
+   const HChar* dyld_cache_value= "use";
+#else
    const HChar* dyld_cache_value= "avoid";
+#endif
    const HChar* v_launcher      = VALGRIND_LAUNCHER "=";
    Int    ld_preload_len  = VG_(strlen)( ld_preload );
    Int    dyld_cache_len  = VG_(strlen)( dyld_cache );
@@ -194,7 +203,7 @@ static HChar** setup_client_env ( HChar** origenv, const HChar* toolname)
 
          *cpp = cp;
 
-         ld_preload_done = True;
+         dyld_cache_done = True;
       }
    }
 
@@ -371,8 +380,18 @@ Addr setup_client_stack( void*  init_sp,
    auxsize += 2 * sizeof(HChar **);
    if (info->executable_path) {
        stringsize += 1 + VG_(strlen)(info->executable_path);
+#if SDK_VERS >= SDK_10_14_6
+       stringsize += 16; // executable_path=
+#endif
    }
 
+#if defined(VGA_arm64)
+    // This is required so that dyld can load our dylib specified in DYLD_INSERT_LIBRARIES
+#define EXTRA_APPLE_ARG "arm64e_abi=all"
+    stringsize += VG_(strlen)(EXTRA_APPLE_ARG) + 1;
+    auxsize += sizeof(Word);
+#endif
+
    /* Darwin mach_header */
    if (info->dynamic) auxsize += sizeof(Word);
 
@@ -387,7 +406,7 @@ Addr setup_client_stack( void*  init_sp,
       auxsize +                               /* auxv */
       VG_ROUNDUP(stringsize, sizeof(Word));   /* strings (aligned) */
 
-   if (0) VG_(printf)("stacksize = %d\n", stacksize);
+   if (0) VG_(printf)("stacksize = %u\n", stacksize);
 
    /* client_SP is the client's stack pointer */
    client_SP = clstack_end + 1 - stacksize;
@@ -409,10 +428,10 @@ Addr setup_client_stack( void*  init_sp,
    VG_(clstk_end)  = clstack_end;
 
    if (0)
-      VG_(printf)("stringsize=%d auxsize=%d stacksize=%d maxsize=0x%x\n"
+      VG_(printf)("stringsize=%u auxsize=%u stacksize=%u maxsize=0x%x\n"
                   "clstack_start %p\n"
                   "clstack_end   %p\n",
-	          stringsize, auxsize, stacksize, (Int)clstack_max_size,
+	          stringsize, auxsize, stacksize, (UInt)clstack_max_size,
                   (void*)clstack_start, (void*)clstack_end);
 
    /* ==================== allocate space ==================== */
@@ -451,11 +470,25 @@ Addr setup_client_stack( void*  init_sp,
       *ptr = (Addr)copy_str(&strtab, *cpp);
    *ptr++ = 0;
 
-   /* --- executable_path + NULL --- */
-   if (info->executable_path) 
+   /* --- executable_path --- */
+   if (info->executable_path) {
+#if SDK_VERS >= SDK_10_14_6
+       Int executable_path_len = VG_(strlen)(info->executable_path) + 16 + 1;
+       HChar *executable_path = VG_(malloc)("initimg-darwin.scs.1", executable_path_len);
+       VG_(snprintf)(executable_path, executable_path_len, "executable_path=%s", info->executable_path);
+       *ptr++ = (Addr)copy_str(&strtab, executable_path);
+       VG_(free)(executable_path);
+#else
       *ptr++ = (Addr)copy_str(&strtab, info->executable_path);
-   else 
-      *ptr++ = 0;
+#endif
+   }
+   // FIXME PJF there was an extra  *ptr++ = 0; in an else here
+   // there is a good chance that executable_path is never NULL so itr was nevwer used
+
+#if defined(VGA_arm64)
+   *ptr++ = (Addr)copy_str(&strtab, EXTRA_APPLE_ARG);
+#endif
+
    *ptr++ = 0;
 
    vg_assert((strtab-stringbase) == stringsize);
@@ -470,7 +503,7 @@ Addr setup_client_stack( void*  init_sp,
       }
       HChar resolved_name[VKI_PATH_MAX];
       VG_(realpath)(exe_name, resolved_name);
-      VG_(resolved_exename) = VG_(strdup)("initimg-darwin.sre.1", resolved_name);
+      VG_(resolved_exename) = VG_(strdup)("initimg-darwin.scs.2", resolved_name);
    }
 
    /* client_SP is pointing at client's argc/argv */
@@ -675,3 +708,4 @@ void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii )
 /*--------------------------------------------------------------------*/
 /*--- end                                                          ---*/
 /*--------------------------------------------------------------------*/
+
diff --git a/coregrind/m_mach/dyld_cache.c b/coregrind/m_mach/dyld_cache.c
new file mode 100644
index 0000000000..953cccd5fb
--- /dev/null
+++ b/coregrind/m_mach/dyld_cache.c
@@ -0,0 +1,504 @@
+
+/*--------------------------------------------------------------------*/
+/*--- DYLD Cache                                      dyld_cache.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (c) 2020-2025 Louis Brunner <lou...@gm...>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 3 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+// While dyld_caching as existed for longer than that
+// we have used DYLD_SHARED_REGION=avoid in the past
+//
+// Starting with macOS 11 (Big Sur), it isn't an option anymore
+// as some dylib are not provided in file format anymore
+#if defined(VGO_darwin) && DARWIN_VERS >= DARWIN_11_00
+
+#include "pub_core_debuginfo.h"             // VG_(di_notify_dsc)
+#include "pub_core_debuglog.h"              // VG_(debugLog)
+#include "pub_core_mach.h"                  // VG_(dyld_cache_*)
+#include "pub_core_syscall.h"               // VG_(do_syscall1)
+#include "pub_core_libcbase.h"              // VG_(strncmp)
+#include "pub_core_libcprint.h"             // VG_(dmsg)
+#include "pub_core_libcfile.h"              // VG_(stat)
+#include "vki/vki-scnums-darwin.h"          // __NR_shared_region_check_np
+#include "priv_dyld_internals.h"            // CACHE_MAGIC_*, dyld_cache_header, etc
+
+// Required by private headers underneath
+#include "pub_core_libcassert.h"            // vg_assert
+#include "pub_core_threadstate.h"           // ThreadState
+
+// FIXME: probably shouldn't include this directly?
+#include "m_syswrap/priv_syswrap-generic.h" // ML_(notify_core_and_tool_of_mmap)
+
+#include <mach-o/loader.h>
+#include <mach-o/fat.h>
+
+// Only supported on macOS 11 onwards which is 64bit only
+# define MACH_HEADER mach_header_64
+# define MAGIC MH_MAGIC_64
+
+static void output_text_debug_info(const dyld_cache_image_text_info* textInfo);
+static void output_debug_info(const dyld_cache_header* dyld_cache);
+
+typedef struct {
+  const dyld_cache_header* header;
+  Addr slide;
+  Bool tried;
+} DYLDCache;
+
+static DYLDCache dyld_cache = {
+  .header = NULL,
+  .slide = 0,
+  .tried = False,
+};
+
+static Addr calculate_relative(const dyld_cache_header * header, Addr offset) {
+  return (Addr)header + offset;
+}
+
+static Addr calculate_unslid(Addr addr) {
+  return addr + dyld_cache.slide;
+}
+
+static int try_to_init_header(Addr address) {
+  const dyld_cache_header* header = (const dyld_cache_header *) address;
+
+  if (
+#if defined(VGA_amd64)
+    VG_(strcmp)(header->magic, CACHE_MAGIC_x86_64) != 0
+    && VG_(strcmp)(header->magic, CACHE_MAGIC_x86_64_HASWELL) != 0
+#elif defined(VGA_arm64)
+    VG_(strcmp)(header->magic, CACHE_MAGIC_arm64) != 0
+    && VG_(strcmp)(header->magic, CACHE_MAGIC_arm64e) != 0
+#else
+    0
+#error "unknown architecture"
+#endif
+  ) {
+    VG_(debugLog)(2, "dyld_cache", "ERROR: incompatible shared dyld cache (%s)\n", header->magic);
+    return 0;
+  }
+
+  if (header->mappingCount < 1) {
+    VG_(debugLog)(2, "dyld_cache", "ERROR: no mappings in the dyld cache\n");
+    return 0;
+  }
+
+  VG_(debugLog)(2, "dyld_cache", "shared dyld cache format: %d / %#x\n", header->formatVersion, header->mappingOffset);
+  output_debug_info(header);
+
+  const dyld_cache_mapping_info* mappings = (const dyld_cache_mapping_info*)(calculate_relative(header, header->mappingOffset));
+  for (int i = 0; i < header->mappingCount; ++i) {
+    const dyld_cache_mapping_info* mapping = &mappings[i];
+    Addr map_addr = calculate_unslid(mapping->address);
+    VG_(debugLog)(5, "dyld_cache",
+      "mapping[%d]{\n"
+      "  .address: %#lx,\n"
+      "  .size: %llu (%#llx),\n"
+      "  .fileOffset: %#lx,\n"
+      "  .maxProt: %#x,\n"
+      "  .initProt: %#x,\n"
+      "}\n",
+      i,
+      map_addr,
+      mapping->size,
+      mapping->size,
+      calculate_relative(header, mapping->fileOffset),
+      mapping->maxProt,
+      mapping->initProt
+    );
+    ML_(notify_core_and_tool_of_mmap)(map_addr, mapping->size, mapping->initProt, VKI_MAP_PRIVATE | VKI_MAP_ANONYMOUS, -1, 0);
+  }
+
+  if (dyld_cache.header->mappingOffset >= __offsetof(dyld_cache_header, dynamicDataMaxSize) && header->dynamicDataMaxSize > 0) {
+    ML_(notify_core_and_tool_of_mmap)(calculate_relative(header, header->dynamicDataOffset), header->dynamicDataMaxSize, VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE | VKI_MAP_ANONYMOUS, -1, 0);
+  }
+
+  return 1;
+}
+
+static int try_to_init(void) {
+  // Read address of the shared cache which is mapped in our address space
+  // and tell Valgrind about it so we avoid false-positives and massive suppression files
+  {
+    Addr cache_address;
+    if (sr_Res(VG_(do_syscall1)(__NR_shared_region_check_np, (UWord)&cache_address)) != 0) {
+      VG_(debugLog)(2, "dyld_cache", "ERROR: could not get shared dyld cache address\n");
+      return 0;
+    }
+    VG_(debugLog)(2, "dyld_cache", "shared dyld cache found: %#lx\n", cache_address);
+
+    // FIXME: should be after `try_to_init_header` but we also need the slide calculate _before_
+    dyld_cache.header = (const dyld_cache_header *) cache_address;
+    const dyld_cache_mapping_info* mappings = (const dyld_cache_mapping_info*)(calculate_relative(dyld_cache.header, dyld_cache.header->mappingOffset));
+    dyld_cache.slide = cache_address - mappings[0].address;
+    VG_(debugLog)(2, "dyld_cache", "dyld cache slide: %#lx\n", dyld_cache.slide);
+
+    if (!try_to_init_header(cache_address)) {
+      return 0;
+    }
+
+    if (dyld_cache.header->mappingOffset >= __offsetof(dyld_cache_header, subCacheArrayCount)) {
+      Bool sub_cache_v2 = dyld_cache.header->mappingOffset > __offsetof(dyld_cache_header, cacheSubType);
+      Addr sub_caches = calculate_relative(dyld_cache.header, dyld_cache.header->subCacheArrayOffset);
+
+      for (int i = 0; i < dyld_cache.header->subCacheArrayCount; ++i) {
+        Addr sub_cache_addr;
+
+        VG_(debugLog)(2, "dyld_cache", "found sub cache %d (v2: %d)\n", i, sub_cache_v2);
+
+        if (sub_cache_v2) {
+          const dyld_subcache_entry* sub_cache = &((const dyld_subcache_entry*) sub_caches)[i];
+          const uint8_t* u = sub_cache->uuid;
+          sub_cache_addr = calculate_relative(dyld_cache.header, sub_cache->cacheVMOffset);
+          VG_(debugLog)(5, "dyld_cache",
+            "sub_cache_v2[%d]{\n"
+            "  .uuid: %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x,\n"
+            "  .cacheVMOffset: %#lx,\n"
+            "  .fileSuffix: %s,\n"
+            "}\n",
+            i,
+            u[0], u[1], u[2], u[3], u[4], u[5], u[6], u[7], u[8], u[9], u[10], u[11], u[12], u[13], u[14], u[15],
+            sub_cache_addr,
+            sub_cache->fileSuffix
+          );
+
+        } else {
+          const dyld_subcache_entry_v1* sub_cache = &((const dyld_subcache_entry_v1*) sub_caches)[i];
+          const uint8_t* u = sub_cache->uuid;
+          sub_cache_addr = calculate_relative(dyld_cache.header, sub_cache->cacheVMOffset);
+          VG_(debugLog)(5, "dyld_cache",
+            "sub_cache_v1[%d]{\n"
+            "  .uuid: %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x,\n"
+            "  .cacheVMOffset: %#lx,\n"
+            "}\n",
+            i,
+            u[0], u[1], u[2], u[3], u[4], u[5], u[6], u[7], u[8], u[9], u[10], u[11], u[12], u[13], u[14], u[15],
+            sub_cache_addr
+          );
+        }
+
+        if (!try_to_init_header(sub_cache_addr)) {
+          return 0;
+        }
+      }
+    }
+  }
+
+  return 1;
+}
+
+Addr VG_(dyld_cache_get_slide)(void) {
+  return dyld_cache.slide;
+}
+
+int ensure_init(void) {
+  if (dyld_cache.header != NULL) {
+    return 1;
+  }
+
+  // FIXME: unlikely race condition?
+  if (dyld_cache.tried) {
+    return 0;
+  }
+  dyld_cache.tried = True;
+
+  if (!try_to_init()) {
+    VG_(dmsg)(
+      "WARNING: could not read from dyld shared cache (DSC)\n"
+      "Some reports (especially memory leaks) might be missing or incorrect (false-positives)\n"
+    );
+    return 0;
+  }
+  // We currently detect if dyld is loading/using a library by checking if stat64 fails.
+  // However, dyld doesn't seem to call stat64 for all of them anymore.
+  // All arm64 binaries are executables but some x86 ones might not be so let's avoid them just to be safe.
+  VG_(dyld_cache_load_library)("/usr/lib/system/libsystem_kernel.dylib");
+  VG_(dyld_cache_load_library)("/usr/lib/system/libsystem_pthread.dylib");
+  VG_(dyld_cache_load_library)("/usr/lib/system/libsystem_platform.dylib");
+
+  return 1;
+}
+
+void VG_(dyld_cache_init)(const HChar* tool) {
+  // drd crashes if you map memory segments in m_main
+  if (VG_(strcmp)(tool, "drd") == 0) {
+    return;
+  }
+
+  ensure_init();
+}
+
+int VG_(dyld_cache_might_be_in)(const HChar* path) {
+  // If not init'd, there is no point
+  if (!ensure_init()) {
+    return 0;
+  }
+
+  if (VG_(strncmp)(path, "/usr/lib/", 9) == 0) {
+    return 1;
+  }
+  if (VG_(strncmp)(path, "/System/Library/", 16) == 0) {
+    return 1;
+  }
+  // FIXME: more flexible heuristics around extensions?
+  return 0;
+}
+
+static struct MACH_HEADER* find_image_text(const dyld_cache_header* header, const char* path, SizeT* len) {
+  vg_assert(len);
+  *len = 0;
+
+  const dyld_cache_image_text_info* textInfos = (const dyld_cache_image_text_info*) calculate_relative(header, header->imagesTextOffset);
+
+  for (int i = 0; i < header->imagesTextCount; ++i) {
+    const dyld_cache_image_text_info* textInfo = &textInfos[i];
+    const char* imagePath = (const char*) calculate_relative(header, textInfo->pathOffset);
+
+    if (VG_(strcmp)(imagePath, path) == 0) {
+      output_text_debug_info(textInfo);
+      *len = textInfo->textSegmentSize;
+      return (struct MACH_HEADER*) calculate_unslid(textInfo->loadAddress);
+    }
+  }
+
+  return NULL;
+}
+
+int VG_(dyld_cache_load_library)(const HChar* path) {
+  struct MACH_HEADER *image = NULL;
+  ULong res = 0;
+  SizeT len = 0;
+
+  if (VG_(strstr)(path, "/PrivateFrameworks/") != NULL) {
+    return 0;
+  }
+
+  // If not init'd, there is no point trying
+  if (!ensure_init()) {
+    return 0;
+  }
+
+  VG_(debugLog)(2, "dyld_cache", "potential dylib to check in the cache: %s\n", path);
+
+  image = find_image_text(dyld_cache.header, path, &len);
+  if (image == NULL) {
+    VG_(debugLog)(2, "dyld_cache", "image not found: %s\n", path);
+    return 0;
+  }
+
+  if (image->magic != MAGIC) {
+    VG_(debugLog)(2, "dyld_cache", "image not mach-o (%#x): %s\n", image->magic, path);
+    return 0;
+  }
+
+  VG_(debugLog)(2, "dyld_cache", "image (%p) is valid, forwarding to debuginfo: %s\n", image, path);
+  res = VG_(di_notify_dsc)(path, (Addr)image, len);
+  if (res == 0) {
+    VG_(debugLog)(2, "dyld_cache", "failed to load debuginfo from: %s\n", path);
+    return 0;
+  }
+
+  VG_(debugLog)(2, "dyld_cache", "image fully loaded: %s\n", path);
+
+  return 1;
+}
+
+static void output_text_debug_info(const dyld_cache_image_text_info* textInfo) {
+  const uint8_t* u = textInfo->uuid;
+  VG_(debugLog)(5, "dyld_cache",
+    "image_text_info{\n"
+    "  .uuid: %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x,\n"
+    "  .loadAddress: %#llx,\n"
+    "  .textSegmentSize: %u,\n"
+    "  .pathOffset: %#x,\n"
+    "}\n",
+    u[0], u[1], u[2], u[3], u[4], u[5], u[6], u[7], u[8],
+    u[9], u[10], u[11], u[12], u[13], u[14], u[15],
+    textInfo->loadAddress,
+    textInfo->textSegmentSize,
+    textInfo->pathOffset
+  );
+}
+
+static void output_debug_info(const dyld_cache_header* cache) {
+  const uint8_t* u1 = cache->uuid;
+  const uint8_t* u2 = cache->symbolFileUUID;
+  VG_(debugLog)(5, "dyld_cache",
+    "shared dyld content: {\n"
+    "  .magic: %s,\n"
+    "  .mappingOffset: %#x,\n"
+    "  .mappingCount: %u,\n"
+    "  .imagesOffsetOld: %#x,\n"
+    "  .imagesCountOld: %u,\n"
+    "  .dyldBaseAddress: %#llx,\n"
+    "  .codeSignatureOffset: %#llx,\n"
+    "  .codeSignatureSize: %llu,\n"
+    "  .slideInfoOffsetUnused: %#llx,\n"
+    "  .slideInfoSizeUnused: %llu,\n"
+    "  .localSymbolsOffset: %#llx,\n"
+    "  .localSymbolsSize: %llu,\n"
+    "  .uuid: %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x,\n"
+    "  .cacheType: %llu,\n"
+    "  .branchPoolsOffset: %#x,\n"
+    "  .branchPoolsCount: %u,\n"
+    "  .dyldInCacheMH: %#llx,\n"
+    "  .dyldInCacheEntry: %#llx,\n"
+    "  .imagesTextOffset: %#llx,\n"
+    "  .imagesTextCount: %llu,\n"
+    "  .patchInfoAddr: %#llx,\n"
+    "  .patchInfoSize: %llu,\n"
+    "  .otherImageGroupAddrUnused: %#llx,\n"
+    "  .otherImageGroupSizeUnused: %llu,\n"
+    "  .progClosuresAddr: %#llx,\n"
+    "  .progClosuresSize: %llu,\n"
+    "  .progClosuresTrieAddr: %#llx,\n"
+    "  .progClosuresTrieSize: %llu,\n"
+    "  .platform: %#x,\n"
+    "  .formatVersion: %#x,\n"
+    "  .dylibsExpectedOnDisk: %d,\n"
+    "  .simulator: %d,\n"
+    "  .locallyBuiltCache: %d,\n"
+    "  .builtFromChainedFixups: %d,\n"
+    "  .padding: %d,\n"
+    "  .sharedRegionStart: %#llx,\n"
+    "  .sharedRegionSize: %llu,\n"
+    "  .maxSlide: %#llx,\n"
+    "  .dylibsImageArrayAddr: %#llx,\n"
+    "  .dylibsImageArraySize: %llu,\n"
+    "  .dylibsTrieAddr: %#llx,\n"
+    "  .dylibsTrieSize: %llu,\n"
+    "  .otherImageArrayAddr: %#llx,\n"
+    "  .otherImageArraySize: %llu,\n"
+    "  .otherTrieAddr: %#llx,\n"
+    "  .otherTrieSize: %llu,\n"
+    "  .mappingWithSlideOffset: %#x,\n"
+    "  .mappingWithSlideCount: %u,\n"
+    "  .dylibsPBLStateArrayAddrUnused: %llu,\n"
+    "  .dylibsPBLSetAddr: %llx,\n"
+    "  .programsPBLSetPoolAddr: %#llx,\n"
+    "  .programsPBLSetPoolSize: %llu,\n"
+    "  .programTrieAddr: %#llx,\n"
+    "  .programTrieSize: %u,\n"
+    "  .osVersion: %#x,\n"
+    "  .altPlatform: %#x,\n"
+    "  .altOsVersion: %#x,\n"
+    "  .swiftOptsOffset: %#llx,\n"
+    "  .swiftOptsSize: %llu,\n"
+    "  .subCacheArrayOffset: %#x,\n"
+    "  .subCacheArrayCount: %u,\n"
+    "  .symbolFileUUID: %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x,\n"
+    "  .rosettaReadOnlyAddr: %#llx,\n"
+    "  .rosettaReadOnlySize: %llu,\n"
+    "  .rosettaReadWriteAddr: %#llx,\n"
+    "  .rosettaReadWriteSize: %llu,\n"
+    "  .imagesOffset: %#x,\n"
+    "  .imagesCount: %u,\n"
+    "  .cacheSubType: %#x,\n"
+    "  .objcOptsOffset: %#llx,\n"
+    "  .objcOptsSize: %llu,\n"
+    "  .cacheAtlasOffset: %#llx,\n"
+    "  .cacheAtlasSize: %llu,\n"
+    "  .dynamicDataOffset: %#llx,\n"
+    "  .dynamicDataMaxSize: %llu,\n"
+    "}\n",
+    cache->magic,
+    cache->mappingOffset,
+    cache->mappingCount,
+    cache->imagesOffsetOld,
+    cache->imagesCountOld,
+    cache->dyldBaseAddress,
+    cache->codeSignatureOffset,
+    cache->codeSignatureSize,
+    cache->slideInfoOffsetUnused,
+    cache->slideInfoSizeUnused,
+    cache->localSymbolsOffset,
+    cache->localSymbolsSize,
+    u1[0], u1[1], u1[2], u1[3], u1[4], u1[5], u1[6], u1[7], u1[8],
+    u1[9], u1[10], u1[11], u1[12], u1[13], u1[14], u1[15],
+    cache->cacheType,
+    cache->branchPoolsOffset,
+    cache->branchPoolsCount,
+    cache->dyldInCacheMH,
+    cache->dyldInCacheEntry,
+    cache->imagesTextOffset,
+    cache->imagesTextCount,
+    cache->patchInfoAddr,
+    cache->patchInfoSize,
+    cache->otherImageGroupAddrUnused,
+    cache->otherImageGroupSizeUnused,
+    cache->progClosuresAddr,
+    cache->progClosuresSize,
+    cache->progClosuresTrieAddr,
+    cache->progClosuresTrieSize,
+    cache->platform,
+    (UInt)cache->formatVersion,
+    cache->dylibsExpectedOnDisk,
+    cache->simulator,
+    cache->locallyBuiltCache,
+    cache->builtFromChainedFixups,
+    cache->padding,
+    cache->sharedRegionStart,
+    cache->sharedRegionSize,
+    cache->maxSlide,
+    cache->dylibsImageArrayAddr,
+    cache->dylibsImageArraySize,
+    cache->dylibsTrieAddr,
+    cache->dylibsTrieSize,
+    cache->otherImageArrayAddr,
+    cache->otherImageArraySize,
+    cache->otherTrieAddr,
+    cache->otherTrieSize,
+    cache->mappingWithSlideOffset,
+    cache->mappingWithSlideCount,
+    cache->dylibsPBLStateArrayAddrUnused,
+    cache->dylibsPBLSetAddr,
+    cache->programsPBLSetPoolAddr,
+    cache->programsPBLSetPoolSize,
+    cache->programTrieAddr,
+    cache->programTrieSize,
+    cache->osVersion,
+    cache->altPlatform,
+    cache->altOsVersion,
+    cache->swiftOptsOffset,
+    cache->swiftOptsSize,
+    cache->subCacheArrayOffset,
+    cache->subCacheArrayCount,
+    u2[0], u2[1], u2[2], u2[3], u2[4], u2[5], u2[6], u2[7], u2[8],
+    u2[9], u2[10], u2[11], u2[12], u2[13], u2[14], u2[15],
+    cache->rosettaReadOnlyAddr,
+    cache->rosettaReadOnlySize,
+    cache->rosettaReadWriteAddr,
+    cache->rosettaReadWriteSize,
+    cache->imagesOffset,
+    cache->imagesCount,
+    cache->cacheSubType,
+    cache->objcOptsOffset,
+    cache->objcOptsSize,
+    cache->cacheAtlasOffset,
+    cache->cacheAtlasSize,
+    cache->dynamicDataOffset,
+    cache->dynamicDataMaxSize
+  );
+}
+
+#endif
diff --git a/coregrind/m_mach/priv_dyld_internals.h b/coregrind/m_mach/priv_dyld_internals.h
new file mode 100644
index 0000000000..32c31f9f95
--- /dev/null
+++ b/coregrind/m_mach/priv_dyld_internals.h
@@ -0,0 +1,146 @@
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (c) 2020-2025 Louis Brunner <lou...@gm...>
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 3 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+#ifndef __PRIV_DYLD_INTERNALS_H
+#define __PRIV_DYLD_INTERNALS_H
+
+#if defined(VGO_darwin)
+
+// This file contains a bunch of structure defined in Apple's dyld
+
+// From Apple's `dyld/dyld/SharedCacheRuntime.cpp`
+#define CACHE_MAGIC_x86_64         "dyld_v1  x86_64"
+#define CACHE_MAGIC_x86_64_HASWELL "dyld_v1 x86_64h"
+#define CACHE_MAGIC_arm64          "dyld_v1   arm64"
+#define CACHE_MAGIC_arm64e         "dyld_v1  arm64e"
+
+// From Apple's `dyld/cache-builder/dyld_cache_format.h`
+typedef struct {
+  char        magic[16];              // e.g. "dyld_v0    i386"
+  uint32_t    mappingOffset;          // file offset to first dyld_cache_mapping_info
+  uint32_t    mappingCount;           // number of dyld_cache_mapping_info entries
+  uint32_t    imagesOffsetOld;        // UNUSED: moved to imagesOffset to prevent older dsc_extarctors from crashing
+  uint32_t    imagesCountOld;         // UNUSED: moved to imagesCount to prevent older dsc_extarctors from crashing
+  uint64_t    dyldBaseAddress;        // base address of dyld when cache was built
+  uint64_t    codeSignatureOffset;    // file offset of code signature blob
+  uint64_t    codeSignatureSize;      // size of code signature blob (zero means to end of file)
+  uint64_t    slideInfoOffsetUnused;  // unused.  Used to be file offset of kernel slid info
+  uint64_t    slideInfoSizeUnused;    // unused.  Used to be size of kernel slid info
+  uint64_t    localSymbolsOffset;     // file offset of where local symbols are stored
+  uint64_t    localSymbolsSize;       // size of local symbols information
+  uint8_t     uuid[16];               // unique value for each shared cache file
+  uint64_t    cacheType;              // 0 for development, 1 for production, 2 for multi-cache
+  uint32_t    branchPoolsOffset;      // file offset to table of uint64_t pool addresses
+  uint32_t    branchPoolsCount;       // number of uint64_t entries
+  uint64_t    dyldInCacheMH;          // (unslid) address of mach_header of dyld in cache
+  uint64_t    dyldInCacheEntry;       // (unslid) address of entry point (_dyld_start) of dyld in cache
+  uint64_t    imagesTextOffset;       // file offset to first dyld_cache_image_text_info
+  uint64_t    imagesTextCount;        // number of dyld_cache_image_text_info entries
+  uint64_t    patchInfoAddr;          // (unslid) address of dyld_cache_patch_info
+  uint64_t    patchInfoSize;          // Size of all of the patch information pointed to via the dyld_cache_patch_info
+  uint64_t    otherImageGroupAddrUnused;    // unused
+  uint64_t    otherImageGroupSizeUnused;    // unused
+  uint64_t    progClosuresAddr;       // (unslid) address of list of program launch closures
+  uint64_t    progClosuresSize;       // size of list of program launch closures
+  uint64_t    progClosuresTrieAddr;   // (unslid) address of trie of indexes into program launch closures
+  uint64_t    progClosuresTrieSize;   // size of trie of indexes into program launch closures
+  uint32_t    platform;               // platform number (macOS=1, etc)
+  uint32_t    formatVersion          : 8,  // dyld3::closure::kFormatVersion
+              dylibsExpectedOnDisk   : 1,  // dyld should expect the dylib exists on disk and to compare inode/mtime to see if cache is valid
+              simulator              : 1,  // for simulator of specified platform
+              locallyBuiltCache      : 1,  // 0 for B&I built cache, 1 for locally built cache
+              builtFromChainedFixups : 1,  // some dylib in cache was built using chained fixups, so patch tables must be used for overrides
+              padding                : 20; // TBD
+  uint64_t    sharedRegionStart;      // base load address of cache if not slid
+  uint64_t    sharedRegionSize;       // overall size required to map the cache and all subCaches, if any
+  uint64_t    maxSlide;               // runtime slide of cache can be between zero and this value
+  uint64_t    dylibsImageArrayAddr;   // (unslid) address of ImageArray for dylibs in this cache
+  uint64_t    dylibsImageArraySize;   // size of ImageArray for dylibs in this cache
+  uint64_t    dylibsTrieAddr;         // (unslid) address of trie of indexes of all cached dylibs
+  uint64_t    dylibsTrieSize;         // size of trie of cached dylib paths
+  uint64_t    otherImageArrayAddr;    // (unslid) address of ImageArray for dylibs and bundles with dlopen closures
+  uint64_t    otherImageArraySize;    // size of ImageArray for dylibs and bundles with dlopen closures
+  uint64_t    otherTrieAddr;          // (unslid) address of trie of indexes of all dylibs and bundles with dlopen closures
+  uint64_t    otherTrieSize;          // size of trie of dylibs and bundles with dlopen closures
+  uint32_t    mappingWithSlideOffset; // file offset to first dyld_cache_mapping_and_slide_info
+  uint32_t    mappingWithSlideCount;  // number of dyld_cache_mapping_and_slide_info entries
+  uint64_t    dylibsPBLStateArrayAddrUnused;    // unused
+  uint64_t    dylibsPBLSetAddr;           // (unslid) address of PrebuiltLoaderSet of all cached dylibs
+  uint64_t    programsPBLSetPoolAddr;     // (unslid) address of pool of PrebuiltLoaderSet for each program
+  uint64_t    programsPBLSetPoolSize;     // size of pool of PrebuiltLoaderSet for each program
+  uint64_t    programTrieAddr;            // (unslid) address of trie mapping program path to PrebuiltLoaderSet
+  uint32_t    programTrieSize;
+  uint32_t    osVersion;                  // OS Version of dylibs in this cache for the main platform
+  uint32_t    altPlatform;                // e.g. iOSMac on macOS
+  uint32_t    altOsVersion;               // e.g. 14.0 for iOSMac
+  uint64_t    swiftOptsOffset;        // VM offset from cache_header* to Swift optimizations header
+  uint64_t    swiftOptsSize;          // size of Swift optimizations header
+  uint32_t    subCacheArrayOffset;    // file offset to first dyld_subcache_entry
+  uint32_t    subCacheArrayCount;     // number of subCache entries
+  uint8_t     symbolFileUUID[16];     // unique value for the shared cache file containing unmapped local symbols
+  uint64_t    rosettaReadOnlyAddr;    // (unslid) address of the start of where Rosetta can add read-only/executable data
+  uint64_t    rosettaReadOnlySize;    // maximum size of the Rosetta read-only/executable region
+  uint64_t    rosettaReadWriteAddr;   // (unslid) address of the start of where Rosetta can add read-write data
+  uint64_t    rosettaReadWriteSize;   // maximum size of the Rosetta read-write region
+  uint32_t    imagesOffset;           // file offset to first dyld_cache_image_info
+  uint32_t    imagesCount;            // number of dyld_cache_image_info entries
+  uint32_t    cacheSubType;           // 0 for development, 1 for production, when cacheType is multi-cache(2)
+  uint64_t    objcOptsOffset;         // VM offset from cache_header* to ObjC optimizations header
+  uint64_t    objcOptsSize;           // size of ObjC optimizations header
+  uint64_t    cacheAtlasOffset;       // VM offset from cache_header* to embedded cache atlas for process introspection
+  uint64_t    cacheAtlasSize;         // size of embedded cache atlas
+  uint64_t    dynamicDataOffset;      // VM offset from cache_header* to the location of dyld_cache_dynamic_data_header
+  uint64_t    dynamicDataMaxSize;     // maximum size of space reserved from dynamic data
+} dyld_cache_header;
+
+// From Apple's `dyld/cache-builder/dyld_cache_format.h`
+typedef struct {
+  uint64_t        address;
+  uint64_t        size;
+  uint64_t        fileOffset;
+  uint32_t        maxProt;
+  uint32_t        initProt;
+} dyld_cache_mapping_info;
+
+// From Apple's `dyld/cache-builder/dyld_cache_format.h`
+typedef struct {
+    uint8_t     uuid[16];           // The UUID of the subCache file
+    uint64_t    cacheVMOffset;      // The offset of this subcache from the main cache base address
+    char        fileSuffix[32];     // The file name suffix of the subCache file e.g. ".25.data", ".03.development"
+} dyld_subcache_entry;
+
+// From Apple's `dyld/cache-builder/dyld_cache_format.h`
+typedef struct {
+  uint8_t     uuid[16];           // The UUID of the subCache file
+  uint64_t    cacheVMOffset;      // The offset of this subcache from the main cache base address
+} dyld_subcache_entry_v1;
+
+// From Apple's `dyld/cache-builder/dyld_cache_format.h`
+typedef struct {
+  uuid_t      uuid;
+  uint64_t    loadAddress;            // unslid address of start of __TEXT
+  uint32_t    textSegmentSize;
+  uint32_t    pathOffset;             // offset from start of cache file
+} dyld_cache_image_text_info;
+
+#endif
+
+#endif
diff --git a/coregrind/m_main.c b/coregrind/m_main.c
index ecd159e973..0bd929d0cc 100644
--- a/coregrind/m_main.c
+++ b/coregrind/m_main.c
@@ -1970,6 +1970,18 @@ Int valgrind_main ( Int argc, HChar **argv, HChar **envp )
 
    VG_(init_Threads)();
 
+   //--------------------------------------------------------------
+   // Initialize the dyld cache, which is required with macOS 11 (Big Sur) and onwards
+   // as some system libraries aren't provided on the disk anymore
+   //   p: none
+   // Note: some tools don't like to start mapping memory right way, so we do it lazily in those cases.
+   //--------------------------------------------------------------
+#  if defined(VGO_darwin) && DARWIN_VERS >= DARWIN_11_00
+   if (the_iifii.dynamic) {
+     VG_(dyld_cache_init)(VG_(clo_toolname));
+   }
+#  endif
+
    //--------------------------------------------------------------
    // Initialise the scheduler (phase 1) [generates tid_main]
    //   p: none, afaics
diff --git a/coregrind/m_replacemalloc/vg_replace_malloc.c b/coregrind/m_replacemalloc/vg_replace_malloc.c
index 7d92450c4e..12b01f678d 100644
--- a/coregrind/m_replacemalloc/vg_replace_malloc.c
+++ b/coregrind/m_replacemalloc/vg_replace_malloc.c
@@ -455,6 +455,19 @@ extern int * __error(void) __attribute__((weak));
  ALLOC_or_NULL(SO_SYN_MALLOC,         malloc,      malloc);
  ZONEALLOC_or_NULL(VG_Z_LIBC_SONAME,  malloc_zone_malloc, malloc);
  ZONEALLOC_or_NULL(SO_SYN_MALLOC,     malloc_zone_malloc, malloc);
+#if DARWIN_VERS >= DARWIN_15_00
+#if defined(VGA_arm64)
+ // on arm64, malloc_type_malloc is used for malloc, new and new[]
+ // __typed_operator_new_impl[abi:ne180100]@libc++abi.dylib calls it for new and new[]
+ // all other usages (Swift, ObjC, C) it calls it for malloc
+ // this matters as we need to put the right tag in the allocation
+ // otherwise the tool might report a mismatch between allocation func and free func
+ TYPE_ALLOC_or_NULL(VG_Z_LIBC_SONAME, malloc_type_malloc);
+#else
+ ALLOC_or_NULL(VG_Z_LIBC_SONAME,      malloc_type_malloc,      malloc);
+#endif
+ ZONEALLOC_or_NULL(VG_Z_LIBC_SONAME,  malloc_type_zone_malloc, malloc);
+#endif
 
 #elif defined(VGO_solaris)
  ALLOC_or_NULL(VG_Z_LIBSTDCXX_SONAME, malloc,      malloc);
@@ -943,6 +956,8 @@ extern int * __error(void) __attribute__((weak));
 #elif defined(VGO_darwin)
  FREE(VG_Z_LIBC_SONAME,       free,                 free );
  FREE(SO_SYN_MALLOC,          free,                 free );
+ FREE(VG_Z_LIBC_SONAME,       vfree,                free );
+ FREE(SO_SYN_MALLOC,          vfree,                free );
  ZONEFREE(VG_Z_LIBC_SONAME,   malloc_zone_free,     free );
  ZONEFREE(SO_SYN_MALLOC,      malloc_zone_free,     free );
 
@@ -2156,8 +2171,9 @@ extern int * __error(void) __attribute__((weak));
  POSIX_MEMALIGN(SO_SYN_MALLOC,    posix_memalign);
 
 #elif defined(VGO_darwin)
-#if (DARWIN_VERSIO >= DARWIN_10_6)
+#if (DARWIN_VERS >= DARWIN_10_6)
  POSIX_MEMALIGN(VG_Z_LIBC_SONAME, posix_memalign);
+ POSIX_MEMALIGN(SO_SYN_MALLOC,    posix_memalign);
 #endif
 
 #elif defined(VGO_solaris)
@@ -2326,7 +2342,7 @@ extern int * __error(void) __attribute__((weak));
  ALIGNED_ALLOC(SO_SYN_MALLOC,   aligned_alloc);
 
  #elif defined(VGO_darwin)
-  //ALIGNED_ALLOC(VG_Z_LIBC_SONAME, aligned_alloc);
+ ALIGNED_ALLOC(VG_Z_LIBC_SONAME, aligned_alloc);
 
  #elif defined(VGO_solaris)
   ALIGNED_ALLOC(VG_Z_LIBC_SONAME, aligned_alloc);
@@ -2495,6 +2511,17 @@ static size_t my_malloc_size ( void* zone, void* ptr )
    return res;
 }
 
+#define ZONE_DESTROY(soname, fnname) \
+   \
+   void VG_REPLACE_FUNCTION_EZU(10291,soname,fnname)(void* zone); \
+   void VG_REPLACE_FUNCTION_EZU(10291,soname,fnname)(void* zone)  \
+   { \
+      TRIGGER_MEMCHECK_ERROR_IF_UNDEFINED(zone); \
+   }
+
+ZONE_DESTROY(VG_Z_LIBC_SONAME, malloc_zone_destroy);
+ZONE_DESTROY(SO_SYN_MALLOC,    malloc_zone_destroy);
+
 /* Note that the (void*) casts below are a kludge which stops
    compilers complaining about the fact that the replacement
    functions aren't really of the right type. */
@@ -2507,7 +2534,7 @@ static vki_malloc_zone_t vg_default_zone = {
     (void*)VG_REPLACE_FUNCTION_EZU(10130,VG_Z_LIBC_SONAME,malloc_zone_valloc),
     (void*)VG_REPLACE_FUNCTION_EZU(10040,VG_Z_LIBC_SONAME,malloc_zone_free),
     (void*)VG_REPLACE_FUNCTION_EZU(10080,VG_Z_LIBC_SONAME,malloc_zone_realloc),
-    NULL, // GrP fixme: destroy
+    (void*)VG_REPLACE_FUNCTION_EZU(10291,VG_Z_LIBC_SONAME,malloc_zone_destroy),
     "ValgrindMallocZone",
     NULL, // batch_malloc
     NULL, // batch_free
diff --git a/coregrind/m_syswrap/priv_syswrap-darwin.h b/coregrind/m_syswrap/priv_syswrap-darwin.h
index 7ffc4f88f3..f30136e291 100644
--- a/coregrind/m_syswrap/priv_syswrap-darwin.h
+++ b/coregrind/m_syswrap/priv_syswrap-darwin.h
@@ -362,7 +362,9 @@ DECL_TEMPLATE(darwin, gettid);                  // 286
 // NYI mkfifo_extended 291
 // NYI mkdir_extended 292
 // NYI identitysvc 293
-// NYI shared_region_check_np 294
+#if DARWIN_VERS >= DARWIN_11_00
+DECL_TEMPLATE(darwin, shared_region_check_np); // 294
+#endif
 // NYI shared_region_map_np 295
 #if DARWIN_VERS >= DARWIN_10_6
 // NYI vm_pressure_monitor 296
@@ -540,7 +542,9 @@ DECL_TEMPLATE(darwin, fileport_makeport);        // 430
 // NYI pid_shutdown_sockets 436
 #endif /* DARWIN_VERS >= DARWIN_10_10 */
 // old old shared_region_slide_np 437
-// NYI shared_region_map_and_slide_np            // 438
+#if DARWIN_VERS >= DARWIN_11_00
+DECL_TEMPLATE(darwin, shared_region_map_and_slide_np); // 438
+#endif
 // NYI kas_info                                  // 439
 // NYI memorystatus_control                      // 440
 DECL_TEMPLATE(darwin, guarded_open_np);          // 441
@@ -655,6 +659,19 @@ DECL_TEMPLATE(darwin, abort_with_payload);          // 521
 // NYI log_data                       // 533
 // NYI memorystatus_available_memory  // 534
 #endif
+#if DARWIN_VERS >= DARWIN_11_00
+DECL_TEMPLATE(darwin, objc_bp_assist_cfg_np); // 535
+// NYI shared_region_map_and_slide_2_np   // 536
+// NYI pivot_root                         // 537
+// NYI task_inspect_for_pid               // 538
+DECL_TEMPLATE(darwin, task_read_for_pid); // 539
+// NYI sys_preadv                         // 540
+// NYI sys_pwritev                        // 541
+// NYI sys_preadv_nocancel                // 542
+// NYI sys_pwritev_nocancel               // 543
+DECL_TEMPLATE(darwin, ulock_wait2);       // 544
+// NYI proc_info_extended_id              // 545
+#endif
 
 // Mach message helpers
 DECL_TEMPLATE(darwin, mach_port_set_context);
diff --git a/coregrind/m_syswrap/syswrap-darwin.c b/coregrind/m_syswrap/syswrap-darwin.c
index 09c1338cfa..25ac11eac3 100644
--- a/coregrind/m_syswrap/syswrap-darwin.c
+++ b/coregrind/m_syswrap/syswrap-darwin.c
@@ -44,6 +44,7 @@
 #include "pub_core_libcprint.h"
 #include "pub_core_libcproc.h"
 #include "pub_core_libcsignal.h"
+#include "pub_core_mach.h"         // VG_(dyld_cache_*)
 #include "pub_core_machine.h"      // VG_(get_SP)
 #include "pub_core_mallocfree.h"
 #include "pub_core_options.h"
@@ -3312,10 +3313,34 @@ PRE(stat64)
    PRE_REG_READ2(long, "stat", const char *,path, struct stat64 *,buf);
    PRE_MEM_RASCIIZ("stat64(path)", ARG1);
    PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
+
+#if DARWIN_VERS >= DARWIN_11_00
+   // Starting with macOS 11.0, some system libraries are not provided on the disk but only though
+   // shared dyld cache, thus we try to detect if dyld tried (and failed) to load a dylib,
+   // in which case we do the same thing as dyld and load the info from the cache directly
+   //
+   // This is our entry point for checking a particular dylib: if it looks like one,
+   // we want to see the error result, if any, and subsequently check the cache
+   if (ARG1 != 0 && VG_(dyld_cache_might_be_in)((HChar *)ARG1)) {
+     *flags |= SfPostOnFail;
+   }
+#endif
 }
 POST(stat64)
 {
-   POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
+   if (SUCCESS) {
+      POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
+   }
+
+#if DARWIN_VERS >= DARWIN_11_00
+   if (SUCCESS || (FAILURE && ERR == VKI_ENOENT)) {
+     // It failed and `SfPostOnFail` was set, thus this is probably a dylib,
+     // try to load it from cache which will call VG_(di_notify_mmap) like the previous versions did
+     if (VG_(dyld_cache_load_library)((HChar *)ARG1)) {
+       ML_(sync_mappings)("after", "stat64", 0);
+     }
+   }
+#endif
 }
 
 PRE(lstat64)
@@ -11082,6 +11107,109 @@ POST(kernelrpc_mach_port_type_trap)
 
 #endif /* DARWIN_VERS >= DARWIN_10_15 */
 
+
+/* ---------------------------------------------------------------------
+ Added for macOS 11.0 (Big Sur)
+ ------------------------------------------------------------------ */
+
+#if DARWIN_VERS >= DARWIN_11_00
+
+#define DYLD_VM_END_MWL (-1ull)
+
+PRE(shared_region_check_np)
+{
+  // Special value used by dyld to forbid further uses of map_with_linking_np on macOS 13+
+  Bool special_call = DARWIN_VERS >= DARWIN_13_00 && ARG1 == DYLD_VM_END_MWL;
+
+  if (special_call) {
+    PRINT("shared_region_check_np(disable_map_with_linking)");
+  } else {
+  PRINT("shared_region_check_np(%#lx)", ARG1);
+  }
+  PRE_REG_READ1(kern_return_t, "shared_region_check_np", uint64_t*, start_address);
+
+  if (!special_call) {
+  PRE_MEM_WRITE("shared_region_check_np(start_address)", ARG1, sizeof(uint64_t));
+}
+}
+
+POST(shared_region_check_np)
+{
+  Bool special_call = DARWIN_VERS >= DARWIN_13_00 && ARG1 == DYLD_VM_END_MWL;
+
+  if (special_call) {
+    return;
+  }
+
+  if (RES == 0) {
+    POST_MEM_WRITE(ARG1, sizeof(uint64_t));
+    PRINT("shared dyld cache %#llx", *((uint64_t*) ARG1));
+  }
+}
+
+PRE(shared_region_map_and_slide_np)
+{
+  PRINT("shared_region_map_and_slide_np(%ld, %lu, %#lx, %lu, %#lx, %lu)", SARG1, ARG2, ARG3, ARG4, ARG5, ARG6);
+  PRE_REG_READ6(kern_return_t, "shared_region_map_and_slide_np",
+    int, fd, uint32_t, count, const struct shared_file_mapping_np*, mappings,
+    uint32_t, slide, uint64_t*, slide_start, uint32_t, slide_size);
+}
+
+PRE(task_read_for_pid)
+{
+  PRINT("task_read_for_pid(%s, %ld, %#lx)", name_for_port(ARG1), SARG2, ARG3);
+  PRE_REG_READ3(kern_return_t, "task_read_for_pid", mach_port_name_t, target_tport, int, pid, mach_port_name_t*, t);
+
+  if (ARG3 != 0) {
+    PRE_MEM_WRITE("task_read_for_pid(t)", ARG3, sizeof(mach_port_name_t));
+  }
+}
+
+POST(task_read_for_pid)
+{
+  if (RES == 0 && ARG3 != 0) {
+    POST_MEM_WRITE(ARG3, sizeof(mach_port_name_t));
+    PRINT("-> t:%s", name_for_port(*(mach_port_name_t*)ARG3));
+  }
+}
+
+PRE(ulock_wait2)
+{
+  PRINT("ulock_wait2(%ld, %#lx, %ld, %#lx, %ld)",
+        SARG1, ARG2, SARG3, ARG4, SARG5);
+  PRE_REG_READ5(int, "ulock_wait2",
+                uint32_t, operation, void*, addr, uint64_t, value,
+                uint32_t, timeout, uint64_t, value2);
+  Int value_size = 4;
+  if (ARG1 == VKI_UL_COMPARE_AND_WAIT64
+      || ARG1 == VKI_UL_COMPARE_AND_WAIT64_SHARED
+      || ARG1 == VKI_UL_COMPARE_AND_WAIT_SHARED) {
+    value_size = 8;
+  }
+  if (ARG2 != 0) {
+    PRE_MEM_READ("ulock_wait2(addr)", ARG2, value_size);
+    *flags |= SfMayBlock;
+  } else {
+    SET_STATUS_Failure( VKI_EINVAL );
+  }
+}
+
+#if defined(VGA_arm64)
+PRE(sys_crossarch_trap)
+{
+  PRINT("sys_crossarch_trap(%lu)", ARG1);
+  PRE_REG_READ1(kern_return_t, "sys_crossarch_trap", uint32_t, name);
+}
+
+PRE(objc_bp_assist_cfg_np)
+{
+  PRINT("objc_bp_assist_cfg_np(%#lx, %#lx)", ARG1, ARG2);
+}
+#endif
+
+#endif /* DARWIN_VERS >= DARWIN_11_00 */
+
+
 /* ---------------------------------------------------------------------
    syscall tables
    ------------------------------------------------------------------ */
@@ -11433,7 +11561,9 @@ const SyscallTableEntry ML_(syscall_table)[] = {
 // _____(__NR_mkfifo_extended),
 // _____(__NR_mkdir_extended),
 // _____(__NR_identitysvc),
-// _____(__NR_shared_region_check_np),
+#if DARWIN_VERS >= DARWIN_11_00
+   MACXY(__NR_shared_region_check_np, shared_region_check_np), // 294
+#endif
 // _____(__NR_shared_region_map_np),
 #if DARWIN_VERS >= DARWIN_10_6
 // _____(__NR_vm_pressure_mon...
 
[truncated message content]