|
From: <sv...@va...> - 2013-01-18 06:20:08
|
philippe 2013-01-18 06:19:49 +0000 (Fri, 18 Jan 2013)
New Revision: 13238
Log:
Implement a more efficient allocation of small blocks which are never freed.
This generalises the "perm_malloc" function which was in ms_main.c
The new VG_(perm_malloc) is used in ms_main.c
and for execontext : when there are a lot of execontext, this
can save significant memory.
Modified files:
trunk/coregrind/m_execontext.c
trunk/coregrind/m_mallocfree.c
trunk/coregrind/pub_core_mallocfree.h
trunk/include/pub_tool_mallocfree.h
trunk/massif/ms_main.c
Modified: trunk/massif/ms_main.c (+3 -25)
===================================================================
--- trunk/massif/ms_main.c 2013-01-17 23:57:35 +00:00 (rev 13237)
+++ trunk/massif/ms_main.c 2013-01-18 06:19:49 +00:00 (rev 13238)
@@ -591,34 +591,12 @@
// parent node to all top-XPts.
static XPt* alloc_xpt;
-// Cheap allocation for blocks that never need to be freed. Saves about 10%
-// for Konqueror startup with --depth=40.
-static void* perm_malloc(SizeT n_bytes)
-{
- static Addr hp = 0; // current heap pointer
- static Addr hp_lim = 0; // maximum usable byte in current block
-
- #define SUPERBLOCK_SIZE (1 << 20) // 1 MB
-
- if (hp + n_bytes > hp_lim) {
- hp = (Addr)VG_(am_shadow_alloc)(SUPERBLOCK_SIZE);
- if (0 == hp)
- VG_(out_of_memory_NORETURN)( "massif:perm_malloc",
- SUPERBLOCK_SIZE);
- hp_lim = hp + SUPERBLOCK_SIZE - 1;
- }
-
- hp += n_bytes;
-
- return (void*)(hp - n_bytes);
-}
-
static XPt* new_XPt(Addr ip, XPt* parent)
{
- // XPts are never freed, so we can use perm_malloc to allocate them.
- // Note that we cannot use perm_malloc for the 'children' array, because
+ // XPts are never freed, so we can use VG_(perm_malloc) to allocate them.
+ // Note that we cannot use VG_(perm_malloc) for the 'children' array, because
// that needs to be resizable.
- XPt* xpt = perm_malloc(sizeof(XPt));
+ XPt* xpt = VG_(perm_malloc)(sizeof(XPt), vg_alignof(XPt));
xpt->ip = ip;
xpt->szB = 0;
xpt->parent = parent;
Modified: trunk/include/pub_tool_mallocfree.h (+15 -0)
===================================================================
--- trunk/include/pub_tool_mallocfree.h 2013-01-17 23:57:35 +00:00 (rev 13237)
+++ trunk/include/pub_tool_mallocfree.h 2013-01-18 06:19:49 +00:00 (rev 13238)
@@ -47,6 +47,7 @@
// possibly some more due to rounding up.
extern SizeT VG_(malloc_usable_size)( void* p );
+
// If tool is replacing malloc for the client, the below returns
// the effective client redzone as derived from the default
// provided by the tool, VG_(clo_redzone_size) and the minimum
@@ -60,6 +61,20 @@
__attribute__((noreturn))
extern void VG_(out_of_memory_NORETURN) ( const HChar* who, SizeT szB );
+// VG_(perm_malloc) is for allocating small blocks which are
+// never released. The overhead for such blocks is minimal.
+// VG_(perm_malloc) returns memory which is (at least) aligned
+// on a multiple of align.
+// Use the macro vg_alignof (type) to get a safe alignment for a type.
+// No other function can be used on these permanently allocated blocks.
+// In particular, do *not* call VG_(free) or VG_(malloc_usable_size)
+// or VG_(realloc).
+// Technically, these blocks will be returned from big superblocks
+// only containing such permanently allocated blocks.
+// Note that there is no cc cost centre : all such blocks will be
+// regrouped under the "perm_alloc" cost centre.
+extern void* VG_(perm_malloc) ( SizeT nbytes, Int align );
+
#endif // __PUB_TOOL_MALLOCFREE_H
/*--------------------------------------------------------------------*/
Modified: trunk/coregrind/m_execontext.c (+4 -3)
===================================================================
--- trunk/coregrind/m_execontext.c 2013-01-17 23:57:35 +00:00 (rev 13237)
+++ trunk/coregrind/m_execontext.c 2013-01-18 06:19:49 +00:00 (rev 13238)
@@ -420,9 +420,10 @@
/* Bummer. We have to allocate a new context record. */
ec_totstored++;
- new_ec = VG_(arena_malloc)( VG_AR_EXECTXT, "execontext.rEw2.2",
- sizeof(struct _ExeContext)
- + n_ips * sizeof(Addr) );
+ new_ec = VG_(arena_perm_malloc)( VG_AR_EXECTXT,
+ sizeof(struct _ExeContext)
+ + n_ips * sizeof(Addr),
+ vg_alignof(struct _ExeContext));
for (i = 0; i < n_ips; i++)
new_ec->ips[i] = ips[i];
Modified: trunk/coregrind/m_mallocfree.c (+88 -16)
===================================================================
--- trunk/coregrind/m_mallocfree.c 2013-01-17 23:57:35 +00:00 (rev 13237)
+++ trunk/coregrind/m_mallocfree.c 2013-01-18 06:19:49 +00:00 (rev 13238)
@@ -218,8 +218,17 @@
SizeT sblocks_used;
Superblock* sblocks_initial[SBLOCKS_SIZE_INITIAL];
Superblock* deferred_reclaimed_sb;
-
- // Stats only.
+
+ // VG_(arena_perm_malloc) returns memory from superblocks
+ // only used for permanent blocks. No overhead. These superblocks
+ // are not stored in sblocks array above.
+ Addr perm_malloc_current; // first byte free in perm_malloc sb.
+ Addr perm_malloc_limit; // maximum usable byte in perm_malloc sb.
+
+ // Stats only
+ SizeT stats__perm_bytes_on_loan;
+ SizeT stats__perm_blocks;
+
ULong stats__nreclaim_unsplit;
ULong stats__nreclaim_split;
/* total # of reclaim executed for unsplittable/splittable superblocks */
@@ -495,6 +504,13 @@
return & vg_arena[arena];
}
+static ArenaId arenaP_to_ArenaId ( Arena *a )
+{
+ ArenaId arena = a -vg_arena;
+ vg_assert(arena >= 0 && arena < VG_N_ARENAS);
+ return arena;
+}
+
SizeT VG_(malloc_effective_client_redzone_size)(void)
{
vg_assert(VG_(needs).malloc_replacement);
@@ -555,6 +571,11 @@
a->sblocks = & a->sblocks_initial[0];
a->sblocks_size = SBLOCKS_SIZE_INITIAL;
a->sblocks_used = 0;
+ a->deferred_reclaimed_sb = 0;
+ a->perm_malloc_current = 0;
+ a->perm_malloc_limit = 0;
+ a->stats__perm_bytes_on_loan= 0;
+ a->stats__perm_blocks = 0;
a->stats__nreclaim_unsplit = 0;
a->stats__nreclaim_split = 0;
a->stats__bytes_on_loan = 0;
@@ -1205,6 +1226,8 @@
}
}
+ arena_bytes_on_loan += a->stats__perm_bytes_on_loan;
+
if (arena_bytes_on_loan != a->stats__bytes_on_loan) {
# ifdef VERBOSE_MALLOC
VG_(printf)( "sanity_check_malloc_arena: a->bytes_on_loan %lu, "
@@ -1370,6 +1393,14 @@
}
}
+ if (a->stats__perm_bytes_on_loan > 0) {
+ tl_assert(n_ccs < N_AN_CCS-1);
+ anCCs[n_ccs].nBytes = a->stats__perm_bytes_on_loan;
+ anCCs[n_ccs].nBlocks = a->stats__perm_blocks;
+ anCCs[n_ccs].cc = "perm_malloc";
+ n_ccs++;
+ }
+
VG_(ssort)( &anCCs[0], n_ccs, sizeof(anCCs[0]), cmp_AnCC_by_vol );
for (k = 0; k < n_ccs; k++) {
@@ -1482,6 +1513,25 @@
return ((req_pszB + n) & (~n));
}
+static
+void add_one_block_to_stats (Arena* a, SizeT loaned)
+{
+ a->stats__bytes_on_loan += loaned;
+ if (a->stats__bytes_on_loan > a->stats__bytes_on_loan_max) {
+ a->stats__bytes_on_loan_max = a->stats__bytes_on_loan;
+ if (a->stats__bytes_on_loan_max >= a->next_profile_at) {
+ /* next profile after 10% more growth */
+ a->next_profile_at
+ = (SizeT)(
+ (((ULong)a->stats__bytes_on_loan_max) * 105ULL) / 100ULL );
+ if (VG_(clo_profile_heap))
+ cc_analyse_alloc_arena(arenaP_to_ArenaId (a));
+ }
+ }
+ a->stats__tot_blocks += (ULong)1;
+ a->stats__tot_bytes += (ULong)loaned;
+}
+
void* VG_(arena_malloc) ( ArenaId aid, const HChar* cc, SizeT req_pszB )
{
SizeT req_bszB, frag_bszB, b_bszB;
@@ -1637,20 +1687,7 @@
// Update stats
SizeT loaned = bszB_to_pszB(a, b_bszB);
- a->stats__bytes_on_loan += loaned;
- if (a->stats__bytes_on_loan > a->stats__bytes_on_loan_max) {
- a->stats__bytes_on_loan_max = a->stats__bytes_on_loan;
- if (a->stats__bytes_on_loan_max >= a->next_profile_at) {
- /* next profile after 10% more growth */
- a->next_profile_at
- = (SizeT)(
- (((ULong)a->stats__bytes_on_loan_max) * 105ULL) / 100ULL );
- if (VG_(clo_profile_heap))
- cc_analyse_alloc_arena(aid);
- }
- }
- a->stats__tot_blocks += (ULong)1;
- a->stats__tot_bytes += (ULong)loaned;
+ add_one_block_to_stats (a, loaned);
a->stats__nsearches += (ULong)stats__nsearches;
# ifdef DEBUG_MALLOC
@@ -2228,7 +2265,37 @@
return res;
}
+void* VG_(arena_perm_malloc) ( ArenaId aid, SizeT size, Int align )
+{
+ Arena* a;
+ ensure_mm_init(aid);
+ a = arenaId_to_ArenaP(aid);
+
+ align = align - 1;
+ size = (size + align) & ~align;
+
+ if (UNLIKELY(a->perm_malloc_current + size > a->perm_malloc_limit)) {
+ // Get a superblock, but we will not insert it into the superblock list.
+ // The superblock structure is not needed, so we will use the full
+ // memory range of it. This superblock is however counted in the
+ // mmaped statistics.
+ Superblock* new_sb = newSuperblock (a, size);
+ a->perm_malloc_limit = &new_sb->payload_bytes[new_sb->n_payload_bytes - 1];
+
+ // We do not mind starting allocating from the beginning of the superblock
+ // as afterwards, we "lose" it as a superblock.
+ a->perm_malloc_current = (Addr)new_sb;
+ }
+
+ a->stats__perm_blocks += 1;
+ a->stats__perm_bytes_on_loan += size;
+ add_one_block_to_stats (a, size);
+
+ a->perm_malloc_current += size;
+ return (void*)(a->perm_malloc_current - size);
+}
+
/*------------------------------------------------------------*/
/*--- Tool-visible functions. ---*/
/*------------------------------------------------------------*/
@@ -2266,7 +2333,12 @@
return VG_(arena_malloc_usable_size)(VG_AR_CLIENT, p);
}
+void* VG_(perm_malloc) ( SizeT size, Int align )
+{
+ return VG_(arena_perm_malloc) ( VG_AR_TOOL, size, align );
+}
+
/*--------------------------------------------------------------------*/
/*--- end ---*/
/*--------------------------------------------------------------------*/
Modified: trunk/coregrind/pub_core_mallocfree.h (+5 -0)
===================================================================
--- trunk/coregrind/pub_core_mallocfree.h 2013-01-17 23:57:35 +00:00 (rev 13237)
+++ trunk/coregrind/pub_core_mallocfree.h 2013-01-18 06:19:49 +00:00 (rev 13238)
@@ -118,6 +118,11 @@
extern void VG_(mallinfo) ( ThreadId tid, struct vg_mallinfo* mi );
+// VG_(arena_perm_malloc) is for permanent allocation of small blocks.
+// See VG_(perm_malloc) in pub_tool_mallocfree.h for more details.
+// Do not call any VG_(arena_*) functions with these permanent blocks.
+extern void* VG_(arena_perm_malloc) ( ArenaId aid, SizeT nbytes, Int align );
+
extern void VG_(sanity_check_malloc_all) ( void );
extern void VG_(print_all_arena_stats) ( void );
|