Simon Sobisch - 2023-11-10

First "roughly tested" new version of get_dupno():

/* Get the next number in a set of duplicates */
static unsigned int
get_dupno (cob_file *f, const cob_u32_t i)
{
    struct indexed_file *p = f->file;
    DBC *cursor;
    int         ret;
    unsigned int        dupno;
    unsigned char *last_key_pos;
    unsigned char *pp;

    dupno = 0;
    p->db[i]->cursor (p->db[i], NULL, &p->cursor[i], 0);
    bdb_setkey (f, i);
    cursor = p->cursor[i];
    memcpy (p->temp_key, p->key.data, (size_t)p->maxkeylen);
    /* check for last position that we may increase to read the next key */
    last_key_pos = (unsigned char *)(p->key.data) + p->key.size - 1;
    for (pp = last_key_pos ; p != p->key.data ; --pp) {
        if (*pp != 255) {
            break;
        }
    }
    /* most likely case: found a position, so increment */
    if (*pp != 255) {
        (*pp)++;
        if (pp != last_key_pos) {
            memset (pp + 1, 0, last_key_pos - pp);
        }
        /* we changed x'1234FF' to x'123500', time to read LT */
        ret = DB_SEQ (cursor, &p->key, &p->data, DB_SET_RANGE);
        if (ret == 0) {
            ret = DB_SEQ (cursor, &p->key, &p->data, DB_PREV);
        } else {
            ret = DB_SEQ (cursor, &p->key, &p->data, DB_LAST);
        }
        if (ret == 0 && memcmp (p->key.data, p->temp_key, (size_t)p->key.size) == 0) {
            /* if we did found an entry and have the expected alterate key,
               then we have our highest dupno value, otherwise it is zero */
            memcpy (&dupno, (cob_u8_ptr)p->data.data + p->primekeylen, sizeof (unsigned int));
        }
    } else {
        /* otherwise check the highest position to get the last entry of the current key */
        ret = DB_SEQ (cursor, &p->key, &p->data, DB_SET_RANGE);
        while (ret == 0 && memcmp (p->key.data, p->temp_key, (size_t)p->key.size) == 0) {
            memcpy (&dupno, (cob_u8_ptr)p->data.data + p->primekeylen, sizeof (unsigned int));
            ret = DB_SEQ (cursor, &p->key, &p->data, DB_NEXT);
        }
    }
    bdb_close_index (f, i);
    dupno = COB_DUPSWAP (dupno);
    return ++dupno;
}

The perf counters are quite promising, result before/after the change with the attached test program (99,999 WRITES, each with 13 ALTERNATE keys, all WITH DUPLICATES, non with SUPRRESS KEY):

before, DB_HOME not set:

 Performance counter stats for './TEST000':

      3.276.556,19 msec task-clock                #    1,000 CPUs utilized
             4.425      context-switches          #    1,351 /sec
                66      cpu-migrations            #    0,020 /sec
           589.252      page-faults               #  179,839 /sec
13.375.054.917.980      cycles                    #    4,082 GHz
   159.572.560.233      stalled-cycles-frontend   #    1,19% frontend cycles idle
 2.691.405.182.354      stalled-cycles-backend    #   20,12% backend cycles idle
38.528.240.216.921      instructions              #    2,88  insn per cycle
                                                  #    0,07  stalled cycles per insn
 7.468.172.876.954      branches                  #    2,279 G/sec
     6.247.697.048      branch-misses             #    0,08% of all branches

    3278,036850505 seconds time elapsed

    3270,290794000 seconds user
       6,209564000 seconds sys

after, DB_HOME not set

 Performance counter stats for './TEST000':

          9.643,00 msec task-clock                #    0,938 CPUs utilized
               203      context-switches          #   21,052 /sec
                 0      cpu-migrations            #    0,000 /sec
             1.309      page-faults               #  135,746 /sec
    29.443.585.691      cycles                    #    3,053 GHz
     1.465.199.855      stalled-cycles-frontend   #    4,98% frontend cycles idle
     3.812.028.517      stalled-cycles-backend    #   12,95% backend cycles idle
    32.663.151.670      instructions              #    1,11  insn per cycle
                                                  #    0,12  stalled cycles per insn
     7.152.981.190      branches                  #  741,779 M/sec
       231.401.434      branch-misses             #    3,24% of all branches

      10,281735939 seconds time elapsed

       6,127515000 seconds user
       3,514310000 seconds sys

After, DB_HOME set:

 Performance counter stats for './TEST000':

         11.162,13 msec task-clock                #    0,962 CPUs utilized
               355      context-switches          #   31,804 /sec
                 5      cpu-migrations            #    0,448 /sec
             1.839      page-faults               #  164,753 /sec
    37.714.775.104      cycles                    #    3,379 GHz
     1.790.539.738      stalled-cycles-frontend   #    4,75% frontend cycles idle
     4.006.272.268      stalled-cycles-backend    #   10,62% backend cycles idle
    44.745.187.067      instructions              #    1,19  insn per cycle
                                                  #    0,09  stalled cycles per insn
     9.768.296.247      branches                  #  875,128 M/sec
       259.880.737      branch-misses             #    2,66% of all branches

      11,606220654 seconds time elapsed

       8,232158000 seconds user
       2,926547000 seconds sys

This is a nice result: the new code takes 0.1% of the instructions and 0.32% of the time :-)

 

Last edit: Simon Sobisch 2023-11-10