I use kyra on WinCE plateform, everything works better. But the time to load .dat file is too long, .dat file have one 320*240 frame need 15 seconds, so I try to do some optimizing work and find code:

bool KrRle::Read( SDL_RWops* data )
{
    flags.FromU32( SDL_ReadLE16( data ) );
    deltaHotToOrigin.x = SDL_ReadLE32( data );
    deltaHotToOrigin.y = SDL_ReadLE32( data );
    size.x = SDL_ReadLE32( data );
    size.y = SDL_ReadLE32( data );
    delta.x = SDL_ReadLE32( data );
    delta.y = SDL_ReadLE32( data );
#if !defined(_WIN32_WCE)
    GLASSERT( InRange(  deltaHotToOrigin.x, -SANITY_ORIGIN_OFFSET, SANITY_ORIGIN_OFFSET ) );    // sanity
    GLASSERT( InRange(  deltaHotToOrigin.y, -SANITY_ORIGIN_OFFSET, SANITY_ORIGIN_OFFSET ) );    // sanity
    GLASSERT( InRange(  size.x, 0, SANITY_ORIGIN_OFFSET ) );                // sanity
    GLASSERT( InRange(  size.y, 0, SANITY_ORIGIN_OFFSET ) );                // sanity
#endif
    KrRGBA minColor, bits;
    minColor.c.red = ReadByte( data );
    bits.c.red = ReadByte( data );
    minColor.c.green = ReadByte( data );
    bits.c.green = ReadByte( data );
    minColor.c.blue = ReadByte( data );
    bits.c.blue = ReadByte( data );
    minColor.c.alpha = ReadByte( data );
    bits.c.alpha = ReadByte( data );

    GlReadBitStream reader( data );

    // We get 0 sizes for fonts, so be careful.
    if ( size.y > 0 )
    {
        if ( memoryPoolLine )
        {
            line = (KrRleLine*) memoryPoolLine->Alloc( sizeof( KrRleLine ) * size.y );
            flags.Set( MEMORYPOOL );
        }
        else
        {
            line = new KrRleLine[ size.y ];
        }

        for ( int i=0; i<size.y; i++ )
        {
            line[i].Clear();
            line[i].Read( &reader, minColor, bits );
//            #ifdef DEBUG
//            if ( line[i].NumSegments() > 0 && line[i].Segment( 0 )->Skip() < minx )
//                minx = line[i].Segment( 0 )->Skip();
//            #endif
#if !defined(_WIN32_WCE)
            GLASSERT( line[i].CalcSize() <= size.x );
#endif
        }
//        GLASSERT( minx == 0 );
    }
    else
    {
        line = 0;
    }

    reader.Flush();
    //GLOUTPUT( "RLE: alpha=%d\n", Alpha() );
    return ( size.y > 0 );
}

bool KrRleLine::Read( GlReadBitStream* reader, KrRGBA minColor, KrRGBA bits )
{
//    flags.FromU32( ReadByte( data ) );
//     int is8Bit = ( flags & COMPRESS8BIT );

//    if ( flags.IsSet( COMPRESS8BIT ) )
//    {
//        flags.Clear( COMPRESS8BIT );
//        nSegments = ReadByte( data );
//    }
//    else
//    {
//        nSegments = SDL_ReadLE16( data );
//    }

    flags.FromU32( reader->ReadBits( BITS_USED ) );
    int bitsNeeded = reader->ReadBits( 4 );
    nSegments = reader->ReadBits( bitsNeeded );

    // Lines can have no segments: any blank line.
#if !defined(_WIN32_WCE)
    GLASSERT( nSegments >= 0 );
    GLASSERT( nSegments < SANITY_SEGMENTS );    // sanity check
#endif

    if ( nSegments > 0 )
    {
        if ( KrRle::memoryPoolSegment )
        {
            segment = (KrRleSegment*) KrRle::memoryPoolSegment->Alloc( nSegments * sizeof( KrRleSegment ) );
            flags.Set( MEMORYPOOL );
        }
        else       
        {
            segment = new KrRleSegment[ nSegments ];
        }

        for ( int i=0; i<nSegments; i++ )
        {
            segment[i].Clear();
            segment[i].Read( reader, minColor, bits );
        }
    }
    return true;
}

bool KrRleSegment::Read(    GlReadBitStream* reader,
                            KrRGBA minColor,
                            KrRGBA bits )
{
//    flags.FromU32( ReadByte( data ) );
    flags.FromU32( reader->ReadBits( BITS_USED ) );

//    if ( flags.IsSet( COMPRESS8BIT ) )
//    {
//        flags.Clear( COMPRESS8BIT );
//        start = ReadByte( data );
//        end   = ReadByte( data );
//    }
//    else
//    {
//        start  = SDL_ReadLE16( data );
//        end    = SDL_ReadLE16( data );
//    }

    int bitsInRange = reader->ReadBits( 4 );
    start = reader->ReadBits( bitsInRange );
    end   = reader->ReadBits( bitsInRange );

#if !defined(_WIN32_WCE)
    GLASSERT( Len() > 0 );
    GLASSERT( Len() < 2000 );    // not true, but good reality check
#endif
    // Allocate the RGBA. Use a memory pool, if available.
    rgba = 0;
    if ( KrRle::memoryPoolRGBA )
    {
        rgba = (KrRGBA*) KrRle::memoryPoolRGBA->Alloc( Len() * sizeof( KrRGBA ) );
        flags.Set( MEMORYPOOL );
    }
    if ( !rgba )
    {
        rgba = new KrRGBA[ Len() ];
    }
    #ifdef DEBUG
        numRGBA += Len();
    #endif

    for ( int i=0; i<Len(); ++i )
    {
//        SDL_RWread( data, &rgba[i].c.red,   1, 1);
//        SDL_RWread( data, &rgba[i].c.green, 1, 1);
//        SDL_RWread( data, &rgba[i].c.blue,  1, 1);
//
//        if ( flags.IsSet( ALPHA ) )
//            SDL_RWread( data, &rgba[i].c.alpha, 1, 1);
//        else
//            rgba[i].c.alpha = 255;
        rgba[i].c.red   = minColor.c.red   + reader->ReadBits( bits.c.red );
        rgba[i].c.green = minColor.c.green + reader->ReadBits( bits.c.green );
        rgba[i].c.blue  = minColor.c.blue  + reader->ReadBits( bits.c.blue );
        rgba[i].c.alpha = minColor.c.alpha + reader->ReadBits( bits.c.alpha );
    }

    return true;
}

U32 GlReadBitStream::ReadBits( int nBitsInData )
{
    if ( nBitsInData == 0 )
        return 0;

    U32 val = 0;

    while ( nBitsInData )
    {
        if ( bitsLeft == 0 )
        {
            bitsLeft = 8;
            //accum = fgetc( fp );
            SDL_RWread( fp, &accum, 1, 1 );
        }

        if ( nBitsInData <= bitsLeft )
        {
            val |= accum >> ( bitsLeft - nBitsInData );

            bitsLeft -= nBitsInData;
            nBitsInData = 0;

            // Trim the accumulator
            U32 mask = 0xff;
            mask >>= ( 8 - bitsLeft );
            accum &= mask;

        }
        else
        {
            // There are more bits needed than what is in
            // the accumulator

            val |= accum << ( nBitsInData - bitsLeft );

            nBitsInData -= bitsLeft;
            bitsLeft = 0;
        }
    }
    return val;
}

but I'm not familiar with c++, so I can't find how to make this codes run more quickly, anyone can give me some help? Thanks in advance.