Dizzzzy - 2016-11-19

Here is what I've also tried, this is a modifed demo/cpp/012 sample that uses what you do in demo/cpp/003 to skip the Byte-Order-Mark in Windows. This exits the loop immidiately as the first token is <TERMINATION>:

#include<cstdio> 
#include "max_Lexer"

/* When using multiple lexical analyzers, it must be compiled with 
 * QUEX_OPTION_MULTI and 'multi.i' must be included in one single file.      */
#include <quex/code_base/multi.i>

using namespace std;

int main(int argc, char** argv) 
{        
    // we want to have error outputs in stdout, so that the unit test could see it.
    FILE*           fh = fopen(argc > 1 ? argv[1] : "example-utf16.txt", "rb");

    /* Either there is no BOM, or if there is one, then it must be UTF8 */
    QUEX_TYPE_BOM         bom_type = quex::bom_snap(fh);

    cout << "Found BOM: " << quex::bom_name(bom_type) << endl;
    if ((bom_type & (QUEX_BOM_UTF_8 | QUEX_BOM_NONE)) == 0) {
        cout << "Found a non-UTF8 BOM. Exit\n";
        fclose(fh);
        return 0;
    }

    /* The lexer **must** be constructed after the BOM-cut                   */
    max::Lexer       max_lex(fh, "UTF16", true);
    max::Token*    max_token    = 0x0;
    max_token    = max_lex.token_p();

    do {
        (void)max_lex.receive();

        char* lexeme = (char*)max_token->pretty_char_text().c_str();
        int   L      = (int)max_token->text.length();

        printf("%s", lexeme);

        for(int i=0; i < 10 - L ; ++i) printf(" ");
        printf("\t");
        printf("%s\n",
            max_token->type_id_name().c_str());

    } while( max_token->type_id() != MAX_TKN_TERMINATION );

    fclose(fh);
    return 0;
}

demo/cpp/003 has the same issue, the first token is <TERMINATION>.

 

Last edit: Dizzzzy 2016-11-19