#18 hotfix patch for bug #845283 (\fe\ff in titles)

open
nobody
None
5
2004-10-07
2004-10-07
Andreas Baumann
No

...see bugreport.

backport of code from xpdf 3.00 src/pdfinfo.cc.
The code is NOT checked for memory leaks as the
code has big problems with memory managament
anyway.

Discussion

  • Logged In: YES
    user_id=574221

    diff -ruN pdftohtml-0.36/src/pdftohtml.cc
    pdftohtml-0.36-bugfix#845283/src/pdftohtml.cc
    --- pdftohtml-0.36/src/pdftohtml.cc 2003-06-25
    00:41:27.000000000 +0200
    +++ pdftohtml-0.36-bugfix#845283/src/pdftohtml.cc
    2004-10-07 16:14:54.000000000 +0200
    @@ -49,11 +49,12 @@
    GBool noMerge = gFalse;
    static char ownerPassword[33] = "";
    static char userPassword[33] = "";
    +static char cfgFileName[256] = "";
    static char gsDevice[33] = "png16m";
    static GBool printVersion = gFalse;

    -static GString* getInfoString(Dict *infoDict, char *key);
    -static GString* getInfoDate(Dict *infoDict, char *key);
    +static GString* getInfoString(Dict *infoDict, char *key,
    UnicodeMap *uMap);
    +static GString* getInfoDate(Dict *infoDict, char *key,
    UnicodeMap *uMap);

    static char textEncName[128] = "";

    @@ -98,6 +99,8 @@
    "owner password (for encrypted files)"},
    {"-upw", argString, userPassword,
    sizeof(userPassword),
    "user password (for encrypted files)"},
    + {"-cfg", argString, cfgFileName,
    sizeof(cfgFileName),
    + "configuration file to use in place of .xpdfrc"},
    {NULL}
    };

    @@ -116,6 +119,7 @@
    GString *ownerPW, *userPW;
    Object info;
    char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff",
    "pbm", NULL};
    + UnicodeMap *uMap;

    // parse args
    ok = parseArgs(argDesc, &argc, argv);
    @@ -147,6 +151,18 @@
    }
    }

    + // read config file
    + globalParams = new GlobalParams(cfgFileName);
    + if (textEncName[0]) {
    + globalParams->setTextEncoding(textEncName);
    + }
    +
    + // get mapping to output encoding
    + if (!(uMap = globalParams->getTextEncoding())) {
    + error(-1, "Couldn't get text encoding");
    + goto error;
    + }
    +
    // open PDF file
    if (ownerPassword[0]) {
    ownerPW = new GString(ownerPassword);
    @@ -232,13 +248,13 @@

    doc->getDocInfo(&info);
    if (info.isDict()) {
    - docTitle = getInfoString(info.getDict(), "Title");
    - author = getInfoString(info.getDict(), "Author");
    - keywords = getInfoString(info.getDict(), "Keywords");
    - subject = getInfoString(info.getDict(), "Subject");
    - date = getInfoDate(info.getDict(), "ModDate");
    + docTitle = getInfoString(info.getDict(), "Title", uMap);
    + author = getInfoString(info.getDict(), "Author", uMap);
    + keywords = getInfoString(info.getDict(), "Keywords",
    uMap);
    + subject = getInfoString(info.getDict(), "Subject", uMap);
    + date = getInfoDate(info.getDict(), "ModDate", uMap);
    if( !date )
    - date = getInfoDate(info.getDict(), "CreationDate");
    + date = getInfoDate(info.getDict(), "CreationDate",
    uMap);
    }
    info.free();
    if( !docTitle ) docTitle = new GString(htmlFileName);
    @@ -365,18 +381,44 @@
    return 0;
    }

    -static GString* getInfoString(Dict *infoDict, char *key) {
    +static GString* getInfoString(Dict *infoDict, char *key,
    UnicodeMap *uMap) {
    Object obj;
    GString *s1 = NULL;
    + GString *s2 = NULL;
    + GBool isUnicode;
    + Unicode u;
    + char buf[8];
    + int i, n;

    if (infoDict->lookup(key, &obj)->isString()) {
    s1 = new GString(obj.getString());
    + s2 = new GString("");
    + if ((s1->getChar(0) & 0xff) == 0xfe &&
    + (s1->getChar(1) & 0xff) == 0xff) {
    + isUnicode = gTrue;
    + i = 2;
    + } else {
    + isUnicode = gFalse;
    + i = 0;
    + }
    + while (i < obj.getString()->getLength()) {
    + if (isUnicode) {
    + u = ((s1->getChar(i) & 0xff) << 8) |
    + (s1->getChar(i+1) & 0xff);
    + i += 2;
    + } else {
    + u = s1->getChar(i) & 0xff;
    + ++i;
    + }
    + n = uMap->mapUnicode(u, buf, sizeof(buf));
    + s2->append(buf, n);
    + }
    }
    obj.free();
    - return s1;
    + return s2;
    }

    -static GString* getInfoDate(Dict *infoDict, char *key) {
    +static GString* getInfoDate(Dict *infoDict, char *key,
    UnicodeMap *uMap) {
    Object obj;
    char *s;
    int year, mon, day, hour, min, sec;

     
  • Lucas B.
    Lucas B.
    2006-08-27

    Logged In: YES
    user_id=964337

    this is not fixed in 0.39
    this bug also applies to the titles of the elements in
    *-outline.html. with similar changes in
    HtmlOutputDev::newOutlineLevel and possibly some other
    places you might fix this bug completely.