this is not fixed in 0.39
this bug also applies to the titles of the elements in
*-outline.html. with similar changes in
HtmlOutputDev::newOutlineLevel and possibly some other
places you might fix this bug completely.
If you would like to refer to this comment somewhere else in this project, copy and paste the following link:
Logged In: YES
user_id=574221
diff -ruN pdftohtml-0.36/src/pdftohtml.cc
pdftohtml-0.36-bugfix#845283/src/pdftohtml.cc
--- pdftohtml-0.36/src/pdftohtml.cc 2003-06-25
00:41:27.000000000 +0200
+++ pdftohtml-0.36-bugfix#845283/src/pdftohtml.cc
2004-10-07 16:14:54.000000000 +0200
@@ -49,11 +49,12 @@
GBool noMerge = gFalse;
static char ownerPassword[33] = "";
static char userPassword[33] = "";
+static char cfgFileName[256] = "";
static char gsDevice[33] = "png16m";
static GBool printVersion = gFalse;
-static GString* getInfoString(Dict *infoDict, char *key);
-static GString* getInfoDate(Dict *infoDict, char *key);
+static GString* getInfoString(Dict *infoDict, char *key,
UnicodeMap *uMap);
+static GString* getInfoDate(Dict *infoDict, char *key,
UnicodeMap *uMap);
static char textEncName[128] = "";
@@ -98,6 +99,8 @@
"owner password (for encrypted files)"},
{"-upw", argString, userPassword,
sizeof(userPassword),
"user password (for encrypted files)"},
+ {"-cfg", argString, cfgFileName,
sizeof(cfgFileName),
+ "configuration file to use in place of .xpdfrc"},
{NULL}
};
@@ -116,6 +119,7 @@
GString *ownerPW, *userPW;
Object info;
char * extsList[] = {"png", "jpeg", "bmp", "pcx", "tiff",
"pbm", NULL};
+ UnicodeMap *uMap;
// parse args
ok = parseArgs(argDesc, &argc, argv);
@@ -147,6 +151,18 @@
}
}
+ // read config file
+ globalParams = new GlobalParams(cfgFileName);
+ if (textEncName[0]) {
+ globalParams->setTextEncoding(textEncName);
+ }
+
+ // get mapping to output encoding
+ if (!(uMap = globalParams->getTextEncoding())) {
+ error(-1, "Couldn't get text encoding");
+ goto error;
+ }
+
// open PDF file
if (ownerPassword[0]) {
ownerPW = new GString(ownerPassword);
@@ -232,13 +248,13 @@
doc->getDocInfo(&info);
if (info.isDict()) {
- docTitle = getInfoString(info.getDict(), "Title");
- author = getInfoString(info.getDict(), "Author");
- keywords = getInfoString(info.getDict(), "Keywords");
- subject = getInfoString(info.getDict(), "Subject");
- date = getInfoDate(info.getDict(), "ModDate");
+ docTitle = getInfoString(info.getDict(), "Title", uMap);
+ author = getInfoString(info.getDict(), "Author", uMap);
+ keywords = getInfoString(info.getDict(), "Keywords",
uMap);
+ subject = getInfoString(info.getDict(), "Subject", uMap);
+ date = getInfoDate(info.getDict(), "ModDate", uMap);
if( !date )
- date = getInfoDate(info.getDict(), "CreationDate");
+ date = getInfoDate(info.getDict(), "CreationDate",
uMap);
}
info.free();
if( !docTitle ) docTitle = new GString(htmlFileName);
@@ -365,18 +381,44 @@
return 0;
}
-static GString* getInfoString(Dict *infoDict, char *key) {
+static GString* getInfoString(Dict *infoDict, char *key,
UnicodeMap *uMap) {
Object obj;
GString *s1 = NULL;
+ GString *s2 = NULL;
+ GBool isUnicode;
+ Unicode u;
+ char buf[8];
+ int i, n;
if (infoDict->lookup(key, &obj)->isString()) {
s1 = new GString(obj.getString());
+ s2 = new GString("");
+ if ((s1->getChar(0) & 0xff) == 0xfe &&
+ (s1->getChar(1) & 0xff) == 0xff) {
+ isUnicode = gTrue;
+ i = 2;
+ } else {
+ isUnicode = gFalse;
+ i = 0;
+ }
+ while (i < obj.getString()->getLength()) {
+ if (isUnicode) {
+ u = ((s1->getChar(i) & 0xff) << 8) |
+ (s1->getChar(i+1) & 0xff);
+ i += 2;
+ } else {
+ u = s1->getChar(i) & 0xff;
+ ++i;
+ }
+ n = uMap->mapUnicode(u, buf, sizeof(buf));
+ s2->append(buf, n);
+ }
}
obj.free();
- return s1;
+ return s2;
}
-static GString* getInfoDate(Dict *infoDict, char *key) {
+static GString* getInfoDate(Dict *infoDict, char *key,
UnicodeMap *uMap) {
Object obj;
char *s;
int year, mon, day, hour, min, sec;
Logged In: YES
user_id=964337
this is not fixed in 0.39
this bug also applies to the titles of the elements in
*-outline.html. with similar changes in
HtmlOutputDev::newOutlineLevel and possibly some other
places you might fix this bug completely.