CVS: tvision/classes codepage.cc,2.24.2.14,2.24.2.15

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Update of /cvsroot/tvision/tvision/classes
In directory sfp-cvs-1.v30.ch3.sourceforge.com:/tmp/cvs-serv2537/classes

Modified Files:
      Tag: r2_0_1u
	codepage.cc 
Log Message:
* Added: [TVCodePage] Functions to convert the app cp to and from UTF8
(convertStrUTF8_2_CP and convertStrCP_2_UTF8).
* Added: [X11] Support for copy/paste using UTF8.


Index: codepage.cc
===================================================================
RCS file: /cvsroot/tvision/tvision/classes/codepage.cc,v
retrieving revision 2.24.2.14
retrieving revision 2.24.2.15
diff -C2 -d -r2.24.2.14 -r2.24.2.15
*** codepage.cc	10 Jul 2008 16:10:05 -0000	2.24.2.14
--- codepage.cc	1 Aug 2017 14:10:40 -0000	2.24.2.15
***************
*** 1,5 ****
  /**[txh]********************************************************************
  
!   Copyright 1996-2003 by Salvador Eduardo Tropea (SET)
    This file is covered by the GPL license.
  
--- 1,5 ----
  /**[txh]********************************************************************
  
!   Copyright 1996-2017 by Salvador Eduardo Tropea (SET)
    This file is covered by the GPL license.
  
***************
*** 2962,2965 ****
--- 2962,3160 ----
  
    Description:
+   Converts a string containing UTF8 to Application Code Page. The len
+ indicates how many chars from orig we must process. The dest buffer must
+ be long enough to hold the result. If dest is NULL this routine just
+ meassure the length of the result.
+   
+   Return: -1 if the UTF8 sequence is wrong, the length of the converted
+ text otherwise.
+   
+ ***************************************************************************/
+ 
+ int TVCodePage::convertStrUTF8_2_CP(char *dest, const char *orig,
+                                     unsigned len)
+ {
+  const uchar *utf8=(const uchar *)orig;
+  uchar *d=(uchar *)dest;
+  unsigned unicode;
+  int retLen=0;
+ 
+  uchar c=*(utf8++);
+  while (len--)
+    {
+     if ((c&0x80)==0)
+       {// 1 byte
+        unicode=c;
+       }
+     else if ((c&0xF8)==0xF0)
+       {// 4 bytes
+        if (len<3) return -1;
+        len-=3;
+        unicode =(c&0x07)<<18;
+        c=*(utf8++);
+        if ((c&0xC0)!=0x80) return -1;
+        unicode|=(c&0x3F)<<12;
+        c=*(utf8++);
+        if ((c&0xC0)!=0x80) return -1;
+        unicode|=(c&0x3F)<<6;
+        c=*(utf8++);
+        if ((c&0xC0)!=0x80) return -1;
+        unicode|=(c&0x3F);
+       }
+     else if ((c&0xE0)==0xE0)
+       {// 3 bytes
+        if (len<2) return -1;
+        len-=2;
+        unicode =(c&0x0F)<<12;
+        c=*(utf8++);
+        if ((c&0xC0)!=0x80) return -1;
+        unicode|=(c&0x3F)<<6;
+        c=*(utf8++);
+        if ((c&0xC0)!=0x80) return -1;
+        unicode|=(c&0x3F);
+       }
+     else
+       {// 2 bytes
+        if (!len) return -1;
+        len-=1;
+        unicode =(c&0x1F)<<6;
+        c=*(utf8++);
+        if ((c&0xC0)!=0x80) return -1;
+        unicode|=(c&0x3F);
+       }
+     c=*(utf8++);
+     // Convert the unicode value to the app code page
+     if (dest)
+       {
+        if (unicode>0xFFFF)
+           d+=sprintf((char *)d,"&#x%08X;",unicode); // &#x00000000; 12
+        else
+          {
+           uint16 cp=unicodeToApp->search(unicode);
+           if (cp==0xFFFF)
+              d+=sprintf((char *)d,"&#x%04X;",unicode); // &#x0000; 8
+           else
+              *(d++)=(uchar)cp;
+          }
+       }
+     else
+       {
+        if (unicode>0xFFFF)
+           retLen+=12;
+        else
+          {
+           uint16 cp=unicodeToApp->search(unicode);
+           retLen+=cp==0xFFFF ? 8 : 1;
+          }
+       }
+    }
+  if (dest)
+    {
+     *d=0;
+     retLen=d-(uchar *)dest;
+    }
+  return retLen;
+ }
+ 
+ /**[txh]********************************************************************
+ 
+   Description:
+   Converts a string containing Application Code Page to UTF8. Only len bytes
+ are processed. If dest is NULL it just meassures the destination length.
+   
+   Return: The destination length, or -1 if the string can't be represented
+ using UTF8.
+   
+ ***************************************************************************/
+ 
+ int TVCodePage::convertStrCP_2_UTF8(char *dest, const char *orig,
+                                     unsigned len)
+ {
+  const uchar *o=(const uchar *)orig;
+  uchar *d=(uchar *)dest;
+  unsigned unicode;
+  int retLen=0;
+ 
+  while (len--)
+    {
+     if (*o=='&')
+       {
+        unicode='&';
+        // Is it a 12 bytes escaped U32?
+        if (len>=11 && o[1]=='#' && o[2]=='x' && o[11]==';')
+          {// Seems to be, try
+           char *end;
+           unsigned v=strtoul((const char *)(o+3),&end,16);
+           if (end-(char *)o==11)
+             {
+              unicode=v;
+              len-=11;
+              o+=11;
+             }
+          }
+        // Is it an 8 bytes escaped U16?
+        else if (len>=7 && o[1]=='#' && o[2]=='x' && o[7]==';')
+          {// Seems to be, try
+           char *end;
+           unsigned v=strtoul((const char *)(o+3),&end,16);
+           if (end-(char *)o==7)
+             {
+              unicode=v;
+              len-=7;
+              o+=7;
+             }
+          }
+       }
+     else
+        unicode=appToUnicode[*o];
+     o++;
+     // Now encode the unicode value using UTF8
+     if (unicode<=0x7F)
+       {
+        retLen++;
+        if (d)
+           *(d++)=unicode;
+       }
+     else if (unicode<=0x7FF)
+       {
+        retLen+=2;
+        if (d)
+          {
+           *(d++)=0xC0 | (unicode>>6);
+           *(d++)=0x80 | (unicode&0x3F);
+          }
+       }
+     else if (unicode<=0xFFFF)
+       {
+        retLen+=3;
+        if (d)
+          {
+           *(d++)=0xE0 | (unicode>>12);
+           *(d++)=0x80 | ((unicode>>6)&0x3F);
+           *(d++)=0x80 | (unicode&0x3F);
+          }
+       }
+     else if (unicode<=0x1FFFFF)
+       {
+        retLen+=4;
+        if (d)
+          {
+           *(d++)=0xF0 | (unicode>>18);
+           *(d++)=0x80 | ((unicode>>12)&0x3F);
+           *(d++)=0x80 | ((unicode>> 6)&0x3F);
+           *(d++)=0x80 | (unicode&0x3F);
+          }
+       }
+     else // UTF8 doesn't support bigger values
+        return -1;
+    }
+  if (d)
+     *d=0;
+  return retLen;
+ }
+ 
+ /**[txh]********************************************************************
+ 
+   Description:
    Converts a string containing Application Code Page to Unicode. The
  destination must be (len+1)*2 bytes for the EOS.
***************
*** 2973,2977 ****
  {
   uint16 *d=(uint16 *)dest;
!  uchar *o=(uchar *)orig;
   while (len--)
      *(d++)=appToUnicode[*(o++)];
--- 3168,3172 ----
  {
   uint16 *d=(uint16 *)dest;
!  const uchar *o=(const uchar *)orig;
   while (len--)
      *(d++)=appToUnicode[*(o++)];