--- rxvt-2.7.10/autoconf/config.h.in.utf8 2003-02-20 01:35:55.000000000 +0100 +++ rxvt-2.7.10/autoconf/config.h.in 2004-07-20 15:49:08.000000000 +0200 @@ -186,6 +186,8 @@ /* Build shared library version - specify via configure only */ #undef LIBRXVT +#undef MULTIBYTE_CHAR + /* Define if you want Menubar support */ #undef MENUBAR --- rxvt-2.7.10/autoconf/configure.in.utf8 2003-03-07 03:32:27.000000000 +0100 +++ rxvt-2.7.10/autoconf/configure.in 2004-07-20 15:49:08.000000000 +0200 @@ -59,6 +59,7 @@ support_scroll_next=no support_scroll_xterm=no multichar_set=no +multibyte_char=no dnl# -------------------------------------------------------------------------- dnl# CHECKING COMMAND LINE OPTIONS @@ -101,6 +102,12 @@ AC_DEFINE(MULTICHAR_SET, 1, Define to enable multichar glyph language support) multichar_set=yes fi]) +AC_ARG_ENABLE(multibyte-char, + [ --enable-multibyte-char enable multibyte-char language support], + [if test x$enableval = xyes; then + AC_DEFINE(MULTIBYTE_CHAR) multibyte_char=yes + fi]) + AC_ARG_WITH(encoding, [ --with-encoding=NAME set language default encoding to NAME (default: sjis) (eucj|sjis|big5|gb|kr|noenc)], @@ -1273,6 +1280,7 @@ The following are set in config.h " echo " multichar glyph languages: "$multichar_set +echo " multibyte-char languages: "${multibyte_char} echo " pty/tty type: "$rxvt_cv_ptys if test x$support_utmp != xyes; then echo " utmp support: disabled --- rxvt-2.7.10/src/gbk16.c.utf8 2004-07-20 15:49:08.000000000 +0200 +++ rxvt-2.7.10/src/gbk16.c 2004-07-20 15:49:08.000000000 +0200 @@ -0,0 +1,108 @@ +/*--------------------------------*-C-*--------------------------------------* + * File: gbk16.c + *---------------------------------------------------------------------------* + * Copyright (C) 2000 Xianping Ge + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *--------------------------------------------------------------------------*/ +/* + * Convert between GBK multibyte string (i.e., mixed ASCII and GBK characters) + * and gbk_t string formats. + * gbk_t strings can be used in XDrawString16, XDrawImageString16. + * + * gbk_t is currently typedef'ed to be 'u_int16_t', as X Window can only + * handle the 16-bit XChar2b, not 32-bit. + */ + +/* + * To enable GB13080-2000 encoding (up-to-4-byte Chinese character encoding), + * change gbk_t to + * typedef u_int32_t gbk_t; + */ +typedef u_int16_t gbk_t; + +typedef unsigned long uint32_t; + +gbk_t * +cstring_to_gbk(const unsigned char *str, int len, int *gbk_len) +{ + static gbk_t *gbk_str = NULL; + static int max_len_gbk_str = 0; + static uint32_t ch; + static int i_byte = 0; + int i_str, i_gbk_str; + + if (len == 0) + return; + if (max_len_gbk_str < len + 1) { + max_len_gbk_str = len + 1; + gbk_str = REALLOC(gbk_str, (sizeof(gbk_t) * max_len_gbk_str)); + } + + for (i_gbk_str = 0, i_str = 0; i_str < len; i_str++) { + if (i_byte > 0) { + uint32_t byte = str[i_str]; + ch <<= 8; + ch |= byte; + ++i_byte; + if (i_byte==4 || (i_byte == 2 && (byte >= 0x40))) { + gbk_str[i_gbk_str++] = ch; + i_byte = 0; + } + } else { + ch = str[i_str]; + if (ch < 0x80) { + gbk_str[i_gbk_str++] = ch; + } else { + i_byte = 1; + } + } + } + + *gbk_len = i_gbk_str; + return gbk_str; +} + +unsigned char * +gbk_to_cstring(gbk_t * gbk_str, int len) +{ + static unsigned char *buf = NULL; + static int max_len_buf = 0; + unsigned char *outptr; + int i; + + if (max_len_buf < len * 4 + 1) { + max_len_buf = len * 4 + 1; + buf = REALLOC(buf, max_len_buf); + } + outptr = buf; + for (i = 0; i < len; i++) { + uint32_t wc = gbk_str[i]; + + if (wc < 0x80u) + *outptr++ = (unsigned char)wc; + else if (wc < 0xffffu) { + *outptr++ = (unsigned char)((wc >> 8) & 0xffu); + *outptr++ = (unsigned char)(wc & 0xffu); + } else { + *outptr++ = (unsigned char)((wc >> 24) & 0xffu); + *outptr++ = (unsigned char)((wc >> 16) & 0xffu); + *outptr++ = (unsigned char)((wc >> 8) & 0xffu); + *outptr++ = (unsigned char)(wc & 0xffu); + } + } + *outptr = '\0'; + return buf; +} --- rxvt-2.7.10/src/gbk16.h.utf8 2004-07-20 15:49:08.000000000 +0200 +++ rxvt-2.7.10/src/gbk16.h 2004-07-20 15:49:08.000000000 +0200 @@ -0,0 +1,39 @@ +/*--------------------------------*-C-*--------------------------------------* + * File: gbk16.h + *---------------------------------------------------------------------------* + * Copyright (C) 2000 Xianping Ge + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *--------------------------------------------------------------------------*/ +/* + * Convert between GBK multibyte string (i.e., mixed ASCII and GBK characters) + * and gbk_t string formats. + * gbk_t strings can be used in XDrawString16, XDrawImageString16. + * + * gbk_t is currently typedef'ed to be 'u_int16_t', as X Window can only + * handle the 16-bit XChar2b, not 32-bit. + */ + +/* + * To enable GB13080-2000 encoding (up-to-4-byte Chinese character encoding), + * change gbk_t to + * typedef u_int32_t gbk_t; + */ +#ifndef GBK16_4_RXVT_ +#define GBK16_4_RXVT_ +typedef u_int16_t gbk_t; +gbk_t * cstring_to_gbk(const unsigned char *str, int len, int *gbk_len); +unsigned char * gbk_to_cstring(gbk_t * gbk_str, int len); +#endif /* GBK16_4_RXVT_ */ --- rxvt-2.7.10/src/main.c.utf8 2002-10-24 03:52:33.000000000 +0200 +++ rxvt-2.7.10/src/main.c 2004-07-20 15:49:08.000000000 +0200 @@ -435,6 +435,13 @@ } } /* TODO: BOUNDS */ +#ifdef MULTIBYTE_CHAR + /* XGE: fix this hack with, e.g., command line options, etc! */ + r->TermWin.propfont = 0; + r->TermWin.fwidth = 8; + r->TermWin.fheight = 16; +#endif + r->TermWin.width = r->TermWin.ncol * r->TermWin.fwidth; r->TermWin.height = r->TermWin.nrow * r->TermWin.fheight; max_width = MAX_COLS * r->TermWin.fwidth; @@ -644,6 +651,13 @@ r->TermWin.fwidth = fw; r->TermWin.fheight = fh; +#ifdef MULTIBYTE_CHAR + /* XGE: fix this hack with, e.g., command line options, etc! */ + r->TermWin.propfont = 0; + r->TermWin.fwidth = fw = 8; + r->TermWin.fheight = fh = 16; +#endif + /* check that size of boldFont is okay */ #ifndef NO_BOLDFONT if (recheckfonts) { --- rxvt-2.7.10/src/rsizes.h.utf8 2004-07-20 15:49:08.000000000 +0200 +++ rxvt-2.7.10/src/rsizes.h 2004-07-20 15:49:08.000000000 +0200 @@ -0,0 +1,22 @@ +/* + * If we haven't pulled in typedef's like int16_t , then do them ourself + */ + +/* type of (normal and unsigned) basic sizes */ +/* e.g. typedef short int16_t */ + +/* e.g. typedef unsigned short u_int16_t */ + +/* e.g. typedef int int32_t */ + +/* e.g. typedef unsigned int u_int32_t */ + +/* e.g. typedef long int64_t */ + +/* e.g. typedef unsigned long u_int64_t */ + + +/* whatever normal size corresponds to a integer pointer */ +#define intp_t int32_t +/* whatever normal size corresponds to a unsigned integer pointer */ +#define u_intp_t u_int32_t --- rxvt-2.7.10/src/rxvt.h.utf8 2003-03-07 02:17:18.000000000 +0100 +++ rxvt-2.7.10/src/rxvt.h 2004-07-20 23:20:57.000000000 +0200 @@ -358,6 +358,9 @@ # define IS_MULTI1(r) (0) # define IS_MULTI2(r) (0) #endif +#ifdef MULTIBYTE_CHAR +#define RS_trailing_column 0x40000000u +#endif #define RS_fontMask (RS_acsFont|RS_ukFont) #define RS_baseattrMask (RS_Bold|RS_Blink|RS_RVid|RS_Uline) @@ -1074,7 +1077,7 @@ char *env_display; /* environmental variable DISPLAY */ char *env_term; /* environmental variable TERM */ char *env_colorfgbg; - char *buffer; + text_t *buffer; char *locale; char charsets[4]; unsigned char *v_buffer; /* pointer to physical buffer */ --- rxvt-2.7.10/src/screen.c.utf8 2003-03-23 17:56:06.000000000 +0100 +++ rxvt-2.7.10/src/screen.c 2004-07-20 23:19:35.000000000 +0200 @@ -28,6 +28,16 @@ #include "rxvt.h" /* NECESSARY */ #include "screen.intpro" /* PROTOS for internal routines */ +#ifdef MULTIBYTE_CHAR +#ifdef GBK16 +#include "gbk16.c" +#else /* GBK16 */ +#include "utf8.c" +#include "wcwidth.c" +#endif /* GBK16 */ +#define IS_TRAILING_COLUMN(r) ((r) & RS_trailing_column) +#endif + #include /* get the typedef for CARD32 */ /* ------------------------------------------------------------------------- */ @@ -128,10 +138,17 @@ void rxvt_blank_line(text_t *et, rend_t *er, unsigned int width, rend_t efs) { - MEMSET(et, ' ', (size_t)width); efs &= ~RS_baseattrMask; +#ifdef MULTIBYTE_CHAR + for (; width--;) { + *et++ = ' '; + *er++ = efs; + } +#else + MEMSET(et, ' ', (size_t)width); for (; width--;) *er++ = efs; +#endif } /* ------------------------------------------------------------------------- */ @@ -150,10 +167,22 @@ tp[row] = rxvt_malloc(sizeof(text_t) * width); rp[row] = rxvt_malloc(sizeof(rend_t) * width); } - MEMSET(tp[row], ' ', width); efs &= ~RS_baseattrMask; +#ifdef MULTIBYTE_CHAR + { + text_t *et = tp[row]; + rend_t *er = rp[row]; + + for (; width--;) { + *et++ = ' '; + *er++ = efs; + } + } +#else + MEMSET(tp[row], ' ', width); for (er = rp[row]; width--;) *er++ = efs; +#endif } /* ------------------------------------------------------------------------- * @@ -738,11 +767,11 @@ * Add text given in of length to screen struct */ /* EXTPROTO */ -void -rxvt_scr_add_lines(rxvt_t *r, const unsigned char *str, int nlines, int len) +static void +rxvt_add_lines(rxvt_t *r, const text_t *str, int nlines, int len) { unsigned char checksel, clearsel; - char c; + text_t c; int i, row, last_col; text_t *stp; rend_t *srp; @@ -767,6 +796,7 @@ r->screen.cur.row -= nlines; } } +#ifndef MULTIBYTE_CHAR #ifdef DEBUG_STRICT assert(r->screen.cur.col < last_col); assert((r->screen.cur.row < r->TermWin.nrow) @@ -776,6 +806,7 @@ MIN_IT(r->screen.cur.row, r->TermWin.nrow - 1); MAX_IT(r->screen.cur.row, -(int32_t)r->TermWin.nscrolled); #endif +#endif row = r->screen.cur.row + r->TermWin.saveLines; checksel = (r->selection.op @@ -799,8 +830,10 @@ rxvt_scr_tab(r, 1); continue; case '\n': +#ifndef MULTIBYTE_CHAR if (r->screen.tlen[row] != -1) /* XXX: think about this */ MAX_IT(r->screen.tlen[row], r->screen.cur.col); +#endif r->screen.flags &= ~Screen_WrapNext; if (r->screen.cur.row == r->screen.bscroll) rxvt_scroll_text(r, r->screen.tscroll, r->screen.bscroll, 1, 0); @@ -811,8 +844,10 @@ RESET_CHSTAT(h); continue; case '\r': +#ifndef MULTIBYTE_CHAR if (r->screen.tlen[row] != -1) /* XXX: think about this */ MAX_IT(r->screen.tlen[row], r->screen.cur.col); +#endif r->screen.flags &= ~Screen_WrapNext; r->screen.cur.col = 0; RESET_CHSTAT(h); @@ -852,7 +887,9 @@ clearsel = 1; } if (r->screen.flags & Screen_WrapNext) { +#ifndef MULTIBYTE_CHAR r->screen.tlen[row] = -1; +#endif if (r->screen.cur.row == r->screen.bscroll) rxvt_scroll_text(r, r->screen.tscroll, r->screen.bscroll, 1, 0); else if (r->screen.cur.row < (r->TermWin.nrow - 1)) @@ -876,6 +913,47 @@ srp[r->screen.cur.col + 1] &= ~RS_multiMask; } #endif +#ifdef MULTIBYTE_CHAR + { + int width, j; + u_int16_t col = r->screen.cur.col; + + /* Is it a double-width CJK(?) character? */ +#ifdef GBK16 + if (c & 0xff00u) + width = 2; + else + width = 1; +#else /* GBK16 */ + width = Markus_Kuhn_wcwidth(c); +#endif /* GBK16 */ + + if (col + width > last_col) { + if (r->screen.flags & Screen_Autowrap) { + r->screen.flags |= Screen_WrapNext; + if (r->screen.tlen[row] > 0) + r->screen.tlen[row] *= -1; + /* XGE: what if screen resizes? */ + } + else + r->screen.flags &= ~Screen_WrapNext; + i--; + continue; + } + + stp[col] = c; + srp[col] = h->rstyle; +#if 0 /* XGE: optimize this? */ + srp[col] &= ~RS_trailing_column; +#endif + for (j=1; jrstyle | RS_trailing_column; + col += width; + r->screen.cur.col = col; + MAX_IT(r->screen.tlen[row], col); + } + } +#else /* MULTIBYTE_CHAR */ stp[r->screen.cur.col] = c; srp[r->screen.cur.col] = h->rstyle; if (r->screen.cur.col < (last_col - 1)) @@ -885,9 +963,11 @@ if (r->screen.flags & Screen_Autowrap) r->screen.flags |= Screen_WrapNext; } + } if (r->screen.tlen[row] != -1) /* XXX: think about this */ MAX_IT(r->screen.tlen[row], r->screen.cur.col); +#endif /* MULTIBYTE_CHAR */ /* * If we wrote anywhere in the selected area, kill the selection @@ -906,6 +986,30 @@ /* ------------------------------------------------------------------------- */ /* + * Add text given in of length to screen struct + */ +/* EXTPROTO */ +void +rxvt_scr_add_lines(rxvt_t *r, const text_t *str, int nlines, int len) +{ +#ifdef MULTIBYTE_CHAR + text_t *text_str; + int text_len; +#ifdef GBK16 +#define BYTE_STR_TO_TEXT_T_STR cstring_to_gbk +#else /* GBK16 */ +#define BYTE_STR_TO_TEXT_T_STR utf8_to_ucs2 +#endif /* GBK16 */ + text_str = BYTE_STR_TO_TEXT_T_STR(str, len, &text_len); + rxvt_add_lines(r, text_str, nlines, text_len); +#else /* MULTIBYTE_CHAR */ + rxvt_add_lines(r, str, nlines, len); +#endif /* MULTIBYTE_CHAR */ +} + + +/* ------------------------------------------------------------------------- */ +/* * Process Backspace. Move back the cursor back a position, wrap if have to * XTERM_SEQ: CTRL-H */ @@ -1317,6 +1421,19 @@ rxvt_selection_check(r, 1); MIN_IT(count, (r->TermWin.ncol - r->screen.cur.col)); +#ifdef MULTIBYTE_CHAR + /* Make sure a multi-column character is not split */ + { + rend_t *srp; + row = r->screen.cur.row + r->TermWin.saveLines; + col = r->screen.cur.col + count; + srp = r->screen.rend[row]; + while (col < r->TermWin.ncol && IS_TRAILING_COLUMN(srp[col])) + col++; + count = col - r->screen.cur.col; + } +#endif + row = r->screen.cur.row + r->TermWin.saveLines; r->screen.flags &= ~Screen_WrapNext; @@ -1330,7 +1447,11 @@ stp[col] = stp[col - count]; srp[col] = srp[col - count]; } +#ifndef MULTIBYTE_CHAR if (*slp != -1) { +#else + if (*slp >= 0) { +#endif *slp += count; MIN_IT(*slp, r->TermWin.ncol); } @@ -1365,8 +1486,13 @@ rxvt_blank_line(&(stp[r->TermWin.ncol - count]), &(srp[r->TermWin.ncol - count]), (unsigned int)count, tr); +#ifndef MULTIBYTE_CHAR if (*slp == -1) /* break line continuation */ *slp = r->TermWin.ncol; +#else + if (*slp < 0) /* break line continuation */ + *slp *= (-1); +#endif *slp -= count; MAX_IT(*slp, 0); if (r->selection.op && r->h->current_screen == r->selection.screen @@ -1746,7 +1872,7 @@ for (i = rc[PART_BEG].row; i <= rc[PART_END].row; i++) MEMSET(&(r->drawn_text[i][rc[PART_BEG].col]), 0, - rc[PART_END].col - rc[PART_BEG].col + 1); + sizeof(text_t) * (rc[PART_END].col - rc[PART_BEG].col + 1)); if (refresh) rxvt_scr_refresh(r, SLOW_REFRESH | REFRESH_BOUNDS); @@ -1940,7 +2066,7 @@ XGCValues gcvalue; /* Graphics Context values */ rend_t *drp, *srp; /* drawn-rend-pointer, screen-rend-pointer */ text_t *dtp, *stp; /* drawn-text-pointer, screen-text-pointer */ - char *buffer; /* local copy of r->h->buffer */ + text_t *buffer; /* local copy of r->h->buffer */ struct rxvt_hidden *h = r->h; int (*draw_string) () = XDrawString; int (*draw_image_string) () = XDrawImageString; @@ -1952,13 +2078,17 @@ * A: set up vars */ clearfirst = clearlast = must_clear = wbyte = 0; +#ifdef MULTIBYTE_CHAR + draw_string = XDrawString16; + draw_image_string = XDrawImageString16; +#endif #ifndef NO_BOLDFONT bfont = 0; #endif if (h->currmaxcol < r->TermWin.ncol) { h->currmaxcol = r->TermWin.ncol; - h->buffer = rxvt_realloc(h->buffer, sizeof(char) * (h->currmaxcol + 1)); + h->buffer = rxvt_realloc(h->buffer, sizeof(text_t) * (h->currmaxcol + 1)); } buffer = h->buffer; h->refresh_count = 0; @@ -1999,8 +2129,19 @@ ccol2; /* Cursor colour2 */ if ((r->screen.flags & Screen_VisibleCursor) && r->TermWin.focus) { +#ifdef MULTIBYTE_CHAR + col = r->screen.cur.col; + while (col > 0 && + IS_TRAILING_COLUMN( + r->screen.rend[r->screen.cur.row + r->TermWin.saveLines] + [col])) + col--; + srp = &(r->screen.rend[r->screen.cur.row + r->TermWin.saveLines] + [col]); +#else /* MULTIBYTE_CHAR */ srp = &(r->screen.rend[r->screen.cur.row + r->TermWin.saveLines] [r->screen.cur.col]); +#endif *srp ^= RS_RVid; #ifndef NO_CURSORCOLOR cc1 = *srp & (RS_fgMask | RS_bgMask); @@ -2050,7 +2191,15 @@ || r->screen.cur.col != h->oldcursor.col) { if (ocrow < r->TermWin.nrow && h->oldcursor.col < r->TermWin.ncol) { +#ifdef MULTIBYTE_CHAR + col = h->oldcursor.col; + srp = r->drawn_rend[h->oldcursor.row]; + while (col > 0 && IS_TRAILING_COLUMN(srp[col])) + col--; + r->drawn_rend[ocrow][col] ^= (RS_RVid | RS_Uline); +#else /* MULTIBYTE_CHAR */ r->drawn_rend[ocrow][h->oldcursor.col] ^= (RS_RVid | RS_Uline); +#endif #ifdef MULTICHAR_SET if (h->oldcursormulti) { col = h->oldcursor.col + h->oldcursormulti; @@ -2218,7 +2367,34 @@ unsigned char fontdiff,/* current font size != base font size */ fprop; /* proportional font used */ rend_t rend; /* rendition value */ - +#ifdef MULTIBYTE_CHAR + int hasChanged; + + hasChanged = 0; +#define COMPARE_SCREEN_AND_DRAWN_AT_COL do {\ + if (!hasChanged) { \ + if (stp[col] != dtp[col]) \ + hasChanged = 1; \ + } \ + if (!hasChanged) { \ + rend_t rt1, rt2; \ + rt1 = srp[col]; \ + rt2 = drp[col]; \ + if (rt1 != rt2 && \ + !(stp[col] == ' ' \ + && GET_BGATTR(rt1) == GET_BGATTR(rt2))) \ + hasChanged = 1; \ + }} while (0) + + COMPARE_SCREEN_AND_DRAWN_AT_COL; + + if (!hasChanged) { + while (col+1 < r->TermWin.ncol && + IS_TRAILING_COLUMN(srp[col+1])) + col++; + continue; + } +#else /* MULTIBYTE_CHAR */ /* compare new text with old - if exactly the same then continue */ rend = srp[col]; /* screen rendition (target rendtion) */ if (stp[col] == dtp[col] /* Must match characters to skip. */ @@ -2237,12 +2413,13 @@ } #endif } +#endif /* MULTIBYTE_CHAR */ /* redraw one or more characters */ fontdiff = 0; len = 0; buffer[len++] = dtp[col] = stp[col]; - drp[col] = rend; + rend = drp[col] = srp[col]; xpixel = Col2Pixel(col); /* @@ -2307,13 +2484,54 @@ if (wbyte) { wbyte = 0; XSetFont(r->Xdisplay, r->TermWin.gc, r->TermWin.font->fid); +#ifndef MULTIBYTE_CHAR draw_string = XDrawString; draw_image_string = XDrawImageString; +#endif } #else { #endif if (!fprop) { +#ifdef MULTIBYTE_CHAR +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define UINT16_to_XChar2b(c) (((((u_int16_t)(c)) & 0xff00u) >> 8) | ((((u_int16_t)(c)) & 0x00ffu) << 8)) +#else /* BYTE_ORDER */ +#define UCS2_to_XChar2b(c) (c) +#endif /* BYTE_ORDER */ + { + u_int16_t start_col = col; + + for (start_col=col, wlen=0; col < r->TermWin.ncol; ) { +#define TEXT_T_TO_X_Char2b(stp,srp,col) UINT16_to_XChar2b(stp[col]) + buffer[wlen++] = TEXT_T_TO_X_Char2b(stp,srp,col); + do { + col++; + } while (col < r->TermWin.ncol && + IS_TRAILING_COLUMN(srp[col])); + if (wlen >= h->currmaxcol) + break; + if (col < r->TermWin.ncol + && !IS_TRAILING_COLUMN(srp[col]) + && !IS_TRAILING_COLUMN(drp[col])) + { + if (rend != srp[col]) + break; + else { + int hasChanged = 0; + COMPARE_SCREEN_AND_DRAWN_AT_COL; + if (!hasChanged) + break; + } + } + } + len = col - start_col; + MEMCPY(&dtp[start_col], &stp[start_col],sizeof(text_t)*len); + MEMCPY(&drp[start_col], &srp[start_col],sizeof(rend_t)*len); + if (col < r->TermWin.ncol) + col--; + } +#else /* MULTIBYTE_CHAR */ /* single stepping - `normal' mode */ for (i = 0; ++col < r->TermWin.ncol - 1;) { if (rend != srp[col]) @@ -2330,6 +2548,7 @@ } col--; /* went one too far. move back */ len -= i; /* dump any matching trailing chars */ +#endif /* MULTIBYTE_CHAR */ } wlen = len; } @@ -2474,8 +2693,19 @@ */ if (r->screen.flags & Screen_VisibleCursor) { if (r->TermWin.focus) { +#ifdef MULTIBYTE_CHAR + col = r->screen.cur.col; + while (col > 0 && + IS_TRAILING_COLUMN( + r->screen.rend[r->screen.cur.row + r->TermWin.saveLines] + [col])) + col--; + srp = &(r->screen.rend[r->screen.cur.row + r->TermWin.saveLines] + [col]); +#else /* MULTIBYTE_CHAR */ srp = &(r->screen.rend[r->screen.cur.row + r->TermWin.saveLines] [r->screen.cur.col]); +#endif /* MULTIBYTE_CHAR */ *srp ^= RS_RVid; #ifndef NO_CURSORCOLOR *srp = (*srp & ~(RS_fgMask | RS_bgMask)) | cc1; @@ -2930,8 +3160,9 @@ { int i, col, end_col, row, end_row; unsigned char *new_selection_text; - char *str; + text_t *str, *text_buf; text_t *t; + rend_t *srp; D_SELECT((stderr, "rxvt_selection_make(): r->selection.op=%d, r->selection.clicks=%d", r->selection.op, r->selection.clicks)); switch (r->selection.op) { @@ -2953,10 +3184,10 @@ i = (r->selection.end.row - r->selection.beg.row + 1) * (r->TermWin.ncol + 1) + 1; - str = rxvt_malloc(i * sizeof(char)); - + str = rxvt_malloc(i * sizeof(text_t)); +#ifndef MULTIBYTE_CHAR new_selection_text = (unsigned char *)str; - +#endif col = r->selection.beg.col; MAX_IT(col, 0); row = r->selection.beg.row + r->TermWin.saveLines; @@ -2965,6 +3196,20 @@ * A: rows before end row */ for (; row < end_row; row++, col = 0) { +#ifdef MULTIBYTE_CHAR + t = r->screen.text[row]; + srp = r->screen.rend[row]; + end_col = r->screen.tlen[row]; + if (end_col < 0) + end_col *= -1; + + for (; col < end_col; col++) + if (!IS_TRAILING_COLUMN(srp[col])) + *str++ = t[col]; + col = 0; + if (r->screen.tlen[row] >= 0) + *str++ = '\n'; +#else t = &(r->screen.text[row][col]); if ((end_col = r->screen.tlen[row]) == -1) end_col = r->TermWin.ncol; @@ -2972,10 +3217,24 @@ *str++ = *t++; if (r->screen.tlen[row] != -1) *str++ = '\n'; +#endif } /* * B: end row */ +#ifdef MULTIBYTE_CHAR + t = r->screen.text[row]; + srp = r->screen.rend[row]; + end_col = r->screen.tlen[row]; + if (end_col < 0) + end_col *= -1; + if (r->selection.end.col <= end_col) + end_col = r->selection.end.col; + MIN_IT(end_col, r->TermWin.ncol); /* CHANGE */ + for (; col < end_col; col++) + if (!IS_TRAILING_COLUMN(srp[col])) + *str++ = t[col]; +#else t = &(r->screen.text[row][col]); end_col = r->screen.tlen[row]; if (end_col == -1 || r->selection.end.col <= end_col) @@ -2994,6 +3253,17 @@ *str++ = '\n'; #endif *str = '\0'; +#endif + +#ifdef MULTIBYTE_CHAR +#ifdef GBK16 +#define TEXT_T_STR_TO_BYTE_STR gbk_to_cstring +#else /* GBK16 */ +#define TEXT_T_STR_TO_BYTE_STR ucs2_to_utf8 +#endif /* GBK16 */ + new_selection_text = STRDUP(TEXT_T_STR_TO_BYTE_STR(text_buf, str - text_buf)); +#endif /* MULTIBYTE_CHAR */ + if ((i = STRLEN((char *)new_selection_text)) == 0) { free(new_selection_text); return; @@ -3122,7 +3392,11 @@ break; } if ((col == bound.col) && (row != bound.row)) { +#ifndef MULTIBYTE_CHAR if (r->screen.tlen[(row - (dirn == UP ? 1 : 0))] == -1) { +#else + if (r->screen.tlen[(row - (dirn == UP ? 1 : 0))] < 0) { +#endif trow = row + dirnadd; tcol = dirn == UP ? r->TermWin.ncol - 1 : 0; if (r->screen.text[trow] == NULL) @@ -3383,6 +3657,12 @@ if (r->selection.clicks == 1) { end_col = r->screen.tlen[r->selection.beg.row + r->TermWin.saveLines]; +#ifdef MULTIBYTE_CHAR + if (end_col < 0) + end_col *= -1; + if (r->selection.beg.col > end_col) + r->selection.beg.col = r->TermWin.ncol; +#else /* MUTLIBYTE_CHAR */ if (end_col != -1 && r->selection.beg.col > end_col) { #if 1 r->selection.beg.col = ncol; @@ -3400,6 +3680,7 @@ # ifdef MULTICHAR_SET rxvt_selection_adjust_kanji(r); # endif /* MULTICHAR_SET */ +# endif /* MUTLIBYTE_CHAR */ } else if (r->selection.clicks == 2) { if (ROWCOL_IS_AFTER(r->selection.end, r->selection.beg)) r->selection.end.col--; @@ -3630,8 +3911,9 @@ - r->TermWin.lineSpace; } #endif + /* ------------------------------------------------------------------------- */ - + /* ------------------------------------------------------------------------- * * DEBUG ROUTINES * * ------------------------------------------------------------------------- */ --- rxvt-2.7.10/src/utf8.c.utf8 2004-07-20 15:49:08.000000000 +0200 +++ rxvt-2.7.10/src/utf8.c 2004-07-20 23:18:41.000000000 +0200 @@ -0,0 +1,148 @@ +/*--------------------------------*-C-*--------------------------------------* + * File: utf8.c + *---------------------------------------------------------------------------* + * Code borrowed/stolen from `glibc-2.1.3/iconv/gconv_simple.c': + * - Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + * - This file is part of the GNU C Library. + * - Contributed by Ulrich Drepper , 1997. + * - Adapted for rxvt by Xianping Ge , 2000. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *--------------------------------------------------------------------------*/ +/* + * Convert between UTF-8 string and UCS2 string formats. + */ + +typedef unsigned long uint32_t; + +u_int16_t * +utf8_to_ucs2(const unsigned char *str, int len, int *ucs2_len) +{ + static u_int16_t *ucs2_str = NULL; + static int max_len_ucs2_str = 0; + static uint32_t ch; + static int cnt = 0; + int i_str, i_ucs2_str; + + if (len == 0) + return NULL; + if (max_len_ucs2_str < len + 1) { + max_len_ucs2_str = len + 1; + ucs2_str = rxvt_realloc(ucs2_str, (sizeof(u_int16_t) * max_len_ucs2_str)); + } + + for (i_ucs2_str = 0, i_str = 0; i_str < len; i_str++) { + if (cnt > 0) { + uint32_t byte = str[i_str]; + + if ((byte & 0xc0) != 0x80) { + i_str--; + cnt = 0; + } else { + ch <<= 6; + ch |= byte & 0x3f; + if (--cnt == 0) { + ucs2_str[i_ucs2_str++] = ch; + } + } + } else { + ch = str[i_str]; + if (ch < 0x80) { + /* One byte sequence. */ + ucs2_str[i_ucs2_str++] = ch; + } else { + if (ch >= 0xc2 && ch < 0xe0) { + /* We expect two bytes. The first byte cannot be 0xc0 + * or 0xc1, otherwise the wide character could have been + * represented using a single byte. */ + cnt = 2; + ch &= 0x1f; + } else if ((ch & 0xf0) == 0xe0) { + /* We expect three bytes. */ + cnt = 3; + ch &= 0x0f; + } else if ((ch & 0xf8) == 0xf0) { + /* We expect four bytes. */ + cnt = 4; + ch &= 0x07; + } else if ((ch & 0xfc) == 0xf8) { + /* We expect five bytes. */ + cnt = 5; + ch &= 0x03; + } else if ((ch & 0xfe) == 0xfc) { + /* We expect six bytes. */ + cnt = 6; + ch &= 0x01; + } else { + cnt = 1; + } + --cnt; + } + } + } + + *ucs2_len = i_ucs2_str; + return ucs2_str; +} + +static const uint32_t encoding_mask[] = { + ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff +}; + +static const unsigned char encoding_byte[] = { + 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; + +unsigned char * +ucs2_to_utf8(u_int16_t * ucs2_str, int len) +{ + static unsigned char *utf8 = NULL; + static int max_len_utf8 = 0; + unsigned char *outptr; + int i; + + if (max_len_utf8 < len * 6 + 1) { + max_len_utf8 = len * 6 + 1; + utf8 = rxvt_realloc(utf8, max_len_utf8); + } + outptr = utf8; + for (i = 0; i < len; i++) { + uint32_t wc = ucs2_str[i]; + + if (wc < 0x80) + /* It's an one byte sequence. */ + *outptr++ = (unsigned char)wc; + else { + size_t step; + char *start; + + for (step = 2; step < 6; ++step) + if ((wc & encoding_mask[step - 2]) == 0) + break; + + start = outptr; + *outptr = encoding_byte[step - 2]; + outptr += step; + --step; + do { + start[step] = 0x80 | (wc & 0x3f); + wc >>= 6; + } while (--step > 0); + start[0] |= wc; + } + } + *outptr = '\0'; + return utf8; +} --- rxvt-2.7.10/src/utf8.h.utf8 2004-07-20 15:49:08.000000000 +0200 +++ rxvt-2.7.10/src/utf8.h 2004-07-20 15:49:08.000000000 +0200 @@ -0,0 +1,31 @@ +/*--------------------------------*-C-*--------------------------------------* + * File: utf8.h + *---------------------------------------------------------------------------* + * Code borrowed/stolen from `glibc-2.1.3/iconv/gconv_simple.c': + * - Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + * - This file is part of the GNU C Library. + * - Contributed by Ulrich Drepper , 1997. + * - Adapted for rxvt by Xianping Ge , 2000. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + *--------------------------------------------------------------------------*/ +/* + * Convert between UTF-8 string and UCS2 string formats. + */ +#ifndef UTF8_4_RXVT_H_ +#define UTF8_4_RXVT_H_ +u_int16_t *utf8_to_ucs2(const unsigned char *str, int len, int *ucs2_len); +unsigned char *ucs2_to_utf8(u_int16_t * ucs2_str, int len); +#endif /* UTF8_4_RXVT_H_ */ --- rxvt-2.7.10/src/wcwidth.c.utf8 2004-07-20 15:49:08.000000000 +0200 +++ rxvt-2.7.10/src/wcwidth.c 2004-07-20 15:49:08.000000000 +0200 @@ -0,0 +1,134 @@ +/* + * Source: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c + * Changes: wcwidth() --> Markus_Kuhn_wcwidth() + * wcswidth() --> Markus_Kuhn_wcswidth() + */ + +/* + * This is an implementation of wcwidth() and wcswidth() as defined in + * "The Single UNIX Specification, Version 2, The Open Group, 1997" + * + * + * Markus Kuhn -- 2000-02-08 -- public domain + */ + +#include + +/* These functions define the column width of an ISO 10646 character + * as follows: + * + * - The null character (U+0000) has a column width of 0. + * + * - Other C0/C1 control characters and DEL will lead to a return + * value of -1. + * + * - Non-spacing and enclosing combining characters (general + * category code Mn or Me in the Unicode database) have a + * column width of 0. + * + * - Spacing characters in the East Asian Wide (W) or East Asian + * FullWidth (F) category as defined in Unicode Technical + * Report #11 have a column width of 2. + * + * - All remaining characters (including all printable + * ISO 8859-1 and WGL4 characters, Unicode control characters, + * etc.) have a column width of 1. + * + * This implementation assumes that wchar_t characters are encoded + * in ISO 10646. + */ + +int Markus_Kuhn_wcwidth(wchar_t ucs) +{ + /* sorted list of non-overlapping intervals of non-spacing characters */ + static const struct interval { + unsigned short first; + unsigned short last; + } combining[] = { + { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 }, + { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, + { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, + { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, + { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, + { 0x0711, 0x0711 }, { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, + { 0x0901, 0x0902 }, { 0x093C, 0x093C }, { 0x0941, 0x0948 }, + { 0x094D, 0x094D }, { 0x0951, 0x0954 }, { 0x0962, 0x0963 }, + { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, { 0x09C1, 0x09C4 }, + { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, { 0x0A02, 0x0A02 }, + { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, { 0x0A47, 0x0A48 }, + { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, { 0x0A81, 0x0A82 }, + { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, + { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, { 0x0B3C, 0x0B3C }, + { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, { 0x0B4D, 0x0B4D }, + { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, + { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, + { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, { 0x0CBF, 0x0CBF }, + { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, { 0x0D41, 0x0D43 }, + { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, + { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, + { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, + { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, + { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, + { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, + { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, { 0x1036, 0x1037 }, + { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x18A9, 0x18A9 }, + { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, + { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 } + }; + int min = 0; + int max = sizeof(combining) / sizeof(struct interval) - 1; + int mid; + + /* test for 8-bit control characters */ + if (ucs == 0) + return 0; + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) + return -1; + + /* first quick check for Latin-1 etc. characters */ + if (ucs < combining[0].first) + return 1; + + /* binary search in table of non-spacing characters */ + while (max >= min) { + mid = (min + max) / 2; + if (combining[mid].last < ucs) + min = mid + 1; + else if (combining[mid].first > ucs) + max = mid - 1; + else if (combining[mid].first <= ucs && combining[mid].last >= ucs) + return 0; + } + + /* if we arrive here, ucs is not a combining or C0/C1 control character */ + + /* fast test for majority of non-wide scripts */ + if (ucs < 0x1100) + return 1; + + return 1 + + ((ucs >= 0x1100 && ucs <= 0x115f) || /* Hangul Jamo */ + (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && + ucs != 0x303f) || /* CJK ... Yi */ + (ucs >= 0xac00 && ucs <= 0xd7a3) || /* Hangul Syllables */ + (ucs >= 0xf900 && ucs <= 0xfaff) || /* CJK Compatibility Ideographs */ + (ucs >= 0xfe30 && ucs <= 0xfe6f) || /* CJK Compatibility Forms */ + (ucs >= 0xff00 && ucs <= 0xff5f) || /* Fullwidth Forms */ + (ucs >= 0xffe0 && ucs <= 0xffe6)); +} + + +int Markus_Kuhn_wcswidth(const wchar_t *pwcs, size_t n) +{ + int w, width = 0; + + for (;*pwcs && n-- > 0; pwcs++) + if ((w = Markus_Kuhn_wcwidth(*pwcs)) < 0) + return -1; + else + width += w; + + return width; +} --- rxvt-2.7.10/src/rxvtlib.h.in.utf8 2003-02-28 02:03:16.000000000 +0100 +++ rxvt-2.7.10/src/rxvtlib.h.in 2004-07-20 15:49:08.000000000 +0200 @@ -77,8 +77,13 @@ int32_t col; } row_col_t; +#ifdef MULTIBYTE_CHAR +typedef u_int16_t text_t; +#else typedef unsigned char text_t; -#if defined(TTY_256COLOR) || defined(MULTICHAR_SET) +#endif + +#if defined(TTY_256COLOR) || defined(MULTICHAR_SET) || defined(MULTIBYTE_CHAR) #define rend_t u_int32_t #else #define rend_t u_int16_t