[Sc2-cvs] sc2/src/sc2code/libs/strings strings.c,1.3,1.4 unicode.c,1.1,1.2

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 454-5900

Update of /cvsroot/sc2/sc2/src/sc2code/libs/strings
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12949/sc2code/libs/strings

Modified Files:
	strings.c unicode.c 
Log Message:
Unicode support.

Index: strings.c
===================================================================
RCS file: /cvsroot/sc2/sc2/src/sc2code/libs/strings/strings.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -d -r1.3 -r1.4
*** strings.c	18 Jan 2003 23:08:39 -0000	1.3
--- strings.c	20 Feb 2005 00:44:35 -0000	1.4
***************
*** 145,152 ****
  		StringIndex = STRING_INDEX (String);
  		LockStringTable (StringTable, &StringTablePtr);
  		StringLength = (COUNT)(
  				StringTablePtr->StringOffsets[StringIndex + 1]
! 				- StringTablePtr->StringOffsets[StringIndex]
! 				);
  		UnlockStringTable (StringTable);
  	}
--- 145,165 ----
  		StringIndex = STRING_INDEX (String);
  		LockStringTable (StringTable, &StringTablePtr);
+ 
+ 		{
+ 			STRINGPTR start;
+ 			STRINGPTR end;
+ 
+ 			start = (STRINGPTR) ((BYTE *) StringTablePtr +
+ 					StringTablePtr->StringOffsets[StringIndex]);
+ 			end = (STRINGPTR) ((BYTE *) StringTablePtr +
+ 					StringTablePtr->StringOffsets[StringIndex + 1]);
+ 			StringLength = utf8StringCountN(start, end);
+ 		}
+ 
+ #if 0
  		StringLength = (COUNT)(
  				StringTablePtr->StringOffsets[StringIndex + 1]
! 				- StringTablePtr->StringOffsets[StringIndex]);
! #endif
  		UnlockStringTable (StringTable);
  	}

Index: unicode.c
===================================================================
RCS file: /cvsroot/sc2/sc2/src/sc2code/libs/strings/unicode.c,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** unicode.c	5 Feb 2005 01:24:12 -0000	1.1
--- unicode.c	20 Feb 2005 00:44:35 -0000	1.2
***************
*** 18,25 ****
  #include <stdio.h>

  // Get one character from a UTF-8 encoded string.
  // *ptr will point to the start of the next character.
  // Returns 0 if the encoding is bad. This can be distinguished from the
! // '\0' character by checking whether **ptr == '\0'.
  wchar_t
  getCharFromString(const unsigned char **ptr) {
--- 18,33 ----
  #include <stdio.h>

+ // Resynchronise (skip everything starting with 0x10xxxxxx):
+ static inline void
+ resyncUTF8(const unsigned char **ptr) {
+ 	while ((**ptr & 0xc0) == 0x80)
+ 		*ptr++;
+ }
+ 
  // Get one character from a UTF-8 encoded string.
  // *ptr will point to the start of the next character.
  // Returns 0 if the encoding is bad. This can be distinguished from the
! // '\0' character by checking whether **ptr == '\0' before calling this
! // function.
  wchar_t
  getCharFromString(const unsigned char **ptr) {
***************
*** 107,116 ****

  	// Resynchronise (skip everything starting with 0x10xxxxxx):
! 	while ((**ptr & 0xc0) == 0x80)
! 		*ptr++;

  	return 0;
  }

--- 115,245 ----

  	// Resynchronise (skip everything starting with 0x10xxxxxx):
! 	resyncUTF8(ptr);

  	return 0;
  }

+ wchar_t
+ getCharFromStringN(const unsigned char **ptr, const unsigned char *end) {
+ 	int numBytes;
+ 
+ 	if (**ptr < 0x80) {
+ 		numBytes = 1;
+ 	} else if ((**ptr & 0xe0) == 0xc0) {
+ 		numBytes = 2;
+ 	} else if ((**ptr & 0xf0) == 0xe0) {
+ 		numBytes = 3;
+ 	} else if ((**ptr & 0xf8) == 0xf0) {
+ 		numBytes = 4;
+ 	} else
+ 		goto err;
+ 
+ 	if (*ptr + numBytes > end)
+ 		goto err;
+ 
+ 	return getCharFromString(ptr);
+ 
+ err:
+ 	*ptr = end;
+ 	return 0;
+ }
+ 
+ // Get one line from a string.
+ // A line is terminated with either CRLF (DOS/Windows),
+ // LF (Unix, MacOS X), or CR (old MacOS).
+ // The end of the string is reached when **startNext == '\0'.
+ // NULL is returned if the string is not valid UTF8. In this case
+ // *end points to the first invalid character (or the character before if
+ // it was a LF), and *startNext to the start of the next (possibly invalid
+ // too) character.
+ unsigned char *
+ getLineFromString(const unsigned char *start, const unsigned char **end,
+ 		const unsigned char **startNext) {
+ 	const unsigned char *ptr = start;
+ 	const unsigned char *lastPtr;
+ 	wchar_t ch;
+ 
+ 	// Search for the first newline.
+ 	for (;;) {
+ 		if (*ptr == '\0') {
+ 			*end = ptr;
+ 			*startNext = ptr;
+ 			return (unsigned char *) start;
+ 		}
+ 		lastPtr = ptr;
+ 		ch = getCharFromString(&ptr);
+ 		if (ch == '\0') {
+ 			// Bad string
+ 			*end = lastPtr;
+ 			*startNext = ptr;
+ 			return NULL;
+ 		}
+ 		if (ch == '\n') {
+ 			*end = lastPtr;
+ 			if (*ptr == '\0'){
+ 				// LF at the end of the string.
+ 				*startNext = ptr;
+ 				return (unsigned char *) start;
+ 			}
+ 			ch = getCharFromString(&ptr);
+ 			if (ch == '\0') {
+ 				// Bad string
+ 				return NULL;
+ 			}
+ 			if (ch == '\r') {
+ 				// LFCR
+ 				*startNext = ptr;
+ 			} else {
+ 				// LF
+ 				*startNext = *end;
+ 			}
+ 			return (unsigned char *) start;
+ 		} else if (ch == '\r') {
+ 			*end = lastPtr;
+ 			*startNext = ptr;
+ 			return (unsigned char *) start;
+ 		} // else: a normal character
+ 	}
+ }
+ 
+ size_t
+ utf8StringCount(const unsigned char *start) {
+ 	size_t count = 0;
+ 	wchar_t ch;
+ 
+ 	for (;;) {
+ 		ch = getCharFromString(&start);
+ 		if (ch == '\0')
+ 			return count;
+ 		count++;
+ 	}
+ }
+ 
+ size_t
+ utf8StringCountN(const unsigned char *start, const unsigned char *end) {
+ 	size_t count = 0;
+ 	wchar_t ch;
+ 
+ 	for (;;) {
+ 		ch = getCharFromStringN(&start, end);
+ 		if (ch == '\0')
+ 			return count;
+ 		count++;
+ 	}
+ }
+ 
+ unsigned char *
+ skipUTF8Chars(const unsigned char *ptr, size_t num) {
+ 	wchar_t ch;
+ 	const unsigned char *oldPtr;
+ 
+ 	while (num--) {
+ 		oldPtr = ptr;
+ 		ch = getCharFromString(&ptr);
+ 		if (ch == '\0')
+ 			return (unsigned char *) oldPtr;
+ 	}
+ 	return (unsigned char *) ptr;
+ }