From: Erik B. <eb...@us...> - 2006-11-22 19:41:58
|
Update of /cvsroot/gexperts/gexperts/unstable/Comps In directory sc8-pr-cvs2.sourceforge.net:/tmp/cvs-serv15508/Comps Modified Files: GpTextStream.pas Log Message: 1.04a: 2006-08-30 - Bug fixed: When cfUseLF was set, /CR/ was used as line delimiter in Writeln (/LF/ should be used). 1.04: 2006-02-06 - Added support for UCS-4 encoding in a very primitive form - all high-word values are stripped away on read and set to 0 on write. - Added CP_UNICODE32 UCS-4 pseudo-codepage constant. Index: GpTextStream.pas =================================================================== RCS file: /cvsroot/gexperts/gexperts/unstable/Comps/GpTextStream.pas,v retrieving revision 1.9 retrieving revision 1.10 diff -u -d -r1.9 -r1.10 --- GpTextStream.pas 2 Dec 2005 21:19:30 -0000 1.9 +++ GpTextStream.pas 22 Nov 2006 19:41:56 -0000 1.10 @@ -24,7 +24,7 @@ This software is distributed under the BSD license. -Copyright (c) 2003, Primoz Gabrijelcic +Copyright (c) 2006, Primoz Gabrijelcic All rights reserved. Redistribution and use in source and binary forms, with or without modification, @@ -50,28 +50,30 @@ Author : Primoz Gabrijelcic Creation date : 2001-07-17 - Last modification: 2003-05-16 - Version : 1.02 + Last modification: 2006-08-30 + Version : 1.04a </pre> *)(* History: + 1.04a: 2006-08-30 + - Bug fixed: When cfUseLF was set, /CR/ was used as line delimiter in Writeln (/LF/ + should be used). + 1.04: 2006-02-06 + - Added support for UCS-4 encoding in a very primitive form - all high-word values + are stripped away on read and set to 0 on write. + - Added CP_UNICODE32 UCS-4 pseudo-codepage constant. 1.03: 2004-05-12 - Added Turkish codepage ISO_8859_9. - 1.02: 2003-05-16 - Compatible with Delphi 7. - 1.01: 2002-04-24 - Added TGpTSCreateFlag flag tscfCompressed to keep this enum in sync with GpTextFile.TCreateFlag. - 1.0b: 2001-12-15 - Updated to compile with Delphi 6 (thanks to Artem Khassanov). - 1.0a: 2001-10-06 - Fixed error in GpTextStream.Read that could cause exception to be raised unnecessary. - 1.0: 2001-07-17 - Created from GpTextFile 3.0b (thanks to Miha Remec). - Fix UTF 8 decoding error in TGpTextStream.Read. @@ -103,7 +105,8 @@ CP_UTF8 = 65001; // UTF-8 pseudo-codepage, defined in Windows.pas in Delphi 3 and newer. {$ENDIF} - CP_UNICODE = 1200; // Unicode pseudo-codepage, + CP_UNICODE = 1200; // Unicode UCS-2 Little-Endian pseudo-codepage + CP_UNICODE32 = 12000; // Unicode UCS-4 Little-Endian pseudo-codepage ISO_8859_1 = 28591; // Western Alphabet (ISO) ISO_8859_2 = 28592; // Central European Alphabet (ISO) ISO_8859_3 = 28593; // Latin 3 Alphabet (ISO) @@ -115,6 +118,10 @@ ISO_8859_9 = 28599; // Turkish Alphabet (ISO) type +{$IFNDEF D6PLUS} + UCS4Char = type LongWord; +{$ENDIF} + {:Base exception class for exceptions created in TGpTextStream. } EGpTextStream = class(Exception); @@ -184,6 +191,7 @@ function AllocBuffer(size: integer): pointer; virtual; procedure FreeBuffer(var buffer: pointer); virtual; function GetWindowsError: DWORD; virtual; + function IsUnicodeCodepage(codepage: word): boolean; procedure PrepareStream; virtual; procedure SetCodepage(cp: word); virtual; function StreamName(param: string = ''): string; virtual; @@ -196,6 +204,7 @@ ); destructor Destroy; override; function Is16bit: boolean; + function Is32bit: boolean; function IsUnicode: boolean; function Read(var buffer; count: longint): longint; override; function Readln: WideString; @@ -226,11 +235,19 @@ SysConst; const - {:Header for 'normal' Unicode stream (Intel format). + {:Header for 'normal' Unicode UCS-4 stream (Intel format). + } + CUnicode32Normal: UCS4Char = UCS4Char($0000FEFF); + + {:Header for 'reversed' Unicode UCS-4 stream (Motorola format). + } + CUnicode32Reversed: UCS4Char = UCS4Char($0000FFFE); + + {:Header for big-endian (Motorola) Unicode stream. } CUnicodeNormal: WideChar = WideChar($FEFF); - {:Header for 'reversed' Unicode stream (Motorola format). + {:Header for little-endian (Intel) Unicode stream. } CUnicodeReversed: WideChar = WideChar($FFFE); @@ -455,7 +472,7 @@ access: TGpTSAccess; createFlags: TGpTSCreateFlags; codePage: word); begin inherited Create(dataStream); - if (tscfUnicode in createFlags) and (codePage <> CP_UTF8) then + if (tscfUnicode in createFlags) and (codePage <> CP_UTF8) and (codePage <> CP_UNICODE32) then codePage := CP_UNICODE; tsAccess := access; tsCreateFlags := createFlags; @@ -499,14 +516,20 @@ end; { TGpTextStream.GetWindowsError } {:Checks if stream contains 16-bit characters. - @returns True if stream contains 16-bit characters. } function TGpTextStream.Is16bit: boolean; begin - Result := IsUnicode and (Codepage <> CP_UTF8); + Result := IsUnicode and (Codepage = CP_UNICODE); end; { TGpTextStream.Is16bit } -{:Checks if stream is Unicode (UCS-2 or UTF-8 encoding). +{:Checks if stream contains 32-bit characters. +} +function TGpTextStream.Is32bit: boolean; +begin + Result := IsUnicode and (Codepage = CP_UNICODE32); +end; { TGpTextStream.Is32bit } + +{:Checks if stream is Unicode (UTF-8 or UCS-2 or UCS-4 encoding). @returns True if stream is Unicode. } function TGpTextStream.IsUnicode: boolean; @@ -514,6 +537,14 @@ Result := (tscfUnicode in tsCreateFlags); end; { TGpTextStream.IsUnicode } +{:Checks is codepage is one of the supported Unicode codepages. + @since 2006-02-06 +} +function TGpTextStream.IsUnicodeCodepage(codepage: word): boolean; +begin + Result := (codepage = CP_UTF8) or (codepage = CP_UNICODE) or (codepage = CP_UNICODE32); +end; { TGpTextStream.IsUnicodeCodepage } + {:Prepares stream for read or write operation. @raises EGpTextStream if caller tries to rewrite or append 'reverse' Unicode stream. @@ -522,43 +553,59 @@ var marker : WideChar; marker3: Char; + marker4: UCS4Char; begin case tsAccess of tsaccRead: begin tsCreateFlags := []; - if WrappedStream.Size >= SizeOf(WideChar) then begin - WrappedStream.Read(marker,SizeOf(WideChar)); + if WrappedStream.Size >= SizeOf(UCS4Char) then begin + WrappedStream.Position := 0; + WrappedStream.Read(marker4, SizeOf(UCS4Char)); + if marker4 = CUnicode32Normal then begin + tsCreateFlags := tsCreateFlags + [tscfUnicode]; + Codepage := CP_UNICODE32; + end + else if marker4 = CUnicode32Reversed then begin + tsCreateFlags := tsCreateFlags + [tscfUnicode, tscfReverseByteOrder]; + Codepage := CP_UNICODE32; + end; + end; + if (WrappedStream.Size >= SizeOf(WideChar)) and (Codepage <> CP_UNICODE32) then begin + WrappedStream.Position := 0; + WrappedStream.Read(marker, SizeOf(WideChar)); if marker = CUnicodeNormal then begin tsCreateFlags := tsCreateFlags + [tscfUnicode]; Codepage := CP_UNICODE; end else if marker = CUnicodeReversed then begin - tsCreateFlags := tsCreateFlags + [tscfUnicode,tscfReverseByteOrder]; + tsCreateFlags := tsCreateFlags + [tscfUnicode, tscfReverseByteOrder]; Codepage := CP_UNICODE; end else if (marker = CUTF8BOM12) and (WrappedStream.Size >= 3) then begin - WrappedStream.Read(marker3,SizeOf(Char)); + WrappedStream.Read(marker3, SizeOf(Char)); if marker3 = CUTF8BOM3 then begin tsCreateFlags := tsCreateFlags + [tscfUnicode]; Codepage := CP_UTF8; end; end; - if not IsUnicode then - WrappedStream.Position := 0; end; - if (not IsUnicode) and ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then + if not IsUnicode then + WrappedStream.Position := 0; + if (not IsUnicode) and IsUnicodeCodepage(Codepage) then tsCreateFlags := [tscfUnicode]; end; //tsaccRead tsaccWrite: begin - if ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then + if IsUnicodeCodepage(Codepage) then tsCreateFlags := tsCreateFlags + [tscfUnicode]; - if tsCreateFlags*[tscfUnicode,tscfReverseByteOrder] = [tscfUnicode,tscfReverseByteOrder] then - raise EGpTextStream.CreateFmtHelp(sCannotWriteReversedUnicodeStream,[StreamName],hcTFCannotWriteReversed); + if tsCreateFlags * [tscfUnicode, tscfReverseByteOrder] = [tscfUnicode, tscfReverseByteOrder] then + raise EGpTextStream.CreateFmtHelp(sCannotWriteReversedUnicodeStream, [StreamName], hcTFCannotWriteReversed); WrappedStream.Size := 0; if IsUnicode then begin - if Codepage <> CP_UTF8 then + if Codepage = CP_UNICODE32 then + WrappedStream.Write(CUnicode32Normal,SizeOf(UCS4Char)) + else if Codepage <> CP_UTF8 then WrappedStream.Write(CUnicodeNormal,SizeOf(WideChar)) else if tscfWriteUTF8BOM in tsCreateFlags then begin WrappedStream.Write(CUTF8BOM12,SizeOf(WideChar)); @@ -568,12 +615,14 @@ end; //tsaccWrite tsaccReadWrite: begin - if ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then + if IsUnicodeCodepage(Codepage) then tsCreateFlags := tsCreateFlags + [tscfUnicode]; - if tsCreateFlags*[tscfUnicode,tscfReverseByteOrder] = [tscfUnicode,tscfReverseByteOrder] then - raise EGpTextStream.CreateFmtHelp(sCannotAppendReversedUnicodeStream,[StreamName],hcTFCannotAppendReversed); + if tsCreateFlags * [tscfUnicode, tscfReverseByteOrder] = [tscfUnicode, tscfReverseByteOrder] then + raise EGpTextStream.CreateFmtHelp(sCannotAppendReversedUnicodeStream, [StreamName], hcTFCannotAppendReversed); if (WrappedStream.Size = 0) and IsUnicode then begin - if Codepage <> CP_UTF8 then + if Codepage = CP_UNICODE32 then + WrappedStream.Write(CUnicode32Normal,SizeOf(UCS4Char)) + else if Codepage <> CP_UTF8 then WrappedStream.Write(CUnicodeNormal,SizeOf(WideChar)) else if tscfWriteUTF8BOM in tsCreateFlags then begin WrappedStream.Write(CUTF8BOM12,SizeOf(WideChar)); @@ -584,7 +633,19 @@ tsaccAppend: begin tsCreateFlags := []; - if WrappedStream.Size >= SizeOf(WideChar) then begin + if WrappedStream.Size >= SizeOf(UCS4Char) then begin + WrappedStream.Position := 0; + WrappedStream.Read(marker4, SizeOf(UCS4Char)); + if marker4 = CUnicode32Normal then begin + tsCreateFlags := tsCreateFlags + [tscfUnicode]; + Codepage := CP_UNICODE32; + end + else if marker4 = CUnicode32Reversed then begin + tsCreateFlags := tsCreateFlags + [tscfUnicode, tscfReverseByteOrder]; + Codepage := CP_UNICODE32; + end; + end; + if (WrappedStream.Size >= SizeOf(WideChar)) and (Codepage <> CP_UNICODE32) then begin WrappedStream.Position := 0; WrappedStream.Read(marker,SizeOf(WideChar)); if marker = CUnicodeNormal then begin @@ -602,7 +663,6 @@ Codepage := CP_UTF8; end; end; - WrappedStream.Position := WrappedStream.Size; end else if (WrappedStream.Size = 0) and IsUnicode then begin if Codepage <> CP_UTF8 then @@ -612,18 +672,20 @@ WrappedStream.Write(CUTF8BOM3,SizeOf(Char)); end; end; - if (not IsUnicode) and ((Codepage = CP_UTF8) or (Codepage = CP_UNICODE)) then + WrappedStream.Position := WrappedStream.Size; + if (not IsUnicode) and IsUnicodeCodepage(Codepage) then tsCreateFlags := tsCreateFlags + [tscfUnicode]; - if tsCreateFlags*[tscfUnicode,tscfReverseByteOrder] = [tscfUnicode,tscfReverseByteOrder] then + if tsCreateFlags * [tscfUnicode, tscfReverseByteOrder] = [tscfUnicode, tscfReverseByteOrder] then raise EGpTextStream.CreateFmtHelp(sCannotAppendReversedUnicodeStream,[StreamName],hcTFCannotAppendReversed); end; //tsaccAppend end; //case end; { TGpTextStream.PrepareStream } {:Reads 'count' number of bytes from stream. 'Count' must be an even number as - data is always returned in Unicode format (two bytes per character). If stream - is 8-bit, data is converted to Unicode according to code page specified in + data is always returned in Unicode format (two bytes per character). + If stream is 8-bit, data is converted to Unicode according to code page specified in constructor. + If stream is 32-bit, high-order word of every UCS-4 char is stripped away. @param buffer Buffer for read data. @param count Number of bytes to be read. @returns Number of bytes actually read. @@ -632,12 +694,13 @@ } function TGpTextStream.Read(var buffer; count: longint): longint; var - bufPtr : pointer; + bufPtr : PByte; bytesConv: integer; bytesLeft: integer; bytesRead: integer; numChar : integer; tmpBuf : pointer; + tmpPtr : PByte; begin DelayedSeek; if IsUnicode then begin @@ -650,24 +713,38 @@ bytesLeft := 0; repeat // at least numChar UTF-8 bytes are needed for numChar WideChars - bytesRead := WrappedStream.Read(pointer(integer(tmpBuf)+bytesLeft)^,numChar); - bytesConv := UTF8BufToWideCharBuf(tmpBuf^,bytesRead+bytesLeft,bufPtr^,bytesLeft); + bytesRead := WrappedStream.Read(pointer(integer(tmpBuf)+bytesLeft)^, numChar); + bytesConv := UTF8BufToWideCharBuf(tmpBuf^, bytesRead+bytesLeft, bufPtr^, bytesLeft); Result := Result + bytesConv; if bytesRead <> numChar then // end of stream break; numChar := numChar - (bytesConv div SizeOf(WideChar)); - bufPtr := pointer(integer(bufPtr) + bytesConv); + Inc(bufPtr, bytesConv); if (bytesLeft > 0) and (bytesLeft < bytesRead) then - Move(pointer(integer(tmpBuf)+bytesRead-bytesLeft)^,tmpBuf^,bytesLeft); + Move(pointer(integer(tmpBuf)+bytesRead-bytesLeft)^, tmpBuf^, bytesLeft); until numChar = 0; finally FreeBuffer(tmpBuf); end; end + else if Codepage = CP_UNICODE32 then begin + tmpBuf := AllocBuffer(count*2); + try + Result := WrappedStream.Read(tmpBuf^, count*2) div 2; + bufPtr := @buffer; + tmpPtr := tmpBuf; + for bytesRead := 1 to Result div 2 do begin + PWord(bufPtr)^ := PWord(tmpPtr)^; + Inc(tmpPtr, SizeOf(WideChar)*2); + Inc(bufPtr, SizeOf(WideChar)); + end; + finally FreeBuffer(tmpBuf); end; + end else Result := WrappedStream.Read(buffer, count); end else begin if Odd(count) then - raise EGpTextStream.CreateFmtHelp(sCannotConvertOddNumberOfBytes,[StreamName,count],hcTFCannotConvertOdd) + raise EGpTextStream.CreateFmtHelp(sCannotConvertOddNumberOfBytes, + [StreamName, count], hcTFCannotConvertOdd) else begin numChar := count div SizeOf(WideChar); tmpBuf := AllocBuffer(numChar); @@ -766,7 +843,7 @@ } procedure TGpTextStream.SetCodepage(cp: word); begin - if (cp = CP_UTF8) or (cp = CP_UNICODE) then begin + if (cp = CP_UTF8) or (cp = CP_UNICODE) or (cp = CP_UNICODE32) then begin tsCodePage := cp; tsCreateFlags := tsCreateFlags + [tscfUnicode]; end @@ -775,7 +852,7 @@ tsCodePage := GetDefaultAnsiCodepage(GetKeyboardLayout(GetCurrentThreadId) and $FFFF, 1252) else tsCodePage := cp; - if not ((tsCodePage = 0) or (tsCodePage = CP_UNICODE)) then + if not ((tsCodePage = 0) or IsUnicodeCodepage(tsCodePage)) then tsCreateFlags := tsCreateFlags - [tscfUnicode]; end; end; { TGpTextStream.SetCodepage } @@ -818,9 +895,10 @@ end; { TGpTextStream.Win32Check } {:Writes 'count' number of bytes to stream. 'Count' must be an even number as - data is always expected in Unicode format (two bytes per character). If stream - is 8-bit, data is converted from Unicode according to code page specified in + data is always expected in Unicode format (two bytes per character). + If stream is 8-bit, data is converted from Unicode according to code page specified in constructor. + If stream is 32-bit, high-order word of every UCS-4 char is set to 0. @param buffer Data to be written. @param count Number of bytes to be written. @returns Number of bytes actually written. @@ -829,10 +907,12 @@ } function TGpTextStream.Write(const buffer; count: longint): longint; var + bufPtr : PByte; leftUTF8 : integer; numBytes : integer; numChar : integer; tmpBuf : pointer; + tmpPtr : PByte; uniBuf : pointer; utfWritten: integer; begin @@ -857,8 +937,23 @@ Result := count; finally FreeBuffer(tmpBuf); end; end + else if Codepage = CP_UNICODE32 then begin + tmpBuf := AllocBuffer(count*2); + try + bufPtr := @buffer; + tmpPtr := tmpBuf; + for utfWritten := 1 to count div SizeOf(WideChar) do begin + PWideChar(tmpPtr)^ := PWideChar(bufPtr)^; + Inc(tmpPtr, SizeOf(WideChar)); + Inc(bufPtr, SizeOf(WideChar)); + PWord(tmpPtr)^ := 0; + Inc(tmpPtr, SizeOf(WideChar)); + end; + Result := WrappedStream.Write(tmpBuf^, count*2) div 2; + finally FreeBuffer(tmpBuf); end; + end else - Result := WrappedStream.Write(buffer,count); + Result := WrappedStream.Write(buffer, count); end else begin if Odd(count) then @@ -911,7 +1006,7 @@ end else begin if tscfUseLF in tsCreateFlags then begin - ch := Char($0D); + ch := Char($0A); Result := (WrappedStream.Write(ch,SizeOf(Char)) = SizeOf(Char)); end else begin |