From: Maël Hörz <ma...@fd...> - 2008-10-03 18:47:14
|
Update of /cvsroot/synedit/SynEdit/Source In directory fdv4jf1.ch3.sourceforge.com:/tmp/cvs-serv14198 Modified Files: Tag: Unicode_2004_08_31 SynUnicode.pas Log Message: Commit changes by Erik Berry: TUnicodeStrings can now load files in different formats, not only UTF-16LE. Index: SynUnicode.pas =================================================================== RCS file: /cvsroot/synedit/SynEdit/Source/Attic/SynUnicode.pas,v retrieving revision 1.1.2.41 retrieving revision 1.1.2.42 diff -u -d -r1.1.2.41 -r1.1.2.42 --- SynUnicode.pas 23 Sep 2008 14:02:59 -0000 1.1.2.41 +++ SynUnicode.pas 3 Oct 2008 18:47:03 -0000 1.1.2.42 @@ -86,6 +86,8 @@ UTF8BOM: array[0..2] of Byte = ($EF, $BB, $BF); UTF16BOMLE: array[0..1] of Byte = ($FF, $FE); UTF16BOMBE: array[0..1] of Byte = ($FE, $FF); + UTF32BOMLE: array[0..3] of Byte = ($FF, $FE, $00, $00); + UTF32BOMBE: array[0..3] of Byte = ($00, $00, $FE, $FF); const // constants describing range of the Unicode Private Use Area (Unicode 3.2) @@ -118,6 +120,13 @@ BOM_MSB_FIRST = WideChar($FFFE); type + TSaveFormat = (sfUTF16LSB, sfUTF16MSB, sfUTF8, sfAnsi); + +const + sfUnicodeLSB = sfUTF16LSB; + sfUnicodeMSB = sfUTF16MSB; + +type TFontCharSet = 0..255; {$IFDEF UNICODE} @@ -137,11 +146,10 @@ TUnicodeStrings = class(TPersistent) private FUpdateCount: Integer; - FSaved, // set in SaveToStream, True in case saving was successfull otherwise False - FSaveUnicode: Boolean; // flag set on loading to keep track in which format to save - // (can be set explicitely, but expect losses if there's true Unicode content - // and this flag is set to False) + FSaved: Boolean; // set in SaveToStream, True in case saving was successfull otherwise False FOnConfirmConversion: TConfirmConversionEvent; + FSaveFormat: TSaveFormat; // overrides the FSaveUnicode flag, initialized when a file is loaded, + // expect losses if it is set to sfAnsi before saving function GetCommaText: UnicodeString; function GetName(Index: Integer): UnicodeString; function GetValue(const Name: UnicodeString): UnicodeString; @@ -149,6 +157,8 @@ procedure SetCommaText(const Value: UnicodeString); procedure SetValue(const Name, Value: UnicodeString); procedure WriteData(Writer: TWriter); + function GetSaveUnicode: Boolean; + procedure SetSaveUnicode(const Value: Boolean); protected procedure DefineProperties(Filer: TFiler); override; procedure DoConfirmConversion(var Allowed: Boolean); virtual; @@ -185,10 +195,10 @@ function IndexOfObject(AObject: TObject): Integer; procedure Insert(Index: Integer; const S: UnicodeString); virtual; abstract; procedure InsertObject(Index: Integer; const S: UnicodeString; AObject: TObject); - procedure LoadFromFile(const FileName: string); virtual; + procedure LoadFromFile(const FileName: TFileName); virtual; procedure LoadFromStream(Stream: TStream); virtual; procedure Move(CurIndex, NewIndex: Integer); virtual; - procedure SaveToFile(const FileName: string); virtual; + procedure SaveToFile(const FileName: TFileName); virtual; procedure SaveToStream(Stream: TStream; WithBOM: Boolean = True); virtual; procedure SetTextStr(const Value: UnicodeString); virtual; @@ -199,7 +209,8 @@ property Objects[Index: Integer]: TObject read GetObject write PutObject; property Values[const Name: UnicodeString]: UnicodeString read GetValue write SetValue; property Saved: Boolean read FSaved; - property SaveUnicode: Boolean read FSaveUnicode write FSaveUnicode default True; + property SaveUnicode: Boolean read GetSaveUnicode write SetSaveUnicode default True; + property SaveFormat: TSaveFormat read FSaveFormat write FSaveFormat default sfUnicodeLSB; property Strings[Index: Integer]: UnicodeString read Get write Put; default; property Text: UnicodeString read GetTextStr write SetTextStr; @@ -398,6 +409,8 @@ out WithBOM: Boolean): TSynEncoding; overload; function LoadFromStream(UnicodeStrings: TUnicodeStrings; Stream: TStream; Encoding: TSynEncoding; out WithBOM: Boolean): TSynEncoding; overload; +function LoadFromStream(UnicodeStrings: TUnicodeStrings; Stream: TStream; + Encoding: TSynEncoding): TSynEncoding; overload; function ClipboardProvidesText: Boolean; function GetClipboardText: UnicodeString; @@ -456,7 +469,20 @@ constructor TUnicodeStrings.Create; begin inherited; - FSaveUnicode := True; + FSaveFormat := sfUnicodeLSB; +end; + +function TUnicodeStrings.GetSaveUnicode: Boolean; +begin + Result := SaveFormat in [sfUTF16LSB, sfUTF16MSB, sfUTF8]; +end; + +procedure TUnicodeStrings.SetSaveUnicode(const Value: Boolean); +begin + if Value then + SaveFormat := sfUnicodeLSB + else + SaveFormat := sfAnsi; end; function TUnicodeStrings.Add(const S: UnicodeString): Integer; @@ -598,7 +624,7 @@ // Defines a private property for the content of the list. // There's a bug in the handling of text DFMs in Classes.pas which prevents // UnicodeStrings from loading under some circumstances. Zbysek Hlinka -// (zh...@lo...) brought this to my attention and supplied also a solution. +// (zhlinka att login dott cz) brought this to my attention and supplied also a solution. // See ReadData and WriteData methods for implementation details. function DoWrite: Boolean; @@ -825,15 +851,19 @@ PutObject(Index, AObject); end; -procedure TUnicodeStrings.LoadFromFile(const FileName: string); +procedure TUnicodeStrings.LoadFromFile(const FileName: TFileName); var Stream: TStream; begin - Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite); try - LoadFromStream(Stream); - finally - Stream.Free; + Stream := TFileStream.Create(FileName, fmOpenRead or fmShareDenyNone); + try + LoadFromStream(Stream); + finally + Stream.Free; + end; + except + RaiseLastOSError; end; end; @@ -842,30 +872,85 @@ var Size, BytesRead: Integer; - Order: WideChar; + ByteOrderMask: array[0..5] of Byte; // BOM size is max 5 bytes (cf: wikipedia) + // but it is easier to implement with a multiple of 2 + Loaded: Boolean; SW: UnicodeString; SA: AnsiString; begin BeginUpdate; try + Loaded := False; + Size := Stream.Size - Stream.Position; - BytesRead := Stream.Read(Order, 2); - if (Order = BOM_LSB_FIRST) or (Order = BOM_MSB_FIRST) then + BytesRead := Stream.Read(ByteOrderMask[0], SizeOf(ByteOrderMask)); + + // UTF16 LSB = Unicode LSB/LE + if (BytesRead >= 2) and (ByteOrderMask[0] = UTF16BOMLE[0]) + and (ByteOrderMask[1] = UTF16BOMLE[1]) then begin - FSaveUnicode := True; - SetLength(SW, (Size - 2) div 2); - Stream.Read(PWideChar(SW)^, Size - 2); - if Order = BOM_MSB_FIRST then + FSaveFormat := sfUTF16LSB; + SetLength(SW, (Size - 2) div SizeOf(WideChar)); + Assert((Size and 1) <> 1, 'Number of chars must be a multiple of 2'); + if BytesRead > 2 then + begin + System.Move(ByteOrderMask[2], SW[1], BytesRead - 2); // max 4 bytes = 2 widechars + if Size > BytesRead then + // first 2 chars (maximum) were copied by System.Move + Stream.Read(SW[3], Size - BytesRead); + end; + SetTextStr(SW); + Loaded := True; + end; + + // UTF16 MSB = Unicode MSB/BE + if (BytesRead >= 2) and (ByteOrderMask[0] = UTF16BOMBE[0]) + and (ByteOrderMask[1] = UTF16BOMBE[1]) then + begin + FSaveFormat := sfUTF16MSB; + SetLength(SW, (Size - 2) div SizeOf(WideChar)); + Assert((Size and 1) <> 1, 'Number of chars must be a multiple of 2'); + if BytesRead > 2 then + begin + System.Move(ByteOrderMask[2], SW[1] ,BytesRead - 2); // max 4 bytes = 2 widechars + if Size > BytesRead then + // first 2 chars (maximum) were copied by System.Move + Stream.Read(SW[3], Size-BytesRead); StrSwapByteOrder(PWideChar(SW)); + end; SetTextStr(SW); - end - else + Loaded := True; + end; + + // UTF8 + if (BytesRead >= 3) and (ByteOrderMask[0] = UTF8BOM[0]) + and (ByteOrderMask[1] = UTF8BOM[1]) and (ByteOrderMask[2] = UTF8BOM[2]) then begin - // without byte order mark it is assumed that we are loading ANSI text - FSaveUnicode := False; - Stream.Seek(-BytesRead, soFromCurrent); - SetLength(SA, Size); - Stream.Read(PAnsiChar(SA)^, Size); + FSaveFormat := sfUTF8; + SetLength(SA, (Size - 3) div SizeOf(AnsiChar)); + if BytesRead > 3 then + begin + System.Move(ByteOrderMask[3], SA[1], BytesRead - 3); // max 3 bytes = 3 chars + if Size > BytesRead then + // first 3 chars were copied by System.Move + Stream.Read(SA[4], Size - BytesRead); + SW := UTF8Decode(SA); + end; + SetTextStr(SW); + Loaded := True; + end; + + // default case (Ansi) + if not Loaded then + begin + FSaveFormat := sfAnsi; + SetLength(SA, Size div SizeOf(AnsiChar)); + if BytesRead > 0 then + begin + System.Move(ByteOrderMask[0], SA[1], BytesRead); // max 6 bytes = 6 chars + if Size > BytesRead then + Stream.Read(SA[7], Size - BytesRead); // first 6 chars were copied by System.Move + end; SetTextStr(SA); end; finally @@ -902,7 +987,7 @@ end; end; -procedure TUnicodeStrings.SaveToFile(const FileName: string); +procedure TUnicodeStrings.SaveToFile(const FileName: TFileName); var Stream: TStream; begin @@ -918,7 +1003,7 @@ // Saves the currently loaded text into the given stream. WithBOM determines whether to write a // byte order mark or not. Note: when saved as ANSI text there will never be a BOM. var - SW, BOM: UnicodeString; + SW: UnicodeString; SA: AnsiString; Allowed: Boolean; Run: PWideChar; @@ -934,7 +1019,7 @@ FSaved := False; // be pessimistic // A check for potential information loss makes only sense if the application has // set an event to be used as call back to ask about the conversion. - if not FSaveUnicode and Assigned(FOnConfirmConversion) then + if not SaveUnicode and Assigned(FOnConfirmConversion) then begin // application requests to save only ANSI characters, so check the text and // call back in case information could be lost @@ -950,19 +1035,37 @@ if Allowed then begin // only save if allowed - if FSaveUnicode then - begin - BOM := BOM_LSB_FIRST; - Stream.WriteBuffer(PWideChar(BOM)^, 2); - // SW has already been filled - Stream.WriteBuffer(PWideChar(SW)^, 2 * Length(SW)); - end - else - begin - SA := AnsiString(PWideChar(SW)); - Stream.WriteBuffer(PAnsiChar(SA)^, Length(SA)); + case SaveFormat of + sfUTF16LSB: + begin + if WithBOM then + Stream.WriteBuffer(UTF16BOMLE[0], SizeOf(UTF16BOMLE)); + Stream.WriteBuffer(SW[1], Length(SW) * SizeOf(WideChar)); + FSaved := True; + end; + sfUTF16MSB: + begin + if WithBOM then + Stream.WriteBuffer(UTF16BOMBE[0], SizeOf(UTF16BOMBE)); + StrSwapByteOrder(PWideChar(SW)); + Stream.WriteBuffer(SW[1], Length(SW) * SizeOf(WideChar)); + FSaved := True; + end; + sfUTF8 : + begin + if WithBOM then + Stream.WriteBuffer(UTF8BOM[0], SizeOf(UTF8BOM)); + SA := UTF8Encode(SW); + Stream.WriteBuffer(SA[1], Length(SA) * SizeOf(AnsiChar)); + FSaved := True; + end; + sfAnsi : + begin + SA := SW; + Stream.WriteBuffer(SA[1], Length(SA) * SizeOf(AnsiChar)); + FSaved := True; + end; end; - FSaved := True; end; end; @@ -2968,6 +3071,14 @@ end; function LoadFromStream(UnicodeStrings: TUnicodeStrings; Stream: TStream; + Encoding: TSynEncoding): TSynEncoding; overload; +var + Dummy: Boolean; +begin + Result := LoadFromStream(UnicodeStrings, Stream, Encoding, Dummy); +end; + +function LoadFromStream(UnicodeStrings: TUnicodeStrings; Stream: TStream; Encoding: TSynEncoding; out WithBOM: Boolean): TSynEncoding; var WideStr: UnicodeString; |