Update of /cvsroot/pywin32/pywin32/Pythonwin/pywin/scintilla
In directory ddv4jf1.ch3.sourceforge.com:/tmp/cvs-serv30801/pywin/scintilla
Modified Files:
document.py view.py
Log Message:
Merge EOL-detection and start of decent encoding detection (BOM only for
now) from py3k-integration branch.
Index: view.py
===================================================================
RCS file: /cvsroot/pywin32/pywin32/Pythonwin/pywin/scintilla/view.py,v
retrieving revision 1.32
retrieving revision 1.33
diff -C2 -d -r1.32 -r1.33
*** view.py 3 Jan 2009 05:52:28 -0000 1.32
--- view.py 5 Jan 2009 11:09:21 -0000 1.33
***************
*** 387,396 ****
def SaveTextFile(self, filename):
doc = self.GetDocument()
! s = self.GetTextRange()
! # Save in binary mode so line endings are not translated.
! # Edit control uses '\r\n', and universal newlines mode replaces ALL '\r' with '\r\n'.
! f = open(filename, 'wb')
! f.write(s.encode('mbcs'))
! f.close()
doc.SetModifiedFlag(0)
return 1
--- 387,398 ----
def SaveTextFile(self, filename):
doc = self.GetDocument()
! # Open in binary mode as scintilla itself ensures the
! # line endings are already appropriate, and our doc save
! # method handles encoding, BOMs, etc.
! f = open(filename, 'wb')
! try:
! doc._SaveTextToFile(self, f)
! finally:
! f.close()
doc.SetModifiedFlag(0)
return 1
Index: document.py
===================================================================
RCS file: /cvsroot/pywin32/pywin32/Pythonwin/pywin/scintilla/document.py,v
retrieving revision 1.8
retrieving revision 1.9
diff -C2 -d -r1.8 -r1.9
*** document.py 26 Nov 2008 08:39:33 -0000 1.8
--- document.py 5 Jan 2009 11:09:20 -0000 1.9
***************
*** 1,13 ****
import win32ui
from pywin.mfc import docview
! from pywin import is_platform_unicode, default_platform_encoding, default_scintilla_encoding
import scintillacon
import win32con
import string
! import array
ParentScintillaDocument=docview.Document
class CScintillaDocument(ParentScintillaDocument):
"A SyntEdit document. "
def DeleteContents(self):
pass
--- 1,24 ----
import win32ui
from pywin.mfc import docview
! from pywin import default_scintilla_encoding
import scintillacon
import win32con
import string
! import os
! import codecs
!
! crlf_bytes = "\r\n".encode("ascii")
! lf_bytes = "\n".encode("ascii")
ParentScintillaDocument=docview.Document
class CScintillaDocument(ParentScintillaDocument):
"A SyntEdit document. "
+ def __init__(self, *args):
+ self.bom = None # the BOM, if any, read from the file.
+ # the encoding we detected from the source. Might have
+ # detected via the BOM or an encoding decl.
+ self.source_encoding = None
+ ParentScintillaDocument.__init__(self, *args)
+
def DeleteContents(self):
pass
***************
*** 18,42 ****
self.SetPathName(filename) # Must set this early!
try:
! if is_platform_unicode:
! # Scintilla in UTF-8 mode - translate accordingly.
! import codecs
! f = codecs.open(filename, 'rb', default_platform_encoding)
! else:
! f = open(filename, 'rb')
try:
! text = f.read()
finally:
f.close()
- if is_platform_unicode:
- # Translate from locale-specific (MCBS) encoding to UTF-8 for Scintilla
- text = text.encode(default_scintilla_encoding)
except IOError:
win32ui.MessageBox("Could not load the file from %s" % filename)
return 0
- self._SetLoadedText(text)
- ## if self.GetFirstView():
- ## self.GetFirstView()._SetLoadedText(text)
- ## self.SetModifiedFlag(0) # No longer dirty
return 1
--- 29,43 ----
self.SetPathName(filename) # Must set this early!
try:
! # load the text as binary we can get smart
! # about detecting any existing EOL conventions.
! f = open(filename, 'rb')
try:
! self._LoadTextFromFile(f)
finally:
f.close()
except IOError:
win32ui.MessageBox("Could not load the file from %s" % filename)
return 0
return 1
***************
*** 54,58 ****
# File related functions
# Helper to transfer text from the MFC document to the control.
! def _SetLoadedText(self, text):
view = self.GetFirstView()
if view.IsWindow():
--- 55,111 ----
# File related functions
# Helper to transfer text from the MFC document to the control.
! def _LoadTextFromFile(self, f):
! # detect EOL mode - we don't support \r only - so find the
! # first '\n' and guess based on the char before.
! l = f.readline()
! # If line ends with \r\n or has no line ending, use CRLF.
! if l.endswith(crlf_bytes) or not l.endswith(lf_bytes):
! eol_mode = scintillacon.SC_EOL_CRLF
! else:
! eol_mode = scintillacon.SC_EOL_LF
!
! # Detect the encoding.
! # XXX - todo - support pep263 encoding declarations as well as
! # the BOM detection here (but note that unlike our BOM, the
! # encoding declaration could change between loading and saving
! # - particularly with a new file - so it also needs to be
! # implemented at save time.)
! for bom, encoding in (
! (codecs.BOM_UTF8, "utf8"),
! (codecs.BOM_UTF16_LE, "utf_16_le"),
! (codecs.BOM_UTF16_BE, "utf_16_be"),
! ):
! if l.startswith(bom):
! self.bom = bom
! self.source_encoding = encoding
! l = l[len(bom):] # remove it.
! break
!
! # reading by lines would be too slow? Maybe we can use the
! # incremental encoders? For now just stick with loading the
! # entire file in memory.
! text = l + f.read()
!
! # Translate from source encoding to UTF-8 bytes for Scintilla
! source_encoding = self.source_encoding
! # This latin1 sucks until we get pep263 support; if we don't
! # know an encoding we just write as binary (maybe we should
! # try ascii to let the 'decoding failed' handling below to
! # provide a nice warning that the file is non-ascii)
! if source_encoding is None:
! source_encoding = 'latin1'
! # we could optimize this by avoiding utf8 to-ing and from-ing,
! # but then we would lose the ability to handle invalid utf8
! # (and even then, the use of encoding aliases makes this tricky)
! # To create an invalid utf8 file:
! # >>> open(filename, "wb").write(codecs.BOM_UTF8+"bad \xa9har\r\n")
! try:
! dec = text.decode(source_encoding)
! except UnicodeError:
! print "WARNING: Failed to decode bytes from %r encoding - treating as latin1" % source_encoding
! dec = text.decode('latin1')
! # and put it back as utf8 - this shouldn't fail.
! text = dec.encode(default_scintilla_encoding)
!
view = self.GetFirstView()
if view.IsWindow():
***************
*** 61,70 ****
# Make sure the control isnt read-only
view.SetReadOnly(0)
-
- doc = self
view.SendScintilla(scintillacon.SCI_CLEARALL)
! view.SendMessage(scintillacon.SCI_ADDTEXT, buffer(text))
view.SendScintilla(scintillacon.SCI_SETUNDOCOLLECTION, 1, 0)
view.SendScintilla(win32con.EM_EMPTYUNDOBUFFER, 0, 0)
def FinalizeViewCreation(self, view):
--- 114,135 ----
# Make sure the control isnt read-only
view.SetReadOnly(0)
view.SendScintilla(scintillacon.SCI_CLEARALL)
! view.SendMessage(scintillacon.SCI_ADDTEXT, text)
view.SendScintilla(scintillacon.SCI_SETUNDOCOLLECTION, 1, 0)
view.SendScintilla(win32con.EM_EMPTYUNDOBUFFER, 0, 0)
+ # set EOL mode
+ view.SendScintilla(scintillacon.SCI_SETEOLMODE, eol_mode)
+
+ def _SaveTextToFile(self, view, f):
+ s = view.GetTextRange() # already decoded from scintilla's encoding
+ if self.bom:
+ f.write(self.bom)
+ source_encoding = self.source_encoding
+ if source_encoding is None:
+ source_encoding = 'latin1'
+
+ f.write(s.encode(source_encoding))
+ self.SetModifiedFlag(0)
+
def FinalizeViewCreation(self, view):
|