From: <k_s...@us...> - 2007-04-18 14:53:27
|
Revision: 9454 http://svn.sourceforge.net/jedit/?rev=9454&view=rev Author: k_satoda Date: 2007-04-18 07:53:24 -0700 (Wed, 18 Apr 2007) Log Message: ----------- Establishes an interface EncodingDetector and makes it a new service in ServiceManager. Previously provided autodetections (BOM and XMP-PI) are still provided as builtin services. This offers a way to add new encoding autodetections like Python, HTML, emacs, ... as a part of a plugin. This change replaces r8853: option for encoding autodetection from buffer local property syntax. This autodetection is also provided as a named service "buffer-local-property". Adding it to new option "encodingDetectors" in [General] option pane enables this autodetection. Originally posted as SF.net patch #1698783. Modified Paths: -------------- jEdit/trunk/doc/CHANGES.txt jEdit/trunk/org/gjt/sp/jedit/MiscUtilities.java jEdit/trunk/org/gjt/sp/jedit/Options.java jEdit/trunk/org/gjt/sp/jedit/ServiceManager.java jEdit/trunk/org/gjt/sp/jedit/bufferio/BufferIORequest.java jEdit/trunk/org/gjt/sp/jedit/io/EncodingWithBOM.java jEdit/trunk/org/gjt/sp/jedit/jEdit.java jEdit/trunk/org/gjt/sp/jedit/jedit_gui.props jEdit/trunk/org/gjt/sp/jedit/options/GeneralOptionPane.java jEdit/trunk/org/gjt/sp/jedit/services.xml Added Paths: ----------- jEdit/trunk/org/gjt/sp/jedit/io/AutoDetection.java jEdit/trunk/org/gjt/sp/jedit/io/BufferLocalEncodingDetector.java jEdit/trunk/org/gjt/sp/jedit/io/EncodingDetector.java jEdit/trunk/org/gjt/sp/jedit/io/XMLEncodingDetector.java Modified: jEdit/trunk/doc/CHANGES.txt =================================================================== --- jEdit/trunk/doc/CHANGES.txt 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/doc/CHANGES.txt 2007-04-18 14:53:24 UTC (rev 9454) @@ -122,9 +122,6 @@ - New popup menu in the plugin manager to remove plugin data when the plugin uses the new plugin api (Matthieu Casanova) -- Option to allow jEdit to autodetect :encoding= properties in the loaded - buffer (only in the 10 first lines) (Matthieu Casanova) - - New LayoutManager ExtendedGridLayout added. For further explanations see its JavaDoc. (Bj\xF6rn "Vampire" Kautler) @@ -154,6 +151,15 @@ the invisible caret hits page boundaries. Additionally the history remembers the vertical scroll position now. (Bj\xF6rn "Vampire" Kautler) +- Added a new option "encoding autodetector names" in General Option pane + to extend encoding autodetection with the new EncodingDetector services. + (Kazutoshi Satoda) + +- Allow jEdit to autodetect :encoding= properties in the loaded buffer + (only in the 10 first lines) by adding "buffer-local-property" to + the new option "encoding autodetector names" in General Option pane. + (Matthieu Casanova and Kazutoshi Satoda) + }}} {{{ Editing @@ -256,6 +262,9 @@ - Added a new GUIUtilities.showVFSFileDialog() method that can use a dialog as the browser's parent, instead of the view. (Kazutoshi Satoda) +- Added two new builtin service interfaces org.gjt.sp.jedit.io.Encoding + and org.gjt.sp.jedit.io.EncodingDetector. (Kazutoshi Satoda) + }}} }}} Modified: jEdit/trunk/org/gjt/sp/jedit/MiscUtilities.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/MiscUtilities.java 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/MiscUtilities.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -30,7 +30,6 @@ import java.io.*; import java.net.MalformedURLException; import java.net.URL; -import java.nio.charset.Charset; import java.text.DecimalFormat; import java.util.*; import java.util.regex.Matcher; @@ -47,7 +46,6 @@ import org.gjt.sp.util.IOUtilities; import org.gjt.sp.util.XMLUtilities; import org.gjt.sp.jedit.menu.EnhancedMenuItem; -import org.gjt.sp.jedit.bufferio.BufferIORequest; //}}} /** @@ -759,136 +757,49 @@ */ public static Reader autodetect(InputStream in, Buffer buffer) throws IOException { - in = new BufferedInputStream(in, - BufferIORequest.getByteIOBufferSize()); - String encoding; if (buffer == null) encoding = System.getProperty("file.encoding"); else encoding = buffer.getStringProperty(Buffer.ENCODING); + boolean gzipped = false; - if(buffer == null || buffer.getBooleanProperty(Buffer.ENCODING_AUTODETECT)) + BufferedInputStream markedStream + = AutoDetection.getMarkedStream(in); + if (buffer == null || buffer.getBooleanProperty(Buffer.ENCODING_AUTODETECT)) { - in.mark(BufferIORequest.XML_PI_LENGTH); - - if(in.read() == BufferIORequest.GZIP_MAGIC_1 - && in.read() == BufferIORequest.GZIP_MAGIC_2) + gzipped = AutoDetection.isGzipped(markedStream); + if (gzipped) { - in.reset(); Log.log(Log.DEBUG, MiscUtilities.class, "Stream is Gzipped"); - in = new GZIPInputStream(in); - if (buffer != null) - buffer.setBooleanProperty(Buffer.GZIPPED,true); - // auto-detect encoding within the gzip stream. - return autodetect(in, buffer); + markedStream.reset(); + markedStream = AutoDetection.getMarkedStream( + new GZIPInputStream(markedStream)); } - in.reset(); - String detectedByBOM = EncodingWithBOM.detectEncoding(in); - if (detectedByBOM != null) + markedStream.reset(); + String detected = AutoDetection.getDetectedEncoding(markedStream); + if (detected != null) { - encoding = detectedByBOM; + encoding = detected; } - else - { - in.reset(); - byte[] _xmlPI = new byte[BufferIORequest.XML_PI_LENGTH]; - int offset = 0; - int count; - while((count = in.read(_xmlPI,offset, - BufferIORequest.XML_PI_LENGTH - offset)) != -1) - { - offset += count; - if(offset == BufferIORequest.XML_PI_LENGTH) - break; - } - - String xmlEncoding = getXMLEncoding(new String( - _xmlPI,0,offset,"ASCII")); - - if (xmlEncoding == null) - { - - if (Options.X_AUTODETECT) - { - in.reset(); - String coding = xAutodetect(in); - if (coding != null) - encoding = coding; - } - } - else - { - encoding = xmlEncoding; - } - } - - in.reset(); } Log.log(Log.DEBUG, MiscUtilities.class, "Stream encoding detected is " + encoding); - Reader result = EncodingServer.getTextReader(in, encoding); + markedStream.reset(); + Reader result = EncodingServer.getTextReader(markedStream, encoding); if (buffer != null) { - // Store the successfull encoding. + // Store the successfull properties. + if (gzipped) + { + buffer.setBooleanProperty(Buffer.GZIPPED,true); + } buffer.setProperty(Buffer.ENCODING, encoding); } return result; } //}}} - //{{{ xAutodetect() method - private static String xAutodetect(InputStream in) throws IOException - { - BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - int i = 0; - while (i < 10) - { - i++; - String line = reader.readLine(); - if (line == null) - return null; - int pos = line.indexOf(":encoding="); - if (pos != -1) - { - int p2 = line.indexOf(':', pos + 10); - String encoding = line.substring(pos + 10, p2); - return encoding; - } - } - return null; - } //}}} - - //{{{ getXMLEncoding() method - /** - * Extract XML encoding name from PI. - */ - private static String getXMLEncoding(String xmlPI) - { - if(!xmlPI.startsWith("<?xml")) - return null; - - int index = xmlPI.indexOf("encoding="); - if(index == -1 || index + 9 == xmlPI.length()) - return null; - - char ch = xmlPI.charAt(index + 9); - int endIndex = xmlPI.indexOf(ch,index + 10); - if(endIndex == -1) - return null; - - String encoding = xmlPI.substring(index + 10,endIndex); - - if(Charset.isSupported(encoding)) - return encoding; - else - { - Log.log(Log.WARNING,MiscUtilities.class,"XML PI specifies " - + "unsupported encoding: " + encoding); - return null; - } - } //}}} - //{{{ closeQuietly() method /** * Method that will close an {@link InputStream} ignoring it if it is null and ignoring exceptions. Modified: jEdit/trunk/org/gjt/sp/jedit/Options.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/Options.java 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/Options.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -30,10 +30,4 @@ public class Options { public static boolean SIMPLIFIED_KEY_HANDLING; - - /** - * Activate this to activate autodetection of the buffer. - * This is an experimental feature and can fail - */ - public static boolean X_AUTODETECT; } Modified: jEdit/trunk/org/gjt/sp/jedit/ServiceManager.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/ServiceManager.java 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/ServiceManager.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -68,6 +68,7 @@ * <li>{@link org.gjt.sp.jedit.buffer.FoldHandler}</li> * <li>{@link org.gjt.sp.jedit.io.VFS}</li> * <li>{@link org.gjt.sp.jedit.io.Encoding}</li> + * <li>{@link org.gjt.sp.jedit.io.EncodingDetector}</li> * </ul> * * Plugins may provide more.<p> Modified: jEdit/trunk/org/gjt/sp/jedit/bufferio/BufferIORequest.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/bufferio/BufferIORequest.java 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/bufferio/BufferIORequest.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -56,18 +56,6 @@ //{{{ Constants /** - * Magic numbers used for auto-detecting GZIP files. - */ - public static final int GZIP_MAGIC_1 = 0x1f; - public static final int GZIP_MAGIC_2 = 0x8b; - - /** - * Length of longest XML PI used for encoding detection.<p> - * <?xml version="1.0" encoding="................"?> - */ - public static final int XML_PI_LENGTH = 50; - - /** * Size of I/O buffers. */ public static final int IOBUFSIZE = 32768; @@ -92,6 +80,9 @@ @Deprecated public static final int UTF8_MAGIC_3 = 0xbf; @Deprecated public static final int UNICODE_MAGIC_1 = 0xfe; @Deprecated public static final int UNICODE_MAGIC_2 = 0xff; + @Deprecated public static final int XML_PI_LENGTH = 50; + @Deprecated public static final int GZIP_MAGIC_1 = 0x1f; + @Deprecated public static final int GZIP_MAGIC_2 = 0x8b; //}}} @@ -409,3 +400,4 @@ } + Added: jEdit/trunk/org/gjt/sp/jedit/io/AutoDetection.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/io/AutoDetection.java (rev 0) +++ jEdit/trunk/org/gjt/sp/jedit/io/AutoDetection.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -0,0 +1,162 @@ +/* + * :tabSize=8:indentSize=8:noTabs=false: + * :folding=explicit:collapseFolds=1: + * + * Copyright (C) 2006 Kazutoshi Satoda + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +package org.gjt.sp.jedit.io; + +//{{{ Imports +import java.io.InputStream; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.Reader; +import java.io.InputStreamReader; +import java.io.Writer; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.nio.charset.Charset; +import java.util.List; +import java.util.ArrayList; + +import org.gjt.sp.jedit.jEdit; +import org.gjt.sp.jedit.ServiceManager; +import org.gjt.sp.jedit.bufferio.BufferIORequest; +import org.gjt.sp.jedit.MiscUtilities; +import org.gjt.sp.util.Log; +//}}} + +/** + * Some functions for auto detection of I/O stream properties. + */ +public class AutoDetection +{ + //{{{ getMarkedStream() method + /** + * Returns a marked, rewindable stream. + * Calling reset() method rewinds the stream to its beginning. + * But reset() can fail if too long bytes were read. + */ + public static BufferedInputStream getMarkedStream(InputStream in) + { + int bufferSize = BufferIORequest.getByteIOBufferSize(); + BufferedInputStream markable + = new BufferedInputStream(in, bufferSize); + assert(markable.markSupported()); + markable.mark(bufferSize); + return markable; + } //}}} + + //{{{ isGzipped() method + /** + * Returns wheather the stream is gzipped. + * This method reads a few bytes from the sample. So a caller + * must take care of mark() to reuse the contents. Wraping the + * stream by getMarkedStream() is suitable. + */ + public static boolean isGzipped(InputStream sample) + throws IOException + { + final byte[] GZIP_MAGIC = { (byte)0x1f, (byte)0x8b }; + return sample.read() == GZIP_MAGIC[0] + && sample.read() == GZIP_MAGIC[1]; + } //}}} + + //{{{ getEncodingDetectors() method + /** + * Returns the user configured ordered list of encoding detectors. + * This method reads property "encodingDetectors". + */ + public static List<EncodingDetector> getEncodingDetectors() + { + List<EncodingDetector> detectors + = new ArrayList<EncodingDetector>(); + String propName = "encodingDetectors"; + String selectedDetectors + = jEdit.getProperty(propName, "BOM XML-PI"); + if (selectedDetectors != null + && selectedDetectors.length() > 0) + { + for (String name: selectedDetectors.split("\\s+")) + { + EncodingDetector service + = getEncodingDetectorService(name); + if (service != null) + { + detectors.add(service); + } + else + { + Log.log(Log.ERROR, AutoDetection.class + , "getEncodingDetectors():" + + " No EncodingDetector for the name" + + " \"" + name + "\""); + } + } + } + return detectors; + } //}}} + + //{{{ getDetectedEncoding() method + /** + * Returns an auto detected encoding from content of markedStream. + * This method assumes that markedStream is wrapped by + * getMarkedStream() method. + */ + public static String getDetectedEncoding(BufferedInputStream markedStream) + throws IOException + { + List<EncodingDetector> detectors = getEncodingDetectors(); + for (EncodingDetector detector: detectors) + { + // FIXME: Here the method reset() can fail if the + // previous detector read more than buffer size of + // markedStream. + markedStream.reset(); + // Wrap once more so that calling mark() + // or reset() in detectEncoding() don't + // alter the mark position of markedStream. + String detected = detector.detectEncoding( + new BufferedInputStream(markedStream)); + if (detected != null) + { + return detected; + } + } + return null; + } //}}} + + //{{{ Private members + /** + * Returns a service of EncodingDetector for name. + */ + private static EncodingDetector getEncodingDetectorService(String name) + { + String serviceClass = "org.gjt.sp.jedit.io.EncodingDetector"; + Object service = ServiceManager.getService(serviceClass, name); + if (service != null && service instanceof EncodingDetector) + { + return (EncodingDetector)service; + } + else + { + return null; + } + } + //}}} +} Property changes on: jEdit/trunk/org/gjt/sp/jedit/io/AutoDetection.java ___________________________________________________________________ Name: svn:keywords + Author Date Id Revision Name: svn:eol-style + native Copied: jEdit/trunk/org/gjt/sp/jedit/io/BufferLocalEncodingDetector.java (from rev 9449, jEdit/trunk/org/gjt/sp/jedit/MiscUtilities.java) =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/io/BufferLocalEncodingDetector.java (rev 0) +++ jEdit/trunk/org/gjt/sp/jedit/io/BufferLocalEncodingDetector.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -0,0 +1,62 @@ +/* + * :tabSize=8:indentSize=8:noTabs=false: + * :folding=explicit:collapseFolds=1: + * + * Copyright (C) 2007 Kazutoshi Satoda + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +package org.gjt.sp.jedit.io; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.BufferedReader; +import java.io.IOException; + +/** + * An encoding detector which finds buffer-local-property syntax. + * + * This reads the sample in the system default encoding for first 10 + * lines and look for ":encoding=..." syntax. This can fail if the + * stream cannot be read in the system default encoding or + * ":encoding=..." is not placed at near the top of the stream. + * + * @since 4.3pre10 + * @author Kazutoshi Satoda + */ +public class BufferLocalEncodingDetector implements EncodingDetector +{ + public String detectEncoding(InputStream sample) throws IOException + { + BufferedReader reader + = new BufferedReader(new InputStreamReader(sample)); + int i = 0; + while (i < 10) + { + i++; + String line = reader.readLine(); + if (line == null) + return null; + int pos = line.indexOf(":encoding="); + if (pos != -1) + { + int p2 = line.indexOf(':', pos + 10); + String encoding = line.substring(pos + 10, p2); + return encoding; + } + } + return null; + } +} Added: jEdit/trunk/org/gjt/sp/jedit/io/EncodingDetector.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/io/EncodingDetector.java (rev 0) +++ jEdit/trunk/org/gjt/sp/jedit/io/EncodingDetector.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -0,0 +1,40 @@ +/* + * :tabSize=8:indentSize=8:noTabs=false: + * :folding=explicit:collapseFolds=1: + * + * Copyright (C) 2007 Kazutoshi Satoda + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +package org.gjt.sp.jedit.io; + +import java.io.InputStream; +import java.io.IOException; + +/** + * An interface to detect a reasonable encoding from some bytes at the + * beginning of a file. + * + * @since 4.3pre10 + * @author Kazutoshi Satoda + */ +public interface EncodingDetector +{ + /** + * Returns the name of a detected encoding for the bytes in sample. + * Returns null if this instance could not detect reasonable one. + */ + public String detectEncoding(InputStream sample) throws IOException; +} Property changes on: jEdit/trunk/org/gjt/sp/jedit/io/EncodingDetector.java ___________________________________________________________________ Name: svn:keywords + Author Date Id Revision Name: svn:eol-style + native Modified: jEdit/trunk/org/gjt/sp/jedit/io/EncodingWithBOM.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/io/EncodingWithBOM.java 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/io/EncodingWithBOM.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -72,61 +72,64 @@ } //}}} - //{{{ detectEncoding() method - public static String detectEncoding(InputStream sample) throws IOException + //{{{ class Detector + public static class Detector implements EncodingDetector { - byte[] mark = new byte[4]; - int count = sample.read(mark); - - byte low = (byte)(BOM16 & 0xff); - byte high = (byte)((BOM16 >> 8) & 0xff); - if (count >= 4) + public String detectEncoding(InputStream sample) throws IOException { - if (mark[0] == low && mark[1] == high - && mark[2] == 0x00 && mark[3] == 0x00) + byte[] mark = new byte[4]; + int count = sample.read(mark); + + byte low = (byte)(BOM16 & 0xff); + byte high = (byte)((BOM16 >> 8) & 0xff); + if (count >= 4) { - return "X-UTF-32LE-BOM"; + if (mark[0] == low && mark[1] == high + && mark[2] == 0x00 && mark[3] == 0x00) + { + return "X-UTF-32LE-BOM"; + } + else if (mark[0] == 0x00 && mark[1] == 0x00 + && mark[2] == high && mark[3] == low) + { + return "X-UTF-32BE-BOM"; + } } - else if (mark[0] == 0x00 && mark[1] == 0x00 - && mark[2] == high && mark[3] == low) + if (count >= 2) { - return "X-UTF-32BE-BOM"; + if (mark[0] == low && mark[1] == high) + { + return "x-UTF-16LE-BOM"; + } + else if (mark[0] == high && mark[1] == low) + { + // "x-UTF-16BE-BOM" does not available. + // But an encoder for "UTF-16" actually uses + // big endian with corresponding BOM. It just + // works as "UTF-16BE with BOM". + return "UTF-16"; + } } - } - if (count >= 2) - { - if (mark[0] == low && mark[1] == high) + + if (count >= UTF8BOM.length) { - return "x-UTF-16LE-BOM"; - } - else if (mark[0] == high && mark[1] == low) - { - // "x-UTF-16BE-BOM" does not available. - // But an encoder for "UTF-16" actually uses - // big endian with corresponding BOM. It just - // works as "UTF-16BE with BOM". - return "UTF-16"; - } - } - - if (count >= UTF8BOM.length) - { - int i = 0; - while (i < UTF8BOM.length) - { - if (mark[i] != UTF8BOM[i]) + int i = 0; + while (i < UTF8BOM.length) { - break; + if (mark[i] != UTF8BOM[i]) + { + break; + } + ++i; } - ++i; + if (i == UTF8BOM.length) + { + return "UTF-8Y"; + } } - if (i == UTF8BOM.length) - { - return "UTF-8Y"; - } + + return null; } - - return null; } //}}} //{{{ Private members Copied: jEdit/trunk/org/gjt/sp/jedit/io/XMLEncodingDetector.java (from rev 9449, jEdit/trunk/org/gjt/sp/jedit/MiscUtilities.java) =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/io/XMLEncodingDetector.java (rev 0) +++ jEdit/trunk/org/gjt/sp/jedit/io/XMLEncodingDetector.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -0,0 +1,90 @@ +/* + * :tabSize=8:indentSize=8:noTabs=false: + * :folding=explicit:collapseFolds=1: + * + * Copyright (C) 2007 Kazutoshi Satoda + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + +package org.gjt.sp.jedit.io; + +//{{{ Imports +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.IOException; +import java.nio.charset.Charset; + +import org.gjt.sp.util.Log; +//}}} + +/** + * An encoding detector which extracts encoding from XML declaration. + * + * @since 4.3pre10 + * @author Kazutoshi Satoda + */ +public class XMLEncodingDetector implements EncodingDetector +{ + //{{{ implements EncodingDetector + public String detectEncoding(InputStream sample) throws IOException + { + // Length of longest XML PI used for encoding detection. + // <?xml version="1.0" encoding="................"?> + final int XML_PI_LENGTH = 50; + + byte[] _xmlPI = new byte[XML_PI_LENGTH]; + int offset = 0; + int count; + while((count = sample.read(_xmlPI,offset, + XML_PI_LENGTH - offset)) != -1) + { + offset += count; + if(offset == XML_PI_LENGTH) + break; + } + return getXMLEncoding(new String(_xmlPI,0,offset,"ASCII")); + } //}}} + + //{{{ Private members + /** + * Extract XML encoding name from PI. + */ + private static String getXMLEncoding(String xmlPI) + { + if(!xmlPI.startsWith("<?xml")) + return null; + + int index = xmlPI.indexOf("encoding="); + if(index == -1 || index + 9 == xmlPI.length()) + return null; + + char ch = xmlPI.charAt(index + 9); + int endIndex = xmlPI.indexOf(ch,index + 10); + if(endIndex == -1) + return null; + + String encoding = xmlPI.substring(index + 10,endIndex); + + if(Charset.isSupported(encoding)) + return encoding; + else + { + Log.log(Log.WARNING, XMLEncodingDetector.class, + "XML PI specifies unsupported encoding: " + encoding); + return null; + } + } + //}}} +} Modified: jEdit/trunk/org/gjt/sp/jedit/jEdit.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/jEdit.java 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/jEdit.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -380,7 +380,6 @@ GUIUtilities.advanceSplashProgress("loading user properties"); initUserProperties(); Options.SIMPLIFIED_KEY_HANDLING = jEdit.getBooleanProperty("newkeyhandling"); - Options.X_AUTODETECT = jEdit.getBooleanProperty("xAutodetect"); //}}} //{{{ Initialize server Modified: jEdit/trunk/org/gjt/sp/jedit/jedit_gui.props =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/jedit_gui.props 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/jedit_gui.props 2007-04-18 14:53:24 UTC (rev 9454) @@ -1635,7 +1635,7 @@ options.general.sortBuffers=Sort buffer list (takes effect after restart) options.general.sortByName=Sort buffer list by file name, instead of path name options.general.newkeyhandling=Use new keyboard handling (experimental) -options.general.xAutodetect=Autodetect encoding using buffer local properties (experimental) +options.general.encodingDetectors=Ordered list of encoding autodetector names (experimental) #}}} #{{{ Abbreviations pane Modified: jEdit/trunk/org/gjt/sp/jedit/options/GeneralOptionPane.java =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/options/GeneralOptionPane.java 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/options/GeneralOptionPane.java 2007-04-18 14:53:24 UTC (rev 9454) @@ -49,7 +49,7 @@ private JCheckBox sortBuffers; private JCheckBox sortByName; private JCheckBox newKeyboardHandling; - private JCheckBox xAutodetect; + private JTextField encodingDetectors; //}}} //{{{ GeneralOptionPane constructor @@ -199,9 +199,12 @@ newKeyboardHandling.setSelected(jEdit.getBooleanProperty("newkeyhandling")); addComponent(newKeyboardHandling); - xAutodetect = new JCheckBox(jEdit.getProperty("options.general.xAutodetect")); - xAutodetect.setSelected(jEdit.getBooleanProperty("xAutodetect")); - addComponent(xAutodetect); + encodingDetectors = new JTextField(jEdit.getProperty( + "options.general.encodingDetectors")); + encodingDetectors.setText(jEdit.getProperty("encodingDetectors", + "BOM XML-PI")); + addComponent(jEdit.getProperty("options.general.encodingDetectors"), + encodingDetectors); } //}}} @@ -251,8 +254,7 @@ boolean nkh = newKeyboardHandling.isSelected(); jEdit.setBooleanProperty("newkeyhandling", nkh); Options.SIMPLIFIED_KEY_HANDLING = nkh; - jEdit.setBooleanProperty("xAutodetect", xAutodetect.isSelected()); - Options.X_AUTODETECT = xAutodetect.isSelected(); + jEdit.setProperty("encodingDetectors",encodingDetectors.getText()); jEdit.setBooleanProperty("saveCaret",saveCaret.isSelected()); jEdit.setBooleanProperty("persistentMarkers", persistentMarkers.isSelected()); Modified: jEdit/trunk/org/gjt/sp/jedit/services.xml =================================================================== --- jEdit/trunk/org/gjt/sp/jedit/services.xml 2007-04-18 08:10:26 UTC (rev 9453) +++ jEdit/trunk/org/gjt/sp/jedit/services.xml 2007-04-18 14:53:24 UTC (rev 9454) @@ -39,4 +39,15 @@ <SERVICE CLASS="org.gjt.sp.jedit.io.Encoding" NAME="X-UTF-32BE-BOM"> new EncodingWithBOM("UTF-32BE"); </SERVICE> + + <!-- Encoding detectors --> + <SERVICE CLASS="org.gjt.sp.jedit.io.EncodingDetector" NAME="BOM"> + new EncodingWithBOM.Detector(); + </SERVICE> + <SERVICE CLASS="org.gjt.sp.jedit.io.EncodingDetector" NAME="XML-PI"> + new XMLEncodingDetector(); + </SERVICE> + <SERVICE CLASS="org.gjt.sp.jedit.io.EncodingDetector" NAME="buffer-local-property"> + new BufferLocalEncodingDetector(); + </SERVICE> </SERVICES> This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |