From: Matthieu C. <cho...@gm...> - 2007-05-02 07:51:32
|
In fact the fact that the method isBinary(Reader) throws some exceptions on the encoding could be a hint on the fact that the encoding was not well detected don't you think ? 2007/5/1, Kazutoshi Satoda <k_s...@f2...>: > > Hi Alan, Matthieu, I understood the good of "Skip binary files" from > your mails. Thank you. > > So I made a patch (fix_encoding_error.patch) to fix the problem. This > adds isBinary(InputStream) and deprecates isBinary(Reader). > > Additionally I made another patch (add_notice_log.patch). This adds > NOTICE log for skipped binary files in directory search. Without this, > user can't find possible false positive cases of jEdit's binary > detection. > > I'll commit them if both you have no objection. > -- > k_satoda > > Index: doc/CHANGES.txt > =================================================================== > --- doc/CHANGES.txt (revision 9474) > +++ doc/CHANGES.txt (working copy) > @@ -266,6 +266,9 @@ > - Added two new builtin service interfaces org.gjt.sp.jedit.io.Encoding > and org.gjt.sp.jedit.io.EncodingDetector. (Kazutoshi Satoda) > > +- MiscUtilities.isBinary(Reader) is now deprecated in favor of a new > + method MiscUtilities.isBinary(InputStream). (Kazutoshi Satoda) > + > }}} > > }}} > Index: org/gjt/sp/jedit/io/AutoDetection.java > =================================================================== > --- org/gjt/sp/jedit/io/AutoDetection.java (revision 9474) > +++ org/gjt/sp/jedit/io/AutoDetection.java (working copy) > @@ -33,6 +33,7 @@ > import java.nio.charset.Charset; > import java.util.List; > import java.util.ArrayList; > +import java.util.zip.GZIPInputStream; > > import org.gjt.sp.jedit.jEdit; > import org.gjt.sp.jedit.ServiceManager; > @@ -141,6 +142,76 @@ > return null; > } //}}} > > + //{{{ class Result > + /** > + * An utility class to hold the result of some auto detections. > + */ > + public static class Result > + { > + //{{{ Constructor > + /** > + * Do some auto detection for a stream and hold the > + * result in this instance. > + * @param in the stream > + */ > + public Result(InputStream in) throws IOException > + { > + BufferedInputStream marked = getMarkedStream(in); > + > + gzipped = isGzipped(marked); > + if (gzipped) > + { > + marked.reset(); > + marked = getMarkedStream( > + new GZIPInputStream(marked)); > + } > + > + marked.reset(); > + encoding = AutoDetection.getDetectedEncoding > (marked); > + > + markedStream = marked; > + } //}}} > + > + //{{{ getRewindedStream() > + /** > + * Returns the stream which can be read the contents of > + * the original stream. > + * Some bytes ware read from original stream for auto > + * detections. But they are rewinded at this method. > + */ > + public BufferedInputStream getRewindedStream() > + throws IOException > + { > + markedStream.reset(); > + return markedStream; > + } //}}} > + > + //{{{ streamIsGzipped() > + /** > + * Returns if the stream is gzipped. > + */ > + public boolean streamIsGzipped() > + { > + return gzipped; > + } //}}} > + > + //{{{ getDetectedEncoding() > + /** > + * Returns the auto detected encoding. > + * Returns null if no encoding was detected. > + */ > + public String getDetectedEncoding() > + { > + return encoding; > + } //}}} > + > + //{{{ Private members > + private final BufferedInputStream markedStream; > + private final boolean gzipped; > + private final String encoding; > + //}}} > + } //}}} > + > //{{{ Private members > /** > * Returns a service of EncodingDetector for name. > Index: org/gjt/sp/jedit/io/VFSFile.java > =================================================================== > --- org/gjt/sp/jedit/io/VFSFile.java (revision 9474) > +++ org/gjt/sp/jedit/io/VFSFile.java (working copy) > @@ -278,11 +278,6 @@ > //{{{ isBinary() method > /** > * Check if a file is binary file. > - * To check if a file is binary, we will check the first > characters 100 > - * (jEdit property vfs.binaryCheck.length) > - * If more than 1 (jEdit property vfs.binaryCheck.count), the > - * file is declared binary. > - * This is not 100% because sometimes the autodetection could > fail. > * > * @param session the VFS session > * @return <code>true</code> if the file was detected as binary > @@ -292,7 +287,6 @@ > public boolean isBinary(Object session) > throws IOException > { > - Reader reader = null; > InputStream in = > getVFS()._createInputStream(session,getPath(), > false,jEdit.getActiveView()); > if(in == null) > @@ -300,12 +294,11 @@ > > try > { > - reader = MiscUtilities.autodetect(in, null); > - return MiscUtilities.isBinary(reader); > + return MiscUtilities.isBinary(in); > } > finally > { > - IOUtilities.closeQuietly(reader); > + IOUtilities.closeQuietly(in); > } > } //}}} > > Index: org/gjt/sp/jedit/MiscUtilities.java > =================================================================== > --- org/gjt/sp/jedit/MiscUtilities.java (revision 9474) > +++ org/gjt/sp/jedit/MiscUtilities.java (working copy) > @@ -34,7 +34,6 @@ > import java.util.*; > import java.util.regex.Matcher; > import java.util.regex.Pattern; > -import java.util.zip.GZIPInputStream; > > import org.xml.sax.InputSource; > import org.xml.sax.helpers.DefaultHandler; > @@ -694,39 +693,49 @@ > return IOUtilities.copyStream(4096,progress, in, out, > canStop); > } //}}} > > - //{{{ isBinaryFile() method > + //{{{ isBinary() method > /** > - * Check if a Reader is binary. > - * To check if a file is binary, we will check the first characters > 100 > - * (jEdit property vfs.binaryCheck.length) > - * If more than 1 (jEdit property vfs.binaryCheck.count), the > - * file is declared binary. > - * This is not 100% because sometimes the autodetection could fail. > - * This method will not close your reader. You have to do it > yourself > - * > - * @param reader the reader > - * @return <code>true</code> if the Reader was detected as binary > - * @throws IOException IOException If an I/O error occurs > - * @since jEdit 4.3pre5 > - */ > - public static boolean isBinary(Reader reader) > - throws IOException > + * Check if a Reader is binary. > + * @deprecated > + * Use isBinary(InputStream) instead. > + */ > + @Deprecated > + public static boolean isBinary(Reader reader) throws IOException > { > - int nbChars = jEdit.getIntegerProperty(" > vfs.binaryCheck.length",100); > - int authorized = jEdit.getIntegerProperty(" > vfs.binaryCheck.count",1); > - for (long i = 0L;i < nbChars;i++) > + return containsNulCharacter(reader); > + } //}}} > + > + //{{{ isBinary() method > + /** > + * Check if an InputStream is binary. > + * First this tries encoding auto detection. If an encoding is > + * detected, the stream should be a text stream. Otherwise, this > + * will check the first characters 100 > + * (jEdit property vfs.binaryCheck.length) in the system default > + * encoding. If more than 1 (jEdit property vfs.binaryCheck.count) > + * NUL(\u0000) was found, the stream is declared binary. > + * > + * This is not 100% because sometimes the autodetection could > fail. > + * > + * This method will not close the stream. You have to do it > yourself > + * > + * @param in the stream > + * @return <code>true</code> if the stream was detected as binary > + * @throws IOException IOException If an I/O error occurs > + * @since jEdit 4.3pre10 > + */ > + public static boolean isBinary(InputStream in) throws IOException > + { > + AutoDetection.Result detection = new AutoDetection.Result > (in); > + // If an encoding is detected, this is a text stream > + if (detection.getDetectedEncoding() != null) > { > - int c = reader.read(); > - if (c == -1) > - return false; > - if (c == 0) > - { > - authorized--; > - if (authorized == 0) > - return true; > - } > + return false; > } > - return false; > + // Read the stream in system default encoding. The > encoding > + // might be wrong. But enough for binary detection. > + return containsNulCharacter( > + new InputStreamReader(detection.getRewindedStream > ())); > } //}}} > > //{{{ isBackup() method > @@ -764,30 +773,31 @@ > encoding = buffer.getStringProperty( > Buffer.ENCODING); > boolean gzipped = false; > > - BufferedInputStream markedStream > - = AutoDetection.getMarkedStream(in); > if (buffer == null || buffer.getBooleanProperty( > Buffer.ENCODING_AUTODETECT)) > { > - gzipped = AutoDetection.isGzipped(markedStream); > + AutoDetection.Result detection = new > AutoDetection.Result(in); > + gzipped = detection.streamIsGzipped(); > if (gzipped) > { > - Log.log(Log.DEBUG, MiscUtilities.class, > "Stream is Gzipped"); > - markedStream.reset(); > - markedStream = > AutoDetection.getMarkedStream( > - new > GZIPInputStream(markedStream)); > + Log.log(Log.DEBUG, MiscUtilities.class > + , "Stream is Gzipped"); > } > - > - markedStream.reset(); > - String detected = > AutoDetection.getDetectedEncoding(markedStream); > + String detected = detection.getDetectedEncoding(); > if (detected != null) > { > encoding = detected; > + Log.log(Log.DEBUG, MiscUtilities.class > + , "Stream encoding detected is " + > detected); > } > + in = detection.getRewindedStream(); > } > + else > + { > + // Make the stream buffered in the same way. > + in = AutoDetection.getMarkedStream(in); > + } > > - Log.log(Log.DEBUG, MiscUtilities.class, "Stream encoding > detected is " + encoding); > - markedStream.reset(); > - Reader result = EncodingServer.getTextReader(markedStream, > encoding); > + Reader result = EncodingServer.getTextReader(in, > encoding); > if (buffer != null) > { > // Store the successfull properties. > @@ -1702,5 +1712,26 @@ > return 0; > } //}}} > > + //{{{ containsNulCharacter() > + public static boolean containsNulCharacter(Reader reader) > + throws IOException > + { > + int nbChars = jEdit.getIntegerProperty(" > vfs.binaryCheck.length",100); > + int authorized = jEdit.getIntegerProperty(" > vfs.binaryCheck.count",1); > + for (long i = 0L;i < nbChars;i++) > + { > + int c = reader.read(); > + if (c == -1) > + return false; > + if (c == 0) > + { > + authorized--; > + if (authorized == 0) > + return true; > + } > + } > + return false; > + } //}}} > + > //}}} > } > > Index: org/gjt/sp/jedit/io/VFS.java > =================================================================== > --- org/gjt/sp/jedit/io/VFS.java (revision 9474) > +++ org/gjt/sp/jedit/io/VFS.java (working copy) > @@ -644,7 +644,7 @@ > String[] retval = null; > retval = _listDirectory(session, directory, glob, > recursive, comp, true, false); > return retval; > - } > + } //}}} > > > //{{{ _listDirectory() method > @@ -1120,7 +1120,11 @@ > try > { > if (file.isBinary > (session)) > + { > + Log.log(Log.NOTICE > ,this > + , > file.getPath() + ": skipped as a binary file"); > continue; > + } > } > catch(IOException e) > { > > |