From: Egon W. <eg...@us...> - 2004-10-26 09:03:44
|
Update of /cvsroot/cdk/cdk/src/org/openscience/cdk/io In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv17483/src/org/openscience/cdk/io Modified Files: ReaderFactory.java Log Message: Rewrote methods: format detecting no longer requires the readers. This involved one API change: ReaderFactory.getFormat() now returns a ChemFormat object Index: ReaderFactory.java =================================================================== RCS file: /cvsroot/cdk/cdk/src/org/openscience/cdk/io/ReaderFactory.java,v retrieving revision 1.42 retrieving revision 1.43 diff -u -r1.42 -r1.43 --- ReaderFactory.java 25 Oct 2004 19:11:36 -0000 1.42 +++ ReaderFactory.java 26 Oct 2004 09:03:05 -0000 1.43 @@ -175,125 +175,39 @@ * * @see #guessFormat(InputStream) */ - public String guessFormat(Reader input) throws IOException { - ChemObjectReader reader = createReader(input); - if (reader != null) { - return reader.getClass().getName(); - } - return "Format undetermined"; - } - - public String guessFormat(InputStream input) throws IOException { - ChemObjectReader reader = createReader(input); - if (reader != null) { - return reader.getClass().getName(); - } - return "Format undetermined"; - } - - /** - * Detects the format of the Reader input, and if known, it will return - * a CDK Reader to read the format. Note that this Reader might be a - * subclass of DummyReader, which means that the Reader does not yet - * have an implementation. - * - * @see #createReader(Reader) - * @see org.openscience.cdk.io.DummyReader - */ - public ChemObjectReader createReader(InputStream input) throws IOException { - BufferedInputStream bistream = new BufferedInputStream(input, 8192); - InputStream istreamToRead = bistream; // if gzip test fails, then take default - bistream.mark(5); - int countRead = 0; - try { - byte[] abMagic = new byte[4]; - countRead = bistream.read(abMagic, 0, 4); - bistream.reset(); - if (countRead == 4) { - if (abMagic[0] == (byte)0x1F && abMagic[1] == (byte)0x8B) { - istreamToRead = new GZIPInputStream(bistream); - } - } - } catch (IOException exception) { - logger.error(exception.getMessage()); - logger.debug(exception); - } - return createReader(new InputStreamReader(istreamToRead)); - } - - /** - * Detects the format of the Reader input, and if known, it will return - * a CDK Reader to read the format. Note that this Reader might be a - * subclass of DummyReader, which means that the Reader does not yet - * have an implementation. - * - * <p>This method is not able to detect the format of gziped files. - * Use createReader(InputStream) instead for such files. - * - * @see #createReader(InputStream) - * @see org.openscience.cdk.io.DummyReader - */ - public ChemObjectReader createReader(Reader input) throws IOException { + public ChemFormat guessFormat(BufferedReader input) throws IOException { if (input == null) { throw new IllegalArgumentException("input cannot be null"); } - // FIXME: this should use the new ChemObjectReader.matches() method - // make a copy of the header - int bufferSize = this.headerLength; - BufferedReader originalBuffer = new BufferedReader(input, bufferSize); - char[] header = new char[bufferSize]; - if (!originalBuffer.markSupported()) { + char[] header = new char[this.headerLength]; + if (!input.markSupported()) { logger.error("Mark not supported"); throw new IllegalArgumentException("input must support mark"); } - originalBuffer.mark(bufferSize); - originalBuffer.read(header, 0, bufferSize); - originalBuffer.reset(); + input.mark(this.headerLength); + input.read(header, 0, this.headerLength); + input.reset(); BufferedReader buffer = new BufferedReader(new CharArrayReader(header)); /* Search file for a line containing an identifying keyword */ String line = buffer.readLine(); int lineNumber = 1; - boolean formatDetected = false; - while (buffer.ready() && (line != null) && (!formatDetected)) { + while (buffer.ready() && (line != null)) { logger.debug(lineNumber + ": ", line); - for (int i=0; i<formats.size() && !formatDetected; i++) { + for (int i=0; i<formats.size(); i++) { ChemFormatMatcher cfMatcher = (ChemFormatMatcher)formats.elementAt(i); if (cfMatcher.matches(lineNumber, line)) { - formatDetected = true; logger.info("Detected format: ", cfMatcher.getFormatName()); - String readerClassName = cfMatcher.getReaderClassName(); - if (readerClassName != null) { - try { - // make a new instance of this class - ChemObjectReader coReader = (ChemObjectReader)this.getClass().getClassLoader(). - loadClass(readerClassName).newInstance(); - coReader.setReader(originalBuffer); - return coReader; - } catch (ClassNotFoundException exception) { - logger.error("Could not find this ChemObjectReader: ", readerClassName); - logger.debug(exception); - } catch (Exception exception) { - logger.error("Could not create this ChemObjectReader: ", readerClassName); - logger.debug(exception); - } - } else { - logger.info("Format detected, but not implemented!"); - } + return cfMatcher; } } line = buffer.readLine(); lineNumber++; } - if (formatDetected == true) { - logger.warn("Format was detected but it could not instantiate a Reader for that format"); - return null; - } - logger.warn("Now comes the tricky and more difficult ones...."); buffer = new BufferedReader(new CharArrayReader(header)); @@ -305,11 +219,11 @@ if (tokenCount == 1) { new Integer(tokenizer.nextToken()); // if not failed, then it is a XYZ file - return new org.openscience.cdk.io.XYZReader(originalBuffer); + return new org.openscience.cdk.io.formats.XYZFormat(); } else if (tokenCount == 2) { new Integer(tokenizer.nextToken()); if ("Bohr".equalsIgnoreCase(tokenizer.nextToken())) { - return new org.openscience.cdk.io.XYZReader(originalBuffer); + return new org.openscience.cdk.io.formats.XYZFormat(); } } } catch (NumberFormatException exception) { @@ -319,7 +233,7 @@ try { SmilesParser sp = new SmilesParser(); Molecule m = sp.parseSmiles(line); - return new org.openscience.cdk.io.SMILESReader(originalBuffer); + return new org.openscience.cdk.io.formats.SMILESFormat(); } catch (Exception ise) { // no, it is not logger.info("No, it's not a SMILES file"); @@ -328,5 +242,98 @@ logger.warn("File format undetermined"); return null; } + + public ChemFormat guessFormat(InputStream input) throws IOException { + BufferedInputStream bistream = new BufferedInputStream(input, 8192); + InputStream istreamToRead = bistream; // if gzip test fails, then take default + bistream.mark(5); + int countRead = 0; + try { + byte[] abMagic = new byte[4]; + countRead = bistream.read(abMagic, 0, 4); + bistream.reset(); + if (countRead == 4) { + if (abMagic[0] == (byte)0x1F && abMagic[1] == (byte)0x8B) { + istreamToRead = new GZIPInputStream(bistream); + } + } + } catch (IOException exception) { + logger.error(exception.getMessage()); + logger.debug(exception); + } + return guessFormat(new BufferedReader(new InputStreamReader(istreamToRead))); + } + + /** + * Detects the format of the Reader input, and if known, it will return + * a CDK Reader to read the format. Note that this Reader might be a + * subclass of DummyReader, which means that the Reader does not yet + * have an implementation. + * + * @see #createReader(Reader) + * @see org.openscience.cdk.io.DummyReader + */ + public ChemObjectReader createReader(InputStream input) throws IOException { + BufferedInputStream bistream = new BufferedInputStream(input, 8192); + InputStream istreamToRead = bistream; // if gzip test fails, then take default + bistream.mark(5); + int countRead = 0; + try { + byte[] abMagic = new byte[4]; + countRead = bistream.read(abMagic, 0, 4); + bistream.reset(); + if (countRead == 4) { + if (abMagic[0] == (byte)0x1F && abMagic[1] == (byte)0x8B) { + istreamToRead = new GZIPInputStream(bistream); + } + } + } catch (IOException exception) { + logger.error(exception.getMessage()); + logger.debug(exception); + } + return createReader(new InputStreamReader(istreamToRead)); + } + + /** + * Detects the format of the Reader input, and if known, it will return + * a CDK Reader to read the format. Note that this Reader might be a + * subclass of DummyReader, which means that the Reader does not yet + * have an implementation. + * + * <p>This method is not able to detect the format of gziped files. + * Use createReader(InputStream) instead for such files. + * + * @see #createReader(InputStream) + * @see org.openscience.cdk.io.DummyReader + */ + public ChemObjectReader createReader(Reader input) throws IOException { + if (!(input instanceof BufferedReader)) { + input = new BufferedReader(input); + } + ChemFormat chemFormat = guessFormat((BufferedReader)input); + if (chemFormat != null) { + String readerClassName = chemFormat.getReaderClassName(); + if (readerClassName != null) { + try { + // make a new instance of this class + ChemObjectReader coReader = (ChemObjectReader)this.getClass().getClassLoader(). + loadClass(readerClassName).newInstance(); + coReader.setReader(input); + return coReader; + } catch (ClassNotFoundException exception) { + logger.error("Could not find this ChemObjectReader: ", readerClassName); + logger.debug(exception); + } catch (Exception exception) { + logger.error("Could not create this ChemObjectReader: ", readerClassName); + logger.debug(exception); + } + } else { + logger.warn("ChemFormat is recognized, but no reader is available."); + } + } else { + logger.warn("ChemFormat is not recognized."); + } + return null; + } } |