From: <my...@us...> - 2009-06-10 14:04:45
|
Revision: 1971 http://aperture.svn.sourceforge.net/aperture/?rev=1971&view=rev Author: mylka Date: 2009-06-10 14:04:42 +0000 (Wed, 10 Jun 2009) Log Message: ----------- nepomuk #822 - added some more descriptive exception handling to the xml extractor Modified Paths: -------------- aperture/trunk/core/extractor/text/xml/src/main/java/org/semanticdesktop/aperture/extractor/xml/XmlExtractor.java Modified: aperture/trunk/core/extractor/text/xml/src/main/java/org/semanticdesktop/aperture/extractor/xml/XmlExtractor.java =================================================================== --- aperture/trunk/core/extractor/text/xml/src/main/java/org/semanticdesktop/aperture/extractor/xml/XmlExtractor.java 2009-06-09 14:21:28 UTC (rev 1970) +++ aperture/trunk/core/extractor/text/xml/src/main/java/org/semanticdesktop/aperture/extractor/xml/XmlExtractor.java 2009-06-10 14:04:42 UTC (rev 1971) @@ -39,6 +39,8 @@ public class XmlExtractor implements Extractor { private static final int BUFFER_SIZE = 8192; + + private Logger logger = LoggerFactory.getLogger(XmlExtractor.class); public void extract(URI id, InputStream stream, Charset charset, String mimeType, RDFContainer result) throws ExtractorException { @@ -56,11 +58,14 @@ public void close() { // don't do anything - } + } + }; // setup a parser SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); + XMLReader r = parser.getXMLReader(); + r.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); // prepare a ContentHandler that gathers all text XmlTextExtractor listener = new XmlTextExtractor(); @@ -78,7 +83,7 @@ // see if this is an exception indicating that an external DTD or entity declaration cannot be // resolved if (!isFailingInclusionException(e)) { - return; + throw new ExtractorException(e); } // a FileNotFoundException is typically thrown when an external DTD or entity declaration @@ -91,23 +96,29 @@ // again. try { // disable external DTD loading, etc. on the XMLReader - XMLReader r = parser.getXMLReader(); + r = parser.getXMLReader(); r.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); r.setFeature("http://xml.org/sax/features/external-general-entities", false); r.setFeature("http://xml.org/sax/features/external-parameter-entities", false); // reset stream and parser listener listener.clear(); - stream.reset(); + filterStream.reset(); // try once more to parse the document parser.parse(source, listener); } + catch (IOException ioe) { + // under some circumstances the stream.reset() did throw an IOException because the stream was + // allegedly trying to reset to invalid mark, + logger.error("Exception while parsing document " + id.toString(), e); + logger.error("While trying to reparse the file without loading external DTDs following error occured",ioe); + throw ioe; + } catch (SAXException se) { // the FNFE is probably more worthy to report than the SAXException - // Antoni: 2007-09-26, probably not, It is misguiding - Logger logger = LoggerFactory.getLogger(getClass()); - logger.error("FileNotFoundException while parsing document " + id.toString(), e); + // Antoni: 2007-09-26, probably not, It is misguiding, I revert to SAXException + logger.error("Exception while parsing document " + id.toString(), e); logger.error("While trying to reparse the file without loading external DTDs following error occured",se); } } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |