From: <jbo...@li...> - 2005-12-21 07:37:18
|
Author: mic...@jb... Date: 2005-12-21 02:37:13 -0500 (Wed, 21 Dec 2005) New Revision: 1911 Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java Log: handles newlines Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java 2005-12-21 07:32:06 UTC (rev 1910) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java 2005-12-21 07:37:13 UTC (rev 1911) @@ -4,6 +4,7 @@ import java.net.URL; import java.net.URLConnection; +import org.apache.commons.lang.StringUtils; import org.drools.natural.NaturalLanguageException; import org.drools.natural.ruledoc.RuleDocumentListener; import org.htmlparser.Node; @@ -85,8 +86,11 @@ private void handleText(String text) { - listener.handleText(unescapeSmartQuotes(unescapeEntities(text))); + String noNewLines = StringUtils.replaceChars(text, '\n', ' '); + noNewLines = StringUtils.replaceChars(noNewLines, '\r', ' '); + listener.handleText(unescapeSmartQuotes(unescapeEntities(noNewLines))); + } private void handleTag(TagNode tag) @@ -120,9 +124,9 @@ listener.endComment(); } } else if (tagName.equals("P")) { - handleText(" "); + handleText("\n"); } else if (tagName.equals("BR")) { - handleText(" "); + handleText("\n"); } |