From: <jbo...@li...> - 2005-12-21 07:32:33
|
Author: mic...@jb... Date: 2005-12-21 02:32:06 -0500 (Wed, 21 Dec 2005) New Revision: 1910 Added: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/DictionaryHelper.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocument.java trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/DictionaryHelperTest.java trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/NaturalRulesSpec.html trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentListenerTest.java trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentTest.java trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserTest.java trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/sample.dictionary.properties trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/simple-ruledoc.html Removed: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/ParseState.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListenerImpl.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleSectionState.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/TableState.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParserImpl.java trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserImplTest.java trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/NaturalRulesSpec.html trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/test_rule_raw.txt Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/NaturalLanguageException.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ast/AbstractSyntaxNode.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/Keywords.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListener.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/keywords.properties trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/simplest.html Log: some more work on the HTML parser Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/NaturalLanguageException.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/NaturalLanguageException.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/NaturalLanguageException.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -13,5 +13,9 @@ { super(message); } + + public NaturalLanguageException(String message, Throwable cause) { + super(message, cause); + } } Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ast/AbstractSyntaxNode.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ast/AbstractSyntaxNode.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ast/AbstractSyntaxNode.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,5 +1,14 @@ package org.drools.natural.ast; +/** + * All natural language syntax nodes extend this class. + * + * Basically we start out with a list of tokens, each token is then classified into one of the syntax types, + * and then they build themselvs up into a list of little ASTs. + * + * @author <a href="mailto:mic...@gm..."> Michael Neale</a> + * + */ public abstract class AbstractSyntaxNode { Added: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/DictionaryHelper.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/DictionaryHelper.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/DictionaryHelper.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -0,0 +1,26 @@ +package org.drools.natural.ruledoc; + +import java.util.Properties; + +public class DictionaryHelper +{ + + private Properties props; + + public DictionaryHelper(Properties properties) { + props = properties; + } + + public String getItem(String key) { + return props.getProperty(key); + } + + public String getFunctions() { + return props.getProperty("functions"); + } + + public String getImports() { + if (props.containsKey("import")) return props.getProperty("import"); + return props.getProperty("imports"); + } +} Property changes on: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/DictionaryHelper.java ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/Keywords.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/Keywords.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/Keywords.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -9,14 +9,14 @@ public class Keywords { - public static Keywords instance; + private static Keywords instance; private Properties props; private Keywords(Properties p) { this.props = p; } - public static Keywords getInstance() { + private static Keywords getInstance() { if (instance == null) { Properties props = new Properties(); @@ -34,6 +34,10 @@ * Helper method to get a keyword */ public static String getKeyword(String key) { + Keywords keywords = getInstance(); + if (!keywords.props.containsKey(key)) { + throw new IllegalArgumentException("The keyword [" + key + "] was not in the configuration."); + } return getInstance().props.getProperty(key); } Deleted: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/ParseState.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/ParseState.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/ParseState.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,12 +0,0 @@ -package org.drools.natural.ruledoc; - -/** - * This is the super class of all document parse states. - * @author <a href="mailto:mic...@gm..."> Michael Neale</a> - * - */ -public abstract class ParseState -{ - abstract void parseChunk(String text); - -} Added: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocument.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocument.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocument.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -0,0 +1,31 @@ +package org.drools.natural.ruledoc; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.List; +import java.util.Properties; + +import org.drools.natural.ruledoc.html.HTMLDocParser; + +/** + * This is the class that does it all for rule documents (HTML based). + * @author <a href="mailto:mic...@gm..."> Michael Neale</a> + */ +public class RuleDocument +{ + + public RuleDocument() { + } + + List buildRuleListFromDocument(URL document, Properties dictionary) { + HTMLDocParser parser = new HTMLDocParser(); + RuleDocumentListener listener = new RuleDocumentListener(); + parser.parseDocument(document, listener); + + return listener.getRules(); + + } + + +} Property changes on: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocument.java ___________________________________________________________________ Name: svn:eol-style + native Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListener.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListener.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListener.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,27 +1,91 @@ package org.drools.natural.ruledoc; -public interface RuleDocumentListener +import java.util.ArrayList; +import java.util.List; + +/** + * This is a simple rule listener implementation. Doesn't do anything clever, just extracts the rules. + * Leaves everything else alone. + * No special treatment of tables. + * @author <a href="mailto:mic...@gm..."> Michael Neale</a> + */ +public class RuleDocumentListener { - /** - * Process a line of text. - */ - public abstract void handleText(String text); + private boolean inComment; + private List rules; + private StringBuffer ruleBuffer; + private boolean inRule; + + public RuleDocumentListener() { + rules = new ArrayList(); + } - public abstract void startTable(); + public void handleText(String text) + { + if (text.trim().startsWith(Keywords.getKeyword("rule.start"))) { + startNewRule(text); + } else if (text.trim().endsWith(Keywords.getKeyword("rule.end"))) { + finishCurrentRule(text); + } else if (inComment) { + return; + } else if (inRule) { + ruleBuffer.append(text); + } + + } - public abstract void startColumn(); + private void finishCurrentRule(String text) + { + ruleBuffer.append(text); + rules.add(ruleBuffer.toString()); + inRule = false; + } - public abstract void startRow(); + private void startNewRule(String text) + { + ruleBuffer = new StringBuffer(); + ruleBuffer.append(text); + inRule = true; + } - public abstract void endTable(); + public void startTable() + { + // TODO Auto-generated method stub + + } - public abstract void endColumn(); + public void startColumn() + { + // TODO Auto-generated method stub + + } - public abstract void endRow(); + public void startRow() + { + // TODO Auto-generated method stub + + } - public abstract void startComment(); + public void endTable() + { + // TODO Auto-generated method stub + + } - public abstract void endComment(); + public void startComment() + { + this.inComment = true; + } -} \ No newline at end of file + public void endComment() + { + this.inComment = false; + } + + public List getRules() { + return rules; + } + + +} Deleted: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListenerImpl.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListenerImpl.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleDocumentListenerImpl.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,83 +0,0 @@ -package org.drools.natural.ruledoc; - -/** - * This builds up the rules from the document events. - * HTML is obviously supported, but this interface is not bound to HTML. - * - * This is the class context for the parsing state, like GoF state pattern. - * - * @author <a href="mailto:mic...@gm..."> Michael Neale</a> - * - */ -public class RuleDocumentListenerImpl implements RuleDocumentListener -{ - - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#handleText(java.lang.String) - */ - public void handleText(String text) { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#startTable() - */ - public void startTable() { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#startColumn() - */ - public void startColumn() { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#startRow() - */ - public void startRow() { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#endTable() - */ - public void endTable() { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#endColumn() - */ - public void endColumn() { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#endRow() - */ - public void endRow() { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#startComment() - */ - public void startComment() { - - } - - /* (non-Javadoc) - * @see org.drools.natural.ruledoc.RuleDocumentListener#endComment() - */ - public void endComment() { - - } - - - - - -} Deleted: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleSectionState.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleSectionState.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/RuleSectionState.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,31 +0,0 @@ -package org.drools.natural.ruledoc; - -import org.apache.commons.lang.StringUtils; - -public class RuleSectionState extends ParseState -{ - - private StringBuffer buf = new StringBuffer(); - private String name; - - - public RuleSectionState(String name) { - this.buf.append(name + " "); - } - - void parseChunk(String text) - { - buf.append(text); - } - - public static boolean isStart(String text) - { - return StringUtils.contains(text, Keywords.getKeyword("rule.start")); - } - - public static boolean isEnd(String text) - { - return StringUtils.contains(text, Keywords.getKeyword("rule.end")); - } - -} Deleted: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/TableState.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/TableState.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/TableState.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,28 +0,0 @@ -package org.drools.natural.ruledoc; - -import java.util.Properties; - -public class TableState extends ParseState -{ - - private String name; - private Properties data; - private boolean inKey; - - - public TableState(String name) { - this.name = name; - data = new Properties(); - } - - - - void parseChunk(String text) - { - - - - - } - -} Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParser.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,30 +1,154 @@ package org.drools.natural.ruledoc.html; import java.io.IOException; -import java.io.InputStream; +import java.net.URL; +import java.net.URLConnection; +import org.drools.natural.NaturalLanguageException; import org.drools.natural.ruledoc.RuleDocumentListener; +import org.htmlparser.Node; +import org.htmlparser.Parser; +import org.htmlparser.nodes.RemarkNode; +import org.htmlparser.nodes.TagNode; +import org.htmlparser.nodes.TextNode; +import org.htmlparser.util.NodeIterator; +import org.htmlparser.util.NodeList; +import org.htmlparser.util.ParserException; -/** +/** * The HTML document parser will treat italics as comments. * Must be tolerant of dodgy HTML, and handle quirky HTML like the stuff - * that Microsoft word spits out. - * - * Any html parsers must implement this. - * My suggested ones are: - * 1) javax.swing - * 2) HotSax - * 3) that one from SiteMesh - * 4) roll your own. + * that Microsoft word spits out. * @author <a href="mailto:mic...@gm..."> Michael Neale</a> - * */ -public interface HTMLDocParser +public class HTMLDocParser { - /** - * @param input The input to the HTML. - * @param listener The listener to raise document events against. - * @throws IOException If there is something wrong with the stream. - */ - void parseDocument(InputStream input, RuleDocumentListener listener) throws IOException; + + private RuleDocumentListener listener; + + public void parseDocument(URL url, RuleDocumentListener listener) { + try { + parseDocument(url.openConnection(), listener); + } catch (IOException e) { + throw new NaturalLanguageException("Unable to open URL to rule document", e); + } + } + + public void parseDocument(URLConnection input, + RuleDocumentListener listener) + { + this.listener = listener; + try + { + Parser parser = new Parser(input); + for (NodeIterator i = parser.elements (); i.hasMoreNodes(); ) { + processNodes (i.nextNode ()); + } + } + catch ( ParserException e ) + { + throw new NaturalLanguageException("Error in the HTML parser.", e); + } + } + + private void processNodes (Node node) throws ParserException + { + if (node instanceof TextNode) + { + // downcast to TextNode + TextNode text = (TextNode)node; + // do whatever processing you want with the text + handleText(text.getText()); + } + if (node instanceof RemarkNode) + { + // downcast to RemarkNode + //RemarkNode remark = (RemarkNode)node; + } + else if (node instanceof TagNode) + { + // downcast to TagNode + TagNode tag = (TagNode)node; + // do whatever processing you want with the tag itself + handleTag(tag); + // process recursively (nodes within nodes) via getChildren() + NodeList nl = tag.getChildren (); + if (null != nl) + for (NodeIterator i = nl.elements (); i.hasMoreNodes(); ) { + processNodes (i.nextNode ()); + } + + } + } + + + + private void handleText(String text) + { + listener.handleText(unescapeSmartQuotes(unescapeEntities(text))); + + } + + private void handleTag(TagNode tag) + { + + String tagName = tag.getTagName(); + boolean isEnding = tag.isEndTag(); + + if (tagName.equals("TABLE") ) { + if (!isEnding) { + listener.startTable(); + } else { + listener.startTable(); + } + } else if (tagName.equals("TH")) { + if (!isEnding) { + listener.startRow(); + } + } else if (tagName.equalsIgnoreCase("TR")) { + if (!isEnding) { + listener.startRow(); + } + } else if (tagName.equals("TD")) { + if (!isEnding) { + listener.startColumn(); + } + } else if (tagName.equals("I")) { + if (!isEnding) { + listener.startComment(); + } else { + listener.endComment(); + } + } else if (tagName.equals("P")) { + handleText(" "); + } else if (tagName.equals("BR")) { + handleText(" "); + } + + + } + + + + private static String unescapeSmartQuotes(String s) { + s = s.replace('\u201c', '"'); + s = s.replace('\u201d', '"'); + s = s.replace('\u2018', '\''); + s = s.replace('\u2019', '\''); + return s; + } + + private static String unescapeEntities(String s) { + s = s.replaceAll("<", "<"); + s = s.replaceAll(">", ">"); + s = s.replaceAll(" ", " "); + s = s.replaceAll(""", "\""); + s = s.replaceAll("&", "&"); + s = s.replaceAll("&rdquo", "\""); + s = s.replaceAll("“", "\""); + return s; + } + + + } Deleted: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParserImpl.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParserImpl.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/html/HTMLDocParserImpl.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,122 +0,0 @@ -package org.drools.natural.ruledoc.html; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.Reader; - -import javax.swing.text.MutableAttributeSet; -import javax.swing.text.html.HTMLEditorKit; -import javax.swing.text.html.HTML.Tag; -import javax.swing.text.html.parser.ParserDelegator; - - -import org.drools.natural.ruledoc.RuleDocumentListener; - -/** - * This implementation of a doc parser uses the javax.swing package. - * @author <a href="mailto:mic...@gm..."> Michael Neale</a> - */ -public class HTMLDocParserImpl extends HTMLEditorKit.ParserCallback implements HTMLDocParser -{ - - private RuleDocumentListener listener; - - public void handleEndTag(Tag tag, - int arg1) - { - System.out.println("Breaksflow: " + tag.breaksFlow()); - - if ( tag == Tag.I ) - { - listener.endComment(); - } - else if ( tag == Tag.TABLE ) - { - listener.endTable(); - } - else if ( tag == Tag.TH ) - { - listener.endRow(); - } - else if ( tag == Tag.TR ) - { - listener.endRow(); - } - else if ( tag == Tag.TD ) - { - listener.endColumn(); - } else if (tag == Tag.BR) { - System.out.println("BREAK"); - } else if (tag == Tag.P) { - System.out.println("PARA"); - } - - } - - public void handleStartTag(Tag tag, - MutableAttributeSet arg1, - int arg2) - { - - // System.out.println("Start TAG: " + name); - if ( tag == Tag.I ) - { - listener.startComment(); - } - else if ( tag == Tag.TABLE ) - { - listener.startTable(); - } - else if ( tag == Tag.TH ) - { - listener.startRow(); - } - else if ( tag == Tag.TR ) - { - listener.startRow(); - } - else if ( tag == Tag.TD ) - { - listener.startColumn(); - } - - } - - public void handleText(char[] chars, - int arg1) - { - String s = new String(chars); - - listener.handleText(unescapeSmartQuotes(unescapeEntities(s))); - } - - - - public void parseDocument(InputStream input, - RuleDocumentListener listener) throws IOException - { - this.listener = listener; - ParserDelegator del = new ParserDelegator(); - Reader reader = new InputStreamReader(input); - del.parse(reader, this, true); - } - - private static String unescapeSmartQuotes(String s) { - s = s.replace('\u201c', '"'); - s = s.replace('\u201d', '"'); - s = s.replace('\u2018', '\''); - s = s.replace('\u2019', '\''); - return s; - } - - private static String unescapeEntities(String s) { - s = s.replaceAll("<", "<"); - s = s.replaceAll(">", ">"); - s = s.replaceAll(" ", " "); - s = s.replaceAll(""", "\""); - s = s.replaceAll("&", "&"); - return s; - } - -} Modified: trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/keywords.properties =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/keywords.properties 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/main/org/drools/natural/ruledoc/keywords.properties 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,16 +1,9 @@ #here we configure the syntax for rules -rule.start=Rule-start: -rule.end=Rule-end +rule.start=Start-rule +rule.end=End-rule rule.if=IF rule.then=THEN rule.condition.and=AND rule.consequence.and=AND -#and here is the config for what tables to look at. All tables have only 2 cols. -rule.config.table.header=Rules -dictionary.table.header=Dictionary -ruleset.table.header=Ruleset -#not sure if I will integrate tests or not... -test.table.header=Test - Added: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/DictionaryHelperTest.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/DictionaryHelperTest.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/DictionaryHelperTest.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -0,0 +1,17 @@ +package org.drools.natural.ruledoc; + +import java.io.IOException; +import java.util.Properties; + +import junit.framework.TestCase; + +public class DictionaryHelperTest extends TestCase +{ + public void testLoad() throws IOException { + + Properties props = new Properties(); + props.load(getClass().getResourceAsStream("sample.dictionary.properties")); + DictionaryHelper dic = new DictionaryHelper(props); + assertEquals("convertToDate(\"${right}\")", dic.getItem("date of")); + } +} Property changes on: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/DictionaryHelperTest.java ___________________________________________________________________ Name: svn:eol-style + native Added: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/NaturalRulesSpec.html =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/NaturalRulesSpec.html 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/NaturalRulesSpec.html 2005-12-21 07:32:06 UTC (rev 1910) @@ -0,0 +1,315 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> +<HTML> +<HEAD> + <META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=windows-1252"> + <TITLE></TITLE> + <META NAME="GENERATOR" CONTENT="OpenOffice.org 2.0 (Win32)"> + <META NAME="CREATED" CONTENT="20051214;11592400"> + <META NAME="CHANGEDBY" CONTENT="Michael Neale"> + <META NAME="CHANGED" CONTENT="20051221;17212142"> + <STYLE> + <!-- + @page { size: 21cm 29.7cm; margin: 2cm } + P { margin-bottom: 0.21cm } + H1 { margin-bottom: 0.21cm } + H1.western { font-family: "Arial", sans-serif; font-size: 16pt } + H1.cjk { font-family: "Lucida Sans Unicode"; font-size: 16pt } + H1.ctl { font-family: "Tahoma"; font-size: 16pt } + H3 { margin-bottom: 0.21cm } + H3.western { font-family: "Arial", sans-serif } + TD P { margin-bottom: 0cm } + TH P { margin-bottom: 0cm; font-style: italic } + --> + </STYLE> +</HEAD> +<BODY LANG="" DIR="LTR"> +<H1 CLASS="western"><FONT FACE="Verdana, sans-serif">Drools natural +rule language</FONT><IMG SRC="NaturalRulesSpec_html_m48cc1843.jpg" NAME="graphics1" ALIGN=LEFT HSPACE=23 VSPACE=23 WIDTH=185 HEIGHT=246 BORDER=0><BR CLEAR=LEFT></H1> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt">Natural +rules, in textual documents specification<BR>(this is an executable +document)</FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt"><I>TODO: +do we prefer the term “Natural rules” or “Literate +rules”?</I></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt"><B>Outline</B></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt">Natural +language rules work off any textual document that can be transformed +to HTML (this is similar in concept to Ward Cunningham’s +fit.c2.com framework) – including this one. Some simple rules +are followed to make the natural language very easy to lexically +analyse (which is normally the hardest bit of parsing). Combine this +with some conventions and object introspection, and we have an +interesting idea.</FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt"><B>This +is essentially an instance of Literate programming +(www.literateprogramming.com),</B> which was coined by Donald Knuth +(why isn’t he “Sir” yet, damn it if Elton John can +get a knighthood, so can Don). A core premise of literate programming +is that “comments” are first class, code is woven in.</FONT></FONT></FONT></P> +<H3 CLASS="western"><FONT FACE="Verdana, sans-serif">Features</FONT></H3> +<OL> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Must + be able to expose the full power of DRL features if needed (a lot of + them won’t be, but generally at least one person will ask for + each feature). </FONT></FONT></FONT> + </P> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Should + be able to produce readable rules (readable by a suitably trained + non technical person) – having a non programmer edit rules in + entirety is a lofty goal ;) But it may be a poison chalice ;) </FONT></FONT></FONT> + </P> + <OL> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Can + use it to create rule templates, where the templates are not + editable, but the rule parameters are.</FONT></FONT></FONT></P> + </OL> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Should + allow editing in normal drools snippets format for power users when + natural language is not appropriate.</FONT></FONT></FONT></P> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Allow + rapid building of Domain Specific Languages (using the built in + dictionary)</FONT></FONT></FONT></P> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">As + it works off HTML (or any appropriate markup language) can be driven + off a wiki. DHTML may be used to provide “intellisense” + in the future, or swing, but at the end of the day it is JUST TEXT + so it can easily be diff’ed (this is important).</FONT></FONT></FONT></P> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Should + produce a HTML “output” file showing how the document + was parsed (this is hard... but worth it), and FIT test results.</FONT></FONT></FONT></P> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">As + it HTML, source could be edited by an enhanced WYSIWYG DHTML editor + (which could restrict what is edited, provide “intellisense” + etc), providing a web interface for rules (an alternative to + document based rules).</FONT></FONT></FONT></P> + <LI><P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> + <FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Tests + can be embedded in the document by utilizing FIT (and fit-for-rules + from Michael), providing what if testing, and regression testing of + rules in isolation.</FONT></FONT></FONT></P> +</OL> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<IMG SRC="NaturalRulesSpec_html_m48cc1843.jpg" NAME="graphics2" ALIGN=LEFT HSPACE=23 VSPACE=23 WIDTH=185 HEIGHT=246 BORDER=0><BR CLEAR=LEFT><BR><BR> +</P> +<H1 CLASS="western"><FONT FACE="Verdana, sans-serif">OK Here we go. </FONT> +</H1> +<P><BR><BR> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">The +term “Rule-start” indicates the start of a rule section, +which continues until “Rule-end”. This is from literate +programming. So just about everything outside is a “comment”. +This is the opposite of normal source.</FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt"><SPAN STYLE="background: #e6e6e6">Start-rule +[In-the-beginning]<BR>Priority[1]<BR>Group[group1]<BR>Input[String +message]</SPAN></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt"><B>IF<BR> </B><SPAN STYLE="font-weight: medium">Today +<I>is</I> before date of 10-Jul-2005 or Today is date of +10-Jul-2006<BR></SPAN><B>THEN<BR> </B><SPAN STYLE="font-weight: medium">Log +“I am a parameter to the log function”</SPAN></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt"> [System.out.println(“boo”)]</FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt"><SPAN STYLE="font-weight: medium"><I><BR></I><SPAN STYLE="background: #e6e6e6">End-rule</SPAN></SPAN></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<BR><BR> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<BR><BR> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<BR><BR> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Note +that the configuration of the rule, such as parameters, salience etc. +is done in a tabular fashion <I>somewhere else</I> in the document. </FONT></FONT></FONT> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">The +expressions in the rule are compiled with the aid of a dictionary. +You can help the lexer out by putting “[ and ]” around +multi word expressions. Alternatively, you can put whole expressions +in square brackets, and they will be passed through the natural +language compiler.</FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt"><I>TODO: +should some attributes like parameters be in the rule definition?</I></FONT></FONT></FONT></P> +<H3 CLASS="western"><FONT FACE="Verdana, sans-serif">The Dictionary +(how it works for starters)</FONT></H3> +<P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">The +dictionary is a little embedded grammar. I chose the word dictionary +as I think it is more intuitive, and it is a very primitive grammar. +This ideally can be imported from another file if needs be. It would +be possibly to map between languages this way, within reason.</FONT></FONT></P> +<P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">This +should be in an appendix hidden and protected. You can also think of +things on the right hand side as templates (although it is more +complex then just string interpolation – such as order of +operations, and nesting).</FONT></FONT></P> +<P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">This +dictionary provides a grammar that is used to help process stuff in +the Rule sections of the document.</FONT></FONT></P> +<P><BR><BR> +</P> +<DL> + <DD> + <TABLE WIDTH=652 BORDER=0 CELLPADDING=0 CELLSPACING=0> + <COL WIDTH=178> + <COL WIDTH=474> + <THEAD> + <TR> + <TH COLSPAN=2 WIDTH=652 VALIGN=TOP BGCOLOR="#e6e6e6"> + <P ALIGN=LEFT STYLE="font-style: normal; font-weight: medium"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Dictionary</FONT></FONT></P> + </TH> + </TR> + </THEAD> + <TBODY> + <TR VALIGN=TOP> + <TD WIDTH=178> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Today</FONT></FONT></P> + </TD> + <TD WIDTH=474> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">new + java.util.Date()</FONT></FONT></P> + </TD> + </TR> + <TR VALIGN=TOP> + <TD WIDTH=178> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">before</FONT></FONT></P> + </TD> + <TD WIDTH=474> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">${left}.before(${right})</FONT></FONT></P> + </TD> + </TR> + <TR VALIGN=TOP> + <TD WIDTH=178> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">date + of</FONT></FONT></P> + </TD> + <TD WIDTH=474> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">convertToDate(“${right}”) + //use of a drools function</FONT></FONT></P> + </TD> + </TR> + <TR VALIGN=TOP> + <TD WIDTH=178> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Log</FONT></FONT></P> + </TD> + <TD WIDTH=474> + <P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">System.out.println(${right})</FONT></FONT></P> + </TD> + </TR> + </TBODY> + </TABLE> +</DL> +<P><BR><BR> +</P> +<P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Ok +lets explain that. The first cell of the table tells drools that it +is a dictionary. The left column are the keywords (watch for case +sensitivity, can be a pain). On the right, there are the little +expressions. Notice that there is no prescribed language yet. In this +case it is a mix of java and drools functions. ${right} means the +token to the right, ${left} means the token to the left. You can also +use ${2} and ${-3} as well to indicate positions. </FONT></FONT> +</P> +<P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Pretty +simple, it gets a little more complex, later (ie order of operations +and nesting, some special keywords). </FONT></FONT> +</P> +<P><BR><BR> +</P> +<H3 CLASS="western"><FONT FACE="Verdana, sans-serif">Tables for +configuration</FONT></H3> +<P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">This +is “somewhere else” in the document. Ideally this sort of +stuff can be included from external resources if needed (to share +common stuff). Where this appears in the document is not important. +Appendix probably makes most sense.</FONT></FONT></P> +<P><BR><BR> +</P> +<P><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">The +"Rule configuration" table sets up technical attributes and +parameters for the rule.</FONT></FONT></P> +<DL> + <DD> + <TABLE WIDTH=560 BORDER=0 CELLPADDING=0 CELLSPACING=0> + <COL WIDTH=140> + <COL WIDTH=420> + <THEAD> + <TR> + <TH COLSPAN=2 WIDTH=560 VALIGN=TOP BGCOLOR="#e6e6e6"> + <P ALIGN=LEFT STYLE="font-style: normal; font-weight: medium"><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Rule-configuration</FONT></FONT></P> + </TH> + </TR> + </THEAD> + <TBODY> + <TR VALIGN=TOP> + <TD WIDTH=140> + <P ALIGN=LEFT><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">In-the-beginning</FONT></FONT></P> + </TD> + <TD WIDTH=420> + <P ALIGN=LEFT><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">Salience=1</FONT></FONT></P> + <P ALIGN=LEFT><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">xor-group=group1 + </FONT></FONT> + </P> + <P ALIGN=LEFT><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">no-loop=false</FONT></FONT></P> + <P ALIGN=LEFT><FONT FACE="Verdana, sans-serif"><FONT SIZE=1 STYLE="font-size: 8pt">parameters=(String + message, Integer number)</FONT></FONT></P> + </TD> + </TR> + <TR VALIGN=TOP> + <TD WIDTH=140> + <P ALIGN=LEFT><BR> + </P> + </TD> + <TD WIDTH=420> + <P ALIGN=LEFT><BR> + </P> + </TD> + </TR> + </TBODY> + </TABLE> +</DL> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<BR><BR> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt"><I>Thats +it for now... go have a coffee. Spend some quality time with your +family. Remember them?</I></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<BR><BR> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt"><I>TODO: +Macros for content sharing. eg (Include something here) and (Import +something here). Just like literate programming.</I></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-weight: medium; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<FONT COLOR="#000000"><FONT FACE="Verdana, sans-serif"><FONT SIZE=2 STYLE="font-size: 9pt"><I>TODO: +Output as HTML the bits that are parsed out.</I></FONT></FONT></FONT></P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; font-style: normal; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<BR><BR> +</P> +<P LANG="en-US" ALIGN=LEFT STYLE="margin-bottom: 0.32cm; background: transparent; line-height: 130%; page-break-inside: auto; widows: 2; orphans: 2; page-break-before: auto; page-break-after: auto"> +<BR><BR> +</P> +</BODY> +</HTML> \ No newline at end of file Property changes on: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/NaturalRulesSpec.html ___________________________________________________________________ Name: svn:eol-style + native Added: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentListenerTest.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentListenerTest.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentListenerTest.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -0,0 +1,32 @@ +package org.drools.natural.ruledoc; + +import java.util.List; + +import junit.framework.TestCase; + +public class RuleDocumentListenerTest extends TestCase +{ + + public void testListBuilder() { + RuleDocumentListener listener = new RuleDocumentListener(); + listener.handleText("ignore me\t \n"); + listener.handleText("Start-rule"); + listener.handleText("in a rule"); + listener.startComment(); + listener.handleText("a comment"); + listener.endComment(); + listener.handleText("End-rule"); + + listener.handleText("Start-rule something here"); + listener.handleText("in a rule"); + listener.handleText("End-rule"); + + + List list = listener.getRules(); + assertEquals(2, list.size()); + String rule = (String) list.get(0); + assertEquals("Start-rulein a ruleEnd-rule", rule); + + } + +} Property changes on: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentListenerTest.java ___________________________________________________________________ Name: svn:eol-style + native Added: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentTest.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentTest.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentTest.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -0,0 +1,29 @@ +package org.drools.natural.ruledoc; + +import java.util.List; +import java.util.Properties; +import java.io.InputStream; +import java.net.URL; + +import junit.framework.TestCase; + +public class RuleDocumentTest extends TestCase +{ + public void testSimpleParser() { + URL url = this.getClass().getResource("simple-ruledoc.html"); + RuleDocument doc = new RuleDocument(); + List rules = doc.buildRuleListFromDocument(url, new Properties()); + assertEquals(1, rules.size()); + System.out.println(rules.get(0)); + } + + + public void testTheSpec() { + URL url = this.getClass().getResource("NaturalRulesSpec.html"); + RuleDocument doc = new RuleDocument(); + List rules = doc.buildRuleListFromDocument(url, new Properties()); + assertEquals(1, rules.size()); + System.out.println(rules.get(0)); + } + +} Property changes on: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/RuleDocumentTest.java ___________________________________________________________________ Name: svn:eol-style + native Deleted: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserImplTest.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserImplTest.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserImplTest.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,73 +0,0 @@ -package org.drools.natural.ruledoc.html; - -import java.io.InputStream; - -import org.drools.natural.ruledoc.RuleDocumentListener; - -import junit.framework.TestCase; - -public class HTMLDocParserImplTest extends TestCase -{ - public void testDocParsing() throws Exception { - - MockRuleDocumentListenter listener = new MockRuleDocumentListenter(); - InputStream stream = this.getClass().getResourceAsStream("simplest.html"); - HTMLDocParserImpl parser = new HTMLDocParserImpl(); - parser.parseDocument(stream, listener); - assertTrue(listener.buf.toString().indexOf("line B") > 0); - assertFalse(listener.comment.toString().indexOf("comment") > 0); - System.out.println(listener.buf.toString()); - assertEquals("comment", listener.comment.toString()); - } - - static class MockRuleDocumentListenter implements RuleDocumentListener { - - public StringBuffer comment = new StringBuffer(); - public StringBuffer buf = new StringBuffer(); - private boolean inComment = false; - - public void handleText(String text) - { - if (!inComment) { - buf.append(text + "|"); - } else { - comment.append(text); - } - } - - public void startTable() - { - } - - public void startColumn() - { - } - - public void startRow() - { - } - - public void endTable() - { - } - - public void endColumn() - { - } - - public void endRow() - { - } - - public void startComment() - { - inComment = true; - } - - public void endComment() - { - inComment = false; - } - - } -} Added: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserTest.java =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserTest.java 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserTest.java 2005-12-21 07:32:06 UTC (rev 1910) @@ -0,0 +1,87 @@ +package org.drools.natural.ruledoc.html; + +import java.net.URL; +import java.util.Properties; + +import junit.framework.TestCase; + +import org.drools.natural.ruledoc.RuleDocumentListener; + +public class HTMLDocParserTest extends TestCase +{ + public void testDocParsing() throws Exception { + + MockRuleDocumentListenter listener = new MockRuleDocumentListenter(); + URL url = this.getClass().getResource("simplest.html"); + HTMLDocParser parser = new HTMLDocParser(); + parser.parseDocument(url.openConnection(), listener); + + assertTrue(listener.buf.toString().indexOf("A line") > 0); + assertFalse(listener.comment.toString().indexOf("comment") > 0); + + assertEquals("comment", listener.comment.toString()); + + assertTrue(listener.table.toString().indexOf("left1|right1") > 0); + } + + static class MockRuleDocumentListenter extends RuleDocumentListener { + + public StringBuffer comment = new StringBuffer(); + public StringBuffer buf = new StringBuffer(); + private boolean inComment = false; + public StringBuffer table = new StringBuffer(); + private boolean inTable = false; + + public void handleText(String text) + { + if (inComment) { + comment.append(text); + } else if (inTable) { + table.append(text); + } + else { + buf.append(text); + + } + } + + public void startTable() + { + inTable = true; + } + + public void startColumn() + { + this.table.append("|"); + } + + public void startRow() + { + } + + public void endTable() + { + this.table.append("||"); + inTable = false; + } + + public void endColumn() + { + } + + public void endRow() + { + } + + public void startComment() + { + inComment = true; + } + + public void endComment() + { + inComment = false; + } + + } +} Property changes on: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/HTMLDocParserTest.java ___________________________________________________________________ Name: svn:eol-style + native Deleted: trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/NaturalRulesSpec.html =================================================================== --- trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/NaturalRulesSpec.html 2005-12-21 02:40:16 UTC (rev 1909) +++ trunk/labs/jbossrules/drools-natural-language/src/test/org/drools/natural/ruledoc/html/NaturalRulesSpec.html 2005-12-21 07:32:06 UTC (rev 1910) @@ -1,303 +0,0 @@ -<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> -<HTML> -<HEAD> - <META HTTP-EQUIV="CONTENT-TYPE" CONTENT="text/html; charset=windows-1252"> - <TITLE></TITLE> - <META NAME="GENERATOR" CONTENT="OpenOffice.org 2.0 (Win32)"> - <META NAME="CREATED" CONTENT="20051214;11592400"> - <META NAME="CHANGEDBY" CONTENT="Michael Neale"> - <META NAME="CHANGED" CONTENT="20051215;16490707"> - <STYLE> - <!-- - @page { size: 21cm 29.7cm; margin: 2cm } - P { margin-bottom: 0.21cm } - H1 { margin-bottom: 0.21cm } - H1.western { font-family: "Arial", sans-serif; font-... [truncated message content] |