From: <lph...@us...> - 2010-10-28 17:05:50
|
Revision: 30395 http://tikiwiki.svn.sourceforge.net/tikiwiki/?rev=30395&view=rev Author: lphuberdeau Date: 2010-10-28 17:05:43 +0000 (Thu, 28 Oct 2010) Log Message: ----------- [NEW] Adding search parameter parsing Added Paths: ----------- trunk/lib/core/Search/Expr/ trunk/lib/core/Search/Expr/And.php trunk/lib/core/Search/Expr/Interface.php trunk/lib/core/Search/Expr/Not.php trunk/lib/core/Search/Expr/Or.php trunk/lib/core/Search/Expr/Parser.php trunk/lib/core/Search/Expr/Token.php trunk/lib/core/Search/Expr/Tokenizer.php trunk/lib/test/core/Search/Expr/ trunk/lib/test/core/Search/Expr/ParserTest.php trunk/lib/test/core/Search/Expr/TokenizerTest.php Added: trunk/lib/core/Search/Expr/And.php =================================================================== --- trunk/lib/core/Search/Expr/And.php (rev 0) +++ trunk/lib/core/Search/Expr/And.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,12 @@ +<?php + +class Search_Expr_And implements Search_Expr_Interface +{ + private $parts; + + function __construct(array $parts) + { + $this->parts = $parts; + } +} + Added: trunk/lib/core/Search/Expr/Interface.php =================================================================== --- trunk/lib/core/Search/Expr/Interface.php (rev 0) +++ trunk/lib/core/Search/Expr/Interface.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,6 @@ +<?php + +interface Search_Expr_Interface +{ +} + Added: trunk/lib/core/Search/Expr/Not.php =================================================================== --- trunk/lib/core/Search/Expr/Not.php (rev 0) +++ trunk/lib/core/Search/Expr/Not.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,12 @@ +<?php + +class Search_Expr_Not implements Search_Expr_Interface +{ + private $expression; + + function __construct($expression) + { + $this->expression = $expression; + } +} + Added: trunk/lib/core/Search/Expr/Or.php =================================================================== --- trunk/lib/core/Search/Expr/Or.php (rev 0) +++ trunk/lib/core/Search/Expr/Or.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,12 @@ +<?php + +class Search_Expr_Or implements Search_Expr_Interface +{ + private $parts; + + function __construct(array $parts) + { + $this->parts = $parts; + } +} + Added: trunk/lib/core/Search/Expr/Parser.php =================================================================== --- trunk/lib/core/Search/Expr/Parser.php (rev 0) +++ trunk/lib/core/Search/Expr/Parser.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,103 @@ +<?php + +class Search_Expr_Parser +{ + private $special = array('(', ')', 'AND', 'OR', 'NOT', '+'); + + function parse($string) + { + $tokenizer = new Search_Expr_Tokenizer; + + $tokens = array(); + foreach ($tokenizer->tokenize($string) as $part) { + if (in_array(strtoupper($part), $this->special)) { + $tokens[] = strtoupper($part); + } else { + $tokens[] = new Search_Expr_Token($part); + } + } + + return $this->reduce($tokens); + } + + private function reduce($tokens) + { + $tokens = $this->reduceParenthesis($tokens); + $tokens = $this->applyOperator($tokens, 'NOT', 'buildNot'); + $tokens = $this->applyOperator($tokens, 'OR', 'buildOr'); + $tokens = $this->applyOperator($tokens, 'AND', 'buildAnd'); + $tokens = $this->applyOperator($tokens, '+', 'buildAnd'); + + if (count($tokens) === 1) { + return reset($tokens); + } else { + return new Search_Expr_Or($tokens); + } + } + + private function reduceParenthesis($tokens) + { + $out = array(); + $firstOpen = null; + $openCount = 0; + + foreach ($tokens as $key => $token) { + if ($token === '(') { + if ($openCount === 0) { + $firstOpen = $key; + } + + ++$openCount; + } elseif ($token === ')') { + --$openCount; + if($openCount === 0) { + $inner = array_slice($tokens, $firstOpen + 1, $key - $firstOpen - 1); + $out[] = $this->reduce($inner); + $firstOpen = null; + } + } elseif($openCount === 0) { + $out[] = $token; + } + } + + return $out; + } + + private function applyOperator($tokens, $lookingFor, $buildMethod) + { + $tokens = array_values($tokens); + $positions = array(); + foreach ($tokens as $key => $token) { + if ($lookingFor === $token) { + $positions[] = $key; + } + } + + foreach ($positions as $key) { + $this->$buildMethod($tokens, $key); + } + + return array_filter($tokens); + } + + private function buildOr(&$tokens, $key) + { + $tokens[$key] = new Search_Expr_Or(array($tokens[$key - 1], $tokens[$key + 1])); + $tokens[$key - 1] = null; + $tokens[$key + 1] = null; + } + + private function buildAnd(&$tokens, $key) + { + $tokens[$key] = new Search_Expr_And(array($tokens[$key - 1], $tokens[$key + 1])); + $tokens[$key - 1] = null; + $tokens[$key + 1] = null; + } + + private function buildNot(&$tokens, $key) + { + $tokens[$key] = new Search_Expr_Not($tokens[$key + 1]); + $tokens[$key + 1] = null; + } +} + Added: trunk/lib/core/Search/Expr/Token.php =================================================================== --- trunk/lib/core/Search/Expr/Token.php (rev 0) +++ trunk/lib/core/Search/Expr/Token.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,12 @@ +<?php + +class Search_Expr_Token implements Search_Expr_Interface +{ + private $string; + + function __construct($string) + { + $this->string = $string; + } +} + Added: trunk/lib/core/Search/Expr/Tokenizer.php =================================================================== --- trunk/lib/core/Search/Expr/Tokenizer.php (rev 0) +++ trunk/lib/core/Search/Expr/Tokenizer.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,53 @@ +<?php + +class Search_Expr_Tokenizer +{ + const QUOTE = '"'; + const OPEN = '('; + const CLOSE = ')'; + + function tokenize($string) + { + $tokens = array(); + $open = false; + $current = ''; + + $length = strlen($string); + for ($i = 0; $length > $i; ++$i ) { + $char = $string{$i}; + + if ($open ) { + if ($char === self::QUOTE) { + $this->addToken($tokens, $current); + $open = false; + } else { + $current .= $char; + } + } else { + if ($char === self::QUOTE) { + $open = true; + } elseif ($char === self::OPEN || $char === self::CLOSE) { + $this->addToken($tokens, $current); + $this->addToken($tokens, $char); + } elseif (ctype_space($char)) { + $this->addToken($tokens, $current); + } else { + $current .= $char; + } + } + } + + $this->addToken($tokens, $current); + + return $tokens; + } + + private function addToken(&$tokens, &$current) + { + if (! empty($current) ) { + $tokens[] = $current; + $current = ''; + } + } +} + Added: trunk/lib/test/core/Search/Expr/ParserTest.php =================================================================== --- trunk/lib/test/core/Search/Expr/ParserTest.php (rev 0) +++ trunk/lib/test/core/Search/Expr/ParserTest.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,116 @@ +<?php + +class Search_Expr_ParserTest extends PHPUnit_Framework_TestCase +{ + private $parser; + + function setUp() + { + $this->parser = new Search_Expr_Parser; + } + + function testSimpleWord() + { + $result = $this->parser->parse('hello'); + + $this->assertEquals($result, new Search_Expr_Token('hello')); + } + + function testMultipleWords() + { + $result = $this->parser->parse('"hello world" test again'); + $this->assertEquals(new Search_Expr_Or(array( + new Search_Expr_Token('hello world'), + new Search_Expr_Token('test'), + new Search_Expr_Token('again'), + )), $result); + } + + function testSimpleParenthesis() + { + $result = $this->parser->parse('(test again)'); + $this->assertEquals(new Search_Expr_Or(array( + new Search_Expr_Token('test'), + new Search_Expr_Token('again'), + )), $result); + } + + function testMatchParenthesis() + { + $result = $this->parser->parse('(hello (bob roger)) (test again)'); + $this->assertEquals(new Search_Expr_Or(array( + new Search_Expr_Or(array( + new Search_Expr_Token('hello'), + new Search_Expr_Or(array( + new Search_Expr_Token('bob'), + new Search_Expr_Token('roger'), + )), + )), + new Search_Expr_Or(array( + new Search_Expr_Token('test'), + new Search_Expr_Token('again'), + )), + )), $result); + } + + function testStripOr() + { + $result = $this->parser->parse('(bob roger) or (test again)'); + + $this->assertEquals(new Search_Expr_Or(array( + $this->parser->parse('bob roger'), + $this->parser->parse('test again'), + )), $result); + } + + function testRecongnizeAnd() + { + $result = $this->parser->parse('(bob roger) and (test again)'); + + $this->assertEquals(new Search_Expr_And(array( + $this->parser->parse('bob roger'), + $this->parser->parse('test again'), + )), $result); + } + + function testRecongnizePlus() + { + $result = $this->parser->parse('(bob roger) + (test again)'); + + $this->assertEquals(new Search_Expr_And(array( + $this->parser->parse('bob roger'), + $this->parser->parse('test again'), + )), $result); + } + + function testCheckPriority() + { + $result = $this->parser->parse('bob AND test OR again'); + + $this->assertEquals(new Search_Expr_And(array( + $this->parser->parse('bob'), + $this->parser->parse('test again'), + )), $result); + } + + function testCheckLowerSpacePriority() + { + $result = $this->parser->parse('bob AND test again'); + + $this->assertEquals(new Search_Expr_Or(array( + $this->parser->parse('bob AND test'), + $this->parser->parse('again'), + )), $result); + } + + function testNotOperator() + { + $result = $this->parser->parse('bob AND NOT (roger alphonse)'); + + $this->assertEquals(new Search_Expr_And(array( + $this->parser->parse('bob'), + new Search_Expr_Not($this->parser->parse('roger OR alphonse')), + )), $result); + } +} + Added: trunk/lib/test/core/Search/Expr/TokenizerTest.php =================================================================== --- trunk/lib/test/core/Search/Expr/TokenizerTest.php (rev 0) +++ trunk/lib/test/core/Search/Expr/TokenizerTest.php 2010-10-28 17:05:43 UTC (rev 30395) @@ -0,0 +1,32 @@ +<?php + +class Search_Expr_TokenizerTest extends PHPUnit_Framework_TestCase +{ + private $tokenizer; + + function setUp() + { + $this->tokenizer = new Search_Expr_Tokenizer; + } + + function testSingleWord() + { + $this->assertEquals(array('hello'), $this->tokenizer->tokenize('hello')); + } + + function testMultipleWords() + { + $this->assertEquals(array('hello', 'world', 'who', 'listens'), $this->tokenizer->tokenize('hello world who listens')); + } + + function testWithQuotedText() + { + $this->assertEquals(array('hello world', 'who listens'), $this->tokenizer->tokenize('"hello world" "who listens"')); + } + + function testWithParenthesis() + { + $this->assertEquals(array('hello world (who?)', '(', 'who', ')', '(', 'test', 'listens', ')'), $this->tokenizer->tokenize('"hello world (who?)" (who) (test listens)')); + } +} + This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |