From: <cw...@us...> - 2007-09-19 19:35:24
|
Revision: 538 http://rdfapi-php.svn.sourceforge.net/rdfapi-php/?rev=538&view=rev Author: cweiske Date: 2007-09-19 12:35:23 -0700 (Wed, 19 Sep 2007) Log Message: ----------- - Make tokenization function testable - Add support for """ quotations Modified Paths: -------------- trunk/rdfapi-php/api/sparql/SparqlParser.php trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php Modified: trunk/rdfapi-php/api/sparql/SparqlParser.php =================================================================== --- trunk/rdfapi-php/api/sparql/SparqlParser.php 2007-09-18 17:55:11 UTC (rev 537) +++ trunk/rdfapi-php/api/sparql/SparqlParser.php 2007-09-19 19:35:23 UTC (rev 538) @@ -106,9 +106,9 @@ if ($queryString) { $this->query->setQueryString($queryString); - $uncommentedQuery = $this->uncomment($queryString); - $this->tokenize($uncommentedQuery); + $uncommentedQuery = $this->uncomment($queryString); $this->queryString = $uncommentedQuery; + $this->tokens = self::tokenize($uncommentedQuery); $this->parseQuery(); if (!$this->query->isComplete()) { throw new SparqlParserException( @@ -126,7 +126,7 @@ $this->query->isEmpty = true; } return $this->query; - } + }//public function parse($queryString = false) @@ -143,35 +143,66 @@ // add the default prefixes defined in constants.php global $default_prefixes; $this->query->prefixes = $default_prefixes; - } + }//protected function prepare() /** - * Tokenizes the query string. + * Tokenizes the query string into $tokens. + * The query may not contain any comments. * - * @param String $queryString - * @return void + * @param string $queryString Query to split into tokens + * + * @return array Tokens */ - protected function tokenize($queryString) + public static function tokenize($queryString) { - $queryString = trim($queryString); - $specialChars = array(" ", "\t", "\r", "\n", ",", "(", ")","{","}",'"',"'",";","[","]"); - $len = strlen($queryString); - $this->tokens[0]=''; - $n = 0; - for ($i=0; $i<$len; ++$i) { + $queryString = trim($queryString); + $specialChars = array(' ', "\t", "\r", "\n", ',', '\\', '(', ')','{','}','"',"'",';','[',']'); + $len = strlen($queryString); + $tokens = array(''); + $n = 0; + + for ($i = 0; $i < $len; ++$i) { if (!in_array($queryString{$i}, $specialChars)) { - $this->tokens[$n] .= $queryString{$i}; + $tokens[$n] .= $queryString{$i}; } else { - if ($this->tokens[$n] != '') { + if ($tokens[$n] != '') { ++$n; + if (!isset($tokens[$n])) { + $tokens[$n] = ''; + } } - $this->tokens[$n] = $queryString{$i}; - $this->tokens[++$n] = ''; + if ($queryString{$i} == "'" && $n > 1 + && $tokens[$n - 2] == "'" && $tokens[$n - 1] == "'" + ) { + //special ''' quotation + $tokens[$n - 2] = "'''"; + $tokens[$n - 1] = ''; + unset($tokens[$n]); + --$n; + continue; + } else if ($queryString{$i} == '"' && $n > 1 + && $tokens[$n - 2] == '"' && $tokens[$n - 1] == '"' + ) { + //special """ quotation + $tokens[$n - 2] = '"""'; + $tokens[$n - 1] = ''; + unset($tokens[$n]); + --$n; + continue; + } else if ($queryString{$i} == '\\') { + $tokens[$n] .= substr($queryString, $i, 2); + ++$i; + continue; + } + $tokens[$n] = $queryString{$i}; + $tokens[++$n] = ''; } } - } +//var_dump($tokens); + return $tokens; + }//public static function tokenize($queryString) @@ -184,14 +215,12 @@ */ protected function uncomment($queryString) { - // php appears to escape quotes, so unescape them - $queryString = str_replace('\"',"'",$queryString); - $queryString = str_replace("\'",'"',$queryString); - $regex ="/((\"[^\"]*\")|(\'[^\']*\')|(\<[^\>]*\>))|(#.*)/"; return preg_replace($regex,'\1',$queryString); - } + }//protected function uncomment($queryString) + + /** * Starts parsing the tokenized SPARQL Query. * @@ -237,7 +266,7 @@ } } while (next($this->tokens)); - } + }//protected function parseQuery() @@ -545,45 +574,56 @@ } + /** * Checks if $token is a Literal. * - * @param String $token The token + * @param string $token The token + * * @return boolean TRUE if the token is a Literal false if not */ - protected function literalCheck($token){ - $pattern="/^[\"\'].*$/"; - if(preg_match($pattern,$token)>0) - return true; + protected function literalCheck($token) + { + $pattern = "/^[\"\'].*$/"; + if (preg_match($pattern,$token) > 0) { + return true; + } return false; - } + }//protected function literalCheck($token) + + /** * FastForward until next token which is not blank. * * @return void */ - protected function _fastForward(){ + protected function _fastForward() + { next($this->tokens); while(current($this->tokens)==" "|current($this->tokens)==chr(10)|current($this->tokens)==chr(13)|current($this->tokens)==chr(9)){ next($this->tokens); } - return; - } + }//protected function _fastForward() + + /** * Rewind until next token which is not blank. * * @return void */ - protected function _rewind(){ + protected function _rewind() + { prev($this->tokens); while(current($this->tokens)==" "|current($this->tokens)==chr(10)|current($this->tokens)==chr(13)|current($this->tokens)==chr(9)){ prev($this->tokens); } return; - } + }//protected function _rewind() + + /** * Parses a graph pattern. * @@ -1299,12 +1339,13 @@ * Parses a String to an RDF node. * * @param String $node + * * @return Node The parsed RDF node * @throws SparqlParserException */ protected function parseNode($node = false) { - $eon = false; + //$eon = false; if ($node) { $node = $node; } else { @@ -1343,19 +1384,12 @@ $node = new Resource($node); return $node; } else if ($this->literalCheck($node)) { - do { - switch(substr($node,0,1)){ - case '"': - $this->parseLiteral($node,'"'); - $eon = true; - break; - case "'": - $this->parseLiteral($node,"'"); - $eon = true; - break; - } - } while(!$eon); - + $ch = substr($node, 0, 1); + $chLong = str_repeat($ch, 3); + if (substr($node, 0, 3) == $chLong) { + $ch = $chLong; + } + $this->parseLiteral($node, $ch); } else if ($this->varCheck($node)) { $pos = strpos($node,'.'); if ($pos) { @@ -1381,21 +1415,24 @@ return $this->parseNode($node); } else { throw new SparqlParserException( - $node . " is neither a valid rdf- node nor a variable.", + '"' . $node . '" is neither a valid rdf- node nor a variable.', null, key($this->tokens) ); } return $node; - } + }//protected function parseNode($node = false) + + /** * Checks if there is a datatype given and appends it to the node. * - * @param String $node + * @param string $node Node to check + * * @return void */ - protected function checkDtypeLang(&$node) + protected function checkDtypeLang(&$node, $nSubstrLength = 1) { $this->_fastForward(); switch (substr(current($this->tokens), 0, 1)) { @@ -1411,24 +1448,26 @@ break; case '@': $node = new Literal( - substr($node, 1, -1), - substr(current($this->tokens), 1) + substr($node, $nSubstrLength, -$nSubstrLength), + substr(current($this->tokens), $nSubstrLength) ); break; default: prev($this->tokens); - $node = new Literal(substr($node, 1, -1)); + $node = new Literal(substr($node, $nSubstrLength, -$nSubstrLength)); break; } + }//protected function checkDtypeLang(&$node, $nSubstrLength = 1) - } + /** * Parses a literal. * * @param String $node * @param String $sep used separator " or ' + * * @return void */ protected function parseLiteral(&$node, $sep) @@ -1437,13 +1476,16 @@ next($this->tokens); $node = $node.current($this->tokens); } while (current($this->tokens) != $sep); - $this->checkDtypeLang($node); - } + $this->checkDtypeLang($node, strlen($sep)); + }//protected function parseLiteral(&$node, $sep) + + /** * Checks if the Node is a typed Literal. * * @param String $node + * * @return boolean TRUE if typed FALSE if not */ protected function dtypeCheck(&$node) @@ -1476,8 +1518,10 @@ return true; } return false; - } + }//protected function dtypeCheck(&$node) + + /** * Parses an RDF collection. * Modified: trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php =================================================================== --- trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php 2007-09-18 17:55:11 UTC (rev 537) +++ trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php 2007-09-19 19:35:23 UTC (rev 538) @@ -67,6 +67,16 @@ + function testTokenizer() + { + $this->assertEqual( + array('abc', "'", 'hi', "'", "'", 'def', "'''", 'rst', "\'", "'", "'", 'xyz'), + SparqlParser::tokenize("abc'hi''def'''rst\\'''xyz") + ); + }//function testTokenizer() + + + function testEdgeCases() { $query = <<<EOT @@ -147,6 +157,7 @@ $parser = new SparqlParser(); foreach ($_SESSION['sparql_dawg2_tests'] as $test) { +echo $test['title'] . "\n"; //use syntax tests only if (!isset($test['type']) || ($test['type'] != 'syntax-positive' && This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |