|
From: <cw...@us...> - 2007-09-19 19:35:24
|
Revision: 538
http://rdfapi-php.svn.sourceforge.net/rdfapi-php/?rev=538&view=rev
Author: cweiske
Date: 2007-09-19 12:35:23 -0700 (Wed, 19 Sep 2007)
Log Message:
-----------
- Make tokenization function testable
- Add support for """ quotations
Modified Paths:
--------------
trunk/rdfapi-php/api/sparql/SparqlParser.php
trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php
Modified: trunk/rdfapi-php/api/sparql/SparqlParser.php
===================================================================
--- trunk/rdfapi-php/api/sparql/SparqlParser.php 2007-09-18 17:55:11 UTC (rev 537)
+++ trunk/rdfapi-php/api/sparql/SparqlParser.php 2007-09-19 19:35:23 UTC (rev 538)
@@ -106,9 +106,9 @@
if ($queryString) {
$this->query->setQueryString($queryString);
- $uncommentedQuery = $this->uncomment($queryString);
- $this->tokenize($uncommentedQuery);
+ $uncommentedQuery = $this->uncomment($queryString);
$this->queryString = $uncommentedQuery;
+ $this->tokens = self::tokenize($uncommentedQuery);
$this->parseQuery();
if (!$this->query->isComplete()) {
throw new SparqlParserException(
@@ -126,7 +126,7 @@
$this->query->isEmpty = true;
}
return $this->query;
- }
+ }//public function parse($queryString = false)
@@ -143,35 +143,66 @@
// add the default prefixes defined in constants.php
global $default_prefixes;
$this->query->prefixes = $default_prefixes;
- }
+ }//protected function prepare()
/**
- * Tokenizes the query string.
+ * Tokenizes the query string into $tokens.
+ * The query may not contain any comments.
*
- * @param String $queryString
- * @return void
+ * @param string $queryString Query to split into tokens
+ *
+ * @return array Tokens
*/
- protected function tokenize($queryString)
+ public static function tokenize($queryString)
{
- $queryString = trim($queryString);
- $specialChars = array(" ", "\t", "\r", "\n", ",", "(", ")","{","}",'"',"'",";","[","]");
- $len = strlen($queryString);
- $this->tokens[0]='';
- $n = 0;
- for ($i=0; $i<$len; ++$i) {
+ $queryString = trim($queryString);
+ $specialChars = array(' ', "\t", "\r", "\n", ',', '\\', '(', ')','{','}','"',"'",';','[',']');
+ $len = strlen($queryString);
+ $tokens = array('');
+ $n = 0;
+
+ for ($i = 0; $i < $len; ++$i) {
if (!in_array($queryString{$i}, $specialChars)) {
- $this->tokens[$n] .= $queryString{$i};
+ $tokens[$n] .= $queryString{$i};
} else {
- if ($this->tokens[$n] != '') {
+ if ($tokens[$n] != '') {
++$n;
+ if (!isset($tokens[$n])) {
+ $tokens[$n] = '';
+ }
}
- $this->tokens[$n] = $queryString{$i};
- $this->tokens[++$n] = '';
+ if ($queryString{$i} == "'" && $n > 1
+ && $tokens[$n - 2] == "'" && $tokens[$n - 1] == "'"
+ ) {
+ //special ''' quotation
+ $tokens[$n - 2] = "'''";
+ $tokens[$n - 1] = '';
+ unset($tokens[$n]);
+ --$n;
+ continue;
+ } else if ($queryString{$i} == '"' && $n > 1
+ && $tokens[$n - 2] == '"' && $tokens[$n - 1] == '"'
+ ) {
+ //special """ quotation
+ $tokens[$n - 2] = '"""';
+ $tokens[$n - 1] = '';
+ unset($tokens[$n]);
+ --$n;
+ continue;
+ } else if ($queryString{$i} == '\\') {
+ $tokens[$n] .= substr($queryString, $i, 2);
+ ++$i;
+ continue;
+ }
+ $tokens[$n] = $queryString{$i};
+ $tokens[++$n] = '';
}
}
- }
+//var_dump($tokens);
+ return $tokens;
+ }//public static function tokenize($queryString)
@@ -184,14 +215,12 @@
*/
protected function uncomment($queryString)
{
- // php appears to escape quotes, so unescape them
- $queryString = str_replace('\"',"'",$queryString);
- $queryString = str_replace("\'",'"',$queryString);
-
$regex ="/((\"[^\"]*\")|(\'[^\']*\')|(\<[^\>]*\>))|(#.*)/";
return preg_replace($regex,'\1',$queryString);
- }
+ }//protected function uncomment($queryString)
+
+
/**
* Starts parsing the tokenized SPARQL Query.
*
@@ -237,7 +266,7 @@
}
} while (next($this->tokens));
- }
+ }//protected function parseQuery()
@@ -545,45 +574,56 @@
}
+
/**
* Checks if $token is a Literal.
*
- * @param String $token The token
+ * @param string $token The token
+ *
* @return boolean TRUE if the token is a Literal false if not
*/
- protected function literalCheck($token){
- $pattern="/^[\"\'].*$/";
- if(preg_match($pattern,$token)>0)
- return true;
+ protected function literalCheck($token)
+ {
+ $pattern = "/^[\"\'].*$/";
+ if (preg_match($pattern,$token) > 0) {
+ return true;
+ }
return false;
- }
+ }//protected function literalCheck($token)
+
+
/**
* FastForward until next token which is not blank.
*
* @return void
*/
- protected function _fastForward(){
+ protected function _fastForward()
+ {
next($this->tokens);
while(current($this->tokens)==" "|current($this->tokens)==chr(10)|current($this->tokens)==chr(13)|current($this->tokens)==chr(9)){
next($this->tokens);
}
- return;
- }
+ }//protected function _fastForward()
+
+
/**
* Rewind until next token which is not blank.
*
* @return void
*/
- protected function _rewind(){
+ protected function _rewind()
+ {
prev($this->tokens);
while(current($this->tokens)==" "|current($this->tokens)==chr(10)|current($this->tokens)==chr(13)|current($this->tokens)==chr(9)){
prev($this->tokens);
}
return;
- }
+ }//protected function _rewind()
+
+
/**
* Parses a graph pattern.
*
@@ -1299,12 +1339,13 @@
* Parses a String to an RDF node.
*
* @param String $node
+ *
* @return Node The parsed RDF node
* @throws SparqlParserException
*/
protected function parseNode($node = false)
{
- $eon = false;
+ //$eon = false;
if ($node) {
$node = $node;
} else {
@@ -1343,19 +1384,12 @@
$node = new Resource($node);
return $node;
} else if ($this->literalCheck($node)) {
- do {
- switch(substr($node,0,1)){
- case '"':
- $this->parseLiteral($node,'"');
- $eon = true;
- break;
- case "'":
- $this->parseLiteral($node,"'");
- $eon = true;
- break;
- }
- } while(!$eon);
-
+ $ch = substr($node, 0, 1);
+ $chLong = str_repeat($ch, 3);
+ if (substr($node, 0, 3) == $chLong) {
+ $ch = $chLong;
+ }
+ $this->parseLiteral($node, $ch);
} else if ($this->varCheck($node)) {
$pos = strpos($node,'.');
if ($pos) {
@@ -1381,21 +1415,24 @@
return $this->parseNode($node);
} else {
throw new SparqlParserException(
- $node . " is neither a valid rdf- node nor a variable.",
+ '"' . $node . '" is neither a valid rdf- node nor a variable.',
null,
key($this->tokens)
);
}
return $node;
- }
+ }//protected function parseNode($node = false)
+
+
/**
* Checks if there is a datatype given and appends it to the node.
*
- * @param String $node
+ * @param string $node Node to check
+ *
* @return void
*/
- protected function checkDtypeLang(&$node)
+ protected function checkDtypeLang(&$node, $nSubstrLength = 1)
{
$this->_fastForward();
switch (substr(current($this->tokens), 0, 1)) {
@@ -1411,24 +1448,26 @@
break;
case '@':
$node = new Literal(
- substr($node, 1, -1),
- substr(current($this->tokens), 1)
+ substr($node, $nSubstrLength, -$nSubstrLength),
+ substr(current($this->tokens), $nSubstrLength)
);
break;
default:
prev($this->tokens);
- $node = new Literal(substr($node, 1, -1));
+ $node = new Literal(substr($node, $nSubstrLength, -$nSubstrLength));
break;
}
+ }//protected function checkDtypeLang(&$node, $nSubstrLength = 1)
- }
+
/**
* Parses a literal.
*
* @param String $node
* @param String $sep used separator " or '
+ *
* @return void
*/
protected function parseLiteral(&$node, $sep)
@@ -1437,13 +1476,16 @@
next($this->tokens);
$node = $node.current($this->tokens);
} while (current($this->tokens) != $sep);
- $this->checkDtypeLang($node);
- }
+ $this->checkDtypeLang($node, strlen($sep));
+ }//protected function parseLiteral(&$node, $sep)
+
+
/**
* Checks if the Node is a typed Literal.
*
* @param String $node
+ *
* @return boolean TRUE if typed FALSE if not
*/
protected function dtypeCheck(&$node)
@@ -1476,8 +1518,10 @@
return true;
}
return false;
- }
+ }//protected function dtypeCheck(&$node)
+
+
/**
* Parses an RDF collection.
*
Modified: trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php
===================================================================
--- trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php 2007-09-18 17:55:11 UTC (rev 537)
+++ trunk/rdfapi-php/test/unit/Sparql/SparqlParserTests_test.php 2007-09-19 19:35:23 UTC (rev 538)
@@ -67,6 +67,16 @@
+ function testTokenizer()
+ {
+ $this->assertEqual(
+ array('abc', "'", 'hi', "'", "'", 'def', "'''", 'rst', "\'", "'", "'", 'xyz'),
+ SparqlParser::tokenize("abc'hi''def'''rst\\'''xyz")
+ );
+ }//function testTokenizer()
+
+
+
function testEdgeCases()
{
$query = <<<EOT
@@ -147,6 +157,7 @@
$parser = new SparqlParser();
foreach ($_SESSION['sparql_dawg2_tests'] as $test) {
+echo $test['title'] . "\n";
//use syntax tests only
if (!isset($test['type']) ||
($test['type'] != 'syntax-positive' &&
This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.
|