[Phpxd-commits] CVS: phpXD/include/parser DTDParser.php,1.1,1.2
Status: Beta
Brought to you by:
growbal
From: Thomas D. <th...@us...> - 2002-01-27 01:23:14
|
Update of /cvsroot/phpxd/phpXD/include/parser In directory usw-pr-cvs1:/tmp/cvs-serv24978/include/parser Modified Files: DTDParser.php Log Message: Parsing of Element declarations implemented. Parameter Entities are supported, but it is just a hack (lines 26-43). Attlists, Entities and Notations are still missing. Index: DTDParser.php =================================================================== RCS file: /cvsroot/phpxd/phpXD/include/parser/DTDParser.php,v retrieving revision 1.1 retrieving revision 1.2 diff -C2 -r1.1 -r1.2 *** DTDParser.php 2002/01/26 13:20:01 1.1 --- DTDParser.php 2002/01/27 01:23:10 1.2 *************** *** 16,20 **** --- 16,24 ---- class DTDParser { var $parseDTD = false; + var $parseInternalDTD = false; + var $DTDTokens; var $parameterEntities; + var $elements; + var $currentElement; function parse($str) { *************** *** 22,40 **** while (preg_match('=(.*)\%([a-z,A-Z,0-9,\.]*);(.*)$=sU', $str, $ent)) { $str = preg_replace('=<!--.*-->=sU', '', $str); ! while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*PUBLIC[ ,\n,\r,\t]*"([a-z,A-Z,0-9,\-,/, ]*)"[ ,\n,\r,\t]*"([a-z,A-Z,0-9,\.,\-,_,/]*)"[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) { ! if ($this->file_exists($this->currentDir."/".$ent[4])) { ! $this->parameterEntities[$ent[2]] = ! preg_replace('=<!--.*-->=sU', '', implode("", file($this->currentDir."/".$ent[4]))); ! } ! $str = $ent[1].$ent[5]; ! } while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*"(.*)"[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) { $this->parameterEntities[$ent[2]] = $ent[3]; $str = $ent[1].$ent[4]; - } - while (preg_match('=(.*)\%([a-z,A-Z,0-9,\.]*);(.*)$=sU', $str, $ent)) { - $str = $ent[1].$this->parameterEntities[$ent[2]].$ent[3]; } } $parser = xml_parser_create(); --- 26,44 ---- while (preg_match('=(.*)\%([a-z,A-Z,0-9,\.]*);(.*)$=sU', $str, $ent)) { $str = preg_replace('=<!--.*-->=sU', '', $str); ! while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*PUBLIC[ ,\n,\r,\t]*"([a-z,A-Z,0-9,\-,/, ]*)"[ ,\n,\r,\t]*"([a-z,A-Z,0-9,\.,\-,_,/]*)"[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) { ! if ($this->file_exists($this->currentDir."/".$ent[4])) { ! $this->parameterEntities[$ent[2]] = ! preg_replace('=<!--.*-->=sU', '', implode("", file($this->currentDir."/".$ent[4]))); ! } ! $str = $ent[1].$ent[5]; ! } while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*"(.*)"[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) { $this->parameterEntities[$ent[2]] = $ent[3]; $str = $ent[1].$ent[4]; } + foreach ($this->parameterEntities as $ent => $body) { + $str = str_replace("%".$ent.";", $body, $str); + } } $parser = xml_parser_create(); *************** *** 86,118 **** function parseDoctype($tokens) { $token = $tokens[0]; ! if ($this->compareToken($token, "<!DOCTYPE")) { ! $this->parseDocumentElement(array_slice($tokens, 1)); } } ! function parseDocumentElement($tokens) { ! $token = $tokens[0]; $this->documentElement = $token; ! $token = $tokens[1]; ! if ($this->compareToken($token, "[")) { ! $this->parseInternalDTD(array_slice($tokens, 2)); } ! if ($this->compareToken($token, "SYSTEM")) { ! $this->parseExternalSystemDTD(array_slice($tokens, 2)); } ! if ($this->compareToken($token, "PUBLIC")) { ! $this->parseExternalPublicDTD(array_slice($tokens, 2)); } } ! function parseInternalDTD($tokens) { } ! function parseExternalSystemDTD($tokens) { } ! function parseExternalPublicDTD($tokens) { ! $tokenPublicID = $this->removeQuotes($tokens[0]); ! $tokenURI = $this->removeQuotes($tokens[1]); switch ($tokenPublicID) { case "-//W3C//DTD XHTML 1.0 Strict//EN": { --- 90,174 ---- function parseDoctype($tokens) { $token = $tokens[0]; ! if ($token == "<!DOCTYPE") { ! $this->parseDocumentElement($tokens, 1); } } ! function parseDocumentElement($tokens, $offset) { ! $token = $tokens[$offset]; $this->documentElement = $token; ! $token = $tokens[$offset + 1]; ! if ($token == "[") { ! $this->parseInternalDTD($tokens, $offset + 2); ! // foreach ($this->elements as $element) { ! // echo $element->tagName." -- "; ! // if ($element->children == null) { ! // echo "EMPTY"; ! // } ! // else { ! // echo $element->children->toString(); ! // } ! // echo "<br>"; ! // } ! exit; ! } ! if ($token == "SYSTEM") { ! $this->parseExternalSystemDTD($tokens, $offset + 2); ! } ! if ($token == "PUBLIC") { ! $this->parseExternalPublicDTD($tokens, $offset + 2); ! } ! } ! ! function parseInternalDTD($tokens, $offset) { ! $token = $tokens[$offset]; ! switch ($token) { ! case "<!ELEMENT": { ! $this->parseElement($tokens, $offset + 1); ! $currentElement = $this->currentElement; ! $currentElement->children =& $this->currentElement->children; ! echo $this->currentElement->tagName." -- "; ! unset($this->currentElement); ! if ($currentElement->children != null) { ! echo $currentElement->children->toString()."<br>"; ! } ! $this->elements[] =& $currentElement; ! $offset = $this->skipToGt($tokens, $offset + 1); ! break; ! } ! case "<!ATTLIST": { ! $offset = $this->skipToGt($tokens, $offset + 1); ! break; ! } ! case "<!ENTITY": { ! $offset = $this->skipToGt($tokens, $offset + 1); ! break; } ! case "<!NOTATION": { ! $offset = $this->skipToGt($tokens, $offset + 1); ! break; ! } } ! if (($tokens[$offset] != "]>") && ($tokens[$offset] != "]")) { ! $this->parseInternalDTD($tokens, $offset); } } ! function skipToGt($tokens, $offset) { ! while ($tokens[$offset] != ">") { ! $offset++; ! } ! return $offset + 1;; } ! function parseExternalSystemDTD($tokens, $offset) { ! $token = $this->removeQuotes($tokens[$offset]); ! $DTDParser = new DTDParser(); ! $DTDParser->parseFile($token); } ! function parseExternalPublicDTD($tokens, $offset) { ! $tokenPublicID = $this->removeQuotes($tokens[$offset]); ! $tokenURI = $this->removeQuotes($tokens[$offset + 1]); switch ($tokenPublicID) { case "-//W3C//DTD XHTML 1.0 Strict//EN": { *************** *** 123,135 **** } } - // echo $tokenURI; } ! function compareToken($tok1, $tok2) { ! if ($tok1 == $tok2) { ! return true; ! } else { ! return false; } } --- 179,300 ---- } } } ! function parseElement($tokens, $offset) { ! $name = $tokens[$offset]; ! $this->currentElement = new DTDElement($name); ! $this->parseElementChilds($tokens, $offset + 1); ! } ! ! function parseElementChilds($tokens, $offset) { ! $token = $tokens[$offset]; ! if ($token == "(") { ! $nestlevel = 1; $cursor = 1; ! while (($nestlevel > 0) && ! (($tokens[$offset + $cursor] != ",") || ($nestlevel > 1))) { ! if ($tokens[$offset + $cursor] == "(") { ! $nestlevel++; ! } ! if (substr($tokens[$offset + $cursor], 0, 1) == ")") { ! $nestlevel--; ! } ! $cursor++; ! } ! if ($nestlevel == 1) { ! if ($this->currentElement->children == null) { ! $this->currentElement->children = new DTDElementSequence(); ! } ! else { ! $newChild = new DTDElementSequence(); ! $newChild->parent =& $this->currentElement->children; ! $newChild->parent->appendChild($newChild); ! unset($this->currentElement->children); ! $this->currentElement->children =& $newChild; ! } ! } ! else { ! if ($this->currentElement->children == null) { ! $this->currentElement->children = new DTDElementChoice(); ! } ! else { ! $newChild = new DTDElementChoice(); ! $newChild->parent =& $this->currentElement->children; ! $newChild->parent->appendChild($newChild); ! unset($this->currentElement->children); ! $this->currentElement->children =& $newChild; ! } ! } ! $this->parseElementChilds($tokens, $offset + 1); ! return; ! } ! if ($token[0] == ")") { ! if (($token[strlen($token) - 1] == "?") || ! ($token[strlen($token) - 1] == "+") || ! ($token[strlen($token) - 1] == "*")) { ! $this->currentElement->children->setNumber($token[strlen($token) - 1]); ! } ! if ($this->currentElement->children->parent != null) { ! $parent =& $this->currentElement->children->parent; ! $this->currentElement->children =& $parent; ! $this->parseElementChilds($tokens, $offset + 1); ! return; ! } ! else { ! // assert: the dtd is correct, so it is the last return. ! return; ! } ! } ! if (($token == ",") || ($token == "|") || ! ($token == "?") || ($token == "+") || ($token == "*")) { ! $this->parseElementChilds($tokens, $offset + 1); ! return; ! } ! if ($token == ">") { ! return; ! } ! $this->parseElementChild($tokens, $offset); ! $this->parseElementChilds($tokens, $offset + 1); ! } ! ! function parseElementChild($tokens, $offset) { ! $token = $tokens[$offset]; ! if ($token == "ANY") { ! $this->currentElement->children = ! new DTDElementChild("", "", true); ! return; ! } ! if ($token == "EMPTY") { ! $this->currentElement->children = null; ! return; ! } ! if ($token == "#PCDATA") { ! if ($this->currentElement->children == null) { ! $this->currentElement->children = ! new DTDElementChild("", "", true, true); ! return; ! } ! else { ! $this->currentElement->children-> ! appendChild(new DTDElementChild("", "", true, true)); ! return; ! } ! } ! if (($token[strlen($token) - 1] == "?") || ! ($token[strlen($token) - 1] == "+") || ! ($token[strlen($token) - 1] == "*")) { ! $number = $token[strlen($token) - 1]; ! $token = substr($token, 0, strlen($token) - 1); ! } else { ! $number = ""; ! } ! $child = new DTDElementChild($token, $number); ! if ($this->currentElement->children == null) { ! $this->currentElement->children =& $child; ! return; ! } ! else { ! $this->currentElement->children->appendChild($child); ! return; } } *************** *** 146,151 **** $data = trim($data); - // echo "<pre>".$data."</pre>"; - if ($data == "<!DOCTYPE") { $this->DTDTokens[] = $data; --- 311,314 ---- *************** *** 155,159 **** if ($this->parseDTD) { ! $this->DTDTokens[] = $data; if ($data == "[") { $this->parseInternalDTD = true; --- 318,324 ---- if ($this->parseDTD) { ! if ($data != "") { ! $this->DTDTokens[] = $data; ! } if ($data == "[") { $this->parseInternalDTD = true; *************** *** 171,180 **** function handleExternalEntityRef($parser, $openEntityNames, $base, $systemId, $publicId) { - echo "hier"; return true; } function handleUnparsedEntityDecl($parser, $entityName, $base, $systemId, $publicId, $notationName) { - echo "hier"; return true; } --- 336,343 ---- *************** *** 211,214 **** --- 374,503 ---- echo "<strong>phpXD error:</strong> ".$message; exit; + } + } + + class DTDNode { + } + + class DTDElement extends DTDNode { + var $tagName; + var $children; + + function DTDElement($tagName) { + $this->tagName = $tagName; + $this->children = null; + } + } + + class DTDElementChild extends DTDNode { + // Number of that child + // 1 = one time + // 2 = ? - zero or one time + // 3 = * - zero or more times + // 4 = + - one or more times + var $number = 1; + + var $any = false; + var $pcdata = false; + var $tagName; + var $id; + + function DTDElementChild($tagName, $number, $any = false, $pcdata = false) { + if ($any) { + $this->any = true; + return; + } + + if ($pcdata) { + $this->pcdata = true; + return; + } + + $this->tagName = $tagName; + $this->setNumber($number); + } + + function setNumber($number) { + switch ($number) { + case "?": { + $this->number = 2; + break; + } + case "+": { + $this->number = 3; + break; + } + case "*": { + $this->number = 4; + break; + } + } + } + + function toString() { + if ($this->any) { + return "ANY"; + } + if ($this->pcdata) { + return "#PCDAZA"; + } + return $this->tagName." / ".$this->number; + } + } + + class DTDElementSequence extends DTDElementChild { + var $sequence; + var $length = 0; + var $parent = null; + + function DTDElementSequence() { + } + + function appendChild(&$child) { + $this->sequence[$this->length++] =& $child; + } + + function toString() { + $str = "("; + $count = 0; + for ($count = 0; $count < $this->length; ++$count) { + $child =& $this->sequence[$count]; + if ($count != 0) { + $str .= " , "; + } + $str .= $child->toString(); + } + $str .= ") / ".$this->number; + return $str; + } + } + + class DTDElementChoice extends DTDElementChild { + var $choices; + var $length = 0; + var $parent = null; + + function DTDElementChoice() { + } + + function DTDElementChoice() { + } + + function appendChild(&$child) { + $this->choices[$this->length++] =& $child; + } + + function toString() { + $str = "("; + $count = 0; + for ($count = 0; $count < $this->length; ++$count) { + $child =& $this->choices[$count]; + if ($count != 0) { + $str .= " | "; + } + $str .= $child->toString(); + } + $str .= ") / ".$this->number; + return $str; } } |