Thread: [Phpxd-commits] CVS: phpXD/include/parser DOMParser.php,1.3,1.4 DTDParser.php,1.3,1.4
Status: Beta
Brought to you by:
growbal
|
From: Thomas D. <th...@us...> - 2002-01-29 20:57:05
|
Update of /cvsroot/phpxd/phpXD/include/parser
In directory usw-pr-cvs1:/tmp/cvs-serv2994/include/parser
Modified Files:
DOMParser.php DTDParser.php
Log Message:
Work on improved dtd support.
Index: DOMParser.php
===================================================================
RCS file: /cvsroot/phpxd/phpXD/include/parser/DOMParser.php,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** DOMParser.php 2002/01/26 13:20:01 1.3
--- DOMParser.php 2002/01/29 20:57:01 1.4
***************
*** 70,74 ****
*/
var $DTDParser = null;
! var $DTDTokens = "";
/**
--- 70,75 ----
*/
var $DTDParser = null;
! var $DTDTokens;
! var $DTDString = "";
/**
***************
*** 268,276 ****
}
else {
! $text = str_replace("\t", " ", $text);
! $text = str_replace("\n", " ", $text);
! $text = str_replace("\r", " ", $text);
! $text = str_replace(" ", " ", $text);
! $text = preg_replace("/\ +/", " ", $text);
$this->lastNode =& $this->currentNode->appendChild(
--- 269,273 ----
}
else {
! $text = str_replace("\r", "", $text);
$this->lastNode =& $this->currentNode->appendChild(
***************
*** 304,311 ****
*/
function handleDefault($parser, $data) {
! $data = trim($data);
! if ($data == "<!DOCTYPE") {
! $this->DTDTokens[] = $data;
$this->parseDTD = true;
return true;
--- 301,309 ----
*/
function handleDefault($parser, $data) {
! $data2 = trim($data);
! if ($data2 == "<!DOCTYPE") {
! $this->DTDTokens[] = $data2;
! $this->DTDString .= $data;
$this->parseDTD = true;
return true;
***************
*** 313,352 ****
if ($this->parseDTD) {
! if ($data != "") {
$this->DTDTokens[] = $data;
}
! if ($data == "[") {
$this->parseInternalDTD = true;
}
! if ($data == "]") {
$this->parseInternalDTD = false;
}
! if (($data == ">") && (!$this->parseInternalDTD)) {
$this->parseDTD = false;
! // $this->DTDParser->parseTokens($this->DTDTokens);
}
return true;
}
! if ($data == "<![CDATA[") {
$this->parseCData = true;
return true;
}
! if ($data == "]]>" && $this->parseCData) {
$this->parseCData = false;
return true;
}
! if (!(strpos($data, "<!--") === false)) {
! $data = str_replace("<!--", "", $data);
! $data = str_replace("-->", "", $data);
if (!isset($this->document->documentElement)) {
$this->document->appendChild(
! $this->document->createComment($data));
}
else {
$this->lastNode =& $this->currentNode->appendChild(
! $this->document->createComment($data));
}
return true;
--- 311,373 ----
if ($this->parseDTD) {
! if ($data2 != "") {
$this->DTDTokens[] = $data;
}
! $this->DTDString .= $data;
! if ($data2 == "[") {
$this->parseInternalDTD = true;
}
! if ($data2 == "]") {
$this->parseInternalDTD = false;
}
! if (($data2 == ">") && (!$this->parseInternalDTD)) {
$this->parseDTD = false;
!
! $doctype = new DocumentType();
! $doctype->name = $this->DTDTokens[1];
! $doctype->publicId = "";
! $doctype->systemId = "";
! $doctype->internalSubset = "";
! $offset = 2;
! if ($this->DTDTokens[2] == "PUBLIC") {
! $doctype->publicId = $this->DTDTokens[3];
! $doctype->systemId = $this->DTDTokens[4];
! $offset = 5;
! }
! if ($this->DTDTokens[2] == "SYSTEM") {
! $doctype->systemId = $this->DTDTokens[3];
! $offset = 4;
! }
! if ($this->DTDTokens[$offset] == "[") {
! $subset = substr($this->DTDString,
! strpos($this->DTDString, "[") + 1);
! $subset = substr($subset, 0, strrpos($subset, "]"));
! $doctype->internalSubset = $subset;
! }
! $this->document->doctype =& $doctype;
}
return true;
}
! if ($data2 == "<![CDATA[") {
$this->parseCData = true;
return true;
}
! if ($data2 == "]]>" && $this->parseCData) {
$this->parseCData = false;
return true;
}
! if (!(strpos($data2, "<!--") === false)) {
! $data2 = str_replace("<!--", "", $data2);
! $data2 = str_replace("-->", "", $data2);
if (!isset($this->document->documentElement)) {
$this->document->appendChild(
! $this->document->createComment($data2));
}
else {
$this->lastNode =& $this->currentNode->appendChild(
! $this->document->createComment($data2));
}
return true;
Index: DTDParser.php
===================================================================
RCS file: /cvsroot/phpxd/phpXD/include/parser/DTDParser.php,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** DTDParser.php 2002/01/29 17:47:12 1.3
--- DTDParser.php 2002/01/29 20:57:01 1.4
***************
*** 15,18 ****
--- 15,25 ----
*/
class DTDParser {
+ // public
+ var $elements;
+ var $attLists;
+ var $entities;
+ var $notations;
+
+ // private
var $parseDTD = false;
var $parseInternalDTD = false;
***************
*** 20,26 ****
var $parameterEntities;
var $parameterEntitiesDefined;
! var $elements;
var $currentElement;
! var $currentAttlist;
function parse($str, $dtdonly = true) {
--- 27,35 ----
var $parameterEntities;
var $parameterEntitiesDefined;
!
! // private
var $currentElement;
! var $currentAttList;
! var $currentEntity;
function parse($str, $dtdonly = true) {
***************
*** 34,37 ****
--- 43,47 ----
while (preg_match('=(.*)\%([a-z,A-Z,0-9,\.]*);(.*)$=sU', $str, $ent)) {
$str = preg_replace('=<!--.*-->=sU', '', $str);
+ // <!ENTITY % name PUBLIC publicId systemid>
while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*'.
'([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*PUBLIC'.
***************
*** 39,69 ****
'[ ,\n,\r,\t]*"([a-z,A-Z,0-9,\.,\-,_,/]*)"'.
'[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) {
! if ($this->file_exists($this->currentDir."/".$ent[4])) {
if (isset($this->parameterEntitiesDefined[$ent[2]]) &&
! ($this->parameterEntitiesDefined[$ent[2]] == true)) {
// redefined parameter entity => replace current string
foreach ($this->parameterEntities as $entref => $body) {
$ent[1] = str_replace("%".$entref.";", $body, $ent[1]);
}
}
! $this->parameterEntities[$ent[2]] =
! preg_replace('=<!--.*-->=sU', '',
! implode("", file($this->currentDir."/".$ent[4])));
! $this->parameterEntitiesDefined[$ent[2]] = true;
! }
$str = $ent[1].$ent[5];
}
while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*'.
'([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*"(.*)"'.
'[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) {
! if (isset($this->parameterEntitiesDefined[$ent[2]]) &&
! ($this->parameterEntitiesDefined[$ent[2]] == true)) {
! // redefined parameter entity => replace current string
! foreach ($this->parameterEntities as $entref => $body) {
! $ent[1] = str_replace("%".$entref.";", $body, $ent[1]);
}
}
- $this->parameterEntities[$ent[2]] = $ent[3];
- $this->parameterEntitiesDefined[$ent[2]] = true;
$str = $ent[1].$ent[4];
}
--- 49,111 ----
'[ ,\n,\r,\t]*"([a-z,A-Z,0-9,\.,\-,_,/]*)"'.
'[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) {
! if (!isset($this->parameterEntitiesDefined[$ent[2]]) ||
! ($this->parameterEntitiesDefined[$ent[2]] < 2)) {
if (isset($this->parameterEntitiesDefined[$ent[2]]) &&
! ($this->parameterEntitiesDefined[$ent[2]] == 1)) {
// redefined parameter entity => replace current string
foreach ($this->parameterEntities as $entref => $body) {
$ent[1] = str_replace("%".$entref.";", $body, $ent[1]);
}
+ }
+ $filename = $this->publicId2Filename($ent[3], $ent[4]);
+ if ($this->file_exists($filename)) {
+ $this->parameterEntities[$ent[2]] =
+ preg_replace('=<!--.*-->=sU', '', implode("",
+ file($filename)));
+ $this->parameterEntitiesDefined[$ent[2]] = 1;
}
! }
$str = $ent[1].$ent[5];
}
+ // <!ENTITY % name SYSTEM systemid>
+ while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*'.
+ '([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*SYSTEM'.
+ '[ ,\n,\r,\t]*"([a-z,A-Z,0-9,\.,\-,_,/]*)"'.
+ '[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) {
+ if ($this->file_exists($this->currentDir."/".$ent[4])) {
+ if (!isset($this->parameterEntitiesDefined[$ent[2]]) ||
+ ($this->parameterEntitiesDefined[$ent[2]] < 2)) {
+ if (isset($this->parameterEntitiesDefined[$ent[2]]) &&
+ ($this->parameterEntitiesDefined[$ent[2]] == 1)) {
+ // redefined parameter entity => replace current string
+ foreach ($this->parameterEntities as $entref => $body) {
+ $ent[1] = str_replace("%".$entref.";", $body, $ent[1]);
+ }
+ }
+ $this->parameterEntities[$ent[2]] =
+ preg_replace('=<!--.*-->=sU', '',
+ implode("", file($this->currentDir."/".
+ $ent[3])));
+ $this->parameterEntitiesDefined[$ent[2]] = 1;
+ }
+ }
+ $str = $ent[1].$ent[4];
+ }
+ // <!ENTITY % name body>
while (preg_match('=(.*)<!ENTITY[ ,\n,\r,\t]*\%[ ,\n,\r,\t]*'.
'([a-z,A-Z,0-9,\.]*)[ ,\n,\r,\t]*"(.*)"'.
'[ ,\n,\r,\t]*>(.*)$=sU', $str, $ent)) {
! if (!isset($this->parameterEntitiesDefined[$ent[2]]) ||
! ($this->parameterEntitiesDefined[$ent[2]] < 2)) {
! if (isset($this->parameterEntitiesDefined[$ent[2]]) &&
! ($this->parameterEntitiesDefined[$ent[2]] == 1)) {
! // redefined parameter entity => replace current string
! foreach ($this->parameterEntities as $entref => $body) {
! $ent[1] = str_replace("%".$entref.";", $body, $ent[1]);
! }
}
+ $this->parameterEntities[$ent[2]] = $ent[3];
+ $this->parameterEntitiesDefined[$ent[2]] = 1;
}
$str = $ent[1].$ent[4];
}
***************
*** 147,158 ****
case "<!ELEMENT": {
$this->parseElement($tokens, $offset + 1);
! $currentElement = $this->currentElement;
! $currentElement->children =& $this->currentElement->children;
! echo $this->currentElement->tagName." -- ";
! unset($this->currentElement);
! if ($currentElement->children != null) {
! echo $currentElement->children->toString()."<br>";
! }
! $this->elements[] =& $currentElement;
$offset = $this->skipToGt($tokens, $offset + 1);
break;
--- 189,196 ----
case "<!ELEMENT": {
$this->parseElement($tokens, $offset + 1);
! // copy element
! $this->elements[$this->currentElement->tagName] = $this->currentElement;
! // $this->elements[$this->currentElement->tagName]->children =&
! // $this->currentElement->children;
$offset = $this->skipToGt($tokens, $offset + 1);
break;
***************
*** 161,171 ****
--- 199,213 ----
$this->parseAttList($tokens, $offset + 1);
$offset = $this->skipToGt($tokens, $offset + 1);
+ $this->attLists[$this->currentAttList->element] =
+ $this->currentAttList;
break;
}
case "<!ENTITY": {
+ $this->parseEntity($tokens, $offset + 1);
$offset = $this->skipToGt($tokens, $offset + 1);
break;
}
case "<!NOTATION": {
+ $this->parseNotation($tokens, $offset + 1);
$offset = $this->skipToGt($tokens, $offset + 1);
break;
***************
*** 196,209 ****
function parseExternalPublicDTD($tokens, $offset) {
! $tokenPublicID = $this->removeQuotes($tokens[$offset]);
! $tokenURI = $this->removeQuotes($tokens[$offset + 1]);
! switch ($tokenPublicID) {
! case "-//W3C//DTD XHTML 1.0 Strict//EN": {
! $DTDParser = new DTDParser();
! // $DTDParser->parseFile("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
! $DTDParser->parseFile("../xhtml1-strict.dtd", true);
! break;
! }
}
// merge definitions from $DTDParser with this
$token = $tokens[$offset + 2];
--- 238,259 ----
function parseExternalPublicDTD($tokens, $offset) {
! $publicId = $this->removeQuotes($tokens[$offset]);
! $systemId = $this->removeQuotes($tokens[$offset + 1]);
! $filename = $this->publicId2Filename($publicId, $systemId);
! $DTDParser = new DTDParser();
! $DTDParser->parameterEntities =& $this->parameterEntities;
! if (is_array($this->parameterEntitiesDefined)) {
! foreach ($this->parameterEntitiesDefined as $key => $value) {
! // internal parameter entities overwrite external
! $DTDParser->parameterEntitiesDefined[$key] = 2;
! }
}
+ $DTDParser->attLists =& $this->attLists;
+ $DTDParser->entities =& $this->entities;
+ $DTDParser->elements =& $this->elements;
+ $DTDParser->notations =& $this->notations;
+ $DTDParser->parseFile($filename, true);
+
+
// merge definitions from $DTDParser with this
$token = $tokens[$offset + 2];
***************
*** 216,219 ****
--- 266,278 ----
$name = $tokens[$offset];
$this->currentElement = new DTDElement($name);
+ if ($tokens[$offset + 1] == "ANY") {
+ $this->currentElement->children =
+ new DTDElementChild("", "", true);
+ return;
+ }
+ if ($tokens[$offset + 1] == "EMPTY") {
+ $this->currentElement->children = null;
+ return;
+ }
$this->parseElementChilds($tokens, $offset + 1);
}
***************
*** 291,312 ****
function parseElementChild($tokens, $offset) {
$token = $tokens[$offset];
- if ($token == "ANY") {
- $this->currentElement->children =
- new DTDElementChild("", "", true);
- return;
- }
- if ($token == "EMPTY") {
- $this->currentElement->children = null;
- return;
- }
if ($token == "#PCDATA") {
if ($this->currentElement->children == null) {
$this->currentElement->children =
! new DTDElementChild("", "", true, true);
return;
}
else {
$this->currentElement->children->
! appendChild(new DTDElementChild("", "", true, true));
return;
}
--- 350,362 ----
function parseElementChild($tokens, $offset) {
$token = $tokens[$offset];
if ($token == "#PCDATA") {
if ($this->currentElement->children == null) {
$this->currentElement->children =
! new DTDElementChild("", "", false, true);
return;
}
else {
$this->currentElement->children->
! appendChild(new DTDElementChild("", "", false, true));
return;
}
***************
*** 334,338 ****
function parseAttList($tokens, $offset) {
$name = $tokens[$offset];
! $this->currentAttlist = new DTDAttList($name);
$this->parseAttributes($tokens, $offset + 1);
}
--- 384,388 ----
function parseAttList($tokens, $offset) {
$name = $tokens[$offset];
! $this->currentAttList = new DTDAttList($name);
$this->parseAttributes($tokens, $offset + 1);
}
***************
*** 341,345 ****
while ($tokens[$offset] != ">") {
$name = $tokens[$offset];
! echo $name;
$type = $tokens[$offset + 1];
if ($type == "(") {
--- 391,399 ----
while ($tokens[$offset] != ">") {
$name = $tokens[$offset];
!
! // TODO: check xml:space and xml:lang attributes for
! // correct implementation
! // ...
!
$type = $tokens[$offset + 1];
if ($type == "(") {
***************
*** 360,375 ****
$default = $tokens[$offset + 1];
$attribute->default = $default;
! if ($default == "#FIXED") {
! $attribute->defaultValue = $this->removeQuotes($tokens[$offset + 2]);
! $offset++;
}
$offset += 2;
}
}
!
function removeQuotes($str) {
! if (($str[0] == "\"") && ($str[strlen($str) - 1] == "\"")) {
return substr($str, 1, strlen($str) - 2);
}
--- 414,487 ----
$default = $tokens[$offset + 1];
$attribute->default = $default;
! if (($default != "#IMPLIED") &&
! ($default != "#REQUIRED")) {
! if ($default == "#FIXED") {
! $attribute->defaultValue = $this->removeQuotes($tokens[$offset + 2]);
! $offset++;
! }
! else {
! $attribute->defaultValue = $this->removeQuotes($tokens[$offset + 1]);
! }
}
$offset += 2;
+ $this->currentAttList->attributes[$attribute->name] = $attribute;
}
}
! function parseEntity($tokens, $offset) {
! $name = $tokens[$offset];
! $entity = new DTDEntity($name);
! $entity->ndata = false;
! $body = $tokens[$offset + 1];
! if (($body == "SYSTEM") || ($body == "PUBLIC")) {
! if ($body == "SYSTEM") {
! $entity->systemId = $tokens[$offset + 2];
! $offset = $offset + 3;
! }
! else {
! $entity->publicId = $tokens[$offset + 2];
! $entity->systemId = $tokens[$offset + 3];
! $offset = $offset + 4;
! }
! if ($tokens[$offset] == "NDATA") {
! $entity->ndata = true;
! $entity->notation = $tokens[$offset + 1];
! }
! }
! else {
! $entity->body = $this->removeQuotes($body);
! }
! $this->entities[$name] =& $entity;
! }
+ function parseNotation($tokens, $offset) {
+ $name = $tokens[$offset];
+ $notation = new DTDNotation($name);
+ $entity->publicId = "";
+ $entity->systemId = "";
+ $body = $tokens[$offset + 1];
+ if ($body == "SYSTEM") {
+ $notation->systemId = $tokens[$offset + 2];
+ }
+ if ($body == "PUBLIC") {
+ $notation->publicId = $tokens[$offset + 2];
+ if ($tokens[$offset + 3] != ">") {
+ $notation->systemId = $tokens[$offset + 3];
+ }
+ }
+ $this->notations[$name] =& $notation;
+ }
+
+ function publicId2Filename($publicId, $systemId) {
+ // TODO: add all known publicId and systemIds
+ if ($publicId == "-//W3C//DTD XHTML 1.0 Strict//EN") {
+ return "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
+ }
+ return $systemId;
+ }
+
function removeQuotes($str) {
! if ((($str[0] == "\"") && ($str[strlen($str) - 1] == "\"")) ||
! (($str[0] == "\'") && ($str[strlen($str) - 1] == "\'"))) {
return substr($str, 1, strlen($str) - 2);
}
***************
*** 440,455 ****
class DTDNode {
}
- class DTDElement extends DTDNode {
- var $tagName;
- var $children;
-
- function DTDElement($tagName) {
- $this->tagName = $tagName;
- $this->children = null;
- }
- }
-
class DTDAttList extends DTDNode {
var $attributes;
--- 552,558 ----
class DTDNode {
+ // abstract class for all DTD classes
}
class DTDAttList extends DTDNode {
var $attributes;
***************
*** 474,477 ****
--- 577,613 ----
}
+ class DTDEntity extends DTDNode {
+ var $name;
+ var $body;
+ var $publicId;
+ var $systemId;
+ var $ndata;
+ var $notation;
+
+ function DTDEntity($name) {
+ $this->name = $name;
+ }
+ }
+
+ class DTDNotation extends DTDNode {
+ var $name;
+ var $publicId;
+ var $systemId;
+
+ function DTDNotation($name) {
+ $this->name = $name;
+ }
+ }
+
+ class DTDElement extends DTDNode {
+ var $tagName;
+ var $children;
+
+ function DTDElement($tagName) {
+ $this->tagName = $tagName;
+ $this->children = null;
+ }
+ }
+
class DTDElementChild extends DTDNode {
var $number = "";
***************
*** 507,511 ****
}
if ($this->pcdata) {
! return "#PCDAZA";
}
return $this->tagName.$this->number;
--- 643,647 ----
}
if ($this->pcdata) {
! return "#PCDATA";
}
return $this->tagName.$this->number;
|