From: Lars M. I. <la...@gn...> - 2004-07-16 12:00:39
|
We've just gotten a news feed that looks like this: <ARTIKKEL> <TITTEL><![CDATA[Stalltips fra Warren]]></TITTEL> <KATEGORI><![CDATA[Aksjetips]]></KATEGORI> <DATO><![CDATA[16.07.04 10:56]]></DATO> </ARTIKKEL> This is, according to people who know XML, valid. clocc doesn't seem to be able to parse this -- it just gives a backtrace. So here's a quick patch that reads a marked section and just returns the text in the section. Index: cllib-xml.lisp =================================================================== RCS file: /home/cvs/backoffice/clocc/cllib-xml.lisp,v retrieving revision 1.1 diff -c -r1.1 cllib-xml.lisp *** cllib-xml.lisp 2 Jun 2004 13:59:54 -0000 1.1 --- cllib-xml.lisp 16 Jul 2004 11:52:16 -0000 *************** *** 745,767 **** 'read-xml stream last) (make-xml-decl :name name :args (nbutlast atts)))) (#\! ! (if (char= #\- (peek-char nil stream)) ! (let ((ch (progn (read-char stream) (read-char stream t nil t)))) ! (assert (char= #\- ch) (ch) ! "~s: cannot handle: <!-~c" 'read-xml ch) ! (make-xml-comment :data (xml-read-comment stream))) ! (let ((obj (read stream t nil t))) ! (case obj ! (xml-tags::entity (make-xml-comment ! :data (xml-read-entity stream))) ! ((xml-tags::doctype xml-tags::element xml-tags::attlist ! xml-tags::notation) ! (make-xml-misc :type obj :data ! (read-delimited-list #\> stream t))) ! (t (warn "~s: what is `~s'? proceed, with fingers crossed..." ! 'read-xml obj) ! (cons obj (xml-list-to-alist ! (read-delimited-list #\> stream t)))))))) (t (unread-char ch stream) (xml-read-tag stream))))) ;; do not need `xml-list-to-alist' in <!DOCTYPE foo [...]> --- 745,774 ---- 'read-xml stream last) (make-xml-decl :name name :args (nbutlast atts)))) (#\! ! (cond ! ((char= #\- (peek-char nil stream)) ! (let ((ch (progn (read-char stream) (read-char stream t nil t)))) ! (assert (char= #\- ch) (ch) ! "~s: cannot handle: <!-~c" 'read-xml ch) ! (make-xml-comment :data (xml-read-comment stream)))) ! ((char= #\[ (peek-char nil stream)) ! (let ((section (read-section stream))) ! (format t "Read section ~s~%" section) ! (assert (eql (read-char stream nil nil) #\>)) ! (cadr section))) ! (t ! (let ((obj (read stream t nil t))) ! (case obj ! (xml-tags::entity (make-xml-comment ! :data (xml-read-entity stream))) ! ((xml-tags::doctype xml-tags::element xml-tags::attlist ! xml-tags::notation) ! (make-xml-misc :type obj :data ! (read-delimited-list #\> stream t))) ! (t (warn "~s: what is `~s'? proceed, with fingers crossed..." ! 'read-xml obj) ! (cons obj (xml-list-to-alist ! (read-delimited-list #\> stream t))))))))) (t (unread-char ch stream) (xml-read-tag stream))))) ;; do not need `xml-list-to-alist' in <!DOCTYPE foo [...]> *************** *** 786,791 **** --- 793,818 ---- (if (find (peek-char t stream t nil t) "&%<>" :test #'char=) (read stream t nil t) (values (xml-read-text stream "<&"))))))))) + + (defun read-section (stream) + (let ((brackets 0) + strings chars) + (loop for char = (read-char stream nil nil) + do + (progn + (if (or (eql char #\[) + (eql char #\])) + (progn + (if (eql char #\[) + (incf brackets) + (decf brackets)) + (when chars + (push (coerce (nreverse chars) 'string) strings) + (setq chars nil))) + (push char chars))) + while (and char + (not (zerop brackets)))) + (nreverse strings))) ;;; ;;; UI -- (domestic pets only, the antidote for overdose, milk.) la...@gn... * Lars Magne Ingebrigtsen |