Update of /cvsroot/php-blog/serendipity
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv19120
Modified Files:
serendipity_functions.inc.php
Log Message:
function to clean up blog entries as best we can to make valid ATOM
feeds. xhtml_cleanup should be watched carefully and tested on as many feeds as possible.
Index: serendipity_functions.inc.php
===================================================================
RCS file: /cvsroot/php-blog/serendipity/serendipity_functions.inc.php,v
retrieving revision 1.239
retrieving revision 1.240
diff -u -d -r1.239 -r1.240
--- serendipity_functions.inc.php 25 Mar 2004 10:57:38 -0000 1.239
+++ serendipity_functions.inc.php 25 Mar 2004 22:13:16 -0000 1.240
@@ -1351,6 +1351,30 @@
return $guid;
}
+// jbalcorn: starter function to clean up xhtml for atom feed. Add things to this as we find common
+// mistakes, unless someone finds a better way to do this.
+// DONE:
+// since someone encoded all the urls, we can now assume any amp followed by
+// whitespace or a HTML tag (i.e. &<br /> )should be
+// encoded and most not with a space are intentional
+// TODO:
+// check ALL ampersands, find out if it's a valid code, and encode if not
+function xhtml_cleanup($html) {
+ $p = array("/\&([\s\<])/", // ampersand followed by whitespace or tag
+ "/\&$/", // ampersand at end of body
+ "/<br([^\/]*)>/i", // unclosed br tag - attributes included
+ "/<p([^\/]*)>/i", // any unclosed p tag
+ "/<\/p>/i" // closing </p> tags, dropped for feed
+ );
+ $r = array("&\\1",
+ "&",
+ "<br\\1 />",
+ "<p\\1 />",
+ ""
+ );
+ return preg_replace($p,$r,$html);
+}
+
function serendipity_printEntries_rss($entries, $version, $comments = false) {
global $serendipity;
@@ -1373,6 +1397,8 @@
// Do some relative -> absolute URI replacing magic. Replaces all HREF/SRC (<a>, <img>, ...) references to only the serendipitypath with the full baseURL URI
// garvin: Could impose some problems. Closely watch this one.
$entry['body'] = preg_replace('@(href|src)=("|\')(' . preg_quote($serendipity['serendipityHTTPPath']) . ')(.*)("|\')(.*)>@imsU', '\1=\2' . $serendipity['baseURL'] . '\4\2\6>', $entry['body']);
+ // jbalcorn: clean up body for XML compliance as best we can.
+ $entry['body'] = xhtml_cleanup($entry['body']);
serendipity_plugin_api::hook_event('frontend_display', $entry);
// extract author information
|