Update of /cvsroot/php-blog/serendipity/include/admin/importers
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4943/include/admin/importers
Modified Files:
generic.inc.php
Log Message:
If RSS feeds are encoded differently than UTF-8, we had a problem.
Let users specify the source encoding of an RSS feed, because Onyx RSS doesn't report the used charset.
Fixes import problems and displaying remote RSS feeds.
Index: generic.inc.php
===================================================================
RCS file: /cvsroot/php-blog/serendipity/include/admin/importers/generic.inc.php,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -d -r1.5 -r1.6
--- generic.inc.php 26 Jan 2005 14:14:05 -0000 1.5
+++ generic.inc.php 15 Feb 2005 11:25:26 -0000 1.6
@@ -10,6 +10,15 @@
var $inputFields = array();
function Serendipity_Import_Generic($data) {
+ $charsets = array();
+ if (LANG_CHARSET != 'UTF-8') {
+ $charsets['UTF-8'] = 'UTF-8';
+ }
+ if (LANG_CHARSET != 'ISO-8859-1') {
+ $charset['ISO-8859-1'] = 'ISO-8859-1';
+ }
+ $charsets['native'] = LANG_CHARSET;
+
$this->data = $data;
$this->inputFields = array(array('text' => RSS . ' ' . URL,
'type' => 'input',
@@ -26,6 +35,12 @@
'name' => 'category',
'value' => 0,
'default' => $this->_getCategoryList()),
+
+ array('text' => CHARSET,
+ 'type' => 'list',
+ 'name' => 'charset',
+ 'value' => 'UTF-8',
+ 'default' => $charsets),
array('text' => RSS_IMPORT_BODYONLY,
'type' => 'bool',
@@ -52,6 +67,26 @@
return $ret;
}
+ function decode($string) {
+ switch($this->data['charset']) {
+ case 'native':
+ return $string;
+
+ case 'ISO-8859-1':
+ if (function_exists('iconv')) {
+ return iconv('ISO-8859-1', LANG_CHARSET, $string);
+ } elseif (function_exists('recode')) {
+ return recode('iso-8859-1..' . LANG_CHARSET, $string);
+ } else {
+ return $string;
+ }
+
+ case 'UTF-8':
+ default:
+ return utf8_decode($string);
+ }
+ }
+
function buildEntry($item, &$entry) {
global $serendipity;
@@ -59,7 +94,7 @@
$bodyonly = serendipity_get_bool($this->data['bodyonly']);
if ($item['description']) {
- $entry['body'] = utf8_decode($item['description']);
+ $entry['body'] = $this->decode($item['description']);
}
if ($item['content:encoded']) {
@@ -75,14 +110,14 @@
// switch). We substract 4 letters because of possible '...' additions to an entry.
$testbody = substr(trim(strip_tags($entry['body'])), 0, -4);
if ($testbody != substr(trim(strip_tags($item['content:encoded'])), 0, strlen($testbody))) {
- $data .= utf8_decode($item['content:encoded']);
+ $data .= $this->decode($item['content:encoded']);
} else {
- $data = utf8_decode($item['content:encoded']);
+ $data = $this->decode($item['content:encoded']);
}
}
- $entry['title'] = utf8_decode($item['title']);
- $entry['timestamp'] = utf8_decode(strtotime(isset($item['pubdate']) ? $item['pubdate'] : $item['dc:date']));
+ $entry['title'] = $this->decode($item['title']);
+ $entry['timestamp'] = $this->decode(strtotime(isset($item['pubdate']) ? $item['pubdate'] : $item['dc:date']));
if ($entry['timestamp'] == -1) {
$entry['timestamp'] = time();
}
@@ -94,7 +129,7 @@
}
if (!empty($item['category'])) {
- $cat = serendipity_fetchCategoryInfo(0, trim(utf8_decode($item['category'])));
+ $cat = serendipity_fetchCategoryInfo(0, trim($this->decode($item['category'])));
if (is_array($cat) && isset($cat['categoryid'])) {
$entry['categories'][] = $cat['categoryid'];
}
|