From: <hs...@us...> - 2012-01-11 16:59:59
|
Revision: 1012 http://treebase.svn.sourceforge.net/treebase/?rev=1012&view=rev Author: hshyket Date: 2012-01-11 16:59:50 +0000 (Wed, 11 Jan 2012) Log Message: ----------- Adding feed2js PHP application Added Paths: ----------- trunk/treebase_feed/ trunk/treebase_feed/.htaccess trunk/treebase_feed/build.php trunk/treebase_feed/feed2js.php trunk/treebase_feed/feed2js_config.php trunk/treebase_feed/feed2php.inc trunk/treebase_feed/footer trunk/treebase_feed/magpie/ trunk/treebase_feed/magpie/cache/ trunk/treebase_feed/magpie/cache_utf8/ trunk/treebase_feed/magpie/extlib/ trunk/treebase_feed/magpie/extlib/Snoopy.class.inc trunk/treebase_feed/magpie/rss_cache.inc trunk/treebase_feed/magpie/rss_fetch.inc trunk/treebase_feed/magpie/rss_parse.inc trunk/treebase_feed/magpie/rss_utils.inc trunk/treebase_feed/magpie_debug.php trunk/treebase_feed/magpie_simple.php trunk/treebase_feed/nosource.php trunk/treebase_feed/popup.js trunk/treebase_feed/preview.php trunk/treebase_feed/style/ trunk/treebase_feed/style/.css trunk/treebase_feed/style/basic1.css trunk/treebase_feed/style/bbc_style.css trunk/treebase_feed/style/blackbox.css trunk/treebase_feed/style/css_classes.gif trunk/treebase_feed/style/dog.css trunk/treebase_feed/style/dog.jpg trunk/treebase_feed/style/essc.css trunk/treebase_feed/style/feed2js.jpg trunk/treebase_feed/style/greenbars.css trunk/treebase_feed/style/kp.css trunk/treebase_feed/style/main.css trunk/treebase_feed/style/marooned2.css trunk/treebase_feed/style/nobullets.css trunk/treebase_feed/style/none.css trunk/treebase_feed/style/outlive.css trunk/treebase_feed/style/play-button.jpg trunk/treebase_feed/style/plum.css trunk/treebase_feed/style/rss.css trunk/treebase_feed/style/sekodeng.css trunk/treebase_feed/style/style_pile.php trunk/treebase_feed/style/zanestate.css trunk/treebase_feed/style.php trunk/treebase_feed/style_preview.php Property changes on: trunk/treebase_feed ___________________________________________________________________ Added: bugtraq:number + true Added: trunk/treebase_feed/.htaccess =================================================================== --- trunk/treebase_feed/.htaccess (rev 0) +++ trunk/treebase_feed/.htaccess 2012-01-11 16:59:50 UTC (rev 1012) @@ -0,0 +1,4 @@ +Options +FollowSymlinks +RewriteEngine on +RewriteCond $1 !^(feed2js\.php) +RewriteRule ^(.*)$ /treebase-feed/feed2js.php/$1 [L] \ No newline at end of file Added: trunk/treebase_feed/build.php =================================================================== --- trunk/treebase_feed/build.php (rev 0) +++ trunk/treebase_feed/build.php 2012-01-11 16:59:50 UTC (rev 1012) @@ -0,0 +1,223 @@ +<?php +/* Feed2JS : RSS feed to JavaScript + build.php + + ABOUT + This script can be used to create a form that is useful + for creating the JavaScript strings and testing the output + + Developed by Alan Levine + http://cogdogblog.com/ + + MORE: + Part of the Feed2JS package + See http://feed2js.org/ + +*/ + + + +// GET VARIABLES --------------------------------------------- +// Get variables from input form and set default values + + + + $src = (isset($_GET['src'])) ? $_GET['src'] : ''; + $chan = (isset($_GET['chan'])) ? $_GET['chan'] : 'y'; + $num = (isset($_GET['num'])) ? $_GET['num'] : 0; + $desc = (isset($_GET['desc'])) ? $_GET['desc'] : 1; + $auth = (isset($_GET['au'])) ? $_GET['au'] : 'n'; + $date = (isset($_GET['date'])) ? $_GET['date'] : 'n'; + $tz = (isset($_GET['tz'])) ? $_GET['tz'] : 'feed'; + $targ = (isset($_GET['targ'])) ? $_GET['targ'] : 'n'; + $html = (isset($_GET['html'])) ? $_GET['html'] : 'n'; + $utf = (isset($_GET['utf'])) ? $_GET['utf'] : 'y'; + $rss_box_id = (isset($_GET['rss_box_id'])) ? $_GET['rss_box_id'] : ''; + $pc = (isset($_GET['pc'])) ? $_GET['pc'] : 'n'; + + +// test for malicious use of script tages +if (strpos($src, '<script>')) { + $src = preg_replace("/(\<script)(.*?)(script>)/si", "SCRIPT DELETED", "$src"); + die("Warning! Attempt to inject javascript detected. Aborted and tracking log updated."); +} + + +// check for status of submit buttons + $generate = (isset($_GET['generate'])) ? $_GET['generate'] : ''; + if (isset($generate)) $generate = $_GET['generate']; + +// update to full descriptions for html turned on + if ($html=='a') $desc = 0; + +// build parameter string for the feed2js url + $options = ''; + if ($chan != 'n') $options .= "&chan=$chan"; + if ($num != 0) $options .= "&num=$num"; + if ($desc != 0) $options .= "&desc=$desc"; + if ($auth != 'n') $options .= "&au=$auth"; + if ($date != 'n') $options .= "&date=$date"; + if ($tz != 'feed') $options .= "&tz=$tz"; + if ($targ != 'n') $options .= "&targ=$targ"; + if ($html != 'n') $html_options = "&html=$html"; + if ($utf == 'y') { + $options .= '&utf=y'; + $utf_str = ' charset="UTF-8"'; // extra param for embed code + } else { + $utf_str = ''; + } + if ($rss_box_id != '') $options .= "&css=$rss_box_id"; + if ($pc == 'y') $options .= '&pc=y'; + + + +if ($generate) { + // URLs for a preview or a generated feed link + + $my_dir = 'http://' . $_SERVER['SERVER_NAME'] . dirname($_SERVER['PHP_SELF']); + + $rss_str = "$my_dir/feed2js.php?src=" . urlencode($src) . $options . $html_options; + + $noscript_rss_str = "$my_dir/feed2js.php?src=" . urlencode($src) . $options . '&html=y'; + +} + +?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> + +<html> +<head> + <title>Build a Feed with Feed2JS</title> + <link rel="stylesheet" href="style/main.css" media="all" /> +<script type="text/javascript" language="Javascript"> +<!-- +function query_str(form) { + + // builds a proper query string by extracting Javascript form variables + // so we can open a preview in a new window + options = encodeURIComponent(form.src.value); + + if (form.chan[2].checked) { + options += '&chan=n'; + } else if (form.chan[1].checked) { + options += '&chan=title'; + } + + if (form.num.value != 0) options += '&num=' + form.num.value; + if (form.desc.value != 1 && !form.html[0].checked) options += '&desc=' + form.desc.value; + + if (form.date[0].checked) options += '&date=y'; + if (form.tz.value != 'feed') options += '&tz=' + form.tz.value; + + if (form.html[0].checked) { + options += '&html=a'; + } else if (form.html[2].checked) { + options += '&html=p'; + } + + + options += '&targ=' + form.targ.value; + + if (form.utf.checked) options += '&utf=y'; + options += '&css=' + form.rss_box_id.value; + + if (form.pc[0].checked) options += '&pc=y'; + + if (form.au[0].checked) options += '&au=y'; + + return(options); + + +} +//--> +</script> + + <script src="popup.js" type="text/javascript" language="Javascript"> +</script> + +</head> +<body> +<div id="content"> +<h1>Feed2JS Build JavaScript and Preview</h1> +<p class="first">The tool below will help you format a feed's display with the information you want to use on your web site. All you need to enter is the URL for the RSS source, and select the desired options below. </p> + +<p>First, be sure to <strong>preview</strong> the feed to verify the content and format. Once the content is displayed how you like, just use the <strong>generate javascript</strong> button to get your code. Once the content looks okay, move on to our <a href="style.php">style tool</a> to make it pretty.</p> + +<?php if ($generate):?> + + +<h2>Get Your Code Here</h2> +<p class="first">Below is the code you need to copy and paste to your own web page to include this RSS feed. The NOSCRIPT tag provides a link to a HTML display of the feed for users who may not have JavaScript enabled. </p> +<form> +<span class="caption">cut and paste javascript:</span><br><textarea name="t" rows="8" cols="70"> +<script language="JavaScript" src="<?php echo htmlentities($rss_str)?>" <?php echo $utf_str?> type="text/javascript"></script> + +<noscript> +<a href="<?php echo htmlentities($noscript_rss_str)?>">View RSS feed</a> +</noscript> +</textarea> +</form> + + +<?php endif?> + +<form method="get" action="build.php" name="builder"> + +<p><strong>URL</strong> Enter the web address of the RSS Feed (must be in http:// format, not feed://)<br> + +<input type="text" name="src" size="50" value="<?php echo $src?>"> <br> +<span style="font-size:x-small">Note: Please verify the URL of your feed (make sure it presents raw RSS) and <a href="http://feedvalidator.org/" onClick="window.open('http://feedvalidator.org/check.cgi?url=' + encodeURIComponent(document.builder.src.value), 'check'); return false;">check that it is valid</a> before using this form.</span> +</p> + +<div id="badge" style="width:250px; padding:0;"> +<h3 class="badge-header">Show n' Tell!</h3> + <div align="center"> + <input type="button" name="preview" value="Preview Feed" onClick="pr=window.open('preview.php?src=' + query_str(document.builder), 'prev', 'scrollbars,resizable,left=20,screenX=20,top=40,screenY=40,height=580,width=700'); pr.focus();" + /> <br /> + <input type="submit" name="generate" value="Generate JavaScript" /> + </div> +</div> + + +<p><strong>Show channel?</strong> (yes/no/title) Display information about the publisher of the feed (yes=show the title and description; title= display title only, no=do not display anything) <br> +<input type="radio" name="chan" value="y" <?php if ($chan=='y') echo 'checked="checked"'?> /> yes <input type="radio" name="chan" value="title" <?php if ($chan=='title') echo 'checked="checked"'?>/> title <input type="radio" name="chan" value="n" <?php if ($chan=='n') echo 'checked="checked"'?>/> no</p> + +<p><strong>Number of items to display.</strong> Enter the number of items to be displayed (enter 0 to show all available)<br> +<input type="text" name="num" size="10" value="<?php echo $num?>"></p> + +<p><strong>Show/Hide item descriptions? How much?</strong> (0=no descriptions; 1=show full description text; n>1 = display first n characters of description; n=-1 do not link item title, just display item contents)<br> +<input type="text" name="desc" size="10" value="<?php echo $desc?>"></p> + +<p><strong>Show item author?</strong> (yes/no) Display the name of an item's author (yes=show the title and description; no=do not display anything) <br> +<input type="radio" name="au" value="y" <?php if ($auth=='y') echo 'checked="checked"'?> /> yes <input type="radio" name="au" value="n" <?php if ($auth=='n') echo 'checked="checked"'?>/> no</p> + +<p><strong>Use HTML in item display? </strong> ("yes" = use HTML from feed and the full item descriptions will be used, ignoring any character limit set above; "no" = output is text-only formatted by CSS; "preserve paragraphs" = no HTML but convert all RETURN/linefeeds to <br> to preserve paragraph breaks)<br> +<input type="radio" name="html" value="a" <?php if ($html=='a') echo 'checked="checked"'?>/> yes <input type="radio" name="html" value="n" <?php if ($html=='n') echo 'checked="checked"'?> /> no <input type="radio" name="html" value="p" <?php if ($html=='p') echo 'checked="checked"'?> /> preserve paragraphs only</p> + +<p><strong>Show item posting date?</strong> (yes/no) Display the time and date for each item.<br> +<input type="radio" name="date" value="y" <?php if ($date=='y') echo 'checked="checked"'?>/> yes <input type="radio" name="date" value="n" <?php if ($date!='y') echo 'checked="checked"'?> /> no</p> + +<p><strong>Time Zone Offset</strong> (+n/-n/'feed') Date and timer are converted to GMT time; to have display in local time, you must enter an offset from your current local time to <strong><?php echo gmdate("r")?> (GMT)</strong>. If your local time is 5 hours before GMT, enter <code>-5</code>. If your local time is 8 hours past GMT, enter <code>+8</code>. Fractional offsets such as +10:30 must be entered as decimal <code>+10.5</code>. If you prefer to just display the date is recorded in the RSS, use a value = <code>feed</code><br> +<input type="text" name="tz" size="10" value="<?php echo $tz?>"></p> + +<p><strong>Target links in the new window?</strong> (n="no, links open the same page", y="yes, open links in a new window", "xxxx" = open links in a frame named 'xxxx', 'popup' = use a <a href="popup.js">JavaScript function</a> <code>popupfeed()</code> to open in new window) <br> +<input type="text" name="targ" size="10" value="<?php echo $targ?>"></p> + +<p><strong>UTF-8 Character Encoding</strong><br> Required for many non-western language web pages and also may help if you see strange characters replacing quotes in your output (see <a href="http://feed2js.org/index.php?s=help#chars">help pages</a> for more information).<br /> +<input type="checkbox" name="utf" value="y" <?php if ($utf=='y') echo 'checked="checked"'?> /> use UTF-8 character encoding +</p> + +<p><strong>Podcast enclosures</strong><br> For RSS 2.0 feeds with enclosures, display link to media files<br /> +<input type="radio" name="pc" value="y" <?php if ($pc=='y') echo 'checked="checked"'?> /> yes +<input type="radio" name="pc" value="n" <?php if ($pc!='y') echo 'checked="checked"'?> /> no +</p> + +<p><strong>Custom CSS Class (advanced users)</strong> <br> Use to create different styles for multiple feeds per page. Specify class for content as <code>rss-box-XXXX</code> where XXXX is the value entered below. Style sheets must be created in accordance with <a href="style.php#2style">Feed2JS guidelines</a>.<br> +<input type="text" name="rss_box_id" size="10" value="<?php echo $rss_box_id?>"></p> + +</form> +</div> + +<?php include 'footer'?> + +</body> +</html> Added: trunk/treebase_feed/feed2js.php =================================================================== --- trunk/treebase_feed/feed2js.php (rev 0) +++ trunk/treebase_feed/feed2js.php 2012-01-11 16:59:50 UTC (rev 1012) @@ -0,0 +1,392 @@ +<?php +/* Feed2JS : RSS feed to JavaScript src file + + VERSION 2.3 (2011 jun 9) + + ABOUT + This PHP script will take an RSS feed as a value of src="...." + and return a JavaScript file that can be linked + remotely from any other web page. Output includes + site title, link, and description as well as item site, link, and + description with these outouts contolled by extra parameters. + + Developed by Alan Levine initially released 13.may.2004 + http://cogdogblog.com/ + + PRIMARY SITE: + http://feed2js.org/ + + CODE: + http://code.google.com/p/feed2js/ + + Feed2JS makes use of the Magpie RSS parser from + http://magpierss.sourceforge.net/ + + ------------- small print --------------------------------------- + GNU General Public License + Copyright (C) 2004-2010 Alan Levine + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details + http://www.gnu.org/licenses/gpl.html + ------------- small print --------------------------------------- + +*/ + +// ERROR CHECKING FOR NO SOURCE ------------------------------- + +$script_msg = ''; +$src = (isset($_GET['src'])) ? $_GET['src'] : ''; + +// trap for missing src param for the feed, use a dummy one so it gets displayed. +if (!$src or strpos($src, 'http://')!=0) $src= 'http://' . $_SERVER['SERVER_NAME'] . dirname($_SERVER['PHP_SELF']) . '/nosource.php'; + +// test for malicious use of script tages +if (strpos($src, '<script>')) { + $src = preg_replace("/(\<script)(.*?)(script>)/si", "SCRIPT DELETED", "$src"); + die("Warning! Attempt to inject javascript detected. Aborted and tracking log updated."); +} + +// MAGPIE SETUP ---------------------------------------------------- +// access configuration settings +require_once('feed2js_config.php'); + +if (!in_array(urldecode($_GET["src"]), $allowedFeeds)) { + die("Not Allowed"); +} + +// check for utf encoding type +$utf = (isset($_GET['utf'])) ? $_GET['utf'] : 'n'; + +if ($utf == 'y') { + define('MAGPIE_CACHE_DIR', MAGPIE_DIR . 'cache_utf8/'); + // chacrater encoding + define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); + + +} else { + define('MAGPIE_CACHE_DIR', MAGPIE_DIR . 'cache/'); + define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); +} + +// GET VARIABLES --------------------------------------------- +// retrieve values from posted variables + +// flag to show channel info +$chan = (isset($_GET['chan'])) ? $_GET['chan'] : 'n'; + +// variable to limit number of displayed items; default = 0 (show all, 100 is a safe bet to list a big list of feeds) + +$num = (isset($_GET['num'])) ? $_GET['num'] : 0; +if ($num==0) $num = 100; + +// indicator to show item description, 0 = no; 1=all; n>1 = characters to display +// values of -1 indicate to displa item without the title as a link +// (default=0) +$desc = (isset($_GET['desc'])) ? $_GET['desc'] : 0; + +// flag to show author of items, values: no/yes (default=no) +$auth = (isset($_GET['au'])) ? 'y' : 'n'; + +// flag to show date of items, values: no/yes (default=no) +$date = (isset($_GET['date'])) ? $_GET['date'] : 'n'; + +// time zone offset for making local time, +// e.g. +7, =-10.5; 'feed' = print the time string in the RSS w/o conversion +$tz = (isset($_GET['tz'])) ? $_GET['tz'] : 'feed'; + + +// flag to open target window in new window; n = same window, y = new window, +// other = targeted window, 'popup' = call JavaScript function popupfeed() to display +// in new window (default is n) + +$targ = (isset($_GET['targ'])) ? $_GET['targ'] : 'n'; +if ($targ == 'n') { + $target_window = ' target="_self"'; +} elseif ($targ == 'y' ) { + $target_window = ' target="_blank"'; +} elseif ($targ == 'popup') { + $target_window = ' onClick="popupfeed(this.href);return false"'; +} else { + $target_window = ' target="' . $targ . '"'; +} + +// flag to show feed as full html output rather than JavaScript, used for alternative +// views for JavaScript-less users. +// y = display html only for non js browsers (NO LONGER USED) +// n = default (JavaScript view) +// a = display javascript output but allow HTML +// p = display text only items but convert linefeeds to BR tags + +// default setting for no conversion of linebreaks +$html = (isset($_GET['html'])) ? $_GET['html'] : 'n'; + +$br = ' '; +if ($html == 'a') { + $desc = 1; +} elseif ($html == 'p') { + $br = '<br />'; +} + +// optional parameter to use different class for the CSS container +$rss_box_id = (isset($_GET['css'])) ? '-' . $_GET['css'] : ''; + +// optional parameter to use different class for the CSS container +$play_podcast = (isset($_GET['pc'])) ? $_GET['pc'] : 'n'; + + +// PARSE FEED and GENERATE OUTPUT ------------------------------- +// This is where it all happens! + + +// check if site has a setting to restrict to a url +if (isset($restrict_url)) { + $src_host = substr($src, 7); + $src_pos = strpos($src_host,"/"); + if ($src_pos) { + $src_host = substr($src_host,0, $src_pos); + } +} +if (isset($restrict_url) && substr($src_host, strlen($src_host)-strlen($restrict_url)) != $restrict_url) { + $str.= "document.write('<div class=\"rss-box" . $rss_box_id . + "\"><p class=\"rss-item\"><em>Error:</em> on feed <strong>" . + $src . "</strong>. " . + "Feeds are allowed only from URLs from the site http://*" . + $restrict_url . "</p></div>');\n"; + +} else { + + + $rss = @fetch_rss( $src ); + + // begin javascript output string for channel info + $str= "document.write('<div class=\"rss-box" . $rss_box_id . "\">');\n"; + + + // no feed found by magpie, return error statement + if (!$rss) { + $str.= "document.write('<p class=\"rss-item\">$script_msg<em>Error:</em> Feed failed! Causes may be (1) No data found for RSS feed $src; (2) There are no items are available for this feed; (3) The RSS feed does not validate.<br /><br /> Please verify that the URL <a href=\"$src\">$src</a> works first in your browser and that the feed passes a <a href=\"http://feedvalidator.org/check.cgi?url=" . urlencode($src) . "\">validator test</a>.</p></div>');\n"; + + + } else { + + + // Create CONNECTION CONFIRM + // create output string for local javascript variable to let + // browser know that the server has been contacted + $feedcheck_str = "feed2js_ck = true;\n\n"; + + // we have a feed, so let's process + if ($chan == 'y') { + + // output channel title and description + $str.= "document.write('<p class=\"rss-title\"><a class=\"rss-title\" href=\"" . trim($rss->channel['link']) . '"' . $target_window . ">" . addslashes(strip_returns($rss->channel['title'])) . "</a><br /><span class=\"rss-item\">" . addslashes(strip_returns(strip_tags($rss->channel['description']))) . "</span></p>');\n"; + + } elseif ($chan == 'title') { + // output title only + $str.= "document.write('<p class=\"rss-title\"><a class=\"rss-title\" href=\"" . trim($rss->channel['link']) . '"' . $target_window . ">" . addslashes(strip_returns($rss->channel['title'])) . "</a></p>');\n"; + + } + + // begin item listing + $str.= "document.write('<ul class=\"rss-items\">');\n"; + + // Walk the items and process each one + $all_items = array_slice($rss->items, 0, $num); + + foreach ( $all_items as $item ) { + + // set defaults thanks RPFK + if (!isset($item['summary'])) $item['summary'] = ''; + $more_link = ''; + + // create output for item author + + + $author_str = ''; + if ($auth == 'y') { + if (isset($item['dc']['creator'])) { + $author_str = ' <span class="rss-item-auth">(' . addslashes(strip_tags($item['dc']['creator'])) . ')</span>'; + + } else { + if (isset($item['author_name'])) { + $author_str = ' <span class="rss-item-auth">(' . addslashes(strip_tags($item['author_name'])) . ')</span>'; + } + } + + } + + + + if ($item['link']) { + // link url + $my_url = addslashes($item['link']); + } elseif ($item['guid']) { + // feeds lacking item -> link + $my_url = ($item['guid']); + } + + + if ($desc < 0) { + $str.= "document.write('<li class=\"rss-item\">');\n"; + + } elseif ($item['title']) { + // format item title + $my_title = addslashes(strip_returns($item['title'])); + + + + // write the title strng + $str.= "document.write('<li class=\"rss-item\"><a class=\"rss-item\" href=\"" . trim($my_url) . "\"" . $target_window . '>' . $my_title . '</a>' . $author_str . "<br />');\n"; + + + } else { + // if no title, build a link to tag on the description + $str.= "document.write('<li class=\"rss-item\">');\n"; + $more_link = " <a class=\"rss-item\" href=\"" . trim($my_url) . '"' . $target_window . ">«details»</a>"; + } + + // print out date if option indicated + + if ($date == 'y') { + + if ($tz == 'feed') { + // echo the date/time stamp reported in the feed + + if ($item['pubdate'] != '') { + // RSS 2.0 is already formatted, so just use it + //$pretty_date = $item['pubdate']; + $pretty_date = date($date_format, strtotime($item['pubdate'])); + } elseif ($item['published'] != "") { + // ATOM 1.0 format, remove the "T" and "Z" and the time zone offset + $pretty_date = str_replace("T", " ", $item['published']); + $pretty_date= str_replace("Z", " ", $pretty_date); + + } elseif ($item['issued'] != "") { + // ATOM 0.3 format, remove the "T" and "Z" and the time zone offset + $pretty_date = str_replace("T", " ", $item['issued']); + $pretty_date= str_replace("Z", " ", $pretty_date); + } elseif ( $item['dc']['date'] != "") { + // RSS 1.0, remove the "T" and the time zone offset + $pretty_date = str_replace("T", " ", $item['dc']['date']); + $pretty_date = substr($pretty_date, 0,-6); + } else { + + // no time/date stamp, + $pretty_date = 'n/a'; + } + + + } else { + // convert to local time via conversion to GMT + offset + + // adjust local server time to GMT and then adjust time according to user + // entered offset. + + // let's see what kind of timestamps we can pull... + if ($item['date_timestamp'] != "") { + $ts = $item['date_timestamp']; + } elseif ($item['published'] != "") { + $ts = strtotime($item['published']); + } elseif ($item['issued'] != "") { + $ts = strtotime($item['issued']); + } elseif ( $item['dc']['date'] != "") { + $ts = strtotime($item['dc']['date']); + } else { + $ts = time(); + } + + $pretty_date = date($date_format, $ts - $tz_offset + $tz * 3600); + + } + + $str.= "document.write('<span class=\"rss-date\">$pretty_date</span><br />');\n"; + } + + // link to podcast media if availavle + + if ($play_podcast == 'y' and is_array($item['enclosure'])) { + $str.= "document.write('<div class=\"pod-play-box\">');\n"; + for ($i = 0; $i < count($item['enclosure']); $i++) { + + // display only if enclosure is a valid URL + //if (strpos($item['enclosure'][$i]['url'], 'http://')!=0) { + $str.= "document.write('<a class=\"pod-play\" href=\"" . trim($item['enclosure'][$i]['url']) . "\" title=\"Play Now\" target=\"_blank\"><em>Play</em> <span> " . substr(trim($item['enclosure'][$i]['url']), -3) . "</span></a> ');\n"; + //} + + } + + $str.= "document.write('</div>');\n"; + + } + + + // output description of item if desired + if ($desc) { + + if ($item['atom_content']) { + // Atom content - note that wordpress.com feeds return bad data here "A" + // so revert to description if this is the case. + $my_blurb = ($item['atom_content'] == "A") ? $item['description'] : html_entity_decode ( $item['atom_content'], ENT_NOQUOTES, MAGPIE_OUTPUT_ENCODING); + + } else if ($item['content']) { + + + + // Atom/encocded content support (thanks David Carter-Tod) + + $my_blurb = html_entity_decode ( $item['content'], ENT_NOQUOTES, MAGPIE_OUTPUT_ENCODING); + + + } else { + $my_blurb = $item['summary']; + } + + // strip html + if ($html != 'a') $my_blurb = strip_tags($my_blurb); + + // trim descriptions + if ($desc > 1) { + + // display specified substring numbers of chars; + // html is stripped to prevent cut off tags + // make sure we dont chop UTF-8 characters + + + if ($utf == 'y') { + $my_blurb = mb_substr($my_blurb, 0, $desc, 'UTF-8') . '...'; + } else { + $my_blurb = substr($my_blurb, 0, $desc) . '...'; + } + + } + + + $str.= "document.write('" . addslashes(strip_returns($my_blurb, $br)) . "');\n"; + + } + + $str.= "document.write('$more_link</li>');\n"; + } + + + $str .= "document.write('</ul></div>');\n"; + } // end restrict_url +} + +// Render as JavaScript +// START OUTPUT +// headers to tell browser this is a JS file +if ($rss) header("Content-type: application/x-javascript"); + +// Spit out the results as the series of JS statements +echo $feedcheck_str . $str; + + +?> Added: trunk/treebase_feed/feed2js_config.php =================================================================== --- trunk/treebase_feed/feed2js_config.php (rev 0) +++ trunk/treebase_feed/feed2js_config.php 2012-01-11 16:59:50 UTC (rev 1012) @@ -0,0 +1,84 @@ +<?php +/* Feed2JS : RSS feed to JavaScript Configuration include + + Use this include to establish server specific paths + and other common functions used by the feed2js.php + + See main script for all the gory details or the Google Code site + http://code.google.com/p/feed2js/ + + created 10.sep.2004 +*/ + + +/* Check to make sure that only the allowed URLs are accepted */ +define('TREEBASE_RSS_FEED', 'http://pipes.yahoo.com/pipes/pipe.run?_id=41dc2d1544150c1b8e69367059f26950&_render=rss'); +$allowedFeeds = array(TREEBASE_RSS_FEED); + +// MAGPIE SETUP ---------------------------------------------------- +// Define path to Magpie files and load library +// The easiest setup is to put the 4 Magpie include +// files in the same directory: +// define('MAGPIE_DIR', './') + +// Otherwise, provide a full valid file path to the directory +// where magpie sites + +define('MAGPIE_DIR', './magpie/'); + +// access magpie libraries +require_once(MAGPIE_DIR.'rss_fetch.inc'); +require_once(MAGPIE_DIR.'rss_utils.inc'); + +// value of 2 optionally show lots of debugging info but breaks JavaScript +// This should be set to 0 unless debugging +define('MAGPIE_DEBUG', 0); + +// Define cache age in seconds. +define('MAGPIE_CACHE_AGE', 60*60); + +// OTHER SETTIINGS ---------------------------------------------- +// Output spec for item date string if used +// see http://www.php.net/manual/en/function.date.php +//$date_format = "F d, Y h:i:s a"; +$date_format = "F d, Y"; + + +// server time zone offset from GMT +// If this line generates errors (common on Windoze servers, +// then figure out your time zone offset from GMT and enter +// manually, e.g. $tz_offset = -7; + +$tz_offset = gmmktime(0,0,0,1,1,1970) - mktime(0,0,0,1,1,1970); + +// ERROR Handling ------------------------------------------------ + +// Report all errors except E_NOTICE +// This is the default value set in php.ini for Apache but often not Windows +// We recommend changing the value to 0 once your scripts are working +ini_set('display_errors', 1); +ini_set('error_reporting', E_ALL^ E_NOTICE); + + +// Restrict RSS url to domain +// Example: www.example.org => allows www.example.org and mywww.example.org +// Example: .example.org => allows www.example.org and other.example.org + +// remove the comment here to activate url restriction +//$restrict_url = "pipes.yahoo.com"; + +// comment out this line to activate url restriction +//unset($restrict_url); + + +// Utility to remove return characters from strings that might +// pollute JavaScript commands. While we are at it, substitute +// valid single quotes as well and get rid of any escaped quote +// characters +function strip_returns ($text, $linefeed=" ") { + $subquotes = trim( preg_replace( '/\s+/', ' ', $text ) ); + return preg_replace("(\r\n|\n|\r)", $linefeed, $subquotes); +} + + +?> \ No newline at end of file Added: trunk/treebase_feed/feed2php.inc =================================================================== --- trunk/treebase_feed/feed2php.inc (rev 0) +++ trunk/treebase_feed/feed2php.inc 2012-01-11 16:59:50 UTC (rev 1012) @@ -0,0 +1,346 @@ +<?php +/* Feed2inc : RSS feed to PHP include file + + ABOUT + This PHP code can be used as an include to provide the + same functionality of our Feed2JS concept, but without + the need to generate content via JavaScript + + Developed by Alan Levine + http://cogdogblog.com/ + + This is a modified version of Fee22JS and merely replaces + the output of Javascript to that for a PHP. See the original + feed2js.php for change history. + + USAGE: + See http://feed2js.org/index.php?s=php + + CODE: + http://code.google.com/p/feed2js/ + + + This makes use of the Magpie RSS parser from + http://magpierss.sourceforge.net/ + + ------------- small print --------------------------------------- + GNU General Public License + Copyright (C) 2004-2010 Alan Levine + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details + http://www.gnu.org/licenses/gpl.html + ------------- small print --------------------------------------- + +*/ + +require_once('feed2js_config.php'); + +/// trap for missing src param for the feed, use a dummy one so it gets displayed. + +if (!$src or strpos($src, 'http://')!=0) $src= 'http://' . $_SERVER['SERVER_NAME'] . dirname($_SERVER['PHP_SELF']) . '/nosource.php'; + +// check for utf encoding type +if (!isset($utf)) $utf = 'n'; + +if ($utf == 'y') { + define('MAGPIE_CACHE_DIR', MAGPIE_DIR . 'cache_utf8/'); + // character encoding + define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); + +} else { + define('MAGPIE_CACHE_DIR', MAGPIE_DIR . 'cache/'); + define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); + +} + + +// GET VARIABLES --------------------------------------------- +// retrieve values from posted variables + +// flag to show channel info +if (!isset($chan)) $chan = 'n'; + +// variable to limit number of displayed items; default = 0 (show all, 20 is a safe bet to list a big list of feeds) + +if (!isset($num)) $num = 0; +if ($num==0) $num = 100; + +// indicator to show item description, 0 = no; 1=all; n>1 = characters to display +// values of -1 indicate to display item without the title as a link +// (default=0) +if (!isset($desc)) $desc = 0; + + +// flag to show author of items, values: no/yes (default=no) +$auth = (isset($_GET['au'])) ? 'y' : 'n'; + + +// flag to show date of posts, values: no/yes (default=no) +$date = (isset($date)) ? $date : 'n'; + +// time zone offset for making local time, +// e.g. +7, =-10.5; 'feed' = print the time string in the RSS w/o conversion +$tz = (isset($tz)) ? $tz : 'feed'; + +// flag to open target window in new window; n = same window, y = new window, +// other = targeted window, 'popup' = call JavaScript function popupfeed to display +// in new window +// (default is n) + +if (!isset($targ)) $targ = 'n'; + +if ($targ == 'n') { + $target_window = ' target="_self"'; +} elseif ($targ == 'y' ) { + $target_window = ' target="_blank"'; +} elseif ($targ == 'popup') { + $target_window = ' onClick="popupfeed(this.href);return false"'; +} else { + $target_window = ' target="' . $targ . '"'; +} + +// flag to show feed as full html output rather than JavaScript, used for alternative +// views for JavaScript-less users. +// y = display html only for non js browsers +// n = default (JavaScript view) +// a = display javascript output but allow HTML +// p = display text only items but convert linefeeds to BR tags + +// default setting for no conversion of linebreaks +if (!isset($html)) $html = 'n'; + +$br = ' '; +if ($html == 'a') { + $desc = 1; +} elseif ($html == 'p') { + $br = '<br />'; +} + +// optional parameter to use different class for the CSS container +if (isset($css)) { + $rss_box_id = '-' . $css; +} else { + $rss_box_id = ''; +} + +if (isset($pc)) { + $play_podcast = $pc; +} else { + $play_podcast = 'n'; +} + + +// PARSE FEED and GENERATE OUTPUT ------------------------------- +// This is where it all happens! + +// Fetch the data, thanks Magpie +$rss = @fetch_rss( $src ); + +// begin javascript output string for channel info +$str = "<div class=\"rss-box" . $rss_box_id . "\">\n"; + +// no feed found by magpie, return error statement +if (!$rss) { + // error, nothing grabbed + $str.= "<p class=\"rss-item\"><em>Error:</em> Feed failed! Causes may be (1) No data found for RSS feed $src; (2) There are no items are available for this feed; (3) The RSS feed does not validate.<br /><br /> Please verify that the URL <a href=\"$src\">$src</a> works first in your browser and that the feed passes a <a href=\"http://feedvalidator.org/check.cgi?url=" . urlencode($src) . "\">validator test</a>.</p>\n"; +} else { + + if ($chan == 'y') { + + // output channel title and description + $str.= "<p class=\"rss-title\"><a class=\"rss-title\" href=\"" . trim($rss->channel['link']) . '"' . $target_window . ">" . strip_returns($rss->channel['title']) . "</a><br /><span class=\"rss-item\">" . strip_returns(strip_tags($rss->channel['description'])) . "</span></p>\n"; + + } elseif ($chan == 'title') { + // output title only + $str.= "<p class=\"rss-title\"><a class=\"rss-title\" href=\"" . trim($rss->channel['link']) . '"' . $target_window . ">" . strip_returns($rss->channel['title']) . "</a></p>\n"; + + } + + + // begin item listing + $str.= "<ul class=\"rss-items\">\n"; + + + // Walk the items and process each one + $all_items = array_slice($rss->items, 0, $num); + + foreach ( $all_items as $item ) { + + + // create output for item author + $author_str = ''; + if ($auth == 'y') { + if (isset($item['dc']['creator'])) { + $author_str = ' <span class="rss-item-auth">(' . addslashes(strip_tags($item['dc']['creator'])) . ')</span>'; + + } else { + if (isset($item['author_name'])) { + $author_str = ' <span class="rss-item-auth">(' . addslashes(strip_tags($item['author_name'])) . ')</span>'; + } + } + + } + + + + if ($item['link']) { + // link url + $my_url = $item['link']; + } elseif ($item['guid']) { + // feeds lacking item -> link + $my_url = ($item['guid']); + } + + + if ($desc < 0) { + $str.= "<li class=\"rss-item\">\n"; + + } elseif ($item['title']) { + // format item title + $my_title = strip_returns($item['title']); + + // create a title attribute. thanks Seb! + $title_str = substr(strip_returns(htmlspecialchars(strip_tags($item['summary']))), 0, 60) . '...'; + + // write the item with a title attribute + $str.= "<li class=\"rss-item\"><a class=\"rss-item\" href=\"" . trim($my_url) . "\" title=\"$title_str\"" . $target_window . ">" . $my_title . '</a> ' . $author_str . "<br />\n"; + + + } else { + // if no title, build a link to tag on the description + $str.= "<li class=\"rss-item\">\n"; + $more_link = " <a class=\"rss-item\" href=\"" .trim($my_url) . '"' . $target_window . ">«details»</a>"; + } + + + // print out date if option indicated and feed returns a value. + // Use the new date_timestamp function in Magpie 0.71 + if ($tz == 'feed') { + // echo the date/time stamp reported in the feed + + if ($item['pubdate'] != '') { + // RSS 2.0 is already formatted, so just use it + $pretty_date = $item['pubdate']; + } elseif ($item['published'] != "") { + // ATOM 1.0 format, remove the "T" and "Z" and the time zone offset + $pretty_date = str_replace("T", " ", $item['published']); + $pretty_date= str_replace("Z", " ", $pretty_date); + + } elseif ($item['issued'] != "") { + // ATOM 0.3 format, remove the "T" and "Z" and the time zone offset + $pretty_date = str_replace("T", " ", $item['issued']); + $pretty_date= str_replace("Z", " ", $pretty_date); + } elseif ( $item['dc']['date'] != "") { + // RSS 1.0, remove the "T" and the time zone offset + $pretty_date = str_replace("T", " ", $item['dc']['date']); + $pretty_date = substr($pretty_date, 0,-6); + } else { + + // no time/date stamp, + $pretty_date = 'n/a'; + } + + } else { + // convert to local time via conversion to GMT + offset + + // adjust local server time to GMT and then adjust time according to user + // entered offset. + + // let's see what kind of timestamps we can pull... + if ($item['date_timestamp'] != "") { + $ts = $item['date_timestamp']; + } elseif ($item['published'] != "") { + $ts = strtotime($item['published']); + } elseif ($item['issued'] != "") { + $ts = strtotime($item['issued']); + } elseif ( $item['dc']['date'] != "") { + $ts = strtotime($item['dc']['date']); + } else { + $ts = time(); + } + + $pretty_date = date($date_format, $ts - $tz_offset + $tz * 3600); + + } + + + $str.= "<span class=\"rss-date\">$pretty_date</span><br />\n"; + } + + // link to podcast media if available + + if ($play_podcast == 'y' and is_array($item['enclosure'])) { + $str.= "<div class=\"pod-play-box\">Media: "; + + for ($i = 0; $i < count($item['enclosure']); $i++) { + + // display only if enclosure is a valid URL + //if (strpos($item['enclosure'][$i]['url'], 'http://')!=0) { + $str.= "<a class=\"pod-play\"><a href=\"" . trim($item['enclosure'][$i]['url']) . "\" title=\"Play Now\" target=\"_blank\"><em>Play</em> <span>" . substr(trim($item['enclosure'][$i]['url']), -3) . "</span></a> "; + //} + + } + $str.= "</div>"; + + } + + + + + // output description of item if desired + if ($desc) { + + if ($item['atom_content']) { + // Atom content - note that wordpress.com feeds return bad data here "A" + // so revert to description if this is the case. + $my_blurb = ($item['atom_content'] == "A") ? $item['description'] : html_entity_decode ( $item['atom_content'], ENT_NOQUOTES, MAGPIE_OUTPUT_ENCODING); + + } else if ($item['content']) { + + + + // Atom/encocded content support (thanks David Carter-Tod) + + $my_blurb = html_entity_decode ( $item['content'], ENT_NOQUOTES, MAGPIE_OUTPUT_ENCODING); + + + } else { + $my_blurb = $item['summary']; + } + + + + // strip html + if ($html != 'a') $my_blurb = strip_tags($my_blurb); + + // trim descriptions + if ($desc > 1) { + + // display specified substring numbers of chars; + // html is stripped to prevent cut off tags + // make sure we dont chop UTF-8 characters + if ($utf == 'y') { + $my_blurb = mb_substr($my_blurb, 0, $desc, 'UTF-8') . '...'; + } else { + $my_blurb = substr($my_blurb, 0, $desc) . '...'; + } + } + + $str.= strip_returns($my_blurb, $br) . "\n"; + } + + $str.= "$more_link</li>\n"; + } +} + +$str .= "</ul></div>\n"; +echo $str; + +?> Added: trunk/treebase_feed/footer =================================================================== --- trunk/treebase_feed/footer (rev 0) +++ trunk/treebase_feed/footer 2012-01-11 16:59:50 UTC (rev 1012) @@ -0,0 +1,9 @@ +<div id="footer"> +<p class="smallprint"> +<strong>Feed2JS v1.93</strong><br> +http://<?php echo $_SERVER['SERVER_NAME'] . $_SERVER['PHP_SELF']?><br><br> +Feed2JS code is Copyright (C) 2004-<?php echo date("Y")?> Created by <a href="http://cogdogblog.com/">Alan Levine</a>. It is available as both a free service at <a href="http://feed2js.org/">http://feed2js.org/</a> and open source code from <a href="https://eduforge.org/projects/feed2js/">eduforge</a>. <br><br> +This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.<br><br> + +This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details <a href="http://www.gnu.org/licenses/gpl.html">http://www.gnu.org/licenses/gpl.html</a></p> +</div> Property changes on: trunk/treebase_feed/magpie ___________________________________________________________________ Added: bugtraq:number + true Property changes on: trunk/treebase_feed/magpie/cache ___________________________________________________________________ Added: bugtraq:number + true Property changes on: trunk/treebase_feed/magpie/cache_utf8 ___________________________________________________________________ Added: bugtraq:number + true Property changes on: trunk/treebase_feed/magpie/extlib ___________________________________________________________________ Added: bugtraq:number + true Added: trunk/treebase_feed/magpie/extlib/Snoopy.class.inc =================================================================== --- trunk/treebase_feed/magpie/extlib/Snoopy.class.inc (rev 0) +++ trunk/treebase_feed/magpie/extlib/Snoopy.class.inc 2012-01-11 16:59:50 UTC (rev 1012) @@ -0,0 +1,900 @@ +<?php + +/************************************************* + +Snoopy - the PHP net client +Author: Monte Ohrt <mo...@is...> +Copyright (c): 1999-2000 ispi, all rights reserved +Version: 1.0 + + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +You may contact the author of Snoopy by e-mail at: +m...@is... + +Or, write to: +Monte Ohrt +CTO, ispi +237 S. 70th suite 220 +Lincoln, NE 68510 + +The latest version of Snoopy can be obtained from: +http://snoopy.sourceforge.com + +*************************************************/ + +class Snoopy +{ + /**** Public variables ****/ + + /* user definable vars */ + + var $host = "www.php.net"; // host name we are connecting to + var $port = 80; // port we are connecting to + var $proxy_host = ""; // proxy host to use + var $proxy_port = ""; // proxy port to use + var $agent = "Snoopy v1.0"; // agent we masquerade as + var $referer = ""; // referer info to pass + var $cookies = array(); // array of cookies to pass + // $cookies["username"]="joe"; + var $rawheaders = array(); // array of raw headers to send + // $rawheaders["Content-type"]="text/html"; + + var $maxredirs = 5; // http redirection depth maximum. 0 = disallow + var $lastredirectaddr = ""; // contains address of last redirected address + var $offsiteok = true; // allows redirection off-site + var $maxframes = 0; // frame content depth maximum. 0 = disallow + var $expandlinks = true; // expand links to fully qualified URLs. + // this only applies to fetchlinks() + // or submitlinks() + var $passcookies = true; // pass set cookies back through redirects + // NOTE: this currently does not respect + // dates, domains or paths. + + var $user = ""; // user for http authentication + var $pass = ""; // password for http authentication + + // http accept types + var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; + + var $results = ""; // where the content is put + + var $error = ""; // error messages sent here + var $response_code = ""; // response code returned from server + var $headers = array(); // headers returned from server sent here + var $maxlength = 500000; // max return data length (body) + var $read_timeout = 0; // timeout on read operations, in seconds + // supported only since PHP 4 Beta 4 + // set to 0 to disallow timeouts + var $timed_out = false; // if a read operation timed out + var $status = 0; // http request status + + var $curl_path = "/usr/bin/curl"; + // Snoopy will use cURL for fetching + // SSL content if a full system path to + // the cURL binary is supplied here. + // set to false if you do not have + // cURL installed. See http://curl.haxx.se + // for details on installing cURL. + // Snoopy does *not* use the cURL + // library functions built into php, + // as these functions are not stable + // as of this Snoopy release. + + // send Accept-encoding: gzip? + var $use_gzip = true; + + /**** Private variables ****/ + + var $_maxlinelen = 4096; // max line length (headers) + + var $_httpmethod = "GET"; // default http request method + var $_httpversion = "HTTP/1.0"; // default http request version + var $_submit_method = "POST"; // default submit method + var $_submit_type = "application/x-www-form-urlencoded"; // default submit type + var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type + var $_redirectaddr = false; // will be set if page fetched is a redirect + var $_redirectdepth = 0; // increments on an http redirect + var $_frameurls = array(); // frame src urls + var $_framedepth = 0; // increments on frame depth + + var $_isproxy = false; // set if using a proxy server + var $_fp_timeout = 30; // timeout for socket connection + +/*======================================================================*\ + Function: fetch + Purpose: fetch the contents of a web page + (and possibly other protocols in the + future like ftp, nntp, gopher, etc.) + Input: $URI the location of the page to fetch + Output: $this->results the output text from the fetch +\*======================================================================*/ + + function fetch($URI) + { + + //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); + $URI_PARTS = parse_url($URI); + if (!empty($URI_PARTS["user"])) + $this->user = $URI_PARTS["user"]; + if (!empty($URI_PARTS["pass"])) + $this->pass = $URI_PARTS["pass"]; + + switch($URI_PARTS["scheme"]) + { + case "http": + $this->host = $URI_PARTS["host"]; + if(!empty($URI_PARTS["port"])) + $this->port = $URI_PARTS["port"]; + if($this->_connect($fp)) + { + if($this->_isproxy) + { + // using proxy, send entire URI + $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); + } + else + { + $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); + // no proxy, send only the path + $this->_httprequest($path, $fp, $URI, $this->_httpmethod); + } + + $this->_disconnect($fp); + + if($this->_redirectaddr) + { + /* url was redirected, check if we've hit the max depth */ + if($this->maxredirs > $this->_redirectdepth) + { + // only follow redirect if it's on this site, or offsiteok is true + if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) + { + /* follow the redirect */ + $this->_redirectdepth++; + $this->lastredirectaddr=$this->_redirectaddr; + $this->fetch($this->_redirectaddr); + } + } + } + + if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) + { + $frameurls = $this->_frameurls; + $this->_frameurls = array(); + + while(list(,$frameurl) = each($frameurls)) + { + if($this->_framedepth < $this->maxframes) + { + $this->fetch($frameurl); + $this->_framedepth++; + } + else + break; + } + } + } + else + { + return false; + } + return true; + break; + case "https": + if(!$this->curl_path || (!is_executable($this->curl_path))) { + $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; + return false; + } + $this->host = $URI_PARTS["host"]; + if(!empty($URI_PARTS["port"])) + $this->port = $URI_PARTS["port"]; + if($this->_isproxy) + { + // using proxy, send entire URI + $this->_httpsrequest($URI,$URI,$this->_httpmethod); + } + else + { + $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); + // no proxy, send only the path + $this->_httpsrequest($path, $URI, $this->_httpmethod); + } + + if($this->_redirectaddr) + { + /* url was redirected, check if we've hit the max depth */ + if($this->maxredirs > $this->_redirectdepth) + { + // only follow redirect if it's on this site, or offsiteok is true + if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) + { + /* follow the redirect */ + $this->_redirectdepth++; + $this->lastredirectaddr=$this->_redirectaddr; + $this->fetch($this->_redirectaddr); + } + } + } + + if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) + { + $frameurls = $this->_frameurls; + $this->_frameurls = array(); + + while(list(,$frameurl) = each($frameurls)) + { + if($this->_framedepth < $this->maxframes) + { + $this->fetch($frameurl); + $this->_framedepth++; + } + else + break; + } + } + return true; + break; + default: + // not a valid protocol + $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; + return false; + break; + } + return true; + } + + + +/*======================================================================*\ + Private functions +\*======================================================================*/ + + +/*======================================================================*\ + Function: _striplinks + Purpose: strip the hyperlinks from an html document + Input: $document document to strip. + Output: $match an array of the links +\*======================================================================*/ + + function _striplinks($document) + { + preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= + ([\"\'])? # find single or double quote + (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching + # quote, otherwise match up to next space + 'isx",$document,$links); + + + // catenate the non-empty matches from the conditional subpattern + + while(list($key,$val) = each($links[2])) + { + if(!empty($val)) + $match[] = $val; + } + + while(list($key,$val) = each($links[3])) + { + if(!empty($val)) + $match[] = $val; + } + + // return the links + return $match; + } + +/*======================================================================*\ + Function: _stripform + Purpose: strip the form elements from an html document + Input: $document document to strip. + Output: $match an array of the links +\*======================================================================*/ + + function _stripform($document) + { + preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); + + // catenate the matches + $match = implode("\r\n",$elements[0]); + + // return the links + return $match; + } + + + +/*======================================================================*\ + Function: _striptext + Purpose: strip the text from an html document + Input: $document document to strip. + Output: $text the resulting text +\*======================================================================*/ + + function _striptext($document) + { + + // I didn't use preg eval (//e) since that is only available in PHP 4.0. + // so, list your entities one by one here. I included some of the + // more common ones. + + $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript + "'<[\/\!]*?[^<>]*?>'si", // strip out html tags + "'([\r\n])[\s]+'", // strip out white space + "'&(quote|#34);'i", // replace html entities + "'&(amp|#38);'i", + "'&(lt|#60);'i", + "'&(gt|#62);'i", + "'&(nbsp|#160);'i", + "'&(iexcl|#161);'i", + "'&(cent|#162);'i", + "'&(pound|#163);'i", + "'&(copy|#169);'i" + ); + $replace = array( "", + "", + "\\1", + "\"", + "&", + "<", + ">", + " ", + chr(161), + chr(162), + chr(163), + chr(169)); + + $text = preg_replace($search,$replace,$document); + + return $text; + } + +/*======================================================================*\ + Function: _expandlinks + Purpose: expand each link into a fully qualified URL + Input: $links the links to qualify + $URI the full URI to get the base from + Output: $expandedLinks the expanded links +\*======================================================================*/ + + function _expandlinks($links,$URI) + { + + preg_match("/^[^\?]+/",$URI,$match); + + $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); + + $search = array( "|^http://".preg_quote($this->host)."|i", + "|^(?!http://)(\/)?(?!mailto:)|i", + "|/\./|", + "|/[^\/]+/\.\./|" + ); + + $replace = array( "", + $match."/", + "/", + "/" + ); + + $expandedLinks = preg_replace($search,$replace,$links); + + return $expandedLinks; + } + +/*======================================================================*\ + Function: _httprequest + Purpose: go get the http data from the server + Input: $url the url to fetch + $fp the current open file pointer + $URI the full URI + $body body contents to send if any (POST) + Output: +\*======================================================================*/ + + function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") + { + if($this->passcookies && $this->_redirectaddr) + $this->setcookies(); + + $URI_PARTS = parse_url($URI); + if(empty($url)) + $url = "/"; + $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; + if(!empty($this->agent)) + $headers .= "User-Agent: ".$this->agent."\r\n"; + if(!empty($this->host) && !isset($this->rawheaders['Host'])) + $headers .= "Host: ".$this->host."\r\n"; + if(!empty($this->accept)) + $headers .= "Accept: ".$this->accept."\r\n"; + + if($this->use_gzip) { + // make sure PHP was built with --with-zlib + // and we can handle gzipp'ed data + if ( function_exists(gzinflate) ) { + $headers .= "Accept-encoding: gzip\r\n"; + } + else { + trigger_error( + "use_gzip is on, but PHP was built without zlib support.". + " Requesting file(s) without gzip encoding.", + E_USER_NOTICE); + } + } + + if(!empty($this->referer)) + $headers .= "Referer: ".$this->referer."\r\n"; + if(!empty($this->cookies)) + { + if(!is_array($this->cookies)) + $this->cookies = (array)$this->cookies; + + reset($this->cookies); + if ( count($this->cookies) > 0 ) { + $cookie_headers .= 'Cookie: '; + foreach ( $this->cookies as $cookieKey => $cookieVal ) { + $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; + } + $headers .= substr($cookie_headers,0,-2) . "\r\n"; + } + } + if(!empty($this->rawheaders)) + { + if(!is_array($this->rawheaders)) + $this->rawheaders = (array)$this->rawheaders; + while(list($headerKey,$headerVal) = each($this->rawheaders)) + $headers .= $headerKey.": ".$headerVal."\r\n"; + } + if(!empty($content_type)) { + $headers .= "Content-type: $content_type"; + if ($content_type == "multipart/form-data") + $headers .= "; boundary=".$this->_mime_boundary; + $headers .= "\r\n"; + } + if(!empty($body)) + $headers .= "Content-length: ".strlen($body)."\r\n"; + if(!empty($this->user) || !empty($this->pass)) + $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; + + $headers .= "\r\n"; + + // set the read timeout if needed + if ($this->read_timeout > 0) + socket_set_timeout($fp, $this->read_timeout); + $this->timed_out = false; + + fwrite($fp,$headers.$body,strlen($headers.$body)); + + $this->_redirectaddr = false; + unset($this->headers); + + // content was returned gzip encoded? + $is_gzipped = false; + + while($currentHeader = fgets($fp,$this->_maxlinelen)) + { + if ($this->read_timeout > 0 && $this->_check_timeout($fp)) + { + $this->status=-100; + return false; + } + + // if($currentHeader == "\r\n") + if(preg_match("/^\r?\n$/", $currentHeader) ) + break; + + // if a header begins with Location: or URI:, set the redirect + if(preg_match("/^(Location:|URI:)/i",$currentHeader)) + { + // get URL portion of the redirect + preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); + // look for :// in the Location header to see if hostname is included + if(!preg_match("|\:\/\/|",$matches[2])) + { + // no host in the path, so pre... [truncated message content] |