Source for file rss_parse.inc
Documentation is available at rss_parse.inc
* Project: MagpieRSS: a simple RSS integration tool
* File: rss_parse.inc - parse an RSS or Atom feed
* return as a simple object.
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3
* The lastest version of MagpieRSS can be obtained from:
* http://magpierss.sourceforge.net
* For questions, help, comments, discussion, etc., please join the
* magpierss-general@lists.sourceforge.net
* @author Kellan Elliott-McCrea <kellan@protest.net>
require_once (MAGPIE_DIR .
'rss_utils.inc');
* Hybrid parser, and object, takes RSS as a string and returns a simple object.
* see: rss_fetch.inc for a simpler interface with integrated caching support
var $items =
array(); // collection of parsed items
var $channel =
array(); // hash of channel fields
var $encoding =
''; // output encoding of parsed rss
// parser variables, useless if you're not a parser, treat as private
var $stack =
array(); // parser stack
var $incontent =
false; // if in Atom <content mode="xml"> field
* Set up XML parser, parse source, and return populated RSS object..
* @param string $source string containing the RSS to be parsed
* NOTE: Probably a good idea to leave the encoding options alone unless
* you know what you're doing as PHP's character set support is
* NOTE: A lot of this is unnecessary but harmless with PHP5
* @param string $output_encoding output the parsed RSS in this character
* set defaults to ISO-8859-1 as this is PHP's
* NOTE: might be changed to UTF-8 in future
* @param string $input_encoding the character set of the incoming RSS source.
* Leave blank and Magpie will try to figure it
* @param bool $detect_encoding if false Magpie won't attempt to detect
* source encoding. (caveat emptor)
function MagpieRSS ($source, $output_encoding=
'ISO-8859-1',
$input_encoding=
null, $detect_encoding=
true)
# if PHP xml isn't compiled in, die
$this->error( "Failed to load PHP's XML Extension. " .
"http://www.php.net/manual/en/ref.xml.php",
$output_encoding, $input_encoding, $detect_encoding);
$this->error( "Failed to create an instance of PHP's XML parser. " .
"http://www.php.net/manual/en/ref.xml.php",
# pass in parser, and a reference to this object
'feed_start_element', 'feed_end_element' );
if ( $errorcode !=
XML_ERROR_NONE ) {
$errormsg =
"$xml_error at line $error_line, column $error_col";
$this->error( $errormsg );
// check for a namespace, and split if found
if ( strpos( $element, ':' ) ) {
list
($ns, $el) =
split( ':', $element, 2);
if ( $ns and $ns !=
'rdf' ) {
# if feed type isn't set, then this is first element of feed
# identify feed from root element
elseif ( $el ==
'rss' ) {
elseif ( $el ==
'feed' ) {
elseif ($el ==
'item' or $el ==
'entry' )
if ( isset
($attrs['rdf:about']) ) {
// if we're in the default namespace of an RSS feed,
// record textinput or image fields
# handle atom content constructs
// avoid clashing w/ RSS mod_content
// if inside an Atom content construct (e.g. content or summary) field treat tags as text
// if tags are inlined, then flatten
// Atom support many links per containging element.
// Magpie treats link elements of type rel='alternate'
// as being equivalent to RSS's simple link element.
if ( isset
($attrs['rel']) and $attrs['rel'] ==
'alternate' )
$link_el =
'link_' .
$attrs['rel'];
$this->append($link_el, $attrs['href']);
// set stack[0] to current element
$this->append($current_el, $text);
if ( $el ==
'item' or $el ==
'entry' )
elseif ($el ==
'channel' or $el ==
'feed' )
// note: i don't think this is actually neccessary
if ( $this->stack[0] ==
$el )
function concat (&$str1, $str2=
"") {
// smart append - field and namespace aware
$this->image[ $el ], $text );
// if atom populate rss fields
$item =
$this->items[$i];
if ( isset
($item['summary']) )
$item['description'] =
$item['summary'];
if ( isset
($item['atom_content']))
$item['content']['encoded'] =
$item['atom_content'];
$atom_date =
(isset
($item['issued']) ) ?
$item['issued'] :
$item['modified'];
if ($epoch and $epoch >
0) {
$item['date_timestamp'] =
$epoch;
$this->items[$i] =
$item;
$item =
$this->items[$i];
if ( isset
($item['description']))
$item['summary'] =
$item['description'];
if ( isset
($item['content']['encoded'] ) )
$item['atom_content'] =
$item['content']['encoded'];
if ( $this->is_rss() ==
'1.0' and isset
($item['dc']['date']) ) {
if ($epoch and $epoch >
0) {
$item['date_timestamp'] =
$epoch;
elseif ( isset
($item['pubdate']) ) {
$item['date_timestamp'] =
$epoch;
$this->items[$i] =
$item;
* return XML parser, and possibly re-encoded source
return array($parser, $source);
* Instantiate an XML parser under PHP5
* PHP5 will do a fine job of detecting input encoding
* if passed an empty string as the encoding.
// by default php5 does a fine job of detecting input encodings
if(!$detect &&
$in_enc) {
* Instaniate an XML parser under PHP4
* Unfortunately PHP4's support for character encodings
* and especially XML and character encodings sucks. As
* long as the documents you parse only contain characters
* from the ISO-8859-1 character set (a superset of ASCII,
* and a subset of UTF-8) you're fine. However once you
* step out of that comfy little world things get mad, bad,
* The following code is based on SJM's work with FoF
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) {
$this->source_encoding =
$in_enc;
// the dectected encoding is not one of the simple encodings PHP knows
// attempt to use the iconv extension to
// cast the XML to a known encoding
// @see http://php.net/iconv
$encoded_source =
iconv($in_enc,'UTF-8', $source);
// iconv didn't work, try mb_convert_encoding
// @see http://php.net/mbstring
$this->error("Feed is in an unsupported character encoding. ($in_enc) " .
"You may see strange artifacts, and mangled characters.",
function error ($errormsg, $lvl=
E_USER_WARNING) {
// append PHP's error message if track_errors enabled
if ( isset
($php_errormsg) ) {
$errormsg .=
" ($php_errormsg)";
$notices =
E_USER_NOTICE|
E_NOTICE;
$this->ERROR =
$errormsg;
// patch to support medieval versions of PHP4.1.x,
// courtesy, Ryan Currie, ryan@digibliss.com
foreach($array as $key=>
$value) {
$output[$cmd($key)]=
$value;
Documentation generated on Tue, 01 May 2007 16:47:12 +0200 by phpDocumentor 1.3.2