RSS_PHP; RSS Parser Source Code

V3 is now a commercial product, single purchase required unlimited domain / installation usage. Buy and Download

If you like RSS_PHP, share it, digg it, blog it, link back to source [rssphp.net]

RSS_PHP Example

Here's a simple example using XPath to retrieve all items from the bcc news rss feed that have a category of "Politics":

<?php
/* RSS_PHP */
require_once '../../rss_php.php';

    $rss = new rss_php;
    $rss->load('http://newsrss.bbc.co.uk/rss/newsonline_uk_edition/front_page/rss.xml');
    print_r($rss->query('*/item[category="Politics"]'));

?>

Full documentation and examples of RSS_PHP are available on this site. Like what you see? then download the latest version of RSS_PHP the PHP RSS Parser.

Version 3 of RSS_PHP includes 4 additional objects:

enhanced iconv support, automatic xml encoding detection and translation to UTF-8, a high speed http transportation object featuring browser immitation, and a valid http request generator; non of which have there source displayed here.

RSS_PHP Source

As RSS_PHP is now a commercial product some of the source code below has been *snipped*. Download the full version 3 of RSS_PHP. <?php
/**
 * RSS_PHP
 * 
 * PHP DOM based XML (RSS) Parser
 * Fri Feb 08 21:00:31 GMT 2008
 *
 * @package RSSPHP
 * @author <black@rssphp.net>
 * @version 3
 */

#INSTALLATION BASED PREFERENCES

//    should RSS_PHP attempt to perform auto encoding detection and conversion to UTF-8 (requires iconv)
    
define('RSS_PHP_ENCODING_CONVERSION'TRUE);
//    should RSS_PHP die if it finds an encoding unsupported by the machine
    
define('RSS_PHP_ICONV_UNSUPPORTED_LANG_DIE'TRUE);
//    should RSS_PHP use HTTP_TRANSPORT
    
define('RSS_PHP_HTTP_TRANSPORT'TRUE);

    
/*
DO NOT MODIFY ANYTHING BELOW 
#######################################################*/

#INTERNAL PATH DEFINITIONS
    
define('RSS_PHP_BUILD_FILENAME'basename(__FILE__));
    define('RSS_PHP_BASE'substr(__FILE__0strlen(__FILE__)-strlen(RSS_PHP_BUILD_FILENAME)));
    define('RSS_PHP_DIRECTORY_SEPARATOR'substr(RSS_PHP_BASE,-1));

#RSS_ENCODING CONDITIONAL INCLUSION
    
if(RSS_PHP_ENCODING_CONVERSION && function_exists('iconv')) {
        if(file_exists(RSS_PHP_BASE.'encoding/xml.php') && file_exists(RSS_PHP_BASE.'encoding/iconv.php')) {
            require_once RSS_PHP_BASE.'encoding/iconv.php';
            require_once RSS_PHP_BASE.'encoding/xml.php';
            if(class_exists('encoding_xml') && class_exists('encoding_iconv')) {
                define('RSS_PHP_USE_RSS_ENCODING'TRUE);
            }
        }
    }
    if(!defined('RSS_PHP_USE_RSS_ENCODING')) {
        define('RSS_PHP_USE_RSS_ENCODING'FALSE);
    }

#HTTP_TRANSPORT CONDITIONAL INCLUSION
    
if(RSS_PHP_HTTP_TRANSPORT && function_exists('stream_socket_client')) {
        if(file_exists(RSS_PHP_BASE.'http/request.php') && file_exists(RSS_PHP_BASE.'http/handler.php')) {
            require_once RSS_PHP_BASE.'http/request.php';
            require_once RSS_PHP_BASE.'http/handler.php';
            if(class_exists('http_request') && class_exists('http_handler')) {
                define('RSS_PHP_USE_HTTP_HANDLER'TRUE);
            }
        }
    }
    if(!defined('RSS_PHP_USE_HTTP_HANDLER')) {
        define('RSS_PHP_USE_HTTP_HANDLER'FALSE);
    }


#INCLUDE RSS_PHP
    
if(file_exists(RSS_PHP_BASE.'rss.php')) {
        require_once RSS_PHP_BASE.'rss.php';
    } else {
        if(!class_exists('rss_php')) {
            die('ERROR: RSS_PHP CAN NOT BE FOUND [path: '.RSS_PHP_BASE.'rss.php'.']');
        }
    }

class rss_php {

/**
 * Array to hold all DOMProcessingInstructions found in an input XML Document
 *
 * @var Array
 */
    
public $DOMProcessingInstructions;
/**
 * Array holding all XMLNS (XML Namespaces) found in an input XML Document.
 *
 * @var Array
 */
    
public $DOMNamespaces;
/**
 * Nested Array of Objects (DOMElements)
 *
 * @var Array
 */
    
public $document;
/**
 * Internal storage of the DOMDocument
 *
 * @var DOMDocument
 */
    
public $DOMDocument;
/**
 * Preloaded Internal DOMXPath Object for use with ->query function
 *
 * @var DOMXpath
 */
    
public $DOMXPath;
/**
 * configuration variable : use rss_encoding_xml && rss_encoding_iconv libraries
 * provides automatic language detection and converion to UTF-8
 *
 * @var bool
 */
    
public $useXMLEncoding=false;
/**
 * configuration variable : use core transport_http_handler library
 * setting this variable to FALSE changes the object to use file_get_contents
 *
 * @var bool
 */
    
public $useHTTPTransport=false;

    public function __construct() {
        if(RSS_PHP_ENCODING_CONVERSION && RSS_PHP_USE_RSS_ENCODING) {
            $this->useXMLEncoding true;
        }
        if(RSS_PHP_HTTP_TRANSPORT && RSS_PHP_USE_HTTP_HANDLER) {
            $this->useHTTPTransport true;
        }
    }
/**
 * load a local or remote xml document into rss_php
 *
 * @param string $url the location of the url, local or remote
 * @param string $user if specified will be used as the http auth username
 * @param string $pass if specified will be used as the http auth password
 * @return boolean success
 */
        
public function load($url=false$user=false$pass=false) {
            $returnValue false;
            if($url) {
                $urlparts parse_url($url);
                if($urlparts) {
                    if((count($urlparts) == && isset($urlparts['path'])) || (count($urlparts) == && isset($urlparts['path'])&& isset($urlparts['scheme']))) {
                        #local file
                        
if(file_exists($urlparts['path'])) {
                            $returnValue $this->loadParser(file_get_contents($url));
                        } else {
                            die('RSS_PHP ERROR : can not find the specified file ['.$url.']');
                        }
                    } else {
                        #remote file
                        
if($this->useHTTPTransport) {
                            $http_handler = new http_handler;
                            $http_handler->setRequest(true$url);
                            if($user && $pass) {
                                $http_handler->setHeaderValue('Authorization''Basic '.base64_encode(trim($user).':'.trim($pass)));
                            }
                            if($http_handler->executeRequest()) {
                                $returnValue $this->loadParser($http_handler->getSingleResponse($url));
                            } else {
                                die($http_handler->HTTPErrorMsg);
                            }
                        } else {
                            $returnValue $this->loadParser(file_get_contents($url));
                        }
                    }
                } else {
                    die('RSS_PHP ERROR : PHP cannot parse the given path / url ['.$url.']');
                }
            } else {
                die('RSS_PHP ERROR : Parameter 1 [path/url] cannot be null');
            }
            return $returnValue;
        }
        
/**
 * load raw xml into rss_php
 *
 * @param string $rawxml raw xml in a string
 * @return boolean success
 */
        
public function loadXML($rawxml=false) {
            if($rawxml) {
                return $this->loadParser($rawxml);
            } else {
                die('RSS_PHP ERROR : Parameter 1 [rawxml] cannot be null');
            }
        }
/**
 * load raw rss into rss_php
 *
 * @deprecated this is included for backwards compatibility only, please use method loadXML()
 * @param string $rawxml raw xml in a string
 * @return boolean success
 */        
        
public function loadRSS($rawxml) {
            return $this->loadXML($rawxml);
        }
/**
 * load an array into rss_php
 *
 * @param array $array to be converted
 * @param string $rootNodeName if specified a root object of rootNodeName will be created and all array data appened
 * @return boolean success
 */
        
public function loadArray($array$rootNodeName=false) {
            $this->DOMDocument = new DOMDocument('1.0''UTF-8');
            $this->DOMDocument->strictErrorChecking false;
            $this->DOMDocument->formatOutput true;
            $this->DOMDocument->preserveWhiteSpace false;
            if($rootNodeName) {
                $array = array($rootNodeName => $array);
            }
            $this->convertArray($array);
            return $this->gdoc();
        }
        
/**
 * return a referenced array to document
 *
 * @param boolean $includeAttributes include all info, default FALSE neat output of node values only
 * @return array multidimensional associative array of all nodes and reference values
 */
    
public function &getValues($includeAttributes=false) {
        if($includeAttributes) {
            return $this->document;
        }
        return $this->valueReturner();
    }

/**
 * return return full rss array
 *
 * @deprecated for backwards compatibility only, please use getValues()
 * @param unknown_type $includeAttributes
 * @return unknown
 */
    
public function &getRSS($includeAttributes=false) {
        return $this->getValues($includeAttributes);
    }
    
    # return rss items
    
public function &getItems($includeAttributes=false,$limit=false,$offset=false) {
        if($includeAttributes) {
            $items $this->getElementsByTagName('item');
        } else {
            $items $this->getValuesByTagName('item');
        }
        if($limit !== false || $offset !== false) {
            $items array_splice($items$offset$limit);
        }
        return $items;
    }
/**
 * return the document as an xml document
 *
 * @return string XML
 */
    
public function getXML() {
        $this->rebuildDOM($this->document);
        return $this->DOMDocument->saveXML();
    }
/**
 * return an assocate array of all DOMElements matching $tagName
 * including all attributes
 *
 * @param string $tagName (tag/node)name to search for
 * @return array formatted dom node list
 */
    
public function &getElementsByTagName($tagName) {
        return $this->extractDOM($this->DOMDocument->getElementsByTagName($tagName));
    }
/**
 * retrieve all namespaces defined in the current document
 *
 * @return array
 */
    
public function getNamespaces() {
        return $this->DOMNamespaces;
    }
/**
 * retrieve all processing instructions related to the current document
 *
 * @return array
 */
    
public function getProcessingInstructions() {
        return $this->DOMProcessingInstructions;
    }
/**
 * return a referenced associative array of all values whos (tag/node)name matches $tagName
 *
 * @param string $tagName (tag/node)name to search for
 * @return array referenced array of nodename/value pairs (and any child elements)
 */
    
public function getValuesByTagName($tagName) {
        return $this->transformNodeList($this->DOMDocument->getElementsByTagName($tagName));
    }
/**
 * provides XPath query functionality to rss_php
 *
 * @param string $XPathQuery must be valid XPath syntax
 * @return array referenced array of nodename/value pairs (and any child elements)
 */
    
public function query($XPathQuery$includeAttributes=false) {
        $result $this->DOMXPath->query($XPathQuery);
        if($includeAttributes) {
            return $this->extractDOM($result);
        }
        return $this->transformNodeList($result);
    }
/**
 * @internal parse XML and turn into an accessible dom document
 *
 * @param string $xml raw xml
 * @return boolean success
 */
    
private function loadParser($xml=false) {
        /**
         * snip..
         * V3 is a commercial product..
         */
    
}
/**
 * @internal set up initial DOMDocument, DOMXPath and extract namespaces
 *
 * @return boolean success
 */
    
private function gdoc() {
        $this->DOMXPath = new DOMXPath($this->DOMDocument);
        $this->extractNamespaces();
        if(!$this->document $this->extractDOM($this->DOMDocument->childNodes)) {
            die('RSS_PHP ERROR : The file specified appears not to be a valid xml file');
        }
        return true;
    }
/**
 * @internal negotiate internal DOMDocument and return an array
 *
 * @param DOMNode $valueBlock one of any element which extends a DOMNode
 * @return array all name/values pairs as multidimensional associative array
 */
    
private function &valueReturner($valueBlock=false) {
        /**
         * snip..
         * V3 is a commercial product..
         */
    
}
/**
 * @internal parses DOMNodeList objects into an associative array for return from public functions
 *
 * @param DOMNodeList/DOMNode $nodeList 
 * @param array $valueBlock current array level
 * @return array final name/value pairs for return from methods
 */
    
private function transformNodeList($nodeList$valueBlock=array()) {
        /**
         * snip..
         * V3 is a commercial product..
         */
    
}
/**
 * @internal update values internally for export
 *
 * @param DOMDocument/DOMNodeList $nodes
 */
    
private function rebuildDOM($nodes=false) {
        /**
         * snip..
         * V3 is a commercial product..
         */
    
}
/**
 * @internal extract registered XMLNS namespaces using XPath
 *
 */
    
private function extractNamespaces() {
        $namespaces $this->DOMXPath->query('namespace::*');
        foreach ($namespaces AS $namespace) {
            if($namespace->localName !== 'xml') {
                $this->DOMNamespaces[$namespace->localName] = $this->DOMDocument->lookupNamespaceURI($namespace->localName);
            }
        }
    }
/**
 * @internal turn a standard DOMDocument into a more accessible format
 *
 * @param DOMDocument $nodeList
 * @return DOMElement internal return only to create document
 */
    
private function &extractDOM($nodeList) {
        /**
         * snip..
         * V3 is a commercial product..
         */
    
}
    
/**
 * @internal internal array parser, turns any array into a DOMDocument
 *
 * @param unknown_type $node
 * @param unknown_type $parentNode
 */
    
private function convertArray($node=false,$parentNode=false) {
        /**
         * snip..
         * V3 is a commercial product..
         */
    
}
}
?>

RSS_PHP Dependencies

As RSS_PHP utilises the DOM to parse RSS, and the DOM Extension is installed as part of the PHP Core, RSS_PHP's only dependency is PHP 5.

RSS_PHP Internationalization and Character Encoding Support

Don't worry! RSS_PHP automatically detects encodings and changes them to UTF-8 on the fly.

Valid XHTML 1.0 Strict

return to the top of the page