Created
November 20, 2008 23:23
-
-
Save singpolyma/27268 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| require_once dirname(__FILE__).'/Outline.php'; | |
| require_once dirname(__FILE__).'/OutlineFromXML.php'; | |
| function checkXML($data) {//returns FALSE if $data is well-formed XML, errorcode otherwise | |
| $rtrn = 0; | |
| $theParser = xml_parser_create(); | |
| if(!xml_parse_into_struct($theParser,$data,$vals)) { | |
| $errorcode = xml_get_error_code($theParser); | |
| if($errorcode != XML_ERROR_NONE && $errorcode != 27) | |
| $rtrn = $errorcode; | |
| }//end if ! parse | |
| xml_parser_free($theParser); | |
| return $rtrn; | |
| }//end function checkXML | |
| function url_get($url) { | |
| if(function_exists('curl_init')) { | |
| $ch = curl_init($url); | |
| curl_setopt($ch, CURLOPT_RETURNTRANSFER,TRUE); | |
| $result = curl_exec($ch); | |
| curl_close($ch); | |
| } else $result = file_get_contents($url); | |
| return $result; | |
| }//end function url_get | |
| function std_feed_parse($xmldata) { | |
| switch(true) { | |
| case (bool)stristr($xmldata,'<rss'): | |
| $struct = new OutlineFromXML($xmldata,array('rootel' => 'rss','itemel' => 'channel>item','subitemels' => false,'collapsels' => array('title','description'))); | |
| $inform = 'rss20'; | |
| break; | |
| case (bool)(stristr($xmldata,'<rdf') && stristr($xmldata,'<channel')): | |
| $struct = new OutlineFromXML($xmldata,array('rootel' => 'rdf:RDF','itemel' => 'item','subitemels' => false,'collapsels' => array('title','description'))); | |
| $inform = 'rss10'; | |
| break; | |
| case (bool)stristr($xmldata,'<feed'): | |
| $struct = new OutlineFromXML($xmldata,array('rootel' => 'feed','itemel' => 'entry','subitemels' => false,'collapsels' => array('title','content','summary'))); | |
| $inform = 'atom'; | |
| break; | |
| case (bool)(stristr($xmldata,'<CHANNEL') && stristr($xmldata,'<ITEM')): | |
| $struct = new OutlineFromXML($xmldata,array('rootel' => 'CHANNEL','itemel' => 'ITEM','subitemels' => false,'collapsels' => array('TITLE','ABSTRACT'))); | |
| $inform = 'rss10'; | |
| break; | |
| /* case (bool)(stristr($xmldata,'hentry')): | |
| $tidy = new tidy; | |
| $tidy->parseString($xmldata, array('output-xml' => true, 'doctype' => 'loose', 'add-xml-decl' => true),'utf8'); | |
| $tidy->cleanRepair(); | |
| $xml = str_replace(' ',' ',$tidy->value); | |
| $inform = 'hatom'; | |
| break;*/ | |
| default: | |
| $xmldata = preg_replace('/<!DOCTYPE[^\f]*?>/','', $xmldata); | |
| $xmldata = preg_replace('/<(meta|link|br|img)([^\f]*?)([^\/])>/','<$1$2$3 />', $xmldata); | |
| $xmldata = str_replace(' ',' ',$xmldata); | |
| $theParser = xml_parser_create(); | |
| xml_parse_into_struct($theParser,$xmldata,$vals); | |
| xml_parser_free($theParser); | |
| $alternates = array(); | |
| foreach($vals as $el) { | |
| if(in_array('alternate', explode(' ', $el['attributes']['REL']))) | |
| $alternates[$el['attributes']['TYPE']] = $el['attributes']['HREF']; | |
| }//end foreach | |
| $url = $alternates['application/rss+xml'] ? $alternates['application/rss+xml'] : array_shift($alternates); | |
| if($url) return std_feed_parse(url_get($url)); | |
| return array('items' => array()); | |
| break; | |
| }//end switch TRUE | |
| if(is_a($struct->getField('channel'),'Outline')) { | |
| $channel = $struct->getField('channel'); | |
| foreach($channel->getFields() as $name => $val) | |
| $struct->addField($name,$val); | |
| $struct->unsetField('channel'); | |
| }//end if channel | |
| $data = array(); | |
| $data['title'] = $struct->getField('title'); | |
| if(!$data['title']) | |
| $data['title'] = $struct->getField('dc:title'); | |
| if(!$data['title']) | |
| $data['title'] = $struct->getField('text'); | |
| if(is_a($data['title'],'Outline')) { | |
| if($data['title']->getNumNodes()) { | |
| $tmp = $data['title']->getNode(0); | |
| $data['title'] = $tmp->getField('text'); | |
| } else | |
| $data['title'] = ''; | |
| }//end if is_a title Outline | |
| $data['link'] = $struct->getField('link'); | |
| if(is_a($data['link'],'Outline')) { | |
| $tmp = $data['link']; | |
| unset($data['link']); | |
| if($tmp->getField('rel') == 'alternate' || $tmp->getField('type') == 'text/html') | |
| $data['link'] = $tmp->getField('href'); | |
| if(!$data['link']) { | |
| foreach($tmp->getNodes() as $node) { | |
| if($node->getField('rel') == 'alternate' || $node->getField('type') == 'text/html') { | |
| $data['link'] = $node->getField('href'); | |
| break; | |
| }//end if rel || type | |
| }//end foreach nodes | |
| }//end if ! $data['link'] | |
| }//end if link is_a Outline | |
| if(!$data['link']) | |
| $data['link'] = $struct->getField('id'); | |
| if(!$data['link']) | |
| $data['link'] = $struct->getField('href'); | |
| $data['description'] = $struct->getField('description'); | |
| if(!$data['description']) | |
| $data['description'] = $struct->getField('dc:description'); | |
| if(!$data['description']) | |
| $data['description'] = $struct->getField('subtitle'); | |
| if(is_a($data['description'],'Outline')) | |
| $data['description'] = $data['description']->getField('text'); | |
| if(!$data['description']) | |
| $data['description'] = $struct->getField('abstract'); | |
| $data['language'] = $struct->getField('language'); | |
| if(!$data['language']) | |
| $data['language'] = $struct->getField('dc:language'); | |
| $data['copyright'] = $struct->getField('copyright'); | |
| if(!$data['copyright']) | |
| $data['copyright'] = $struct->getField('dc:rights'); | |
| $data['webMaster'] = $struct->getField('webmaster'); | |
| if(!$data['webMaster']) | |
| $data['webMaster'] = $struct->getField('managingeditor'); | |
| $data['dc:creator'] = $struct->getField('dc:creator'); | |
| if(!$data['dc:creator']) | |
| $data['dc:creator'] = $struct->getField('dc:contributor'); | |
| if($struct->getField('pubdate')) | |
| $data['timestamp'] = strtotime($struct->getField('pubdate')); | |
| if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('lastbuilddate')) | |
| $data['timestamp'] = strtotime($struct->getField('lastbuilddate')); | |
| if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('dc:date')) | |
| $data['timestamp'] = strtotime($struct->getField('dc:date')); | |
| if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('published')) | |
| $data['timestamp'] = strtotime($struct->getField('published')); | |
| if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('updated')) | |
| $data['timestamp'] = strtotime($struct->getField('updated')); | |
| if((!$data['timestamp'] || $data['timestamp'] == -1) && $struct->getField('modified')) | |
| $data['timestamp'] = strtotime($struct->getField('modified')); | |
| $data['category'] = $struct->getField('category'); | |
| if(is_a($data['category'],'Outline')) { | |
| $cats = $data['category']; | |
| $data['category'] = array(); | |
| foreach($cats->toArray() as $cat) | |
| $data['category'][] = $cat['text']; | |
| }//end if is_a Outline | |
| if($data['category'] && !is_array($data['category'])) | |
| $data['category'] = array($data['category']); | |
| if(!$data['category'] && $struct->getField('dc:subject')) { | |
| $data['category'] = $struct->getField('dc:subject'); | |
| if(is_a($data['category'],'Outline')) { | |
| $cats = $data['category']; | |
| $data['category'] = array(); | |
| foreach($cats->toArray() as $cat) | |
| $data['category'][] = $cat['text']; | |
| } else { | |
| $data['category'] = explode(' ',$data['category']); | |
| }//end if-else $data['category'] is_a Outline | |
| }//end if ! category | |
| $data['image'] = $struct->getField('image'); | |
| if(is_a($data['image'],'Outline')) | |
| $data['image'] = $data['image']->toArray(); | |
| if(!$data['image']) | |
| $data['image'] = $struct->getField('logo'); | |
| if(is_a($data['image'],'Outline')) { | |
| if(!$data['image']->getField('href') && $data['image']->getNumNodes()) | |
| $tmp = $data['image']->getNode(0); | |
| else | |
| $tmp = $data['image']; | |
| $data['image'] = array('url' => $tmp->getField('href')); | |
| }//end if is_a image Outline | |
| $data['items'] = array(); | |
| foreach($struct->getNodes() as $node) { | |
| $item = array(); | |
| $tmp = $node->getField('media:group'); | |
| if($tmp && is_a($tmp, 'Outline')) { | |
| foreach($tmp->getFields() as $name => $val) | |
| $node->addField($name, $val); | |
| }//end if media:group | |
| $item['title'] = $node->getField('title'); | |
| if(!$item['title']) | |
| $item['title'] = $node->getField('dc:title'); | |
| $item['link'] = $node->getField('link'); | |
| if(is_a($item['link'],'Outline')) { | |
| $tmp = $item['link']; | |
| unset($item['link']); | |
| $item['link'] = $tmp->getField('href'); | |
| if(!$item['link']) { | |
| foreach($tmp->getNodes() as $node2) { | |
| if($node2->getField('rel') == 'alternate' || $node2->getField('type') == 'text/html') { | |
| $item['link'] = $node2->getField('href'); | |
| break; | |
| }//end if rel || type | |
| }//end foreach nodes | |
| }//end if ! $item['link'] | |
| }//end if link is_a Outline | |
| if(!$item['link']) | |
| $item['link'] = $node->getField('href'); | |
| $item['description'] = $node->getField('description'); | |
| if(strlen($node->getField('content:encoded')) > strlen($item['description'])) | |
| $item['description'] = $node->getField('content:encoded'); | |
| if(!$item['description']) | |
| $item['description'] = $node->getField('dc:description'); | |
| if(!$item['description']) | |
| $item['description'] = $node->getField('content'); | |
| if(!$item['description']) | |
| $item['description'] = $node->getField('summary'); | |
| if(!$item['description']) | |
| $item['description'] = $node->getField('abstract'); | |
| $item['dc:creator'] = $node->getField('dc:creator'); | |
| if(!$item['dc:creator']) | |
| $item['dc:creator'] = $node->getField('dc:contributor'); | |
| $item['author'] = $node->getField('author'); | |
| if(is_a($item['author'],'Outline')) { | |
| if(!$item['dc:creator']) $item['dc:creator'] = $item['author']->getField('name'); | |
| $item['author'] = $item['author']->getField('email'); | |
| }//end if author is_a Outline | |
| if(substr(trim($item['author']),0,19) == '[email protected]') { | |
| $item['author'] = trim($item['author']); | |
| if(!$item['dc:creator']) { | |
| $item['dc:creator'] = substr($item['author'],21,strlen($item['author'])); | |
| $item['dc:creator'] = substr($item['dc:creator'],0,strlen($item['dc:creator'])-1); | |
| }//end if !$item['dc:creator'] | |
| unset($item['author']); | |
| }//end if [email protected] | |
| $item['category'] = $node->getField('category'); | |
| if(is_a($item['category'],'Outline')) { | |
| $cats = $item['category']; | |
| $item['category'] = array(); | |
| if(!$cats->getNumNodes()) | |
| $cats = array($cats->toArray()); | |
| else | |
| $cats = $cats->toArray(); | |
| foreach($cats as $cat) { | |
| if(!$cat['text']) $cat['text'] = $cat['term']; | |
| if(!$cat['text']) continue; | |
| $item['category'][] = $cat['text']; | |
| }//end foreach cats | |
| }//end if is_a Outline | |
| if($item['category'] && !is_array($item['category'])) | |
| $item['category'] = array($item['category']); | |
| if(!$item['category'] && $node->getField('dc:subject')) { | |
| $item['category'] = $node->getField('dc:subject'); | |
| if(is_a($item['category'],'Outline')) { | |
| $cats = $item['category']; | |
| $item['category'] = array(); | |
| foreach($cats->toArray() as $cat) | |
| $item['category'][] = $cat['text']; | |
| } else { | |
| $item['category'] = explode(' ',$item['category']); | |
| }//end if-else dc:subject is_a Outline | |
| }//end if ! category | |
| if(!$item['category'] && $node->getField('media:keywords')) { | |
| $item['category'] = $node->getField('media:keywords'); | |
| if(is_a($item['category'],'Outline')) { | |
| $cats = $item['category']; | |
| $item['category'] = array(); | |
| foreach($cats->toArray() as $cat) | |
| $item['category'][] = $cat['text']; | |
| } else { | |
| $item['category'] = explode(', ',$item['category']); | |
| }//end if-else dc:subject is_a Outline | |
| }//end if ! category | |
| $item['comments'] = $node->getField('comments'); | |
| $item['enclosure'] = $node->getField('enclosure'); | |
| if(is_a($item['enclosure'],'Outline')) { | |
| $tmp = $item['enclosure']; | |
| $item['enclosure']['url'] = $tmp->getField('url'); | |
| $item['enclosure']['length'] = $tmp->getField('length'); | |
| $item['enclosure']['type'] = $tmp->getField('type'); | |
| }//end if $item['enclosure'] is_a Outline | |
| $item['guid'] = $node->getField('guid'); | |
| if(is_a($item['guid'],'Outline')) | |
| $item['guid'] = $item['guid']->getField('text'); | |
| if(!$item['guid']) | |
| $item['guid'] = $node->getField('id'); | |
| if(!$item['guid'] && $item['link']) | |
| $item['guid'] = $item['link']; | |
| if(!$item['guid']) | |
| $item['guid'] = md5($item['title'].$item['description']); | |
| $item['timestamp'] = $node->getField('pubdate') ? strtotime($node->getField('pubdate')) : NULL; | |
| if(!$item['timestamp']) | |
| $item['timestamp'] = $node->getField('dc:date') ? strtotime($node->getField('dc:date')) : NULL; | |
| if(!$item['timestamp']) | |
| $item['timestamp'] = $node->getField('issued') ? strtotime($node->getField('issued')) : | |
| NULL; | |
| if(!$item['timestamp']) | |
| $item['timestamp'] = $node->getField('created') ? strtotime($node->getField('created')) : NULL; | |
| if(!$item['timestamp']) | |
| $item['timestamp'] = $node->getField('published') ? strtotime($node->getField('published')) : NULL; | |
| if(!$item['timestamp']) | |
| $item['timestamp'] = $node->getField('updated') ? strtotime($node->getField('updated')) : NULL; | |
| if(!$item['timestamp']) | |
| $item['timestamp'] = $node->getField('modified') ? strtotime($node->getField('modified')) : NULL; | |
| $item['source'] = $node->getField('source'); | |
| if(is_a($item['source'],'Outline')) { | |
| $tmp = $item['source']; | |
| $item['source'] = array(); | |
| $item['source']['title'] = $tmp->getField('text'); | |
| $item['source']['url'] = $tmp->getField('url'); | |
| }//end if source is_a Outline | |
| if(!$item['source'] && $node->getField('dc:source')) | |
| $item['source']['url'] = $node->getField('dc:source'); | |
| $item['wfw:comment'] = $node->getField('wfw:comment'); | |
| $item['wfw:commentRss'] = $node->getField('wfw:commentrss'); | |
| if($node->getField('media:content')) | |
| $item['media:content'] = $node->getField('media:content')->toArray(); | |
| if($node->getField('media:thumbnail')) | |
| $item['media:thumbnail'] = $node->getField('media:thumbnail')->toArray(); | |
| array_push($data['items'],$item); | |
| }//end foreach nodes | |
| $data['items'] = array_values($data['items']); | |
| return $data; | |
| } | |
| ?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment