Created
October 7, 2015 06:47
-
-
Save philgyford/6853747e1a3250939ba2 to your computer and use it in GitHub Desktop.
PHP file to combine RSS feeds etc, used on gyford.com. Requires many other files to work...
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* @package Phil | |
* Combine recent photos, writing, links, etc into one feed and one HTML/PHP include. | |
* | |
* NOTE: Required the dom.so extension to be enabled in php.ini | |
* (It wasn't, by default, on Textdrive, 2013-03-14.) | |
* | |
* v1.4 2015-09-04 | |
* | |
* Version history | |
* v1.0 2006-02-17 | |
* v1.01 2006-03-20 | |
* Checked for photo tags before cycling through them. | |
* v1.1 2006-04-22 | |
* Look for featured photo tag in description, not tags. | |
* v1.11 2006-04-30 | |
* Fixed problems with putting the wrong item descriptions/content-encodeds in. | |
* v1.12 2006-06-18 | |
* Stopped including init.php, now just config.php. | |
* v1.13 2006-10-18 | |
* Fixed a bug that was doing wrong descriptions for 'Writing' entries. | |
* v1.14 2007-05-03 | |
* Added links for wfw:comments in Links and Writing. | |
* v1.15 2007-05-07 | |
* Changed the featured photo tag stuff slightly. | |
* v1.2 2008-09-26 | |
* Added the ability to fetch remote feeds, eg PhilipGyford.com. | |
* v1.21 2008-10-17 | |
* Corrected the total number of photos when some aren't to be used. | |
* v1.22 2008-10-21 | |
* Stopped photos from showing when they're marked not to be. | |
* v1.23 2008-10-27 | |
* Added Comments blog to local feeds fetched. | |
* v1.24 2008-12-26 | |
* Some fixes to make it more compatible with the rest of the site. | |
* v1.25 2008-12-30 | |
* Stopped too-old items from remote feeds being displayed. | |
* v1.3 2009-02-09 | |
* Added more feeds, support for YouTube feeds, generates two aggregated feeds, plus outputs HTML includes, rather than PHP arrays. | |
* v1.31 2009-02-12 | |
* Added 'Posted in...' intros to RSS feed items. Fixed number of RSS items displaying. | |
* v1.32 2013-03-12 | |
* Upgraded SimplePie. | |
* v1.33 2013-03-14 | |
* Changed 'posted_url' to 'flickr_url' for multiple Flickr images when writing RSS feed. | |
* v1.34 2014-06-17 | |
* Fixed errors from missing photo descriptions/titles. | |
* v1.4 2015-09-04 | |
* Updated YouTube to use API v3. | |
*/ | |
error_reporting(E_ALL); | |
ini_set('display_errors', '1'); | |
require_once '/home/philgyford/webapps_symlinked/gyfordphil/includes/init.php'; | |
//require_once '/Users/phil/Sites/webfaction/webapps_symlinked/gyfordphil/includes/init.php'; | |
require_once 'Flickr.php'; | |
require_once 'RSSWriter.php'; | |
require_once 'simplepie/autoloader.php'; | |
require_once 'HTML.php'; | |
/** | |
************************************************ | |
* Configuration | |
*/ | |
/** | |
* Number of days of entries to list on the website front page. | |
* It assumes there are that many days' worth of photos/entries available in feeds/includes. | |
* Now only used for the Photos page. | |
*/ | |
define('DAYS_TO_LIST_HTML', 60); | |
/** | |
* Number of entries to list in the RSS feed. | |
* We'll fetch enough entries to fill the front page (see above). | |
* These RSS entries come out of that, so it's possible the ENTRIES_TO_LIST_RSS number won't be reached. | |
*/ | |
define('ENTRIES_TO_LIST_RSS', 12); | |
/** | |
* The RSS files we're going to generate. | |
*/ | |
define('RSS_FILE_PATH_PERSONAL', DOCS_DIR . '/syndication/index-fb.rdf'); | |
define('RSS_FILE_PATH_EVERYTHING', DOCS_DIR . '/syndication/everything-fb.rdf'); | |
/** | |
* The directory where the MT-generated files to include are. | |
* These contain arrays of data about recent entries in the writing and links blogs. | |
*/ | |
define('MT_INCLUDE_DIR', INCLUDES_DIR . '/caches/misc/'); | |
/** | |
* The directory where the saved HTML/PHP files will be saved. | |
* This will feature arrays of data about the latest photos, writing and links for inclusion on the front page. | |
* Another file will only feature the photos. | |
*/ | |
define('SAVED_FILE_DIR', INCLUDES_DIR . '/caches/misc/'); | |
/** | |
* The directoy SimplePie will cache remote feeds in. | |
*/ | |
define('SIMPLEPIE_CACHE_DIR', INCLUDES_DIR . '/caches/simplepie/'); | |
/*************************************************/ | |
/** | |
* If you add anything to this, you may want to add stuff in web/includes/Phil/classes/HTML.php (the entriesColumns() method) for the new source. | |
*/ | |
$sources = array ( | |
'writing' => array ( | |
'rss_intro' => 'Posted in <a href="http://www.gyford.com/phil/writing/">Phil Gyford\'s Writing</a>' | |
), | |
'links' => array ( | |
), | |
'comments' => array ( | |
'rss_intro' => 'Posted in <a href="http://www.gyford.com/phil/comments/">Comments posted elsewhere</a>' | |
), | |
//'philipgyford' => array ( | |
// 'feed_url' => 'http://feeds.feedburner.com/PhilipGyford', | |
// 'rss_intro' => 'Posted at <a href="http://www.philipgyford.com/">PhilipGyford.com</a>' | |
//), | |
// 'overmorgen' => array ( | |
// 'feed_url' => 'http://feeds.feedburner.com/Overmorgen', | |
// 'rss_intro' => 'Posted at <a href="http://www.overmorgen.com/">Overmorgen</a>' | |
// ), | |
// 'whitstillman' => array ( | |
// 'feed_url' => 'http://feeds.feedburner.com/WhitStillman', | |
// 'rss_intro' => 'Posted at <a href="http://www.whitstillman.org/">Whit Stillman</a>' | |
// ), | |
'septivium' => array ( | |
'feed_url' => 'http://feeds2.feedburner.com/Septivium', | |
'rss_intro' => 'Posted at <a href="http://www.septivium.com/">Septivium</a>' | |
), | |
'pepysdiarynews' => array ( | |
'feed_url' => 'http://feeds.feedburner.com/PepysDiary-SiteNews', | |
'rss_intro' => 'Posted at <a href="http://www.pepysdiary.com/about/news/">Pepys\' Diary Site News</a>' | |
), | |
'youtube' => array ( | |
//'feed_url' => 'https://www.youtube.com/feeds/videos.xml?playlist_id=FLgycUrFUa7ABvcs_vHY0l2Q', | |
'feed_url' => 'https://www.googleapis.com/youtube/v3/playlistItems?part=snippet%2CcontentDetails%2Cstatus&playlistId=FLgycUrFUa7ABvcs_vHY0l2Q&key=' . YOUTUBE_API_KEY, | |
'rss_intro' => 'Favourited <a href="https://www.youtube.com/playlist?list=FLgycUrFUa7ABvcs_vHY0l2Q">on YouTube</a>' | |
) | |
); | |
/** | |
* The array in which we'll put all the entries to display. | |
* This will have keys like '1102167975_flickr'. | |
* The values are an array of entry data. | |
*/ | |
$ITEMS = array(); | |
/** | |
* Start doing stuff... | |
*/ | |
getFlickr(); | |
getYouTube($sources); | |
getLocalWeblogs($sources); | |
getRemoteWeblogs($sources); | |
// Sort in reverse chronological order. | |
krsort($ITEMS); | |
writeHTML(array_values($ITEMS)); | |
writeRSS('personal', $sources, array_values($ITEMS)); | |
writeRSS('everything', $sources, array_values($ITEMS)); | |
/** | |
* Puts all the photos for the time period into $items. | |
* The list of photos for a day is in an array provided by phpFlickr. | |
* There are also two extra arrays, featured_indexes and | |
* thumbnail_indexes which indicate which photos for that day | |
* are to be featured on the website. | |
*/ | |
function getFlickr() { | |
global $ITEMS; | |
$from_day = gmdate('Y-m-d', time() - (86400 * (DAYS_TO_LIST_HTML - 1))); | |
$days_of_photos = Flickr::getDayCounts($from_day, DAYS_TO_LIST_HTML); | |
// Cycle through each day and get photos for any day on which there are some. | |
foreach ($days_of_photos as $day) { | |
if ($day['count'] > 0) { | |
$photo_data = Flickr::dayPhotosForDisplay($day['fromdate'], 'posted'); | |
// It could be that every photo that day was marked not to be displayed so we | |
// end up with none to show. | |
if ($photo_data['photos']['total'] > 0) { | |
$ITEMS[$day['fromdate'] . '_flickr'] = $photo_data; | |
} | |
} | |
} | |
} | |
function getLocalWeblogs($sources) { | |
global $ITEMS; | |
foreach ($sources as $blog_key => $source_data) { | |
if (!isset($source_data['feed_url'])) { | |
// If there's no RSS feed, we assume it's a local file. | |
include MT_INCLUDE_DIR . 'mt_recent_entries_' . $blog_key . '.php'; | |
$ITEMS = array_merge($ITEMS, $entries); | |
} | |
} | |
} | |
function getYouTube($sources) { | |
global $ITEMS; | |
$contents = file_get_contents($sources['youtube']['feed_url']); | |
$json = json_decode($contents, TRUE); | |
if (isset($json['items'])) { | |
foreach($json['items'] as $item) { | |
$snippet = $item['snippet']; | |
$video_url = 'https://www.youtube.com/watch?v=' . $snippet['resourceId']['videoId']; | |
$item_datetime = date_format(date_create($snippet['publishedAt']), 'Y-m-d H:i:s'); | |
$entry = array(); | |
$entry['blog_key'] = 'youtube'; | |
$entry['title'] = $snippet['title']; | |
$entry['permalink'] = $video_url; | |
$entry['author'] = ''; | |
$entry['date'] = $item_datetime; | |
$entry['body'] = '<a href="' . $video_url . '"><img src="' . $snippet['thumbnails']['default']['url'] . '" alt="YouTube thumbnail" /></a>'; | |
$entry['extended'] = true; | |
$entry['more'] = '<p>' . $snippet['description'] . '</p>'; | |
$entry['excerpt'] = strip_tags($entry['body']); | |
$ITEMS[strtotime($item_datetime) . '_' . $blog_key] = $entry; | |
} | |
} | |
} | |
function getRemoteWeblogs($sources) { | |
global $ITEMS; | |
foreach ($sources as $blog_key => $source_data) { | |
if (isset($source_data['feed_url'])) { | |
$feed = new SimplePie(); | |
// Feedburner wasn't letting us see the feed with the default useragent. | |
$feed->set_useragent('Gyford.com'); | |
$feed->enable_cache(true); | |
$feed->set_cache_location(SIMPLEPIE_CACHE_DIR); | |
$feed->set_feed_url($source_data['feed_url']); | |
// Assuming we know that all included HTML tags are safe. | |
$feed->strip_htmltags(false); | |
// We also know we want to keep attributes (particularly 'class'): | |
$feed->strip_attributes(false); | |
// Otherwise we at least need to do this: | |
// $strip_htmltags = $feed->strip_htmltags; | |
// unset($strip_htmltags[array_search('object', $strip_htmltags)]); | |
// unset($strip_htmltags[array_search('param', $strip_htmltags)]); | |
// unset($strip_htmltags[array_search('embed', $strip_htmltags)]); | |
// $strip_htmltags = array_values($strip_htmltags); | |
// $feed->strip_htmltags($strip_htmltags); | |
// And something to keep the 'class' attribute. | |
$feed->init(); | |
if ($feed->data) { | |
//$from_time = time() - (86400 * (DAYS_TO_LIST_HTML - 1)); | |
foreach ($feed->get_items() as $item) { | |
// if ($item->get_date('U') < $from_time) { | |
// // This item is older than we want for the front page, so stop here. | |
// break; | |
// } | |
$entry = array(); | |
$entry['blog_key'] = $blog_key; | |
$entry['title'] = $item->get_title(); | |
$entry['permalink'] = $item->get_permalink(); | |
$entry['author'] = $item->get_author()->get_name(); | |
$entry['date'] = $item->get_date('Y-m-d H:i:s'); | |
if ($blog_key == 'youtube') { | |
// We need to add an image to the YouTube data. | |
if ($enclosure = $item->get_enclosure()) { | |
$entry['body'] = '<a href="' . $item->get_permalink() . '" title="' . $item->get_title() . '"><img src="' . $enclosure->get_thumbnail() . '" alt="YouTube thumbnail" /></a>'; | |
$entry['extended'] = true; | |
$entry['more'] = '<p>' . $enclosure->get_description() . '</p>'; | |
} | |
} else { | |
// All the standard text-based feeds, probably generated by me. | |
preg_match("/^(.*?<p>.*?<\/p>)(.*)?$/s", $item->get_content(), $matches); | |
$entry['body'] = $matches[1]; | |
if (isset($matches[2]) && $matches[2] != '') { | |
$entry['extended'] = true; | |
$entry['more'] = trim($matches[2]); | |
} | |
} | |
$entry['excerpt'] = strip_tags($entry['body']); | |
$ITEMS[strtotime($item->get_date('Y-m-d H:i:s')) . '_' . $blog_key] = $entry; | |
} | |
} | |
} | |
} | |
} | |
/** | |
* Write the files that will be included in the site. | |
* @param array $entries An array of hashes - each hash info about an entry. | |
*/ | |
function writeHTML($entries) { | |
// Create the old front page list view. Might as well keep it going. | |
if ($fh = @fopen(SAVED_FILE_DIR . 'home.html', 'w')) { | |
$html = HTML::entries($entries, array('return_html'=>true)); | |
if (@fwrite($fh, $html)) { | |
@fclose($fh); | |
} | |
} | |
// Create the columned-view for the front page. | |
if ($fh = @fopen(SAVED_FILE_DIR . 'home_columns.html', 'w')) { | |
$html = HTML::entries($entries, array('return_html'=>true, 'format' => 'columns')); | |
if (@fwrite($fh, $html)) { | |
@fclose($fh); | |
} | |
} | |
// Create the list view for the Photos page. | |
$photo_entries = array(); | |
foreach ($entries as $entry) { | |
if ($entry['blog_key'] == 'photos') { | |
$photo_entries[] = $entry; | |
} | |
} | |
if ($fh = @fopen(SAVED_FILE_DIR . 'photos.html', 'w')) { | |
$html = HTML::entries($photo_entries, array('return_html'=>true)); | |
if (@fwrite($fh, $html)) { | |
@fclose($fh); | |
} | |
} | |
} | |
/** | |
* @param string $type Either 'personal' or 'everything'. | |
* @param array $entries The big array of all entries. | |
*/ | |
function writeRSS($type, $sources, $entries) { | |
if ($type == 'personal') { | |
$feed_title = 'Phil Gyford (personal)'; | |
$feed_description = 'Writing, photos and links by Phil Gyford'; | |
$blogs_to_use = array('writing', 'photos', 'links', 'comments', 'philipgyford'); | |
} else { | |
$feed_title = 'Phil Gyford (everything)'; | |
$feed_description = 'Most things created or liked by Phil Gyford across the web'; | |
$blogs_to_use = array(); // ALL! | |
} | |
$rss = new RSSWriter( | |
"http://www.gyford.com/", | |
$feed_title, | |
$feed_description, | |
array ( | |
"dc:date" => date(DATE_ATOM), | |
"dc:creator" => "Phil Gyford <[email protected]>", | |
"dc:publisher" => "Phil Gyford <[email protected]>", | |
"dc:language" => "en-gb" | |
) | |
); | |
$rss->setImage("http://www.gyford.com/phil/images/rss.jpg", "Phil Gyford"); | |
$rss->useModule("content", "http://purl.org/rss/1.0/modules/content/"); | |
$rss->useModule("wfw", "http://wellformedweb.org/CommentAPI/"); | |
foreach ($entries as $n => $entry) { | |
if ($n+1 > ENTRIES_TO_LIST_RSS) { | |
break; | |
} | |
// Only add this entry if we're using ALL blogs ($blogs_to_use is empty) | |
// or this entry's blog is in the $blogs_to_use list. | |
if (count($blogs_to_use) == 0 || in_array($entry['blog_key'], $blogs_to_use)) { | |
$comments = ''; | |
if ($entry['blog_key'] == 'photos') { | |
if ($entry['photos']['total'] > 0) { | |
$link = $entry['photos']['flickr_url']; | |
$title = 'Photos for ' . date("j F Y", strtotime($entry['date'])); | |
$description = ''; | |
$content_encoded = ''; | |
foreach ($entry['photos']['photo'] as $photo) { | |
if ($photo['photo']['description'] == '') { | |
$description_text = ''; | |
} else { | |
$description_text = "<br />\n" . preg_replace("/\n/", "<br />\n", $photo['photo']['description']); | |
} | |
$description .= '<table border="0" cellspacing="10"><tbody><tr> | |
<td valign="top" align="center" width="100" style="vertical-align: top;"><a href="' . $photo['urls']['photopage'] . '" title="See a larger version"><img src="' . $photo['urls']['thumbnail'] . '" alt="' . $photo['photo']['title']. '" border="0" /></a></td> | |
<td valign="top" style="vertical-align: top;"><strong><a href="' . $photo['urls']['photopage'] . '">' . $photo['photo']['title'] . "</a></strong> | |
$description_text</td> | |
</tr></tbody></table>\n"; | |
} | |
} | |
} elseif ($entry['blog_key'] == 'links') { | |
$link = $entry['permalink']; | |
$title = 'Links for ' . date("j F Y", strtotime($entry['date'])); | |
$description = $entry['body']; | |
$content_encoded = ''; | |
if (isset($entry['comments_active']) && $entry['comments_active']) { | |
$comments = $entry['permalink'] . '#comments'; | |
} | |
} else { | |
// writing, philipgyford, etc. | |
if (isset($entry['remoteurl'])) { | |
// Comments feed. | |
$link = $entry['remoteurl']; | |
} else { | |
$link = $entry['permalink']; | |
} | |
$title = $entry['title']; | |
if (isset($entry['extended']) && $entry['extended']) { | |
$description = $entry['body'] . "\n<p><a href=\"$link#more\">Read more…</a></p>"; | |
$content_encoded = $entry['body'] . "\n\n" . $entry['more']; | |
} else { | |
$description = $entry['body']; | |
$content_encoded = $entry['body']; | |
} | |
if (isset($entry['comments_active']) && $entry['comments_active']) { | |
$comments = $entry['permalink'] . '#comments'; | |
} | |
} | |
$item_details = array ( | |
'description' => $description | |
); | |
if (isset($content_encoded) && $content_encoded != '') { | |
$item_details['content:encoded'] = $content_encoded; | |
} | |
if (isset($comments) && $comments != '') { | |
$item_details['wfw:comments'] = $comments; | |
} | |
if (isset($sources[$entry['blog_key']]) && isset($sources[$entry['blog_key']]['rss_intro'])) { | |
$item_details['description'] = '<p>' . $sources[$entry['blog_key']]['rss_intro'] . "</p>\n" . $item_details['description']; | |
$item_details['content:encoded'] = '<p>' . $sources[$entry['blog_key']]['rss_intro'] . "</p>\n" . $item_details['content:encoded']; | |
} | |
$item_details['dc:creator'] = 'phil'; | |
$item_details['dc:date'] = date('c', strtotime($entry['date'])); | |
$rss->addItem($link, $title, $item_details); | |
} | |
} | |
// RSSWriter prints its stuff to screen. | |
// So we use output buffering to write it to a file instead. | |
$file_path = $type == 'personal' ? RSS_FILE_PATH_PERSONAL : RSS_FILE_PATH_EVERYTHING; | |
ob_start(); | |
$rss->serialize(); | |
$text = ob_get_contents(); | |
ob_end_clean(); | |
if ($fh = @fopen($file_path, 'w')) { | |
if (@fputs($fh, $text, strlen($text))) { | |
@fclose($fh); | |
} | |
} | |
} | |
/* | |
* Replacement for file_get_contents for when URL file-access is disabled. | |
* http://uk2.php.net/manual/en/function.fopen.php#55922 | |
* @param string $url | |
* @return string The contents of the file or an empty string on failure. | |
*/ | |
function fetchFileContents($url) | |
{ | |
$ch = curl_init(); | |
curl_setopt ($ch, CURLOPT_URL, $url); | |
curl_setopt ($ch, CURLOPT_HEADER, 0); | |
// Without this we were getting Internal Server Errors | |
// when Last.fm was unavailable and returning nothing after | |
// a long delay. | |
curl_setopt ($ch, CURLOPT_TIMEOUT, 5); | |
// Return the result instead of printing it. | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
$string = curl_exec ($ch); | |
if (curl_errno($ch)) { | |
// For debugging. | |
//print curl_error($ch); | |
} | |
curl_close($ch); | |
if (!is_string($string) || !strlen($string)) { | |
// Something went wrong with the curl_exec and it returned false. | |
// Use the curl_error() bit above to find out what went wrong. | |
return ''; | |
} else { | |
return $string; | |
} | |
} | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment