Last active
December 18, 2015 02:38
-
-
Save tpokorra/5711916 to your computer and use it in GitHub Desktop.
this script is used for creating an RSS feed from Typo3 pages, without touching the Typo3 installation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function getContent(&$contents, $startContentStr, $endContentStr) | |
{ | |
// if we want the content of an html element, we need to count, eg. the div's | |
$element = ''; | |
if ($startContentStr[0] == '<' && substr($endContentStr, 0, 2) == '</') | |
{ | |
$posSpace = strpos($startContentStr, ' '); | |
$posClose = strpos($startContentStr, '>'); | |
if ($posSpace !== false && $posClose !== false) | |
{ | |
$element = substr($startContentStr, 1, $posSpace < $posClose ? $posSpace : $posClose); | |
} | |
else if ($posSpace !== false) | |
{ | |
$element = substr($startContentStr, 1, $posSpace - 1); | |
} | |
else if ($posClose !== false) | |
{ | |
$element = substr($startContentStr, 1, $posClose - 1); | |
} | |
else | |
{ | |
$element = substr($startContentStr, 1); | |
} | |
$element = trim($element); | |
} | |
if ($startContentStr == '^') | |
{ | |
$startContentStr = ''; | |
$contentPos = 0; | |
} | |
else | |
{ | |
$contentPos = strpos($contents, $startContentStr); | |
//if ($contentPos === false) die('Problem reading page '.$startContentStr); | |
if ($contentPos === false) | |
{ | |
$contents = ''; | |
return ''; | |
} | |
} | |
$start = $contentPos + strlen($startContentStr); | |
if ($endContentStr == '^') | |
{ | |
$end = strlen($contents); | |
} | |
else | |
{ | |
$end = strpos($contents, $endContentStr, $start); | |
if (strlen($element) > 0) | |
{ | |
// make sure we find the right ending element. count the number of open element tags | |
$result = substr($contents, $start, $end - $start); | |
while (substr_count($result, '<'.$element) <> substr_count($result, '</'.$element.'>') && $end !== false) | |
{ | |
$end = strpos($contents, $endContentStr, $end + 1); | |
$result = substr($contents, $start, $end - $start); | |
} | |
if ($end === false) | |
{ | |
// something went wrong. incorrect number of elements | |
$end = strlen($contents); | |
} | |
} | |
} | |
$result = substr($contents, $start, $end - $start); | |
$contents = substr($contents, $end); | |
return $result; | |
} | |
// it seems file_get_contents does not return the full page | |
function curl_get_file_contents($URL) | |
{ | |
$c = curl_init(); | |
curl_setopt($c, CURLOPT_TIMEOUT, 0); | |
curl_setopt($c, CURLOPT_RETURNTRANSFER, 1); | |
curl_setopt($c, CURLOPT_CONNECTTIMEOUT, 0); | |
curl_setopt($c, CURLOPT_URL, str_replace('&', '&', $URL)); | |
$agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.0.3705; .NET CLR 1.1.4322)'; | |
curl_setopt($c, CURLOPT_USERAGENT, $agent); | |
$contents = curl_exec($c); | |
curl_close($c); | |
if ($contents) return $contents; | |
else return FALSE; | |
} | |
function GetArticleFromTypo3($date_published, $url) | |
{ | |
$html = curl_get_file_contents($url); | |
$title = getContent($html, '<title>', '</title>'); | |
$content = getContent($html, '<div id="content">', '</div>'); | |
$content = preg_replace('%(<!--).*?(-->)%i', '', $content); | |
$content = preg_replace('%( javascript:linkTo_UnCryptMailto\().*?(\);)%i', '', $content); | |
$content = preg_replace('%( class=").*?(")%i', '', $content); | |
$content = preg_replace('%( style=").*?(")%i', '', $content); | |
$content = preg_replace('%( id=").*?(")%i', '', $content); | |
$content = str_replace(' ', ' ', $content); | |
$content = str_replace('<', '&lt;', $content); | |
$content = str_replace('<', '<', $content); | |
$content = str_replace('>', '>', $content); | |
?> | |
<item> | |
<title><?php echo $title; ?></title> | |
<link><?php echo $url;?></link> | |
<pubDate><?php echo date(DATE_RSS, $date_published); ?></pubDate> | |
<dc:creator>Timotheus Pokorra (TBits)</dc:creator> | |
<guid isPermaLink="false"><?php echo $url;?></guid> | |
<description><?php echo $content; ?></description> | |
</item> | |
<?php | |
} | |
function PrintRSSHeader() | |
{ | |
header('Content-Type: text/xml'); | |
echo '<?xml version="1.0" encoding="utf-8" ?'.">\n"; | |
?> | |
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/"> | |
<channel> | |
<title>TBits.net Kolab News</title> | |
<link>http://www.tbits.net</link> | |
<description>TBits.net works with the Kolab project and provides patches and extensions to Kolab on this RSS Feed</description> | |
<language>en</language> | |
<?php | |
} | |
function PrintRSSFooter() | |
{ | |
?> | |
</channel> | |
</rss> | |
<?php | |
} | |
PrintRSSHeader(); | |
GetArticleFromTypo3(mktime(1, 0, 0, 6, 4, 2013), "http://www.tbits.net/tbits-opensource/install-nightly-build-from-gitkolaborg-master.html"); | |
GetArticleFromTypo3(mktime(1, 0, 0, 3, 27, 2013), "http://www.tbits.net/tbits-opensource/kolab3multipledomains.html"); | |
PrintRSSFooter(); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment