Created
October 4, 2012 15:37
-
-
Save aaronpk/3834460 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class ServiceHelper { | |
// @text is the full tweet text | |
// @shortCode is the code part of the shortlink, like nXXX0 | |
public function shortenForTwitter($text, $shortCode=FALSE, $shortDomain=FALSE) { | |
$placeholders = array(); | |
$rawTweet = $text; | |
// Catch anything that Twitter might think is a link and wrap in a t.co placeholder | |
while(preg_match('|(https?://)?[a-z0-9\-]+\.[a-z]{2,10}/?([^ ]+)?|i', $rawTweet, $match, PREG_OFFSET_CAPTURE)) { | |
// Count these entities as 20-characters. | |
// Generate placeholder text, a sequence of characters unlikely to be used in body text | |
$placeholder = ':::'.substr(sha1($match[0].rand(1000000,9999999)), 0, 17); | |
$rawTweet = substr_replace($rawTweet, $placeholder, $match[0][1], strlen($match[0][0])); | |
$placeholders[$placeholder] = $match[0][0]; | |
#echo "Raw: " . $rawTweet . "\n"; | |
} | |
if($shortCode) | |
$citationLinkTemplate = 'http://t.co/xxxxxxxx'; | |
else | |
$citationLinkTemplate = ''; | |
$shortText = $this->shortenToLengthWithCitation($rawTweet, 140, $shortCode, $shortDomain, $citationLinkTemplate); | |
// Re-expand the placeholders before sending to Twitter | |
foreach($placeholders as $key=>$val) { | |
$shortText = str_replace($key, $val, $shortText); | |
} | |
#echo "Tweet: " . $shortText . "\n"; | |
return $shortText; | |
} | |
public function shortenForAppDotNet($text, $shortCode=FALSE, $shortDomain=FALSE) { | |
if($shortCode) | |
$citationLinkTemplate = $shortDomain.'/'.$shortCode; | |
else | |
$citationLinkTemplate = ''; | |
$shortText = $this->shortenToLengthWithCitation($text, 256, $shortCode, $shortDomain, $citationLinkTemplate); | |
return $shortText; | |
} | |
public $citationLinkPosition; | |
public $citationLinkLength; | |
public $citationLinkText; | |
private function shortenToLengthWithCitation($text, $length, $shortCode=FALSE, $shortDomain=FALSE, $citationLinkTemplate='') { | |
// Save 17 characters ' (aaron.pk xxxxx)' for the end | |
$shortText = $text; | |
$words = explode(' ', $shortText); | |
$hashtags = array(); | |
$shortened = FALSE; | |
// Shorten the text to fit in 140-n chars | |
if($shortCode) | |
$citation = ' (' . $citationLinkTemplate . ')'; | |
else | |
$citation = ''; | |
while(strlen($shortText) > ($length-strlen($citation))) { | |
// Preserve hashtags. Find the last word that is not a hashtag and remove it. | |
$found = FALSE; | |
for($i=count($words)-1; $i>=0 && $found==FALSE; $i--) { | |
// Catch hashtags and save them | |
if(!preg_match('/#[^ ]+/', $words[$i])) { | |
array_splice($words, $i, 1); | |
$found = TRUE; | |
} else { | |
$hashtags[] = $words[$i]; | |
array_splice($words, $i, 1); | |
} | |
} | |
// Generate a sample shortText with the new arrays. This is not the final text that will be returned. | |
$shortText = implode(' ', array_merge($words, $hashtags)); | |
$shortened = TRUE; | |
} | |
// If the text was shortened to fit the permalink, then re-shorten the tweet to include the linked permalink and ellipses | |
if($shortened) { | |
$shortText = $text; | |
$words = explode(' ', $shortText); | |
$hashtags = array(); | |
if($shortCode) | |
$citation = '... '.$citationLinkTemplate; | |
else | |
$citation = '...'; | |
while(strlen($shortText) > ($length-strlen($citation))) { | |
$found = FALSE; | |
for($i=count($words)-1; $i>=0 && $found==FALSE; $i--) { | |
if(!preg_match('/#[^ ]+/', $words[$i])) { | |
array_splice($words, $i, 1); | |
$found = TRUE; | |
} else { | |
$hashtags[] = $words[$i]; | |
array_splice($words, $i, 1); | |
} | |
} | |
// Generate a sample shortText with the new arrays. This is not the final text that will be returned. | |
$shortText = implode(' ', array_merge($words, $hashtags)); | |
} | |
} | |
// Put the words back together with the hashtags at the end | |
if($shortened) { | |
// Trim off punctuation. This actually makes the tweet possibly slightly under the limit. | |
$shortText = trim(implode(' ', $words), ',.:;') . '... ' . implode(' ', $hashtags); | |
} | |
#echo "Short: $shortText\n"; | |
#echo "Length: " . strlen($shortText) . "\n"; | |
// Add the citation to the end | |
if($shortCode) { | |
if($shortened) { | |
$this->citationLinkPosition = strlen(trim($shortText) . ' '); | |
if(preg_match('/^http/', $citationLinkTemplate)) | |
$this->citationLinkText = 'http://' . $shortDomain . '/' . $shortCode; | |
else | |
$this->citationLinkText = $shortDomain . '/' . $shortCode; | |
$this->citationLinkLength = strlen($this->citationLinkText); | |
$shortText = trim($shortText) . ' ' . $this->citationLinkText; | |
} else { | |
$this->citationLinkPosition = strlen(trim($shortText) . ' ('); | |
if(preg_match('/^http/', $citationLinkTemplate)) | |
$this->citationLinkText = 'http://' . $shortDomain . '/' . $shortCode; | |
else | |
$this->citationLinkText = $shortDomain . '/' . $shortCode; | |
$this->citationLinkLength = strlen($this->citationLinkText); | |
$shortText = trim($shortText) . ' (' . $this->citationLinkText . ')'; | |
} | |
} | |
return $shortText; | |
} | |
} | |
/** ******************************************** **/ | |
$sample = array( | |
'The real test. Posting from the xtendr iPhone app to my site, syndicating to Twitter, app.net and PuSH. #indieweb', | |
'Random post with app.net short link and a full link http://social-igniter.com/blog/2012/08/why-we-support-app-dot-net and some text after, probably too long to post to Twitter as is', | |
'Naturally the Twitter syndication failed, because it caught my 7-character "app.net" and turned it into a 20-character "t.co" URL....', | |
'Testing my new posting API. This one should use link annotations. #indieweb', | |
'I thought I was going to stop using Twitter because of their ToS changes. Turns out it\'s because of this: http://farm9.staticflickr.com/8434/7805846606_7cd9304bff_o.png', | |
'This is the first good argument for app.net that I\'ve heard: http://social-igniter.com/blog/2012/08/why-we-support-app-dot-net http://news.ycombinator.com/item?id=4367965', | |
'I had a dream last night where I was an XCode project. Every time the alarm went off, that was running "Build and Run." If I would fail to compile, I\'d have to fix the errors and try again. The next time the alarm went off, I\'d attempt to "run" again, and I kept failing to compile a few times when I kept hitting "snooze." The last thing I did before I actually woke up was fix a missing semicolon which finally allowed me to compile and then I was awake. #programming', | |
'Just got in the world\'s dumbest bike accident on the waterfront. This adorable kid tried to give me a high five as I biked by, but I overcompensated with my front brake and flipped over my handlebars. I had no idea it was that easy to flip over.', | |
); | |
foreach($sample as $text) { | |
echo $text . "\n"; | |
echo "Original Length: " . strlen($text) . "\n"; | |
$helper = new ServiceHelper(); | |
#$short = $helper->shortenForAppDotNet($text, 'nXXX0', 'aaron.pk'); | |
$short = $helper->shortenForTwitter($text, 'nXXX0', 'aaron.pk'); | |
echo $short . "\n"; | |
echo "Length: " . strlen($short) . "\n"; | |
echo "\n"; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment