Created
June 16, 2013 07:01
-
-
Save simonwelsh/5791161 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// Tell mbstring to always use UTF-32, which is what the API uses. | |
mb_internal_encoding("UTF-32"); | |
// These come from the API | |
$text = $object->text; | |
$pieces = []; // the pieces of the new text we're building | |
$offset = 0; // the amount of adjustment positive or negative we've seen so far | |
$text_index = 0; // current pos in text | |
// Load all entities into one array and sort by pos. | |
// If you're not displaying, and just care about links, replace this | |
// section with $entities = $object->entities->links; | |
$entities = []; | |
foreach($object->entities->hashtags as $entity) { | |
$entity->type = 'hashtag'; | |
$entities[] = $entity; | |
} | |
foreach($object->entities->links as $entity) { | |
$entity->type = 'link'; | |
$entities[] = $entity; | |
} | |
foreach($object->entities->mentions as $entity) { | |
$entity->type = 'mention'; | |
$entities[] = $entity; | |
} | |
usort($entities, function($a, $b) { | |
return $b->pos - $a->pos; | |
}); | |
foreach($entities as $entity) { | |
$entity_start = $entity->pos | |
$entity_end = $entity->pos + $entity->len | |
// this key not always present, default to entity_end | |
if(isset($entity->amended_len)) { | |
$entity_amended_end = $entity->pos + $entity->amended_len | |
} else { | |
$entity_amended_end = $entity_end; | |
} | |
if ($text_index != $entity_start) { | |
// catch anything before the start of the entity | |
$pieces[] = mb_substr($text, $text_index, $entity_start - $text_index); | |
} | |
// object are looped by reference, so adjust in place | |
$entity->pos += $offset; | |
if($entity_end != $entity_amended_end) { | |
$offset += $entity_end - $entity_amended_end; | |
} | |
$pieces[] = mb_substr($text, $entity->pos, $entity->len); | |
$text_index = $entity_amended_end; | |
} | |
// catch anything that comes after the last entity | |
$pieces[] = mb_substr($text, $text_index); | |
$new_text = implode('', $pieces); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment