Last active
August 29, 2015 14:10
-
-
Save dsnopek/c54216c038ae372d04b5 to your computer and use it in GitHub Desktop.
hook_update_N() function to convert <img> tags to Media tokens (Drupal)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A helper function to convert <img> tags to Media tokens where possible. | |
* | |
*/ | |
function hook_update_N() { | |
// A cache to help us when we encounter the same <img> tag multiple times. | |
$src_info_cache = array(); | |
// We'll work directly in the database for performance reasons. | |
$tables = array( | |
'field_data_body' => 'body_value', | |
'field_revision_body' => 'body_value', | |
'field_data_comment_body' => 'comment_body_value', | |
'field_revision_comment_body' => 'comment_body_value', | |
); | |
foreach ($tables as $table => $column) { | |
if (!db_table_exists($table)) { | |
continue; | |
} | |
$result = db_select($table, 'd') | |
->fields('d') | |
->execute(); | |
foreach ($result as $row) { | |
// Small performance hack to skip field items without any images in them. | |
if (strpos($row->$column, '<img') !== FALSE) { | |
$dom = new DOMDocument(); | |
$dom->loadHTML($row->$column); | |
$changed = FALSE; | |
// Loop through all <img> tags 'src' attributes. | |
$xpath = new DOMXPath($dom); | |
$list = $xpath->query('//img'); | |
foreach ($list as $imgTag) { | |
$src = $imgTag->getAttribute('src'); | |
// Attempt to determine if the image is on this site, and find it's | |
// underlying FID and image style. | |
$src_info = array(); | |
if (!empty($src_info_cache[$src])) { | |
$src_info = $src_info_cache[$src]; | |
} | |
else { | |
$src_info = array( | |
'fid' => NULL, | |
'filename' => NULL, | |
'style' => 'panopoly_image_original', | |
); | |
// NOTE: This only works for 'public' files - we'd need to make a | |
// few changes to support private images too. | |
if (preg_match('/sites\/[^\/]+\/files\/((?:styles\/)([^\/]+)(?:\/public\/))?(.*)$/', $src, $matches)) { | |
if (count($matches) == 4) { | |
$src_info['style'] = $matches[2]; | |
$src_info['filename'] = $matches[3]; | |
} | |
else { | |
$src_info['filename'] = $matches[1]; | |
} | |
if (!empty($src_info['filename'])) { | |
// Remove any GET arguments from the filename. | |
if ($index = strpos($src_info['filename'], '?')) { | |
$src_info['filename'] = substr($src_info['filename'], 0, $index); | |
} | |
// Finally, do the actually FID lookup based on filename. | |
$src_info['fid'] = db_select('file_managed', 'f') | |
->fields('f', array('fid')) | |
->condition('f.uri', 'public://' . $src_info['filename']) | |
->execute() | |
->fetchField(); | |
} | |
} | |
$src_info_cache[$src] = $src_info; | |
} | |
// If this does refer to an image on this site, then we replace the | |
// <img> tag with a Media token. | |
if (!empty($src_info['fid'])) { | |
$media_info = array( | |
'type' => 'media', | |
'fid' => $src_info['fid'], | |
'fields' => array(), | |
'attributes' => array('style' => ''), | |
); | |
// Translate the style into a File view mode. | |
// NOTE: The site I was working with, already used panopoly_images - but your site probably | |
// has it's own set of image styles to convert - feel free to change this! | |
$view_mode = ''; | |
switch ($src_info['style']) { | |
case 'panopoly_image_featured': | |
case 'panopoly_image_full': | |
case 'panopoly_image_original': | |
$view_mode = 'default'; | |
break; | |
case 'panopoly_image_half': | |
case 'panopoly_image_quarter': | |
case 'panopoly_image_square': | |
case 'panopoly_image_thumbnail': | |
$view_mode = 'teaser'; | |
break; | |
default: | |
// If we can't work it out, we just use default. | |
$view_mode = 'default'; | |
} | |
$media_info['view_mode'] = $media_info['fields']['format'] = $view_mode; | |
// Transfer simple attributes from the 'img' tag. | |
foreach (array('width', 'height', 'style') as $attr) { | |
if ($imgTag->hasAttribute($attr)) { | |
$media_info['attributes'][$attr] = $imgTag->getAttribute($attr); | |
} | |
} | |
// Transfer the special alt/title attributes. | |
foreach (array('alt', 'title') as $attr) { | |
$value = $imgTag->getAttribute($attr); | |
$media_info['attributes'][$attr] = $value; | |
$media_info['fields']["field_file_image_{$attr}_text[und][0][value]"] = $value; | |
} | |
// Process the original 'class' attribute into the media token. | |
// NOTE: These classes are specific to the site I originally wrote this code for! | |
// Feel free to remove this section or replace with stuff that makes sense. | |
if ($imgTag->hasAttribute('class')) { | |
// This builds an array like array('class_name' => 1) for each | |
// class on the old <img> tag. Hurray for functional programming! | |
$class_index = array_fill_keys(array_filter(array_map('trim', explode(' ', $imgTag->getAttribute('class')))), 1); | |
// Map some old classes to new values. | |
if (isset($class_index['img-no-border'])) { | |
$media_info['attributes']['class'] = 'img-no-border'; | |
} | |
if (isset($class_index['image-center'])) { | |
$media_info['attributes']['style'] .= 'display: block; margin-left: auto; margin-right: auto'; | |
} | |
elseif (isset($class_index['image-right'])) { | |
$media_info['attributes']['style'] .= 'float: right'; | |
} | |
elseif (isset($class_index['image-left'])) { | |
$media_info['attributes']['style'] .= 'float: left'; | |
} | |
} | |
$media_token = $dom->createTextNode('[[' . json_encode($media_info) . ']]'); | |
$imgTag->parentNode->replaceChild($media_token, $imgTag); | |
$changed = TRUE; | |
} | |
} | |
// If any changes were made, then we serialize the DOM and update the | |
// field value. | |
if ($changed) { | |
// There is no variant on saveHTML() that will only do the innerHTML, | |
// it always includes the outer wrapper, so we have to loop! | |
$row->$column = ''; | |
foreach ($dom->documentElement->firstChild->childNodes as $child) { | |
$row->$column .= $dom->saveHTML($child); | |
} | |
db_update($table) | |
->fields($row) | |
->condition('entity_type', $row->entity_type) | |
->condition('entity_id', $row->entity_id) | |
->condition('revision_id', $row->revision_id) | |
->condition('language', $row->language) | |
->condition('delta', $row->delta) | |
->execute(); | |
} | |
} | |
} | |
} | |
// Since we made the changes directly in the database, we need to clear | |
// the filter, field and page caches manually. | |
foreach (array('filter', 'field', 'page') as $cache) { | |
cache_clear_all('*', "cache_{$cache}", TRUE); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment