Created
September 19, 2018 16:16
-
-
Save laradevitt/e9c7bae4a578128d7d0e803ad8e5da16 to your computer and use it in GitHub Desktop.
(Drupal 7) Programmatically add <meta name="robots" content="noindex, nofollow"> to certain pages.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// I was getting a lot of "Indexed, though blocked by robots.txt" coverage | |
// errors on search/ pages from Google. Since robots.txt only prevents | |
// crawling, not indexing, I will allow crawling and prohibit indexing via | |
// the "robots" meta tag instead. | |
function MYTHEME_preprocess_html(&$variables) { | |
MYTHEME_add_meta_robots_noindex(); | |
} | |
/** | |
* Helper function: Add <meta name="robots" content="noindex, nofollow"> to some | |
* pages. Any pages you add here should be removed from 'Disallow' rules in | |
* robots.txt, which prevents crawling. | |
*/ | |
function MYTHEME_add_meta_robots_noindex() { | |
$robots_noindex = array( | |
'/search/*', | |
); | |
$nofollow = false; | |
// Get current URI and remove the trailing slash. | |
$uri = request_uri(); | |
$uri = preg_replace('{/$}', '', $uri); | |
foreach ($robots_noindex as $url) { | |
if (substr($url, -1) === '*') { | |
// This url contains a wildcard. | |
$url_trimmed = substr($url, 0, -1); | |
if (substr($uri, 0, strlen($url_trimmed)) === $url_trimmed) { | |
$nofollow = true; | |
} | |
} | |
else { | |
if ($url == $uri) { | |
$nofollow = true; | |
} | |
} | |
} | |
if ($nofollow) { | |
$meta = array( | |
'#type' => 'html_tag', | |
'#tag' => 'meta', | |
'#attributes' => array( | |
'name' => 'robots', | |
'content' => 'noindex, nofollow', | |
) | |
); | |
drupal_add_html_head($meta, 'robots_noindex'); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi @team-community - You could do that, as long as you also update robots.txt to allow the pages to be crawled.