|
<?php |
|
/** |
|
* Plugin Name: LR LLMs.txt Generator |
|
* Description: Generates a multilingual, paginated llms.txt for LLM indexing. Includes UI to exclude post types or specific posts. Adds a settings screen under Tools. |
|
* Version: 2.7 |
|
* Author: Luis Martinez |
|
* Author URI: https://www.lessrain.com |
|
* Requires at least: 5.6 |
|
* Tested up to: 6.5 |
|
* Requires PHP: 7.4 |
|
*/ |
|
|
|
if (!defined('ABSPATH')) { |
|
exit; |
|
} |
|
|
|
// Core configuration |
|
global $lr_llms_config; |
|
$lr_llms_config = [ |
|
// Prefix used for all option keys and identifiers |
|
'prefix' => 'lr_llms', |
|
|
|
// Option keys used in the settings form |
|
'setting_key_exclude_types' => 'lr_llms_exclude_post_types', |
|
'setting_key_exclude_ids' => 'lr_llms_exclude_ids', |
|
'setting_key_show_headings' => 'lr_llms_show_headings', |
|
'setting_key_show_descriptions' => 'lr_llms_show_descriptions', |
|
|
|
// POST field names for the settings UI |
|
'form_field_exclude_types' => 'exclude_post_types', |
|
'form_field_exclude_ids' => 'exclude_ids', |
|
'form_field_show_headings' => 'show_headings', |
|
'form_field_show_descriptions' => 'show_descriptions', |
|
|
|
// Form action triggers |
|
'form_trigger_save_settings' => 'llms_txt_exclusions', |
|
'form_trigger_flush_cache' => 'llms_txt_flush_cache', |
|
|
|
// Nonce configuration |
|
'nonce_name' => 'lr_llms_settings_nonce', |
|
'nonce_action' => 'lr_llms_settings_action', |
|
|
|
// Caching configuration |
|
'cache_prefix' => 'lr_llms_cache_', |
|
'cache_timeout' => HOUR_IN_SECONDS, |
|
'transient_flag_flush' => 'lr_llms_flush_needed', |
|
|
|
// Rate limiting |
|
'rate_limit_timeout' => 5 * MINUTE_IN_SECONDS, |
|
'rate_limit_http_status' => 429, |
|
|
|
// Output control |
|
'post_type_priority_order' => ['page', 'post'], |
|
'max_items' => 1000, |
|
'max_length_chars' => 3000, |
|
'default_page_size' => 200, |
|
|
|
// Query parameters |
|
'query_param_flush' => 'flush', |
|
|
|
// REST API configuration |
|
'rest_namespace' => 'lr-llms/v1', |
|
'rest_txt_route' => 'txt', |
|
|
|
// Scheduled cleanup |
|
'cron_event' => 'lr_llms_purge_transients_hook', |
|
'cron_frequency' => 'daily', |
|
|
|
// Development and debug flags |
|
'dev_purge_enabled' => defined('LR_LLMS_DEV_PURGE') && LR_LLMS_DEV_PURGE, |
|
'disable_rate_limit' => defined('LR_LLMS_DISABLE_RATE_LIMIT') && LR_LLMS_DISABLE_RATE_LIMIT, |
|
|
|
// HTTP response headers |
|
'headers_txt_response' => [ |
|
'Content-Type' => 'text/plain; charset=utf-8', |
|
'X-Robots-Tag' => 'index, follow', |
|
'Cache-Control' => 'no-store, must-revalidate', |
|
'Pragma' => 'no-cache', |
|
'Expires' => '0', |
|
], |
|
|
|
// Filter hook names |
|
'filter_included_post_ids' => 'lr_llms_included_post_ids', |
|
'filter_post_type_priority' => 'lr_llms_post_type_priority_order', |
|
'filter_contact_details' => 'lr_llms_contact_details', |
|
'filter_post_type_label' => 'lr_llms_post_type_label', |
|
'filter_post_title' => 'lr_llms_post_title', |
|
'filter_post_url' => 'lr_llms_post_url', |
|
'filter_post_description' => 'lr_llms_post_description', |
|
|
|
// Admin UI messages |
|
'ui_settings_success_message' => 'Settings saved.', |
|
'ui_settings_error_message' => 'Settings could not be saved.', |
|
'ui_flush_success_message' => 'LLMs.txt cache flushed.', |
|
'ui_flush_error_message' => 'Flush failed or was blocked.', |
|
]; |
|
|
|
/** |
|
* Generates a consistent, sanitized cache key from the current query string. |
|
* |
|
* - Removes non-content-affecting GET params (e.g. utm_*, flush, fbclid) |
|
* - Normalizes key order for stability |
|
* - Returns MD5 hash for compact transient use |
|
* |
|
* @param array|null $get Optional override for testing |
|
* @return string Hashed cache key |
|
*/ |
|
function lr_llms_get_cache_key($get = null) { |
|
global $lr_llms_config; |
|
|
|
if ($get === null) { |
|
$get = $_GET; |
|
} |
|
|
|
// Remove tracking or control parameters that shouldn't affect cache content |
|
unset( |
|
$get[$lr_llms_config['query_param_flush']], |
|
$get['fbclid'], |
|
$get['utm_source'], |
|
$get['utm_medium'], |
|
$get['utm_campaign'] |
|
); |
|
|
|
// Normalize query key order to avoid inconsistent cache keys |
|
ksort($get); |
|
|
|
// Generate a compact and consistent hash |
|
return md5(http_build_query($get)); |
|
} |
|
|
|
/** |
|
* Registers the LR LLMs.txt settings page under Tools. |
|
* |
|
* @return void |
|
*/ |
|
function lr_llms_register_settings_page() { |
|
add_management_page( |
|
'LR LLMs.txt Settings', |
|
'LR LLMs.txt Settings', |
|
'manage_options', |
|
'lr-llms-settings', |
|
'lr_llms_render_settings_page' |
|
); |
|
} |
|
|
|
add_action('admin_menu', 'lr_llms_register_settings_page'); |
|
|
|
/** |
|
* Renders the plugin settings page and handles form submission. |
|
*/ |
|
function lr_llms_render_settings_page() { |
|
global $lr_llms_config; |
|
|
|
if (!current_user_can('manage_options')) { |
|
return; |
|
} |
|
|
|
// Handle cache flush request |
|
if (isset($_POST[$lr_llms_config['form_trigger_flush_cache']])) { |
|
check_admin_referer($lr_llms_config['nonce_action'], $lr_llms_config['nonce_name']); |
|
lr_llms_flush_cache(); |
|
echo '<div class="updated"><p>' . esc_html($lr_llms_config['ui_flush_success_message']) . '</p></div>'; |
|
} |
|
|
|
// Handle settings save request |
|
if (isset($_POST[$lr_llms_config['form_trigger_save_settings']])) { |
|
check_admin_referer($lr_llms_config['nonce_action'], $lr_llms_config['nonce_name']); |
|
lr_llms_save_settings(); |
|
echo '<div class="updated"><p>' . esc_html($lr_llms_config['ui_settings_success_message']) . '</p></div>'; |
|
} |
|
|
|
// Load current options for display |
|
$post_types = get_post_types(['public' => true], 'objects'); |
|
$excluded_types = get_option($lr_llms_config['setting_key_exclude_types'], []); |
|
$excluded_ids = get_option($lr_llms_config['setting_key_exclude_ids'], ''); |
|
|
|
?> |
|
|
|
<div class="wrap"> |
|
<h1>LR LLMs.txt Settings</h1> |
|
|
|
<p class="description" style="max-width:900px;"> |
|
This tool generates a dynamic <code>llms.txt</code> file to help LLM crawlers discover indexable content.<br> |
|
The output is paginated, multilingual (supports Polylang and WPML), and customizable. |
|
<br><br> |
|
<strong>Public URL:</strong> |
|
<a href="<?php echo esc_url(home_url('/llms.txt')); ?>" target="_blank" rel="noopener noreferrer"> |
|
<code><?php echo esc_url(home_url('/llms.txt')); ?></code> |
|
</a><br> |
|
<strong>Flush via URL:</strong> Add <code>?flush=1</code> to the above<br> |
|
<small>This forces the llms.txt file to regenerate immediately. Useful after content updates or cache issues.</small><br> |
|
<strong>Robots tip:</strong> Add this line to your <code>robots.txt</code>:<br> |
|
<code>LLMs: <?php echo esc_url(home_url('/llms.txt')); ?></code> |
|
</p> |
|
|
|
<form method="post"> |
|
<?php wp_nonce_field($lr_llms_config['nonce_action'], $lr_llms_config['nonce_name']);?> |
|
|
|
<h2>Exclude Post Types</h2> |
|
<?php foreach ($post_types as $pt): ?> |
|
<label> |
|
<input type="checkbox" |
|
name="<?php echo esc_attr($lr_llms_config['form_field_exclude_types']); ?>[]" |
|
value="<?php echo esc_attr($pt->name); ?>" |
|
<?php checked(in_array($pt->name, $excluded_types)); ?>> |
|
<?php echo esc_html(sprintf('%s (%s)', $pt->label, $pt->name)); ?> |
|
</label><br> |
|
<?php endforeach; ?> |
|
|
|
<h2>Display Options</h2> |
|
<label> |
|
<input type="checkbox" name="<?php echo esc_attr($lr_llms_config['form_field_show_headings']); ?>" value="1" <?php checked(get_option($lr_llms_config['setting_key_show_headings'], false));?>> |
|
Show section headings for each content type |
|
</label><br> |
|
|
|
<label> |
|
<input type="checkbox" name="<?php echo esc_attr($lr_llms_config['form_field_show_descriptions']); ?>" value="1" <?php checked(get_option($lr_llms_config['setting_key_show_descriptions'], false));?>> |
|
Show descriptions (excerpt or summary) for each entry |
|
</label><br> |
|
|
|
<h2>Exclude Specific Posts (IDs, comma separated)</h2> |
|
<input type="text" name="<?php echo esc_attr($lr_llms_config['form_field_exclude_ids']); ?>" value="<?php echo esc_attr($excluded_ids); ?>" class="regular-text" /> |
|
|
|
<p> |
|
<input type="submit" name="<?php echo esc_attr($lr_llms_config['form_trigger_save_settings']); ?>" class="button-primary" value="Save Settings"> |
|
</p> |
|
|
|
<hr> |
|
|
|
<h2>Flush Cache</h2> |
|
<p> |
|
<button type="submit" name="<?php echo esc_attr($lr_llms_config['form_trigger_flush_cache']); ?>" class="button-secondary"> |
|
Flush Cache Now |
|
</button> |
|
</p> |
|
</form> |
|
</div> |
|
|
|
<?php |
|
} |
|
|
|
/** |
|
* Saves exclusion and metadata settings from the admin form. |
|
*/ |
|
function lr_llms_save_settings() { |
|
global $lr_llms_config; |
|
|
|
if (isset($_POST[$lr_llms_config['form_trigger_save_settings']]) && |
|
check_admin_referer($lr_llms_config['nonce_action'], $lr_llms_config['nonce_name'])) { |
|
|
|
$excluded_types = array_map( |
|
'sanitize_text_field', |
|
$_POST[$lr_llms_config['form_field_exclude_types']] ?? [] |
|
); |
|
|
|
$excluded_ids = sanitize_text_field( |
|
$_POST[$lr_llms_config['form_field_exclude_ids']] ?? '' |
|
); |
|
|
|
update_option($lr_llms_config['setting_key_exclude_types'], $excluded_types); |
|
update_option($lr_llms_config['setting_key_exclude_ids'], $excluded_ids); |
|
|
|
$show_headings = isset($_POST[$lr_llms_config['form_field_show_headings']]) ? '1' : '0'; |
|
$show_descriptions = isset($_POST[$lr_llms_config['form_field_show_descriptions']]) ? '1' : '0'; |
|
|
|
update_option($lr_llms_config['setting_key_show_headings'], $show_headings); |
|
update_option($lr_llms_config['setting_key_show_descriptions'], $show_descriptions); |
|
|
|
} |
|
} |
|
|
|
/** |
|
* Flushes all cached llms.txt output. |
|
* |
|
* This is the main cache-clearing function triggered via admin or URL. |
|
*/ |
|
function lr_llms_flush_cache() { |
|
global $wpdb, $lr_llms_config; |
|
|
|
// Build transient key patterns |
|
$transient_like = $wpdb->esc_like('_transient_' . $lr_llms_config['cache_prefix']) . '%'; |
|
$timeout_like = $wpdb->esc_like('_transient_timeout_' . $lr_llms_config['cache_prefix']) . '%'; |
|
|
|
// Delete transient values and expiration timeouts |
|
$wpdb->query( |
|
$wpdb->prepare("DELETE FROM {$wpdb->options} WHERE option_name LIKE %s", $transient_like) |
|
); |
|
$wpdb->query( |
|
$wpdb->prepare("DELETE FROM {$wpdb->options} WHERE option_name LIKE %s", $timeout_like) |
|
); |
|
|
|
// Clear transient flush flag |
|
delete_transient($lr_llms_config['transient_flag_flush']); |
|
} |
|
|
|
/** |
|
* Flags llms.txt cache for flush after post save. |
|
* |
|
* Used to defer full regeneration by a few minutes. |
|
*/ |
|
function lr_llms_flag_cache_flush_on_save($post_id) { |
|
if (defined('DOING_AUTOSAVE') && DOING_AUTOSAVE) { |
|
return; |
|
} |
|
|
|
if (wp_is_post_revision($post_id)) { |
|
return; |
|
} |
|
|
|
global $lr_llms_config; |
|
set_transient($lr_llms_config['transient_flag_flush'], true, 5 * MINUTE_IN_SECONDS); |
|
} |
|
add_action('save_post', 'lr_llms_flag_cache_flush_on_save'); |
|
|
|
/** |
|
* Clears the flush-needed flag if it exists. |
|
* |
|
* This runs on every init. If the flag is set, it triggers a full flush. |
|
*/ |
|
function lr_llms_clear_flush_flag() { |
|
global $lr_llms_config; |
|
|
|
if (get_transient($lr_llms_config['transient_flag_flush'])) { |
|
lr_llms_flush_cache(); |
|
} |
|
} |
|
add_action('init', 'lr_llms_clear_flush_flag'); |
|
|
|
/** |
|
* Checks if the current request is for the llms.txt file, |
|
* including support for localhost setups with subdirectory paths. |
|
* |
|
* Localhost usage: |
|
* Define this in wp-config.php to enable suffix matching: |
|
* define('LR_LLMS_LOCAL_PATH_SUFFIX', 'llms.txt'); |
|
* |
|
* Do not use this override in production. |
|
* |
|
* @return bool True if request matches llms.txt based on current environment |
|
*/ |
|
function lr_llms_is_llms_txt_request() { |
|
$path = parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH); |
|
$path = untrailingslashit(strtolower($path)); |
|
|
|
// Allow suffix override for localhost use only |
|
if (defined('LR_LLMS_LOCAL_PATH_SUFFIX')) { |
|
return str_ends_with($path, '/' . ltrim(LR_LLMS_LOCAL_PATH_SUFFIX, '/')); |
|
} |
|
|
|
// Default: match /llms.txt at root |
|
return ($path === '/llms.txt'); |
|
} |
|
|
|
/** |
|
* It builds the base URL for the Next Page hint using the existing config. |
|
*/ |
|
if (!function_exists('lr_llms_build_base_url')) { |
|
function lr_llms_build_base_url(array $cfg): string |
|
{ |
|
$path = parse_url($_SERVER['REQUEST_URI'] ?? '', PHP_URL_PATH); |
|
if (is_string($path) && substr($path, -9) === '/llms.txt') { |
|
return home_url('/llms.txt'); |
|
} |
|
$ns = rtrim($cfg['rest_namespace'] ?? '', '/'); |
|
return rest_url($ns . '/txt'); |
|
} |
|
} |
|
|
|
/** |
|
* Handles incoming /llms.txt requests and triggers output. |
|
* |
|
* Skips admin pages and only proceeds for valid llms.txt routes. |
|
*/ |
|
function lr_llms_handle_llms_txt_request() { |
|
if (!lr_llms_is_llms_txt_request() || is_admin()) { |
|
return; |
|
} |
|
|
|
lr_llms_serve_txt_output(); |
|
exit; |
|
} |
|
|
|
add_action('init', 'lr_llms_handle_llms_txt_request', 99); |
|
|
|
/** |
|
* Serves the dynamic llms.txt output with caching, pagination, filters, and rate limiting. |
|
* |
|
* Handles: |
|
* - Lightweight IP-based rate limiting (1 request/min per IP) |
|
* - Query params: ?limit, ?page, ?since, ?tag, ?flush |
|
* - Cached output via transient keys |
|
* - Fallback generation if no cache |
|
*/ |
|
function lr_llms_serve_txt_output() { |
|
global $lr_llms_config; |
|
|
|
// Send plain text headers |
|
foreach ($lr_llms_config['headers_txt_response'] as $key => $value) { |
|
header("{$key}: {$value}"); |
|
} |
|
|
|
// IP-based rate limiting (can be disabled) |
|
if (!$lr_llms_config['disable_rate_limit']) { |
|
$ip = $_SERVER['REMOTE_ADDR'] ?? 'unknown'; |
|
$key = $lr_llms_config['prefix'] . '_rl_' . md5($ip); |
|
|
|
if (get_transient($key)) { |
|
status_header($lr_llms_config['rate_limit_http_status']); |
|
echo "Too many requests – try again in a minute."; |
|
exit; |
|
} |
|
|
|
set_transient($key, 1, $lr_llms_config['rate_limit_timeout']); |
|
} |
|
|
|
// Parse and sanitize query params |
|
$default_page_size = $lr_llms_config['default_page_size']; |
|
$max_items = $lr_llms_config['max_items']; |
|
|
|
$limit = isset($_GET['limit']) |
|
? min(max(1, intval($_GET['limit'])), $max_items) |
|
: $default_page_size; |
|
|
|
$page = isset($_GET['page']) ? max(1, intval($_GET['page'])) : 1; |
|
$since = isset($_GET['since']) ? sanitize_text_field($_GET['since']) : null; |
|
$tag = isset($_GET['tag']) ? sanitize_text_field($_GET['tag']) : null; |
|
$lang = isset($_GET['lang']) ? sanitize_text_field($_GET['lang']) : null; |
|
|
|
$offset = ($page - 1) * $limit; |
|
|
|
// Manual flush trigger (?flush=1) |
|
$should_flush = !empty($_GET[$lr_llms_config['query_param_flush']]); |
|
if ($should_flush) { |
|
lr_llms_flush_cache(); |
|
} |
|
|
|
// Generate cache key from query parameters |
|
$query_key = lr_llms_get_cache_key(); |
|
$cache_key = $lr_llms_config['cache_prefix'] . $query_key; |
|
|
|
// Serve cached output if available |
|
$cached_output = get_transient($cache_key); |
|
if ($cached_output) { |
|
echo $cached_output; |
|
return; |
|
} |
|
|
|
// Fallback: generate fresh output and store |
|
lr_llms_generate_output_body($limit, $page, $offset, $since, $tag, $lang, $cache_key); |
|
|
|
} |
|
|
|
/** |
|
* Callback for REST API output of llms.txt data. |
|
* |
|
* Mimics lr_llms_serve_txt_output() behavior but returns a REST response. |
|
* |
|
* @return WP_REST_Response Plain text response with headers |
|
*/ |
|
function lr_llms_rest_output() { |
|
ob_start(); |
|
lr_llms_serve_txt_output(); // Already sets headers and echoes |
|
$output = ob_get_clean(); |
|
|
|
global $lr_llms_config; |
|
|
|
if (mb_strlen($output) > $lr_llms_config['max_length_chars']) { |
|
$output = mb_substr($output, 0, $lr_llms_config['max_length_chars']); |
|
} |
|
|
|
return new WP_REST_Response($output, 200, [ |
|
'Content-Type' => 'text/plain; charset=utf-8', |
|
'X-Robots-Tag' => 'index, follow', |
|
]); |
|
} |
|
|
|
/** |
|
* Get all public post types ordered by config priority, with fallback for others. |
|
* |
|
* @return array Ordered list of post type slugs. |
|
*/ |
|
|
|
function lr_llms_get_ordered_post_types() { |
|
global $lr_llms_config; |
|
|
|
$default_priority = $lr_llms_config['post_type_priority_order'] ?? ['page', 'post']; |
|
$priority_order = apply_filters($lr_llms_config['filter_post_type_priority'], $default_priority); |
|
|
|
$all = get_post_types(['public' => true], 'names'); |
|
$ordered = array_unique(array_merge($priority_order, $all)); |
|
|
|
usort($ordered, function ($a, $b) use ($priority_order) { |
|
$a_priority = array_search($a, $priority_order); |
|
$b_priority = array_search($b, $priority_order); |
|
return ($a_priority !== false ? $a_priority : PHP_INT_MAX) |
|
- ($b_priority !== false ? $b_priority : PHP_INT_MAX); |
|
}); |
|
|
|
return $ordered; |
|
} |
|
|
|
/** |
|
* Generates the full llms.txt body, queries content, and prints output. |
|
* |
|
* @param int $limit Posts per page |
|
* @param int $page Current pagination page |
|
* @param int $offset Query offset |
|
* @param string $since Optional date filter (YYYY-MM-DD or relative) |
|
* @param string $tag Optional tag slug |
|
* @param string|null $lang Optional language code filter (e.g., 'en', 'de') |
|
* @param string $cache_key Transient cache key to save final output |
|
*/ |
|
function lr_llms_generate_output_body($limit, $page, $offset, $since, $tag, $lang, $cache_key) { |
|
$has_more = false; // becomes true if any section has items beyond this page |
|
|
|
global $lr_llms_config; |
|
|
|
// Retrieve settings |
|
$excluded_types = get_option($lr_llms_config['setting_key_exclude_types'], []); |
|
$excluded_ids = array_map('intval', array_map('trim', explode(',', get_option($lr_llms_config['setting_key_exclude_ids'], '')))); |
|
|
|
$show_headings = get_option($lr_llms_config['setting_key_show_headings'], '0') === '1'; |
|
$show_descriptions = get_option($lr_llms_config['setting_key_show_descriptions'], '0') === '1'; |
|
|
|
// Determine included post types with priority order |
|
$post_types = array_diff(lr_llms_get_ordered_post_types(), $excluded_types); |
|
|
|
// Detect active languages |
|
if (function_exists('pll_languages_list')) { |
|
$languages = pll_languages_list(); |
|
$lang_type = 'polylang'; |
|
} elseif (function_exists('icl_get_languages')) { |
|
$wpml_langs = apply_filters('wpml_active_languages', null, ['skip_missing' => 0]); |
|
$languages = array_keys($wpml_langs ?: []); |
|
$lang_type = 'wpml'; |
|
} else { |
|
$languages = [null]; // Default to monolingual |
|
$lang_type = null; |
|
} |
|
|
|
// If a language param is specified and valid, limit to it |
|
if ($lang && in_array($lang, $languages, true)) { |
|
$languages = [$lang]; |
|
} |
|
|
|
|
|
// Begin buffered output |
|
ob_start(); |
|
echo '# LLMs.txt — Generated by ' . esc_html(get_bloginfo('name')) . "\n"; |
|
echo '# Site: ' . esc_url(home_url()) . "\n"; |
|
echo '# Updated: ' . esc_html(current_time('c')) . "\n"; |
|
echo '# Page: ' . esc_html("{$page} / Per-Type Limit: {$limit}") . "\n"; |
|
echo '# Purpose: Lists public, indexable content for LLM indexing and retrieval.' . "\n"; |
|
echo '# Customize: WP Admin > Tools > LR LLMs.txt Settings' . "\n\n"; |
|
|
|
$contact = apply_filters($lr_llms_config['filter_contact_details'],''); |
|
|
|
if (!empty($contact)) { |
|
echo $contact; |
|
} |
|
|
|
echo "## Sitemap\n\n"; |
|
echo '- XML: ' . esc_url(home_url('/sitemap.xml')) . "\n"; |
|
|
|
// Loop through each language |
|
foreach ($languages as $lang) { |
|
|
|
echo "\n## Language: " . ($lang ? strtoupper($lang) : 'Default') . "\n"; |
|
|
|
foreach ($post_types as $type) { |
|
$type_obj = get_post_type_object($type); |
|
if (!$type_obj) { |
|
continue; |
|
} |
|
|
|
$label = $type_obj->labels->name; |
|
|
|
//TODO |
|
/*if ($lang_type === 'polylang' && function_exists('pll__')) { |
|
$label = pll__($label); |
|
} elseif ($lang_type === 'wpml' && function_exists('icl_t')) { |
|
$label = icl_t('Post Type', $label, $label); |
|
}*/ |
|
|
|
if ($show_headings) { |
|
$label = apply_filters($lr_llms_config['filter_post_type_label'], $label, $type, $lang); |
|
echo "\n### {$label}\n\n"; |
|
} |
|
|
|
$args = [ |
|
'post_type' => $type, |
|
'post_status' => 'publish', |
|
'has_password' => false, |
|
'posts_per_page' => $limit, |
|
'offset' => $offset, |
|
'orderby' => 'date', // or 'modified' |
|
'order' => 'DESC', |
|
'no_found_rows' => true, // fast main query (probe handles “next page”) |
|
'fields' => 'ids', |
|
'update_post_meta_cache' => false, |
|
'update_post_term_cache' => false, |
|
'ignore_sticky_posts' => ($type === 'post'), |
|
'suppress_filters' => false, // keep language filters active |
|
]; |
|
|
|
if ($since) { |
|
// Match your ordering. Use post_modified_gmt if you sort by modified. |
|
$args['date_query'] = [[ |
|
'after' => $since, |
|
'inclusive' => true, |
|
'column' => 'post_date_gmt', // switch to 'post_modified_gmt' if order by modified |
|
]]; |
|
} |
|
|
|
if ($tag) { |
|
// Works for 'post'. For CPTs with custom tag taxonomies, prefer a tax_query instead. |
|
$args['tag'] = $tag; |
|
} |
|
|
|
if ($lang_type && $lang) { |
|
$args['lang'] = $lang; // Polylang/WPML aware |
|
} |
|
|
|
// main query |
|
$query = new WP_Query($args); |
|
|
|
// 1-row probe (unchanged) |
|
$probe_args = $args; |
|
$probe_args['posts_per_page'] = 1; |
|
$probe_args['offset'] = $offset + $limit; |
|
$probe = new WP_Query($probe_args); |
|
if (!empty($probe->posts)) { |
|
$has_more = true; |
|
} |
|
|
|
|
|
$ids = array_diff($query->posts, $excluded_ids); |
|
$ids = apply_filters($lr_llms_config['filter_included_post_ids'], $ids, $args, $lang); |
|
|
|
if (!empty($ids)) { |
|
|
|
foreach ($ids as $post_id) { |
|
$title = get_the_title($post_id) ?: '(untitled)'; |
|
|
|
// Allow to override or customize Title here |
|
$title = apply_filters($lr_llms_config['filter_post_title'], $title, $post_id, $lang); |
|
|
|
$url = sanitize_url(get_permalink($post_id)); |
|
|
|
// Allow to override or customize URL here |
|
$url = apply_filters($lr_llms_config['filter_post_url'], $url, $post_id, $lang); |
|
|
|
$last_modified = get_the_modified_time('c', $post_id); |
|
|
|
$line = '- [' . esc_html($title) . '](' . $url . ')' . "\n"; |
|
$line .= 'Last-Modified: ' . esc_html($last_modified) . "\n"; |
|
|
|
$description = ''; |
|
if ($show_descriptions) { |
|
$excerpt = get_the_excerpt($post_id); |
|
$description = apply_filters($lr_llms_config['filter_post_description'], $excerpt, $post_id); |
|
} |
|
|
|
if (!empty($description)) { |
|
$line .= 'Description: ' . esc_html($description) . "\n"; |
|
} |
|
|
|
echo $line; |
|
} |
|
|
|
} //else { |
|
//echo "# No {$type} posts for language " . ($lang ?: 'default') . "\n"; |
|
//} |
|
|
|
wp_reset_postdata(); |
|
} |
|
|
|
} |
|
|
|
|
|
|
|
$output = ob_get_clean(); |
|
|
|
if ($has_more) { |
|
$base = lr_llms_build_base_url($lr_llms_config); |
|
$args = $_GET ?? []; |
|
unset($args['page'], $args['paged']); // normalize to one page param |
|
$next_url = add_query_arg(array_merge($args, ['page' => $page + 1]), $base); |
|
$output .= "\nNext Page: " . $next_url . "\n"; |
|
} |
|
|
|
set_transient($cache_key, $output, $lr_llms_config['cache_timeout']); |
|
echo $output; |
|
} |
|
|
|
/** |
|
* Registers a REST API fallback endpoint for llms.txt-style output. |
|
* |
|
* This allows access via /wp-json/lr-llms/v1/txt when /llms.txt is blocked |
|
* (e.g. due to server rules, proxies, or subdirectory conflicts). |
|
*/ |
|
add_action('rest_api_init', function () { |
|
global $lr_llms_config; |
|
|
|
register_rest_route( |
|
$lr_llms_config['rest_namespace'], |
|
$lr_llms_config['rest_txt_route'], |
|
[ |
|
'methods' => 'GET', |
|
'callback' => 'lr_llms_rest_output', |
|
'permission_callback' => '__return_true', |
|
] |
|
); |
|
}); |
|
|
|
/** |
|
* Optional: Periodically purge expired transient keys used for llms.txt cache. |
|
* |
|
* This helps prevent transient bloat if many unique query param combinations are used. |
|
*/ |
|
add_action('init', function () { |
|
global $lr_llms_config, $wpdb; |
|
|
|
// Register cron handler (transient purge) |
|
add_action($lr_llms_config['cron_event'], function () use ($lr_llms_config, $wpdb) { |
|
$like = $wpdb->esc_like($lr_llms_config['cache_prefix'] . 'transient') . '%'; |
|
$wpdb->query( |
|
$wpdb->prepare( |
|
"DELETE FROM $wpdb->options WHERE option_name LIKE %s AND option_name LIKE '_transient_%'", |
|
$like |
|
) |
|
); |
|
}); |
|
|
|
// Schedule the event if not already set |
|
if (!wp_next_scheduled($lr_llms_config['cron_event'])) { |
|
wp_schedule_event(time(), $lr_llms_config['cron_frequency'], $lr_llms_config['cron_event']); |
|
} |
|
|
|
// Dev-only: unschedule event manually if override is active |
|
if ($lr_llms_config['dev_purge_enabled']) { |
|
$timestamp = wp_next_scheduled($lr_llms_config['cron_event']); |
|
if ($timestamp) { |
|
wp_unschedule_event($timestamp, $lr_llms_config['cron_event']); |
|
error_log('[LR LLMs.txt] Purged scheduled event: ' . $lr_llms_config['cron_event']); |
|
} else { |
|
error_log('[LR LLMs.txt] No scheduled event found for ' . $lr_llms_config['cron_event']); |
|
} |
|
} |
|
}); |