Last active
June 26, 2023 18:30
-
-
Save Sogl/78dce78b254b39a6678f759b3ff981b7 to your computer and use it in GitHub Desktop.
Algolia Pro Flex for Grav CMS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php declare(strict_types=1); | |
namespace Grav\Plugin\AlgoliaPro; | |
use Grav\Common\Grav; | |
use Grav\Common\Page\Interfaces\PageInterface; | |
use Grav\Common\Page\Pages; | |
use Grav\Common\Utils; | |
class CrawlFlexpageSearch extends CrawlPageSearch | |
{ | |
protected static $notAllowedRoutes = [ | |
'/grid', | |
'/forgot_password', | |
'/login', | |
'/profile', | |
'/therapies', | |
'/therapies/add', | |
'/', | |
'/zachem-etot-sait/o-lekciyakh', | |
'/zachem-etot-sait/ob-avtorakh', | |
'/zachem-etot-sait/oplata-i-dostup', | |
'/zachem-etot-sait/updates', | |
]; | |
/** | |
* Process and index Page based on response + Grav page | |
* @param string $lang | |
* @param array $responses | |
* @return array | |
*/ | |
protected function indexPageResponses(string $lang, array $responses): array | |
{ | |
$grav = Grav::instance(); | |
/** @var Pages $pages */ | |
$pages = $grav['pages']; | |
$pages->enablePages(); | |
$flex = Grav::instance()->get('flex'); | |
$initialPage = $pages->find('/therapies/therapy'); | |
//Flex objects loop | |
foreach ($responses as $key => $response) { | |
$fulUrl = $response->getInfo()['url']; | |
$route = parse_url($fulUrl, PHP_URL_PATH); | |
//we don't need some routes | |
if (in_array($route, static::$notAllowedRoutes)) { | |
unset($responses[$key]); | |
continue; | |
} | |
if (Utils::startsWith($route, '/therapies/')) { | |
$routeObj = basename($route); | |
$therapy = $flex->getObject($routeObj, 'therapies'); | |
//clone initial fake Page | |
//this eliminates the error when one entry is written everywhere | |
$page = clone $initialPage; | |
$page->id($page->modified() . md5($route)); | |
$page->slug($routeObj); | |
$page->folder($routeObj); | |
$page->route($route); | |
$page->rawRoute($route); | |
$page->title($therapy->getProperty('title')); | |
$page->content($therapy->getProperty('description')); | |
// taxonomy | |
$tags['tag'] = $therapy->getProperty('tags'); | |
$page->taxonomy($tags); | |
$pages->addPage($page, $route); | |
//clear object | |
$page = null; | |
} | |
} | |
$index = $this->getIndexer($lang); | |
$status = []; | |
$records = []; | |
$steps = count($responses); | |
if ($callback = $this->getProgressCallback()) { | |
$callback($steps, 'Index Config: <yellow>' . $this->name . '</yellow> | Algolia Index: <yellow>' . $index->getIndexName() . '</yellow>'); | |
} | |
foreach ($responses as $response) { | |
$headers = $response->getHeaders(); | |
$info = $response->getInfo(); | |
$url = $info['url'] ?? 'unknown'; | |
$route = $headers['grav-page-route'][0] ?? ''; | |
$base = $headers['grav-base'][0] ?? ''; | |
$page = $pages->find($route); | |
if ($base) { | |
$url = str_replace($base, '', $url); | |
} | |
if ($page instanceof PageInterface) { | |
$this->addRecordFromResponse($page, $response, $url,$records, $status); | |
} else { | |
$status[] = [ | |
'status' => 'error', | |
'msg' => 'Page Not Found: ' . $route, | |
'url' => $url | |
]; | |
if ($callback = $this->getProgressCallback()) { | |
$callback(-1); | |
} | |
} | |
} | |
if ($this->production_mode !== false && !empty($records)) { | |
$index->partialUpdateObjects($records, [ | |
'createIfNotExists' => true | |
]); | |
} | |
return $status; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php declare(strict_types=1); | |
namespace Grav\Plugin\AlgoliaPro; | |
use Grav\Common\Grav; | |
use Grav\Framework\Flex\Flex; | |
use Grav\Framework\Flex\FlexDirectory; | |
use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapiesCollection; | |
use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapyObject; | |
use Grav\Framework\Flex\Interfaces\FlexCollectionInterface; | |
use Grav\Framework\Flex\Interfaces\FlexObjectInterface; | |
use Grav\Common\Utils; | |
use Grav\Plugin\ShortcodeCore\ShortcodeManager; | |
use Grav\Common\Page\Page; | |
class FlexTherapiesSearch extends FlexSearch implements AlgoliaProClassInterface | |
{ | |
/** | |
* @param array $options | |
* @return array | |
*/ | |
public function indexConfiguration(array $options = []): array | |
{ | |
$grav = Grav::instance(); | |
$conf = $this->index_configuration; | |
$name = null; | |
/** @var Language $language */ | |
$language = $grav['language']; | |
if ($language->enabled()) { | |
if (isset($options['lang'])) { | |
$lang = $options['lang']; | |
unset($options['lang']); | |
} | |
$name = $lang ?? $language->getActive() ?? $language->getDefault(); | |
} | |
$index = $this->getIndexer($name); | |
$conf->set('name', $index->getIndexName()); | |
return $conf->toArray(); | |
} | |
/** | |
* Return collection of objects to be indexed. Make sure you filter away inaccessible objects. | |
* | |
* @return FlexCollectionInterface | |
*/ | |
protected function getFilteredCollection(): FlexCollectionInterface | |
{ | |
$grav = Grav::instance(); | |
/** @var Flex $flex */ | |
$flex = $grav['flex']; | |
/** @var FlexDirectory $directory */ | |
$directory = $flex->getDirectory('therapies'); | |
/** @var TherapiesCollection $collection */ | |
$collection = $directory->getCollection()->filterBy(['published' => true]); | |
return $collection; | |
} | |
/** | |
* Return true if object can be handled by this class. | |
* | |
* @param FlexObjectInterface $object | |
* @return bool | |
*/ | |
protected function checkObject(FlexObjectInterface $object): bool | |
{ | |
return $object instanceof TherapyObject; | |
} | |
/** | |
* Each object can have multiple records, so return array of records. | |
* | |
* @param TherapyObject $object | |
* @return array | |
*/ | |
protected function getRecord(FlexObjectInterface $object): array | |
{ | |
//set content and enable toc anchors | |
$page = new Page(); | |
$page->content($object->description); | |
//get content | |
$content = $page->content(); | |
$record = new \stdClass(); | |
$object_url = $this->getUrl($object); | |
$record->url = $object_url; | |
$record->title = $object->title; | |
$record->summary = $this->shortenText($content, 256); | |
$record->access = null; | |
// taxonomy | |
$tags = $object->tags; | |
if (!empty($tags)) { | |
$record->taxonomy['tag'] = $tags; | |
} | |
// language | |
$record->language = 'ru'; | |
// breadcrumbs | |
$breadcrumbs = []; | |
$breadcrumbs[] = ['name' => 'Случаи терапии', 'url' => '/therapies']; | |
$breadcrumbs[] = ['name' => $object->title, 'url' => $object_url]; | |
$record->breadcrumbs = $breadcrumbs; | |
$base_url = trim($object_url, '/'); | |
$base_id = md5($base_url); | |
//works | |
// $content = Utils::processMarkdown($object->description); | |
// $sm = new ShortcodeManager(); | |
// $content = $sm->processShortcodes($content); | |
$flex_data = (array) $record; | |
// content processing | |
$blocks = $this->splitHTMLContent($content); | |
$flex_chunks = []; | |
$counter = 1; | |
foreach ($blocks as $block) { | |
$block_data = []; | |
$block_content = $block['content'] ?? ''; | |
if (isset($block['tag'], $block['header'])) { | |
$block_data['objectType'] = 'header'; | |
$block_data['headers'][$block['tag']][] = $block['header']; | |
$block_data['subtitle'] = $block['header']; | |
$block_data['summary'] = $this->getFirstWords($block_content, 50); | |
if (!empty($block['id'])) { | |
$block_data['url'] = $flex_data['url'] . '#' . $block['id']; | |
} | |
} | |
$block_chunks = $this->splitContentIntoChunks($block_content); | |
foreach ($block_chunks as $chunk) { | |
$block_data['objectID'] = $base_id . '_' . $counter++; | |
$block_data['baseURL'] = $base_url; | |
$block_data['content'] = $chunk; | |
$flex_chunks[] = array_merge($flex_data, $block_data); | |
} | |
} | |
return $flex_chunks; | |
} | |
/** | |
* Get URL for the object. | |
* | |
* @param FlexObjectInterface $object | |
* @return string|null | |
*/ | |
protected function getUrl(FlexObjectInterface $object): ?string | |
{ | |
return $object->url(); | |
} | |
function shortenText($text, $max_length = 140, $cut_off = '…', $keep_word = false) | |
{ | |
//clear all tags and delete whitespaces | |
$text = trim(strip_tags($text)); | |
if(strlen($text) <= $max_length) { | |
return $text; | |
} | |
if(strlen($text) > $max_length) { | |
if($keep_word) { | |
$text = mb_substr($text, 0, $max_length + 1); | |
if($last_space = strrpos($text, ' ')) { | |
$text = mb_substr($text, 0, $last_space); | |
$text = rtrim($text); | |
$text .= $cut_off; | |
} | |
} else { | |
$text = mb_substr($text, 0, $max_length); | |
$text = rtrim($text); | |
$text .= $cut_off; | |
} | |
} | |
return $text; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php declare(strict_types=1); | |
namespace Grav\Plugin\AlgoliaPro; | |
use Grav\Common\Grav; | |
use Grav\Common\Page\Pages; | |
use Grav\Common\Yaml; | |
use RocketTheme\Toolbox\Event\Event; | |
use Grav\Framework\Flex\Flex; | |
use Grav\Framework\Flex\FlexDirectory; | |
use Grav\Plugin\SoglFlex\Flex\Types\Therapies\TherapiesCollection; | |
use Grav\Plugin\PageToc\MarkupFixer; | |
use Grav\Common\Page\Interfaces\PageInterface; | |
use Grav\Common\Plugin; | |
class GravFlexpageSearch extends GravPageSearch | |
{ | |
protected static $notAllowedRoutes = [ | |
'/grid', | |
'/forgot_password', | |
'/login', | |
'/profile', | |
'/therapies', | |
'/therapies/add', | |
'/therapies/therapy', | |
'/', | |
'/zachem-etot-sait/o-lekciyakh', | |
'/zachem-etot-sait/ob-avtorakh', | |
'/zachem-etot-sait/oplata-i-dostup', | |
'/zachem-etot-sait/updates', | |
]; | |
protected static $allowedTemplates = [ | |
'docs', | |
'therapy' | |
]; | |
protected function indexPages(array $options = []): array | |
{ | |
$grav = Grav::instance(); | |
/** @var Pages $pages */ | |
$pages = $grav['pages']; | |
$pages->enablePages(); | |
//MY CUSTOM FLEX CODE | |
/** @var Flex $flex */ | |
$flex = Grav::instance()->get('flex'); | |
$initialPage = $pages->find('/therapies/therapy'); | |
/** @var FlexDirectory $directory */ | |
$directory = $flex->getDirectory('therapies'); | |
/** @var TherapiesCollection $collection */ | |
$collection = $directory->getCollection()->filterBy(['published' => true]); | |
foreach ($collection as $therapy) { | |
//clone initial fake Page | |
//this eliminates the error when one entry is written everywhere | |
$page = clone $initialPage; | |
$routeObj = $therapy->getProperty('slug'); | |
$route = '/therapies/' . $routeObj; | |
$page->id($page->modified() . md5($route)); | |
$page->slug($routeObj); | |
$page->folder($routeObj); | |
$page->route($route); | |
$page->rawRoute($route); | |
$page->title($therapy->getProperty('title')); | |
$page->content($therapy->getProperty('description')); | |
// taxonomy | |
$tags['tag'] = $therapy->getProperty('tags'); | |
$page->taxonomy($tags); | |
$pages->addPage($page, $route); | |
//clear object | |
$page = null; | |
} | |
// Get custom filters | |
$filter = $this->index_configuration->get('filters'); | |
$records = []; | |
$status = []; | |
$lang = $options['lang'] ?? null; | |
$route = $options['route'] ?? null; | |
$index = $this->getIndexer($lang); | |
$collection = []; | |
if ($route) { | |
$page = $pages->find($route); | |
if ($page && $page->exists() && $page->routable() && $page->published()) { | |
$collection[] = $page; | |
} | |
} elseif (is_array($filter) && array_key_exists('items', $filter)) { | |
if (is_string($filter['items'])) { | |
$filter['items'] = Yaml::parse($filter['items']); | |
} | |
$collection = $pages->getCollection($filter)->published()->routable(); | |
} else { | |
$collection = $pages->all()->published()->routable(); | |
} | |
//delete some routes | |
foreach (static::$notAllowedRoutes as $naRoute) { | |
$tempPage = $pages->find($naRoute); | |
if ($tempPage) { | |
$collection->remove($tempPage->path()); | |
} | |
} | |
//and use only specific templates | |
$collection = $collection->ofOneOfTheseTypes(static::$allowedTemplates); | |
$steps = count($collection); | |
if ($callback = $this->getProgressCallback()) { | |
$callback($steps, 'Index Config: <yellow>' . $this->name . '</yellow> | Algolia Index: <yellow>' . $index->getIndexName() . '</yellow>'); | |
} | |
//for TOC search | |
$markup_fixer = new MarkupFixer(); | |
foreach ($collection as $page) { | |
$url = $page->url(); | |
// update progress callback | |
if ($callback = $this->getProgressCallback()) { | |
$callback(); | |
} | |
if (!$this->processPage($page)) { | |
$status[] = [ | |
'status' => 'info', | |
'msg' => 'Page manually skipped', | |
'url' => $url | |
]; | |
if ($callback = $this->getProgressCallback()) { | |
$callback(-1); | |
} | |
continue; | |
} | |
if ($page->redirect()) { | |
$status[] = [ | |
'status' => 'info', | |
'msg' => 'Page is a redirect', | |
'url' => $url | |
]; | |
if ($callback = $this->getProgressCallback()) { | |
$callback(-1); | |
} | |
continue; | |
} | |
try { | |
$skip_event = Grav::instance()->fireEvent('onAlgoliaProPageSkip', | |
new Event(['name' => $this->name, 'config' => $this->index_configuration, 'object' => $page])); | |
if (isset($skip_event['status'])) { | |
$status[] = $skip_event['status']; | |
if ($callback = $this->getProgressCallback()) { | |
$callback(-1); | |
} | |
continue; | |
} | |
$content = trim($page->content()); | |
//for TOC search | |
$content = $markup_fixer->fix($content, $this->getAnchorOptions($page)); | |
$skip_empty_content = $this->index_configuration->get('content.skip_empty', true); | |
if ($skip_empty_content && empty($content)) { | |
$status[] = [ | |
'status' => 'info', | |
'msg' => 'Page has no content, skipping', | |
'url' => $url | |
]; | |
if ($callback = $this->getProgressCallback()) { | |
$callback(-1); | |
} | |
continue; | |
} | |
$page_records = $this->getPageData($content, $page); | |
$updatable_records = $this->recordsNeedUpdating($page_records); | |
$records = array_merge($records, $updatable_records); | |
if (count($records) > 0) { | |
$status[] = [ | |
'status' => 'success', | |
'msg' => 'Page indexed', | |
'url' => $url | |
]; | |
} else { | |
$status[] = [ | |
'status' => 'info', | |
'msg' => 'Cache entry found, no records need updating', | |
'url' => $url | |
]; | |
} | |
} catch (\Exception $e) { | |
$status[] = [ | |
'status' => 'error', | |
'msg' => $e->getMessage(), | |
'url' => $url | |
]; | |
if ($callback = $this->getProgressCallback()) { | |
$callback(-1); | |
} | |
} | |
} | |
if ($this->production_mode && !empty($records)) { | |
$index->partialUpdateObjects($records, [ | |
'createIfNotExists' => true | |
]); | |
} | |
return $status; | |
} | |
protected function getAnchorOptions(PageInterface $page = null, $start = null, $depth = null): array | |
{ | |
$page = $page ?? $this->grav['page']; | |
return [ | |
'start' => (int) ($start ?? $this->configVar('anchors.start', $page,1)), | |
'depth' => (int) ($depth ?? $this->configVar('anchors.depth', $page,6)), | |
'hclass' => $this->configVar('hclass', $page,null), | |
'link' => $this->configVar('anchors.link', $page,true), | |
'position' => $this->configVar('anchors.position', $page,'before'), | |
'aria' => $this->configVar('anchors.aria', $page,'Anchor'), | |
'icon' => $this->configVar('anchors.icon', $page,'#'), | |
'class' => $this->configVar('anchors.class', $page,null), | |
'maxlen' => (int) ($this->configVar('anchors.slug_maxlen', $page,null)), | |
'prefix' => $this->configVar('anchors.slug_prefix', $page,null), | |
]; | |
} | |
public static function configVar($var, $page = null, $default = null) | |
{ | |
return Plugin::inheritedConfigOption('page-toc', $var, $page, $default); | |
} | |
public function modifyObject(object $object, array $options = [], bool $update = true): array | |
{ | |
//we don't need to perform operations on each edit/delete | |
return ['status' => 'success', 'message' => 'test mode']; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment