* All rights reserved * * This script is part of the TYPO3 project. The TYPO3 project is * free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * The GNU General Public License can be found at * http://www.gnu.org/copyleft/gpl.html. * * This script is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This copyright notice MUST APPEAR in all copies of the script! ***************************************************************/ use WapplerSystems\Meilisearch\Access\Rootline; use WapplerSystems\Meilisearch\AdditionalPageIndexer; use WapplerSystems\Meilisearch\ConnectionManager; use WapplerSystems\Meilisearch\Domain\Search\MeilisearchDocument\Builder; use WapplerSystems\Meilisearch\FieldProcessor\Service; use WapplerSystems\Meilisearch\IndexQueue\FrontendHelper\PageFieldMappingIndexer; use WapplerSystems\Meilisearch\IndexQueue\Item; use WapplerSystems\Meilisearch\SubstitutePageIndexer; use WapplerSystems\Meilisearch\System\Configuration\TypoScriptConfiguration; use WapplerSystems\Meilisearch\System\Logging\MeilisearchLogManager; use WapplerSystems\Meilisearch\System\Meilisearch\Document\Document; use WapplerSystems\Meilisearch\System\Meilisearch\MeilisearchConnection; use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController; use WapplerSystems\Meilisearch\Typo3PageContentExtractor; use WapplerSystems\Meilisearch\Util; /** * Page Indexer to index TYPO3 pages used by the Index Queue. * * @author Ingo Renner * @author Daniel Poetzinger * @author Timo Schmidt */ class Typo3PageIndexer { /** * ID of the current page's Meilisearch document. * * @var string */ protected static $pageMeilisearchDocumentId = ''; /** * @var array */ private static $pageMeilisearchDocument; /** * The mount point parameter used in the Frontend controller. * * @var string */ protected $mountPointParameter; /** * Meilisearch server connection. * * @var MeilisearchConnection */ protected $meilisearchConnection = null; /** * Frontend page object (TSFE). * * @var TypoScriptFrontendController */ protected $page = null; /** * Content extractor to extract content from TYPO3 pages * * @var Typo3PageContentExtractor */ protected $contentExtractor = null; /** * URL to be indexed as the page's URL * * @var string */ protected $pageUrl = ''; /** * The page's access rootline * * @var Rootline */ protected $pageAccessRootline = null; /** * Documents that have been sent to Meilisearch * * @var array */ protected $documentsSentToMeilisearch = []; /** * @var TypoScriptConfiguration */ protected $configuration; /** * @var Item */ protected $indexQueueItem; /** * @var \WapplerSystems\Meilisearch\System\Logging\MeilisearchLogManager */ protected $logger = null; /** * Constructor * * @param TypoScriptFrontendController $page The page to index */ public function __construct(TypoScriptFrontendController $page) { $this->logger = GeneralUtility::makeInstance(MeilisearchLogManager::class, /** @scrutinizer ignore-type */ __CLASS__); $this->page = $page; $this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL'); $this->configuration = Util::getMeilisearchConfiguration(); try { $this->initializeMeilisearchConnection(); } catch (\Exception $e) { $this->logger->log( MeilisearchLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode() ); // TODO extract to a class "ExceptionLogger" if ($this->configuration->getLoggingExceptions()) { $this->logger->log( MeilisearchLogManager::ERROR, 'Exception while trying to index a page', [ $e->__toString() ] ); } } $this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, /** @scrutinizer ignore-type */ ''); } /** * @param Item $indexQueueItem */ public function setIndexQueueItem($indexQueueItem) { $this->indexQueueItem = $indexQueueItem; } /** * Initializes the Meilisearch server connection. * * @throws \Exception when no Meilisearch connection can be established. */ protected function initializeMeilisearchConnection() { $meilisearch = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, Util::getLanguageUid()); // do not continue if no server is available if (!$meilisearch->getWriteService()->ping()) { throw new \Exception( 'No Meilisearch instance available while trying to index a page.', 1234790825 ); } $this->meilisearchConnection = $meilisearch; } /** * Gets the current page's Meilisearch document ID. * * @return string|NULL The page's Meilisearch document ID or NULL in case no document was generated yet. */ public static function getPageMeilisearchDocumentId() { return self::$pageMeilisearchDocumentId; } /** * Gets the Meilisearch document generated for the current page. * * @return array|NULL The page's Meilisearch document or NULL if it has not been generated yet. */ public static function getPageMeilisearchDocument() { return self::$pageMeilisearchDocument; } /** * Allows to provide a Meilisearch server connection other than the one * initialized by the constructor. * * @param MeilisearchConnection $meilisearchConnection Meilisearch connection * @throws \Exception if the Meilisearch server cannot be reached */ public function setMeilisearchConnection(MeilisearchConnection $meilisearchConnection) { if (!$meilisearchConnection->getService()->ping()) { throw new \Exception( 'Could not connect to Meilisearch server.', 1323946472 ); } $this->meilisearchConnection = $meilisearchConnection; } /** * Indexes a page. * * @return bool TRUE after successfully indexing the page, FALSE on error * @throws \UnexpectedValueException if a page document post processor fails to implement interface WapplerSystems\Meilisearch\PageDocumentPostProcessor */ public function indexPage() { $pageIndexed = false; $documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages if (is_null($this->meilisearchConnection)) { // intended early return as it doesn't make sense to continue // and waste processing time if the meilisearch server isn't available // anyways // FIXME use an exception return $pageIndexed; } $pageDocument = $this->getPageDocument(); $pageDocument = $this->substitutePageDocument($pageDocument); $this->applyIndexPagePostProcessors($pageDocument); self::$pageMeilisearchDocument = $pageDocument; $documents[] = $pageDocument; $documents = $this->getAdditionalDocuments($pageDocument, $documents); $this->processDocuments($documents); $pageIndexed = $this->addDocumentsToMeilisearchIndex($documents); $this->documentsSentToMeilisearch = $documents; return $pageIndexed; } /** * Applies the configured post processors (indexPagePostProcessPageDocument) * * @param array $pageDocument */ protected function applyIndexPagePostProcessors($pageDocument) { if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPagePostProcessPageDocument'])) { return; } foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) { $postProcessor = GeneralUtility::makeInstance($classReference); if (!$postProcessor instanceof PageDocumentPostProcessor) { throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154); } $postProcessor->postProcessPageDocument($pageDocument, $this->page); } } /** * Builds the Meilisearch document for the current page. * * @return array A document representing the page */ protected function getPageDocument() { /** @var Builder $documentBuilder */ $documentBuilder = GeneralUtility::makeInstance(Builder::class); $document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, (string)$this->mountPointParameter); self::$pageMeilisearchDocumentId = $document['id']; return $document; } // Logging // TODO replace by a central logger /** * Gets the mount point parameter that is used in the Frontend controller. * * @return string */ public function getMountPointParameter() { return $this->mountPointParameter; } // Misc /** * Sets the mount point parameter that is used in the Frontend controller. * * @param string $mountPointParameter */ public function setMountPointParameter($mountPointParameter) { $this->mountPointParameter = (string)$mountPointParameter; } /** * Allows third party extensions to replace or modify the page document * created by this indexer. * * @param Document $pageDocument The page document created by this indexer. * @return Document An Meilisearch document representing the currently indexed page */ protected function substitutePageDocument(Document $pageDocument) { if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageSubstitutePageDocument'])) { return $pageDocument; } $indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage(); foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) { $substituteIndexer = GeneralUtility::makeInstance($classReference); if (!$substituteIndexer instanceof SubstitutePageIndexer) { $message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class; throw new \UnexpectedValueException($message, 1310491001); } if ($substituteIndexer instanceof PageFieldMappingIndexer) { $substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName); } $substituteDocument = $substituteIndexer->getPageDocument($pageDocument); if (!$substituteDocument instanceof Document) { $message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Document object.'; throw new \UnexpectedValueException($message, 1310490952); } $pageDocument = $substituteDocument; } return $pageDocument; } /** * Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set. * * @return string */ protected function getIndexConfigurationNameForCurrentPage() { return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages'; } /** * Allows third party extensions to provide additional documents which * should be indexed for the current page. * * @param Document $pageDocument The main document representing this page. * @param Document[] $existingDocuments An array of documents already created for this page. * @return array An array of additional Document objects to index */ protected function getAdditionalDocuments(Document $pageDocument, array $existingDocuments) { $documents = $existingDocuments; if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageAddDocuments'])) { return $documents; } foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageAddDocuments'] as $classReference) { $additionalIndexer = GeneralUtility::makeInstance($classReference); if (!$additionalIndexer instanceof AdditionalPageIndexer) { $message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class; throw new \UnexpectedValueException($message, 1310491024); } $additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents); if (is_array($additionalDocuments)) { $documents = array_merge($documents, $additionalDocuments); } } return $documents; } /** * Sends the given documents to the field processing service which takes * care of manipulating fields as defined in the field's configuration. * * @param array $documents An array of documents to manipulate */ protected function processDocuments(array $documents) { $processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration(); if (count($processingInstructions) > 0) { $service = GeneralUtility::makeInstance(Service::class); $service->processDocuments($documents, $processingInstructions); } } /** * Adds the collected documents to the Meilisearch index. * * @param array $documents An array of Document objects. * @return bool TRUE if documents were added successfully, FALSE otherwise */ protected function addDocumentsToMeilisearchIndex(array $documents) { $documentsAdded = false; if (!count($documents)) { return $documentsAdded; } try { $this->logger->log(MeilisearchLogManager::INFO, 'Adding ' . count($documents) . ' documents.', $documents); // chunk adds by 20 $documentChunks = array_chunk($documents, 20); foreach ($documentChunks as $documentChunk) { $response = $this->meilisearchConnection->getService()->addDocuments($documentChunk); if ($response->getHttpStatus() != 200) { throw new \RuntimeException('Meilisearch Request failed.', 1331834983); } } $documentsAdded = true; } catch (\Exception $e) { $this->logger->log(MeilisearchLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode()); if ($this->configuration->getLoggingExceptions()) { $this->logger->log(MeilisearchLogManager::ERROR, 'Exception while adding documents', [$e->__toString()]); } } return $documentsAdded; } /** * Gets the current page's URL. * * @return string URL of the current page. */ public function getPageUrl() { return $this->pageUrl; } /** * Sets the URL to use for the page document. * * @param string $url The page's URL. */ public function setPageUrl($url) { $this->pageUrl = $url; } /** * Gets the page's access rootline. * * @return Rootline The page's access rootline */ public function getPageAccessRootline() { return $this->pageAccessRootline; } /** * Sets the page's access rootline. * * @param Rootline $accessRootline The page's access rootline */ public function setPageAccessRootline(Rootline $accessRootline) { $this->pageAccessRootline = $accessRootline; } /** * Gets the documents that have been sent to Meilisearch * * @return array An array of Document objects */ public function getDocumentsSentToMeilisearch() { return $this->documentsSentToMeilisearch; } }