509 lines
17 KiB
PHP
509 lines
17 KiB
PHP
<?php
|
|
|
|
namespace WapplerSystems\Meilisearch\IndexQueue;
|
|
|
|
/***************************************************************
|
|
* Copyright notice
|
|
*
|
|
* (c) 2009-2015 Ingo Renner <ingo@typo3.org>
|
|
* All rights reserved
|
|
*
|
|
* This script is part of the TYPO3 project. The TYPO3 project is
|
|
* free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* The GNU General Public License can be found at
|
|
* http://www.gnu.org/copyleft/gpl.html.
|
|
*
|
|
* This script is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* This copyright notice MUST APPEAR in all copies of the script!
|
|
***************************************************************/
|
|
|
|
use WapplerSystems\Meilisearch\Access\Rootline;
|
|
use WapplerSystems\Meilisearch\AdditionalPageIndexer;
|
|
use WapplerSystems\Meilisearch\ConnectionManager;
|
|
use WapplerSystems\Meilisearch\Domain\Search\MeilisearchDocument\Builder;
|
|
use WapplerSystems\Meilisearch\FieldProcessor\Service;
|
|
use WapplerSystems\Meilisearch\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
|
|
use WapplerSystems\Meilisearch\IndexQueue\Item;
|
|
use WapplerSystems\Meilisearch\SubstitutePageIndexer;
|
|
use WapplerSystems\Meilisearch\System\Configuration\TypoScriptConfiguration;
|
|
use WapplerSystems\Meilisearch\System\Logging\MeilisearchLogManager;
|
|
use WapplerSystems\Meilisearch\System\Meilisearch\Document\Document;
|
|
use WapplerSystems\Meilisearch\System\Meilisearch\MeilisearchConnection;
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility;
|
|
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
|
|
use WapplerSystems\Meilisearch\Typo3PageContentExtractor;
|
|
use WapplerSystems\Meilisearch\Util;
|
|
|
|
/**
|
|
* Page Indexer to index TYPO3 pages used by the Index Queue.
|
|
*
|
|
* @author Ingo Renner <ingo@typo3.org>
|
|
* @author Daniel Poetzinger <poetzinger@aoemedia.de>
|
|
* @author Timo Schmidt <schmidt@aoemedia.de>
|
|
*/
|
|
class Typo3PageIndexer
|
|
{
|
|
|
|
/**
|
|
* ID of the current page's Meilisearch document.
|
|
*
|
|
* @var string
|
|
*/
|
|
protected static $pageMeilisearchDocumentId = '';
|
|
|
|
/**
|
|
* @var array
|
|
*/
|
|
private static $pageMeilisearchDocument;
|
|
|
|
/**
|
|
* The mount point parameter used in the Frontend controller.
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $mountPointParameter;
|
|
/**
|
|
* Meilisearch server connection.
|
|
*
|
|
* @var MeilisearchConnection
|
|
*/
|
|
protected $meilisearchConnection = null;
|
|
/**
|
|
* Frontend page object (TSFE).
|
|
*
|
|
* @var TypoScriptFrontendController
|
|
*/
|
|
protected $page = null;
|
|
/**
|
|
* Content extractor to extract content from TYPO3 pages
|
|
*
|
|
* @var Typo3PageContentExtractor
|
|
*/
|
|
protected $contentExtractor = null;
|
|
/**
|
|
* URL to be indexed as the page's URL
|
|
*
|
|
* @var string
|
|
*/
|
|
protected $pageUrl = '';
|
|
/**
|
|
* The page's access rootline
|
|
*
|
|
* @var Rootline
|
|
*/
|
|
protected $pageAccessRootline = null;
|
|
/**
|
|
* Documents that have been sent to Meilisearch
|
|
*
|
|
* @var array
|
|
*/
|
|
protected $documentsSentToMeilisearch = [];
|
|
|
|
/**
|
|
* @var TypoScriptConfiguration
|
|
*/
|
|
protected $configuration;
|
|
|
|
/**
|
|
* @var Item
|
|
*/
|
|
protected $indexQueueItem;
|
|
|
|
/**
|
|
* @var \WapplerSystems\Meilisearch\System\Logging\MeilisearchLogManager
|
|
*/
|
|
protected $logger = null;
|
|
|
|
/**
|
|
* Constructor
|
|
*
|
|
* @param TypoScriptFrontendController $page The page to index
|
|
*/
|
|
public function __construct(TypoScriptFrontendController $page)
|
|
{
|
|
$this->logger = GeneralUtility::makeInstance(MeilisearchLogManager::class, /** @scrutinizer ignore-type */ __CLASS__);
|
|
|
|
$this->page = $page;
|
|
$this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
|
|
$this->configuration = Util::getMeilisearchConfiguration();
|
|
|
|
try {
|
|
$this->initializeMeilisearchConnection();
|
|
} catch (\Exception $e) {
|
|
$this->logger->log(
|
|
MeilisearchLogManager::ERROR,
|
|
$e->getMessage() . ' Error code: ' . $e->getCode()
|
|
);
|
|
|
|
// TODO extract to a class "ExceptionLogger"
|
|
if ($this->configuration->getLoggingExceptions()) {
|
|
$this->logger->log(
|
|
MeilisearchLogManager::ERROR,
|
|
'Exception while trying to index a page',
|
|
[
|
|
$e->__toString()
|
|
]
|
|
);
|
|
}
|
|
}
|
|
|
|
$this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, /** @scrutinizer ignore-type */ '');
|
|
}
|
|
|
|
/**
|
|
* @param Item $indexQueueItem
|
|
*/
|
|
public function setIndexQueueItem($indexQueueItem)
|
|
{
|
|
$this->indexQueueItem = $indexQueueItem;
|
|
}
|
|
|
|
/**
|
|
* Initializes the Meilisearch server connection.
|
|
*
|
|
* @throws \Exception when no Meilisearch connection can be established.
|
|
*/
|
|
protected function initializeMeilisearchConnection()
|
|
{
|
|
$meilisearch = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, Util::getLanguageUid());
|
|
|
|
// do not continue if no server is available
|
|
if (!$meilisearch->getWriteService()->ping()) {
|
|
throw new \Exception(
|
|
'No Meilisearch instance available while trying to index a page.',
|
|
1234790825
|
|
);
|
|
}
|
|
|
|
$this->meilisearchConnection = $meilisearch;
|
|
}
|
|
|
|
/**
|
|
* Gets the current page's Meilisearch document ID.
|
|
*
|
|
* @return string|NULL The page's Meilisearch document ID or NULL in case no document was generated yet.
|
|
*/
|
|
public static function getPageMeilisearchDocumentId()
|
|
{
|
|
return self::$pageMeilisearchDocumentId;
|
|
}
|
|
|
|
/**
|
|
* Gets the Meilisearch document generated for the current page.
|
|
*
|
|
* @return array|NULL The page's Meilisearch document or NULL if it has not been generated yet.
|
|
*/
|
|
public static function getPageMeilisearchDocument()
|
|
{
|
|
return self::$pageMeilisearchDocument;
|
|
}
|
|
|
|
/**
|
|
* Allows to provide a Meilisearch server connection other than the one
|
|
* initialized by the constructor.
|
|
*
|
|
* @param MeilisearchConnection $meilisearchConnection Meilisearch connection
|
|
* @throws \Exception if the Meilisearch server cannot be reached
|
|
*/
|
|
public function setMeilisearchConnection(MeilisearchConnection $meilisearchConnection)
|
|
{
|
|
if (!$meilisearchConnection->getService()->ping()) {
|
|
throw new \Exception(
|
|
'Could not connect to Meilisearch server.',
|
|
1323946472
|
|
);
|
|
}
|
|
|
|
$this->meilisearchConnection = $meilisearchConnection;
|
|
}
|
|
|
|
/**
|
|
* Indexes a page.
|
|
*
|
|
* @return bool TRUE after successfully indexing the page, FALSE on error
|
|
* @throws \UnexpectedValueException if a page document post processor fails to implement interface WapplerSystems\Meilisearch\PageDocumentPostProcessor
|
|
*/
|
|
public function indexPage()
|
|
{
|
|
$pageIndexed = false;
|
|
$documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
|
|
|
|
if (is_null($this->meilisearchConnection)) {
|
|
// intended early return as it doesn't make sense to continue
|
|
// and waste processing time if the meilisearch server isn't available
|
|
// anyways
|
|
// FIXME use an exception
|
|
return $pageIndexed;
|
|
}
|
|
|
|
$pageDocument = $this->getPageDocument();
|
|
$pageDocument = $this->substitutePageDocument($pageDocument);
|
|
|
|
$this->applyIndexPagePostProcessors($pageDocument);
|
|
|
|
self::$pageMeilisearchDocument = $pageDocument;
|
|
$documents[] = $pageDocument;
|
|
$documents = $this->getAdditionalDocuments($pageDocument, $documents);
|
|
$this->processDocuments($documents);
|
|
|
|
$pageIndexed = $this->addDocumentsToMeilisearchIndex($documents);
|
|
$this->documentsSentToMeilisearch = $documents;
|
|
|
|
return $pageIndexed;
|
|
}
|
|
|
|
/**
|
|
* Applies the configured post processors (indexPagePostProcessPageDocument)
|
|
*
|
|
* @param array $pageDocument
|
|
*/
|
|
protected function applyIndexPagePostProcessors($pageDocument)
|
|
{
|
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPagePostProcessPageDocument'])) {
|
|
return;
|
|
}
|
|
|
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
|
|
$postProcessor = GeneralUtility::makeInstance($classReference);
|
|
if (!$postProcessor instanceof PageDocumentPostProcessor) {
|
|
throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
|
|
}
|
|
|
|
$postProcessor->postProcessPageDocument($pageDocument, $this->page);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Builds the Meilisearch document for the current page.
|
|
*
|
|
* @return array A document representing the page
|
|
*/
|
|
protected function getPageDocument()
|
|
{
|
|
/** @var Builder $documentBuilder */
|
|
$documentBuilder = GeneralUtility::makeInstance(Builder::class);
|
|
$document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, (string)$this->mountPointParameter);
|
|
|
|
self::$pageMeilisearchDocumentId = $document['id'];
|
|
|
|
return $document;
|
|
}
|
|
|
|
|
|
// Logging
|
|
// TODO replace by a central logger
|
|
|
|
/**
|
|
* Gets the mount point parameter that is used in the Frontend controller.
|
|
*
|
|
* @return string
|
|
*/
|
|
public function getMountPointParameter()
|
|
{
|
|
return $this->mountPointParameter;
|
|
}
|
|
|
|
// Misc
|
|
|
|
/**
|
|
* Sets the mount point parameter that is used in the Frontend controller.
|
|
*
|
|
* @param string $mountPointParameter
|
|
*/
|
|
public function setMountPointParameter($mountPointParameter)
|
|
{
|
|
$this->mountPointParameter = (string)$mountPointParameter;
|
|
}
|
|
|
|
/**
|
|
* Allows third party extensions to replace or modify the page document
|
|
* created by this indexer.
|
|
*
|
|
* @param Document $pageDocument The page document created by this indexer.
|
|
* @return Document An Meilisearch document representing the currently indexed page
|
|
*/
|
|
protected function substitutePageDocument(Document $pageDocument)
|
|
{
|
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageSubstitutePageDocument'])) {
|
|
return $pageDocument;
|
|
}
|
|
|
|
$indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
|
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
|
|
$substituteIndexer = GeneralUtility::makeInstance($classReference);
|
|
|
|
if (!$substituteIndexer instanceof SubstitutePageIndexer) {
|
|
$message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
|
|
throw new \UnexpectedValueException($message, 1310491001);
|
|
}
|
|
|
|
if ($substituteIndexer instanceof PageFieldMappingIndexer) {
|
|
$substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
|
|
}
|
|
|
|
$substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
|
|
if (!$substituteDocument instanceof Document) {
|
|
$message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Document object.';
|
|
throw new \UnexpectedValueException($message, 1310490952);
|
|
}
|
|
$pageDocument = $substituteDocument;
|
|
}
|
|
|
|
return $pageDocument;
|
|
}
|
|
|
|
/**
|
|
* Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
|
|
*
|
|
* @return string
|
|
*/
|
|
protected function getIndexConfigurationNameForCurrentPage()
|
|
{
|
|
return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
|
|
}
|
|
|
|
/**
|
|
* Allows third party extensions to provide additional documents which
|
|
* should be indexed for the current page.
|
|
*
|
|
* @param Document $pageDocument The main document representing this page.
|
|
* @param Document[] $existingDocuments An array of documents already created for this page.
|
|
* @return array An array of additional Document objects to index
|
|
*/
|
|
protected function getAdditionalDocuments(Document $pageDocument, array $existingDocuments)
|
|
{
|
|
$documents = $existingDocuments;
|
|
|
|
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageAddDocuments'])) {
|
|
return $documents;
|
|
}
|
|
|
|
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageAddDocuments'] as $classReference) {
|
|
$additionalIndexer = GeneralUtility::makeInstance($classReference);
|
|
|
|
if (!$additionalIndexer instanceof AdditionalPageIndexer) {
|
|
$message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
|
|
throw new \UnexpectedValueException($message, 1310491024);
|
|
}
|
|
|
|
$additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
|
|
if (is_array($additionalDocuments)) {
|
|
$documents = array_merge($documents, $additionalDocuments);
|
|
}
|
|
}
|
|
|
|
return $documents;
|
|
}
|
|
|
|
/**
|
|
* Sends the given documents to the field processing service which takes
|
|
* care of manipulating fields as defined in the field's configuration.
|
|
*
|
|
* @param array $documents An array of documents to manipulate
|
|
*/
|
|
protected function processDocuments(array $documents)
|
|
{
|
|
$processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
|
|
if (count($processingInstructions) > 0) {
|
|
$service = GeneralUtility::makeInstance(Service::class);
|
|
$service->processDocuments($documents, $processingInstructions);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Adds the collected documents to the Meilisearch index.
|
|
*
|
|
* @param array $documents An array of Document objects.
|
|
* @return bool TRUE if documents were added successfully, FALSE otherwise
|
|
*/
|
|
protected function addDocumentsToMeilisearchIndex(array $documents)
|
|
{
|
|
$documentsAdded = false;
|
|
|
|
if (!count($documents)) {
|
|
return $documentsAdded;
|
|
}
|
|
|
|
try {
|
|
$this->logger->log(MeilisearchLogManager::INFO, 'Adding ' . count($documents) . ' documents.', $documents);
|
|
|
|
// chunk adds by 20
|
|
$documentChunks = array_chunk($documents, 20);
|
|
foreach ($documentChunks as $documentChunk) {
|
|
$response = $this->meilisearchConnection->getService()->addDocuments($documentChunk);
|
|
if ($response->getHttpStatus() != 200) {
|
|
throw new \RuntimeException('Meilisearch Request failed.', 1331834983);
|
|
}
|
|
}
|
|
|
|
$documentsAdded = true;
|
|
} catch (\Exception $e) {
|
|
$this->logger->log(MeilisearchLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode());
|
|
|
|
if ($this->configuration->getLoggingExceptions()) {
|
|
$this->logger->log(MeilisearchLogManager::ERROR, 'Exception while adding documents', [$e->__toString()]);
|
|
}
|
|
}
|
|
|
|
return $documentsAdded;
|
|
}
|
|
|
|
/**
|
|
* Gets the current page's URL.
|
|
*
|
|
* @return string URL of the current page.
|
|
*/
|
|
public function getPageUrl()
|
|
{
|
|
return $this->pageUrl;
|
|
}
|
|
|
|
/**
|
|
* Sets the URL to use for the page document.
|
|
*
|
|
* @param string $url The page's URL.
|
|
*/
|
|
public function setPageUrl($url)
|
|
{
|
|
$this->pageUrl = $url;
|
|
}
|
|
|
|
/**
|
|
* Gets the page's access rootline.
|
|
*
|
|
* @return Rootline The page's access rootline
|
|
*/
|
|
public function getPageAccessRootline()
|
|
{
|
|
return $this->pageAccessRootline;
|
|
}
|
|
|
|
/**
|
|
* Sets the page's access rootline.
|
|
*
|
|
* @param Rootline $accessRootline The page's access rootline
|
|
*/
|
|
public function setPageAccessRootline(Rootline $accessRootline)
|
|
{
|
|
$this->pageAccessRootline = $accessRootline;
|
|
}
|
|
|
|
/**
|
|
* Gets the documents that have been sent to Meilisearch
|
|
*
|
|
* @return array An array of Document objects
|
|
*/
|
|
public function getDocumentsSentToMeilisearch()
|
|
{
|
|
return $this->documentsSentToMeilisearch;
|
|
}
|
|
}
|