zwischenstand

This commit is contained in:
Sven Wappler
2021-04-29 18:33:05 +02:00
parent 2c9e27b3b7
commit 0ee2fae261
264 changed files with 263 additions and 21253 deletions

View File

@@ -27,7 +27,6 @@ namespace WapplerSystems\Meilisearch\IndexQueue;
use WapplerSystems\Meilisearch\ContentObject\Classification;
use WapplerSystems\Meilisearch\ContentObject\Multivalue;
use WapplerSystems\Meilisearch\ContentObject\Relation;
use WapplerSystems\Meilisearch\System\Meilisearch\Document\Document;
use TYPO3\CMS\Core\Core\Environment;
use TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser;
use TYPO3\CMS\Core\Utility\GeneralUtility;

View File

@@ -300,7 +300,7 @@ class PageIndexer extends AbstractFrontendHelper implements SingletonInterface
$this->responseData['meilisearchConnection'] = [
'rootPage' => $indexQueueItem->getRootPageUid(),
'sys_language_uid' => Util::getLanguageUid(),
'meilisearch' => (string)$meilisearchConnection->getNode('write')
'meilisearch' => (string)$meilisearchConnection
];
$documentsSentToMeilisearch = $indexer->getDocumentsSentToMeilisearch();

View File

@@ -24,6 +24,7 @@ namespace WapplerSystems\Meilisearch\IndexQueue;
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
use TYPO3\CMS\Core\Utility\DebugUtility;
use WapplerSystems\Meilisearch\ConnectionManager;
use WapplerSystems\Meilisearch\Domain\Search\MeilisearchDocument\Builder;
use WapplerSystems\Meilisearch\FieldProcessor\Service;
@@ -145,11 +146,13 @@ class Indexer extends AbstractIndexer
$this->type = $item->getType();
$this->setLogging($item);
$meilisearchConnections = $this->getMeilisearchConnectionByItem($item);
foreach ($meilisearchConnections as $systemLanguageUid => $meilisearchConnection) {
$this->meilisearch = $meilisearchConnection;
$this->meilisearch = $this->getMeilisearchConnectionByItem($item);
if (!$this->indexItem($item, $systemLanguageUid)) {
$languages = $item->getSite()->getSite()->getLanguages();
foreach ($languages as $language) {
if (!$this->indexItem($item, $language->getLanguageId())) {
/*
* A single language voting for "not indexed" should make the whole
* item count as being not indexed, even if all other languages are
@@ -511,7 +514,7 @@ class Indexer extends AbstractIndexer
* for translations of an item.
*
* @param Item $item An index queue item
* @return array An array of WapplerSystems\Meilisearch\System\Meilisearch\MeilisearchConnection connections, the array's keys are the sys_language_uid of the language of the connection
* @return \WapplerSystems\Meilisearch\System\Meilisearch\MeilisearchConnection
*/
protected function getMeilisearchConnectionByItem(Item $item)
{
@@ -525,7 +528,7 @@ class Indexer extends AbstractIndexer
// Meilisearch configurations possible for this item
$site = $item->getSite();
return $site->getMeilisearchConnectionConfiguration();
return $this->connectionManager->getConnectionBySite($site);
$defaultLanguageUid = $this->getDefaultLanguageUid($item, $site->getRootPage(), $siteLanguages);

View File

@@ -29,6 +29,7 @@ use WapplerSystems\Meilisearch\Access\Rootline;
use WapplerSystems\Meilisearch\Access\RootlineElement;
use WapplerSystems\Meilisearch\Domain\Index\PageIndexer\Helper\UriBuilder\AbstractUriStrategy;
use WapplerSystems\Meilisearch\Domain\Index\PageIndexer\Helper\UriStrategyFactory;
use WapplerSystems\Meilisearch\Domain\Search\MeilisearchDocument\Builder;
use WapplerSystems\Meilisearch\System\Logging\MeilisearchLogManager;
use TYPO3\CMS\Core\Utility\GeneralUtility;
@@ -58,13 +59,19 @@ class PageIndexer extends Indexer
return false;
}
//$meilisearchConnection = $this->getMeilisearchConnectionByItem($item);
$this->meilisearch = $this->getMeilisearchConnectionByItem($item);
$site = $item->getSite();
$languageUids = $site->getAvailableLanguageIds();
$languages = $site->getSite()->getLanguages();
foreach ($languageUids as $systemLanguageUid) {
$contentAccessGroups = $this->getAccessGroupsFromContent($item, $systemLanguageUid);
foreach ($languages as $language) {
$this->indexPage($item, $language->getLanguageId());
// TODO: Versions for usergroups
continue;
$contentAccessGroups = $this->getAccessGroupsFromContent($item, $language->getLanguageId());
if (empty($contentAccessGroups)) {
// might be an empty page w/no content elements or some TYPO3 error / bug
@@ -73,7 +80,7 @@ class PageIndexer extends Indexer
}
foreach ($contentAccessGroups as $userGroup) {
$this->indexPage($item, $systemLanguageUid, $userGroup);
$this->indexPage($item, $language->getLanguageId(), $userGroup);
}
}
@@ -191,46 +198,6 @@ class PageIndexer extends Indexer
return $accessGroupsCache[$accessGroupsCacheEntryId];
}
// Utility methods
/**
* Builds a base page indexer request with configured headers and other
* parameters.
*
* @return PageIndexerRequest Base page indexer request
*/
protected function buildBasePageIndexerRequest()
{
$request = $this->getPageIndexerRequest();
$request->setParameter('loggingEnabled', $this->loggingEnabled);
if (!empty($this->options['authorization.'])) {
$request->setAuthorizationCredentials(
$this->options['authorization.']['username'],
$this->options['authorization.']['password']
);
}
if (!empty($this->options['frontendDataHelper.']['headers.'])) {
foreach ($this->options['frontendDataHelper.']['headers.'] as $headerValue) {
$request->addHeader($headerValue);
}
}
if (!empty($this->options['frontendDataHelper.']['requestTimeout'])) {
$request->setTimeout((float)$this->options['frontendDataHelper.']['requestTimeout']);
}
return $request;
}
/**
* @return PageIndexerRequest
*/
protected function getPageIndexerRequest()
{
return GeneralUtility::makeInstance(PageIndexerRequest::class);
}
/**
* Determines a page ID's URL.
@@ -248,9 +215,7 @@ class PageIndexer extends Indexer
$pageId = $item->getRecordUid();
$strategy = $this->getUriStrategy($pageId);
$mountPointParameter = $this->getMountPageDataUrlParameter($item);
$dataUrl = $strategy->getPageIndexingUriFromPageItemAndLanguageId($item, $language, $mountPointParameter, $this->options);
return $dataUrl;
return $strategy->getPageIndexingUriFromPageItemAndLanguageId($item, $language, $mountPointParameter, $this->options);
}
/**
@@ -294,16 +259,16 @@ class PageIndexer extends Indexer
*/
protected function indexPage(Item $item, $language = 0, $userGroup = 0)
{
DebugUtility::debug('dededede');
$accessRootline = $this->getAccessRootline($item, $language, $userGroup);
$request = $this->buildBasePageIndexerRequest();
$request->setIndexQueueItem($item);
$request->addAction('indexPage');
$request->setParameter('accessRootline', (string)$accessRootline);
$indexRequestUrl = $this->getDataUrl($item, $language);
$response = $request->send($indexRequestUrl);
$indexActionResult = $response->getActionResult('indexPage');
//$this->meilisearch->getService()->getClient()->index($item->getType())->addDocuments();
$result = false;
DebugUtility::debug('PageIndexer');
exit();
if ($this->loggingEnabled) {
$logSeverity = MeilisearchLogManager::INFO;
@@ -404,4 +369,21 @@ class PageIndexer extends Indexer
return Rootline::getAccessRootlineByPageId($pageId, $mountPointParameter);
}
/**
* Builds the Meilisearch document for the current page.
*
* @return array A document representing the page
*/
protected function getPageDocument()
{
/** @var Builder $documentBuilder */
$documentBuilder = GeneralUtility::makeInstance(Builder::class);
$document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, (string)$this->mountPointParameter);
self::$pageMeilisearchDocumentId = $document['id'];
return $document;
}
}

View File

@@ -40,7 +40,7 @@ use TYPO3\CMS\Core\Utility\GeneralUtility;
class PageIndexerRequest
{
const SOLR_INDEX_HEADER = 'X-Tx-Meilisearch-Iq';
const MEILISEARCH_INDEX_HEADER = 'X-Tx-Meilisearch-Iq';
/**
* List of actions to perform during page rendering.
@@ -248,7 +248,7 @@ class PageIndexerRequest
];
$indexerRequestData = array_merge($indexerRequestData, $this->parameters);
$headers[] = self::SOLR_INDEX_HEADER . ': ' . json_encode($indexerRequestData, JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT | JSON_UNESCAPED_SLASHES);
$headers[] = self::MEILISEARCH_INDEX_HEADER . ': ' . json_encode($indexerRequestData, JSON_HEX_TAG | JSON_HEX_APOS | JSON_HEX_AMP | JSON_HEX_QUOT | JSON_UNESCAPED_SLASHES);
return $headers;
}

View File

@@ -0,0 +1,508 @@
<?php
namespace WapplerSystems\Meilisearch\IndexQueue;
/***************************************************************
* Copyright notice
*
* (c) 2009-2015 Ingo Renner <ingo@typo3.org>
* All rights reserved
*
* This script is part of the TYPO3 project. The TYPO3 project is
* free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* The GNU General Public License can be found at
* http://www.gnu.org/copyleft/gpl.html.
*
* This script is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
use WapplerSystems\Meilisearch\Access\Rootline;
use WapplerSystems\Meilisearch\AdditionalPageIndexer;
use WapplerSystems\Meilisearch\ConnectionManager;
use WapplerSystems\Meilisearch\Domain\Search\MeilisearchDocument\Builder;
use WapplerSystems\Meilisearch\FieldProcessor\Service;
use WapplerSystems\Meilisearch\IndexQueue\FrontendHelper\PageFieldMappingIndexer;
use WapplerSystems\Meilisearch\IndexQueue\Item;
use WapplerSystems\Meilisearch\SubstitutePageIndexer;
use WapplerSystems\Meilisearch\System\Configuration\TypoScriptConfiguration;
use WapplerSystems\Meilisearch\System\Logging\MeilisearchLogManager;
use WapplerSystems\Meilisearch\System\Meilisearch\Document\Document;
use WapplerSystems\Meilisearch\System\Meilisearch\MeilisearchConnection;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
use WapplerSystems\Meilisearch\Typo3PageContentExtractor;
use WapplerSystems\Meilisearch\Util;
/**
* Page Indexer to index TYPO3 pages used by the Index Queue.
*
* @author Ingo Renner <ingo@typo3.org>
* @author Daniel Poetzinger <poetzinger@aoemedia.de>
* @author Timo Schmidt <schmidt@aoemedia.de>
*/
class Typo3PageIndexer
{
/**
* ID of the current page's Meilisearch document.
*
* @var string
*/
protected static $pageMeilisearchDocumentId = '';
/**
* @var array
*/
private static $pageMeilisearchDocument;
/**
* The mount point parameter used in the Frontend controller.
*
* @var string
*/
protected $mountPointParameter;
/**
* Meilisearch server connection.
*
* @var MeilisearchConnection
*/
protected $meilisearchConnection = null;
/**
* Frontend page object (TSFE).
*
* @var TypoScriptFrontendController
*/
protected $page = null;
/**
* Content extractor to extract content from TYPO3 pages
*
* @var Typo3PageContentExtractor
*/
protected $contentExtractor = null;
/**
* URL to be indexed as the page's URL
*
* @var string
*/
protected $pageUrl = '';
/**
* The page's access rootline
*
* @var Rootline
*/
protected $pageAccessRootline = null;
/**
* Documents that have been sent to Meilisearch
*
* @var array
*/
protected $documentsSentToMeilisearch = [];
/**
* @var TypoScriptConfiguration
*/
protected $configuration;
/**
* @var Item
*/
protected $indexQueueItem;
/**
* @var \WapplerSystems\Meilisearch\System\Logging\MeilisearchLogManager
*/
protected $logger = null;
/**
* Constructor
*
* @param TypoScriptFrontendController $page The page to index
*/
public function __construct(TypoScriptFrontendController $page)
{
$this->logger = GeneralUtility::makeInstance(MeilisearchLogManager::class, /** @scrutinizer ignore-type */ __CLASS__);
$this->page = $page;
$this->pageUrl = GeneralUtility::getIndpEnv('TYPO3_REQUEST_URL');
$this->configuration = Util::getMeilisearchConfiguration();
try {
$this->initializeMeilisearchConnection();
} catch (\Exception $e) {
$this->logger->log(
MeilisearchLogManager::ERROR,
$e->getMessage() . ' Error code: ' . $e->getCode()
);
// TODO extract to a class "ExceptionLogger"
if ($this->configuration->getLoggingExceptions()) {
$this->logger->log(
MeilisearchLogManager::ERROR,
'Exception while trying to index a page',
[
$e->__toString()
]
);
}
}
$this->pageAccessRootline = GeneralUtility::makeInstance(Rootline::class, /** @scrutinizer ignore-type */ '');
}
/**
* @param Item $indexQueueItem
*/
public function setIndexQueueItem($indexQueueItem)
{
$this->indexQueueItem = $indexQueueItem;
}
/**
* Initializes the Meilisearch server connection.
*
* @throws \Exception when no Meilisearch connection can be established.
*/
protected function initializeMeilisearchConnection()
{
$meilisearch = GeneralUtility::makeInstance(ConnectionManager::class)->getConnectionByPageId($this->page->id, Util::getLanguageUid());
// do not continue if no server is available
if (!$meilisearch->getWriteService()->ping()) {
throw new \Exception(
'No Meilisearch instance available while trying to index a page.',
1234790825
);
}
$this->meilisearchConnection = $meilisearch;
}
/**
* Gets the current page's Meilisearch document ID.
*
* @return string|NULL The page's Meilisearch document ID or NULL in case no document was generated yet.
*/
public static function getPageMeilisearchDocumentId()
{
return self::$pageMeilisearchDocumentId;
}
/**
* Gets the Meilisearch document generated for the current page.
*
* @return array|NULL The page's Meilisearch document or NULL if it has not been generated yet.
*/
public static function getPageMeilisearchDocument()
{
return self::$pageMeilisearchDocument;
}
/**
* Allows to provide a Meilisearch server connection other than the one
* initialized by the constructor.
*
* @param MeilisearchConnection $meilisearchConnection Meilisearch connection
* @throws \Exception if the Meilisearch server cannot be reached
*/
public function setMeilisearchConnection(MeilisearchConnection $meilisearchConnection)
{
if (!$meilisearchConnection->getService()->ping()) {
throw new \Exception(
'Could not connect to Meilisearch server.',
1323946472
);
}
$this->meilisearchConnection = $meilisearchConnection;
}
/**
* Indexes a page.
*
* @return bool TRUE after successfully indexing the page, FALSE on error
* @throws \UnexpectedValueException if a page document post processor fails to implement interface WapplerSystems\Meilisearch\PageDocumentPostProcessor
*/
public function indexPage()
{
$pageIndexed = false;
$documents = []; // this will become useful as soon as when starting to index individual records instead of whole pages
if (is_null($this->meilisearchConnection)) {
// intended early return as it doesn't make sense to continue
// and waste processing time if the meilisearch server isn't available
// anyways
// FIXME use an exception
return $pageIndexed;
}
$pageDocument = $this->getPageDocument();
$pageDocument = $this->substitutePageDocument($pageDocument);
$this->applyIndexPagePostProcessors($pageDocument);
self::$pageMeilisearchDocument = $pageDocument;
$documents[] = $pageDocument;
$documents = $this->getAdditionalDocuments($pageDocument, $documents);
$this->processDocuments($documents);
$pageIndexed = $this->addDocumentsToMeilisearchIndex($documents);
$this->documentsSentToMeilisearch = $documents;
return $pageIndexed;
}
/**
* Applies the configured post processors (indexPagePostProcessPageDocument)
*
* @param array $pageDocument
*/
protected function applyIndexPagePostProcessors($pageDocument)
{
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPagePostProcessPageDocument'])) {
return;
}
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPagePostProcessPageDocument'] as $classReference) {
$postProcessor = GeneralUtility::makeInstance($classReference);
if (!$postProcessor instanceof PageDocumentPostProcessor) {
throw new \UnexpectedValueException(get_class($pageDocument) . ' must implement interface ' . PageDocumentPostProcessor::class, 1397739154);
}
$postProcessor->postProcessPageDocument($pageDocument, $this->page);
}
}
/**
* Builds the Meilisearch document for the current page.
*
* @return array A document representing the page
*/
protected function getPageDocument()
{
/** @var Builder $documentBuilder */
$documentBuilder = GeneralUtility::makeInstance(Builder::class);
$document = $documentBuilder->fromPage($this->page, $this->pageUrl, $this->pageAccessRootline, (string)$this->mountPointParameter);
self::$pageMeilisearchDocumentId = $document['id'];
return $document;
}
// Logging
// TODO replace by a central logger
/**
* Gets the mount point parameter that is used in the Frontend controller.
*
* @return string
*/
public function getMountPointParameter()
{
return $this->mountPointParameter;
}
// Misc
/**
* Sets the mount point parameter that is used in the Frontend controller.
*
* @param string $mountPointParameter
*/
public function setMountPointParameter($mountPointParameter)
{
$this->mountPointParameter = (string)$mountPointParameter;
}
/**
* Allows third party extensions to replace or modify the page document
* created by this indexer.
*
* @param Document $pageDocument The page document created by this indexer.
* @return Document An Meilisearch document representing the currently indexed page
*/
protected function substitutePageDocument(Document $pageDocument)
{
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageSubstitutePageDocument'])) {
return $pageDocument;
}
$indexConfigurationName = $this->getIndexConfigurationNameForCurrentPage();
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageSubstitutePageDocument'] as $classReference) {
$substituteIndexer = GeneralUtility::makeInstance($classReference);
if (!$substituteIndexer instanceof SubstitutePageIndexer) {
$message = get_class($substituteIndexer) . ' must implement interface ' . SubstitutePageIndexer::class;
throw new \UnexpectedValueException($message, 1310491001);
}
if ($substituteIndexer instanceof PageFieldMappingIndexer) {
$substituteIndexer->setPageIndexingConfigurationName($indexConfigurationName);
}
$substituteDocument = $substituteIndexer->getPageDocument($pageDocument);
if (!$substituteDocument instanceof Document) {
$message = 'The document returned by ' . get_class($substituteIndexer) . ' is not a valid Document object.';
throw new \UnexpectedValueException($message, 1310490952);
}
$pageDocument = $substituteDocument;
}
return $pageDocument;
}
/**
* Retrieves the indexConfigurationName from the related queueItem, or falls back to pages when no queue item set.
*
* @return string
*/
protected function getIndexConfigurationNameForCurrentPage()
{
return isset($this->indexQueueItem) ? $this->indexQueueItem->getIndexingConfigurationName() : 'pages';
}
/**
* Allows third party extensions to provide additional documents which
* should be indexed for the current page.
*
* @param Document $pageDocument The main document representing this page.
* @param Document[] $existingDocuments An array of documents already created for this page.
* @return array An array of additional Document objects to index
*/
protected function getAdditionalDocuments(Document $pageDocument, array $existingDocuments)
{
$documents = $existingDocuments;
if (!is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageAddDocuments'])) {
return $documents;
}
foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['meilisearch']['Indexer']['indexPageAddDocuments'] as $classReference) {
$additionalIndexer = GeneralUtility::makeInstance($classReference);
if (!$additionalIndexer instanceof AdditionalPageIndexer) {
$message = get_class($additionalIndexer) . ' must implement interface ' . AdditionalPageIndexer::class;
throw new \UnexpectedValueException($message, 1310491024);
}
$additionalDocuments = $additionalIndexer->getAdditionalPageDocuments($pageDocument, $documents);
if (is_array($additionalDocuments)) {
$documents = array_merge($documents, $additionalDocuments);
}
}
return $documents;
}
/**
* Sends the given documents to the field processing service which takes
* care of manipulating fields as defined in the field's configuration.
*
* @param array $documents An array of documents to manipulate
*/
protected function processDocuments(array $documents)
{
$processingInstructions = $this->configuration->getIndexFieldProcessingInstructionsConfiguration();
if (count($processingInstructions) > 0) {
$service = GeneralUtility::makeInstance(Service::class);
$service->processDocuments($documents, $processingInstructions);
}
}
/**
* Adds the collected documents to the Meilisearch index.
*
* @param array $documents An array of Document objects.
* @return bool TRUE if documents were added successfully, FALSE otherwise
*/
protected function addDocumentsToMeilisearchIndex(array $documents)
{
$documentsAdded = false;
if (!count($documents)) {
return $documentsAdded;
}
try {
$this->logger->log(MeilisearchLogManager::INFO, 'Adding ' . count($documents) . ' documents.', $documents);
// chunk adds by 20
$documentChunks = array_chunk($documents, 20);
foreach ($documentChunks as $documentChunk) {
$response = $this->meilisearchConnection->getService()->addDocuments($documentChunk);
if ($response->getHttpStatus() != 200) {
throw new \RuntimeException('Meilisearch Request failed.', 1331834983);
}
}
$documentsAdded = true;
} catch (\Exception $e) {
$this->logger->log(MeilisearchLogManager::ERROR, $e->getMessage() . ' Error code: ' . $e->getCode());
if ($this->configuration->getLoggingExceptions()) {
$this->logger->log(MeilisearchLogManager::ERROR, 'Exception while adding documents', [$e->__toString()]);
}
}
return $documentsAdded;
}
/**
* Gets the current page's URL.
*
* @return string URL of the current page.
*/
public function getPageUrl()
{
return $this->pageUrl;
}
/**
* Sets the URL to use for the page document.
*
* @param string $url The page's URL.
*/
public function setPageUrl($url)
{
$this->pageUrl = $url;
}
/**
* Gets the page's access rootline.
*
* @return Rootline The page's access rootline
*/
public function getPageAccessRootline()
{
return $this->pageAccessRootline;
}
/**
* Sets the page's access rootline.
*
* @param Rootline $accessRootline The page's access rootline
*/
public function setPageAccessRootline(Rootline $accessRootline)
{
$this->pageAccessRootline = $accessRootline;
}
/**
* Gets the documents that have been sent to Meilisearch
*
* @return array An array of Document objects
*/
public function getDocumentsSentToMeilisearch()
{
return $this->documentsSentToMeilisearch;
}
}