meilisearch/Classes/Domain/Search/MeilisearchDocument/Builder.php

310 lines
10 KiB
PHP
Raw Permalink Normal View History

2021-04-17 00:26:33 +02:00
<?php
2021-04-24 04:44:44 +02:00
namespace WapplerSystems\Meilisearch\Domain\Search\MeilisearchDocument;
2021-04-17 00:26:33 +02:00
/***************************************************************
* Copyright notice
*
* (c) 2017 Timo Hund <timo.hund@dkd.de>
* All rights reserved
*
* This script is part of the TYPO3 project. The TYPO3 project is
* free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* The GNU General Public License can be found at
* http://www.gnu.org/copyleft/gpl.html.
*
* This script is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
use WapplerSystems\Meilisearch\Access\Rootline;
use WapplerSystems\Meilisearch\Domain\Site\SiteRepository;
use WapplerSystems\Meilisearch\Domain\Variants\IdBuilder;
use WapplerSystems\Meilisearch\Domain\Site\Site;
2021-04-17 21:20:54 +02:00
use WapplerSystems\Meilisearch\System\Meilisearch\Document\Document;
2021-04-17 00:26:33 +02:00
use WapplerSystems\Meilisearch\Typo3PageContentExtractor;
use WapplerSystems\Meilisearch\Util;
use TYPO3\CMS\Core\Utility\GeneralUtility;
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
/**
2021-04-24 04:44:44 +02:00
* Builder class to build an MeilisearchDocument
2021-04-17 00:26:33 +02:00
*
2021-04-17 21:20:54 +02:00
* Responsible to build \WapplerSystems\Meilisearch\System\Meilisearch\Document\Document
2021-04-17 00:26:33 +02:00
*
* @author Timo Hund <timo.hund@dkd.de>
*/
class Builder
{
/**
* @var IdBuilder
*/
protected $variantIdBuilder;
/**
* Builder constructor.
* @param IdBuilder|null $variantIdBuilder
*/
public function __construct(IdBuilder $variantIdBuilder = null)
{
$this->variantIdBuilder = $variantIdBuilder ?? GeneralUtility::makeInstance(IdBuilder::class);
}
/**
* This method can be used to build an Document from a TYPO3 page.
*
* @param TypoScriptFrontendController $page
* @param string $url
* @param Rootline $pageAccessRootline
* @param string $mountPointParameter
2021-04-24 04:44:44 +02:00
* @return array
2021-04-17 00:26:33 +02:00
*/
2021-04-24 04:44:44 +02:00
public function fromPage(TypoScriptFrontendController $page, $url, Rootline $pageAccessRootline, $mountPointParameter): array
2021-04-17 00:26:33 +02:00
{
2021-04-24 04:44:44 +02:00
$document = [];
2021-04-17 00:26:33 +02:00
$site = $this->getSiteByPageId($page->id);
$pageRecord = $page->page;
$accessGroups = $this->getDocumentIdGroups($pageAccessRootline);
$documentId = $this->getPageDocumentId($page, $accessGroups, $mountPointParameter);
2021-04-24 04:44:44 +02:00
$document['id'] = $documentId;
$document['site'] = $site->getDomain();
$document['siteHash'] = $site->getSiteHash();
$document['appKey'] = 'EXT:meilisearch';
$document['type'] = 'pages';
2021-04-17 00:26:33 +02:00
// system fields
2021-04-24 04:44:44 +02:00
$document['uid'] = $page->id;
$document['pid'] = $pageRecord['pid'];
2021-04-17 00:26:33 +02:00
// variantId
$variantId = $this->variantIdBuilder->buildFromTypeAndUid('pages', $page->id);
2021-04-24 04:44:44 +02:00
$document['variantId'] = $variantId;
2021-04-17 00:26:33 +02:00
2021-04-24 04:44:44 +02:00
$document['typeNum'] = $page->type;
$document['created'] = $pageRecord['crdate'];
$document['changed'] = $pageRecord['SYS_LASTCHANGED'];
2021-04-17 00:26:33 +02:00
$rootline = $this->getRootLineFieldValue($page->id, $mountPointParameter);
2021-04-24 04:44:44 +02:00
$document['rootline'] = $rootline;
2021-04-17 00:26:33 +02:00
// access
$this->addAccessField($document, $pageAccessRootline);
$this->addEndtimeField($document, $pageRecord);
// content
// @extensionScannerIgnoreLine
$contentExtractor = $this->getExtractorForPageContent($page->content);
2021-04-24 04:44:44 +02:00
$document['title'] = $contentExtractor->getPageTitle();
$document['subTitle'] = $pageRecord['subtitle'];
$document['navTitle'] = $pageRecord['nav_title'];
$document['author'] = $pageRecord['author'];
$document['description'] = $pageRecord['description'];
$document['abstract'] = $pageRecord['abstract'];
$document['content'] = $contentExtractor->getIndexableContent();
$document['url'] = $url;
2021-04-17 00:26:33 +02:00
$this->addKeywordsField($document, $pageRecord);
$this->addTagContentFields($document, $contentExtractor->getTagContent());
return $document;
}
/**
2021-04-17 21:20:54 +02:00
* Creates a Meilisearch document with the basic / core fields set already.
2021-04-17 00:26:33 +02:00
*
* @param array $itemRecord
* @param string $type
* @param int $rootPageUid
* @param string $accessRootLine
2021-04-24 04:44:44 +02:00
* @return array
2021-04-17 00:26:33 +02:00
*/
2021-04-24 04:44:44 +02:00
public function fromRecord(array $itemRecord, string $type, int $rootPageUid, string $accessRootLine): array
2021-04-17 00:26:33 +02:00
{
2021-04-29 18:33:05 +02:00
$document = [];
2021-04-17 00:26:33 +02:00
$site = $this->getSiteByPageId($rootPageUid);
$documentId = $this->getDocumentId($type, $site->getRootPageId(), $itemRecord['uid']);
// required fields
2021-04-24 04:44:44 +02:00
$document['id'] = $documentId;
$document['type'] = $type;
$document['appKey'] = 'EXT:meilisearch';
2021-04-17 00:26:33 +02:00
// site, siteHash
2021-04-24 04:44:44 +02:00
$document['site'] = $site->getDomain();
$document['siteHash'] = $site->getSiteHash();
2021-04-17 00:26:33 +02:00
// uid, pid
2021-04-24 04:44:44 +02:00
$document['uid'] = $itemRecord['uid'];
$document['pid'] = $itemRecord['pid'];
2021-04-17 00:26:33 +02:00
// variantId
$variantId = $this->variantIdBuilder->buildFromTypeAndUid($type, $itemRecord['uid']);
2021-04-24 04:44:44 +02:00
$document['variantId'] = $variantId;
2021-04-17 00:26:33 +02:00
// created, changed
if (!empty($GLOBALS['TCA'][$type]['ctrl']['crdate'])) {
2021-04-24 04:44:44 +02:00
$document['created'] = $itemRecord[$GLOBALS['TCA'][$type]['ctrl']['crdate']];
2021-04-17 00:26:33 +02:00
}
if (!empty($GLOBALS['TCA'][$type]['ctrl']['tstamp'])) {
2021-04-24 04:44:44 +02:00
$document['changed'] = $itemRecord[$GLOBALS['TCA'][$type]['ctrl']['tstamp']];
2021-04-17 00:26:33 +02:00
}
// access, endtime
2021-04-24 04:44:44 +02:00
$document['access'] = $accessRootLine;
2021-04-17 00:26:33 +02:00
if (!empty($GLOBALS['TCA'][$type]['ctrl']['enablecolumns']['endtime'])
&& $itemRecord[$GLOBALS['TCA'][$type]['ctrl']['enablecolumns']['endtime']] != 0
) {
2021-04-24 04:44:44 +02:00
$document['endtime'] = $itemRecord[$GLOBALS['TCA'][$type]['ctrl']['enablecolumns']['endtime']];
2021-04-17 00:26:33 +02:00
}
return $document;
}
/**
2021-04-29 18:33:05 +02:00
* @param TypoScriptFrontendController $frontendController
2021-04-17 00:26:33 +02:00
* @param string $accessGroups
* @param string $mountPointParameter
* @return string
*/
protected function getPageDocumentId(TypoScriptFrontendController $frontendController, string $accessGroups, string $mountPointParameter): string
{
return Util::getPageDocumentId($frontendController->id, $frontendController->type, Util::getLanguageUid(), $accessGroups, $mountPointParameter);
}
/**
* @param string $type
* @param int $rootPageId
* @param int $recordUid
* @return string
*/
protected function getDocumentId(string $type, int $rootPageId, int $recordUid): string
{
return Util::getDocumentId($type, $rootPageId, $recordUid);
}
/**
* @param integer $pageId
* @return Site
*/
protected function getSiteByPageId($pageId)
{
$siteRepository = GeneralUtility::makeInstance(SiteRepository::class);
return $siteRepository->getSiteByPageId($pageId);
}
/**
* @param string $pageContent
* @return Typo3PageContentExtractor
*/
protected function getExtractorForPageContent($pageContent)
{
return GeneralUtility::makeInstance(Typo3PageContentExtractor::class, /** @scrutinizer ignore-type */ $pageContent);
}
/**
* Builds the content for the rootline field.
*
* @param int $pageId
* @param string $mountPointParameter
* @return string
*/
protected function getRootLineFieldValue($pageId, $mountPointParameter)
{
$rootline = $pageId;
if ($mountPointParameter !== '') {
$rootline .= ',' . $mountPointParameter;
}
return $rootline;
}
/**
* Gets a comma separated list of frontend user groups to use for the
* document ID.
*
* @param Rootline $pageAccessRootline
* @return string A comma separated list of frontend user groups.
*/
protected function getDocumentIdGroups(Rootline $pageAccessRootline)
{
$groups = $pageAccessRootline->getGroups();
$groups = Rootline::cleanGroupArray($groups);
if (empty($groups)) {
$groups[] = 0;
}
$groups = implode(',', $groups);
return $groups;
}
/**
* Adds the access field to the document if needed.
*
2021-04-24 04:44:44 +02:00
* @param array $document
2021-04-17 00:26:33 +02:00
* @param Rootline $pageAccessRootline
*/
2021-04-24 04:44:44 +02:00
protected function addAccessField(array &$document, Rootline $pageAccessRootline)
2021-04-17 00:26:33 +02:00
{
$access = (string)$pageAccessRootline;
if (trim($access) !== '') {
2021-04-24 04:44:44 +02:00
$document['access'] = $access;
2021-04-17 00:26:33 +02:00
}
}
/**
* Adds the endtime field value to the Document.
*
2021-04-24 04:44:44 +02:00
* @param array $document
2021-04-17 00:26:33 +02:00
* @param array $pageRecord
*/
2021-04-24 04:44:44 +02:00
protected function addEndtimeField(array &$document, $pageRecord)
2021-04-17 00:26:33 +02:00
{
if ($pageRecord['endtime']) {
2021-04-24 04:44:44 +02:00
$document['endtime'] = $pageRecord['endtime'];
2021-04-17 00:26:33 +02:00
}
}
/**
* Adds keywords, multi valued.
*
2021-04-24 04:44:44 +02:00
* @param array $document
2021-04-17 00:26:33 +02:00
* @param array $pageRecord
*/
2021-04-24 04:44:44 +02:00
protected function addKeywordsField(array &$document, $pageRecord)
2021-04-17 00:26:33 +02:00
{
if (!isset($pageRecord['keywords'])) {
return;
}
$keywords = array_unique(GeneralUtility::trimExplode(',', $pageRecord['keywords'], true));
foreach ($keywords as $keyword) {
2021-04-24 04:44:44 +02:00
$document['keywords'] = $keyword;
2021-04-17 00:26:33 +02:00
}
}
/**
* Add content from several tags like headers, anchors, ...
*
2021-04-24 04:44:44 +02:00
* @param array $document
2021-04-17 00:26:33 +02:00
* @param array $tagContent
*/
2021-04-24 04:44:44 +02:00
protected function addTagContentFields(array &$document, $tagContent = [])
2021-04-17 00:26:33 +02:00
{
foreach ($tagContent as $fieldName => $fieldValue) {
2021-04-24 04:44:44 +02:00
$document[$fieldName] = $fieldValue;
2021-04-17 00:26:33 +02:00
}
}
}