meilisearch/Classes/GarbageCollector.php
2021-04-17 00:26:33 +02:00

381 lines
14 KiB
PHP

<?php
namespace WapplerSystems\Meilisearch;
/***************************************************************
* Copyright notice
*
* (c) 2010-2015 Ingo Renner <ingo@typo3.org>
* All rights reserved
*
* This script is part of the TYPO3 project. The TYPO3 project is
* free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* The GNU General Public License can be found at
* http://www.gnu.org/copyleft/gpl.html.
*
* This script is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
use WapplerSystems\Meilisearch\Domain\Index\Queue\GarbageRemover\StrategyFactory;
use WapplerSystems\Meilisearch\IndexQueue\Queue;
use WapplerSystems\Meilisearch\System\TCA\TCAService;
use TYPO3\CMS\Backend\Utility\BackendUtility;
use TYPO3\CMS\Core\DataHandling\DataHandler;
use TYPO3\CMS\Core\SingletonInterface;
use TYPO3\CMS\Core\Utility\GeneralUtility;
/**
* Garbage Collector, removes related documents from the index when a record is
* set to hidden, is deleted or is otherwise made invisible to website visitors.
*
* Garbage collection will happen for online/LIVE workspaces only.
*
* @author Ingo Renner <ingo@typo3.org>
* @author Timo Schmidt <timo.schmidt@dkd.de>
*/
class GarbageCollector extends AbstractDataHandlerListener implements SingletonInterface
{
/**
* @var array
*/
protected $trackedRecords = [];
/**
* @var TCAService
*/
protected $tcaService;
/**
* GarbageCollector constructor.
* @param TCAService|null $TCAService
*/
public function __construct(TCAService $TCAService = null)
{
parent::__construct();
$this->tcaService = $TCAService ?? GeneralUtility::makeInstance(TCAService::class);
}
/**
* Hooks into TCE main and tracks record deletion commands.
*
* @param string $command The command.
* @param string $table The table the record belongs to
* @param int $uid The record's uid
* @param string $value Not used
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
* @return void
*/
public function processCmdmap_preProcess($command, $table, $uid, $value, DataHandler $tceMain)
{
// workspaces: collect garbage only for LIVE workspace
if ($command === 'delete' && $GLOBALS['BE_USER']->workspace == 0) {
$this->collectGarbage($table, $uid);
if ($table === 'pages') {
$this->getIndexQueue()->deleteItem($table, $uid);
}
}
}
/**
* Holds the configuration when a recursive page queing should be triggered.
*
* Note: The SQL transaction is already committed, so the current state covers only "non"-changed fields.
*
* @var array
* @return array
*/
protected function getUpdateSubPagesRecursiveTriggerConfiguration()
{
return [
// the current page has the field "extendToSubpages" enabled and the field "hidden" was set to 1
// covers following scenarios:
// 'currentState' => ['hidden' => '0', 'extendToSubpages' => '0|1'], 'changeSet' => ['hidden' => '1', (optional)'extendToSubpages' => '1']
'extendToSubpageEnabledAndHiddenFlagWasAdded' => [
'currentState' => ['extendToSubpages' => '1'],
'changeSet' => ['hidden' => '1']
],
// the current page has the field "hidden" enabled and the field "extendToSubpages" was set to 1
// covers following scenarios:
// 'currentState' => ['hidden' => '0|1', 'extendToSubpages' => '0'], 'changeSet' => [(optional)'hidden' => '1', 'extendToSubpages' => '1']
'hiddenIsEnabledAndExtendToSubPagesWasAdded' => [
'currentState' => ['hidden' => '1'],
'changeSet' => ['extendToSubpages' => '1']
]
];
}
/**
* Tracks down index documents belonging to a particular record or page and
* removes them from the index and the Index Queue.
*
* @param string $table The record's table name.
* @param int $uid The record's uid.
* @throws \UnexpectedValueException if a hook object does not implement interface \WapplerSystems\Meilisearch\GarbageCollectorPostProcessor
*/
public function collectGarbage($table, $uid)
{
$garbageRemoverStrategy = StrategyFactory::getByTable($table);
$garbageRemoverStrategy->removeGarbageOf($table, $uid);
}
/**
* @param string $table
* @param int $uid
* @param array $changedFields
*/
protected function deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $changedFields)
{
if (!$this->isRecursivePageUpdateRequired($uid, $changedFields)) {
return;
}
// get affected subpages when "extendToSubpages" flag was set
$pagesToDelete = $this->getSubPageIds($uid);
// we need to at least remove this page
foreach ($pagesToDelete as $pageToDelete) {
$this->collectGarbage($table, $pageToDelete);
}
}
// methods checking whether to trigger garbage collection
/**
* Hooks into TCE main and tracks page move commands.
*
* @param string $command The command.
* @param string $table The table the record belongs to
* @param int $uid The record's uid
* @param string $value Not used
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
*/
public function processCmdmap_postProcess($command, $table, $uid, $value, DataHandler $tceMain) {
// workspaces: collect garbage only for LIVE workspace
if ($command === 'move' && $table === 'pages' && $GLOBALS['BE_USER']->workspace == 0) {
// TODO the below comment is not valid anymore, pid has been removed from doc ID
// ...still needed?
// must be removed from index since the pid changes and
// is part of the Solr document ID
$this->collectGarbage($table, $uid);
// now re-index with new properties
$this->getIndexQueue()->updateItem($table, $uid);
}
}
/**
* Hooks into TCE main and tracks changed records. In this case the current
* record's values are stored to do a change comparison later on for fields
* like fe_group.
*
* @param array $incomingFields An array of incoming fields, new or changed, not used
* @param string $table The table the record belongs to
* @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
*/
public function processDatamap_preProcessFieldArray($incomingFields, $table, $uid, DataHandler $tceMain)
{
if (!is_int($uid)) {
// a newly created record, skip
return;
}
if (Util::isDraftRecord($table, $uid)) {
// skip workspaces: collect garbage only for LIVE workspace
return;
}
$hasConfiguredEnableColumnForFeGroup = $this->tcaService->isEnableColumn($table, 'fe_group');
if (!$hasConfiguredEnableColumnForFeGroup) {
return;
}
$visibilityAffectingFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
$record = (array)BackendUtility::getRecord($table, $uid, $visibilityAffectingFields, '', false);
// If no record could be found skip further processing
if (empty($record)) {
return;
}
$record = $this->tcaService->normalizeFrontendGroupField($table, $record);
// keep previous state of important fields for later comparison
$this->trackedRecords[$table][$uid] = $record;
}
/**
* Hooks into TCE Main and watches all record updates. If a change is
* detected that would remove the record from the website, we try to find
* related documents and remove them from the index.
*
* @param string $status Status of the current operation, 'new' or 'update'
* @param string $table The table the record belongs to
* @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
* @param array $fields The record's data, not used
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
*/
public function processDatamap_afterDatabaseOperations($status, $table, $uid, array $fields, DataHandler $tceMain)
{
if ($status === 'new') {
// a newly created record, skip
return;
}
if (Util::isDraftRecord($table, $uid)) {
// skip workspaces: collect garbage only for LIVE workspace
return;
}
$record = $this->getRecordWithFieldRelevantForGarbageCollection($table, $uid);
// If no record could be found skip further processing
if (empty($record)) {
return;
}
$record = $this->tcaService->normalizeFrontendGroupField($table, $record);
$isGarbage = $this->getIsGarbageRecord($table, $record);
if (!$isGarbage) {
return;
}
$this->collectGarbage($table, $uid);
if ($table === 'pages') {
$this->deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $fields);
}
}
/**
* Check if a record is getting invisible due to changes in start or endtime. In addition it is checked that the related
* queue item was marked as indexed.
*
* @param string $table
* @param array $record
* @return bool
*/
protected function isInvisibleByStartOrEndtime($table, $record)
{
return (
($this->tcaService->isStartTimeInFuture($table, $record) || $this->tcaService->isEndTimeInPast($table, $record)) &&
$this->isRelatedQueueRecordMarkedAsIndexed($table, $record)
);
}
/**
* Checks if the related index queue item is indexed.
*
* * For tt_content the page from the pid is checked
* * For all other records the table it's self is checked
*
* @param string $table The table name.
* @param array $record An array with record fields that may affect visibility.
* @return bool True if the record is marked as being indexed
*/
protected function isRelatedQueueRecordMarkedAsIndexed($table, $record)
{
if ($table === 'tt_content') {
$table = 'pages';
$uid = $record['pid'];
} else {
$uid = $record['uid'];
}
return $this->getIndexQueue()->containsIndexedItem($table, $uid);
}
/**
* @return Queue
*/
private function getIndexQueue()
{
return GeneralUtility::makeInstance(Queue::class);
}
/**
* Checks whether the a frontend group field exists for the record and if so
* whether groups have been removed from accessing the record thus making
* the record invisible to at least some people.
*
* @param string $table The table name.
* @param array $record An array with record fields that may affect visibility.
* @return bool TRUE if frontend groups have been removed from access to the record, FALSE otherwise.
*/
protected function hasFrontendGroupsRemoved($table, $record)
{
if (!isset($GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'])) {
return false;
}
$frontendGroupsField = $GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'];
$previousGroups = explode(',', (string)$this->trackedRecords[$table][$record['uid']][$frontendGroupsField]);
$currentGroups = explode(',', (string)$record[$frontendGroupsField]);
$removedGroups = array_diff($previousGroups, $currentGroups);
return (boolean)count($removedGroups);
}
/**
* Checks whether the page has been excluded from searching.
*
* @param array $record An array with record fields that may affect visibility.
* @return bool True if the page has been excluded from searching, FALSE otherwise
*/
protected function isPageExcludedFromSearch($record)
{
return (boolean)$record['no_search'];
}
/**
* Checks whether a page has a page type that can be indexed.
* Currently standard pages and mount pages can be indexed.
*
* @param array $record A page record
* @return bool TRUE if the page can be indexed according to its page type, FALSE otherwise
*/
protected function isIndexablePageType(array $record)
{
return $this->frontendEnvironment->isAllowedPageType($record);
}
/**
* Determines if a record is garbage and can be deleted.
*
* @param string $table
* @param array $record
* @return bool
*/
protected function getIsGarbageRecord($table, $record):bool
{
return $this->tcaService->isHidden($table, $record) ||
$this->isInvisibleByStartOrEndtime($table, $record) ||
$this->hasFrontendGroupsRemoved($table, $record) ||
($table === 'pages' && $this->isPageExcludedFromSearch($record)) ||
($table === 'pages' && !$this->isIndexablePageType($record));
}
/**
* Returns a record with all visibility affecting fields.
*
* @param string $table
* @param int $uid
* @return array
*/
protected function getRecordWithFieldRelevantForGarbageCollection($table, $uid):array
{
$garbageCollectionRelevantFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
$record = (array)BackendUtility::getRecord($table, $uid, $garbageCollectionRelevantFields, '', false);
return $record;
}
}