381 lines
14 KiB
PHP
381 lines
14 KiB
PHP
<?php
|
|
namespace WapplerSystems\Meilisearch;
|
|
|
|
/***************************************************************
|
|
* Copyright notice
|
|
*
|
|
* (c) 2010-2015 Ingo Renner <ingo@typo3.org>
|
|
* All rights reserved
|
|
*
|
|
* This script is part of the TYPO3 project. The TYPO3 project is
|
|
* free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* The GNU General Public License can be found at
|
|
* http://www.gnu.org/copyleft/gpl.html.
|
|
*
|
|
* This script is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* This copyright notice MUST APPEAR in all copies of the script!
|
|
***************************************************************/
|
|
|
|
use WapplerSystems\Meilisearch\Domain\Index\Queue\GarbageRemover\StrategyFactory;
|
|
use WapplerSystems\Meilisearch\IndexQueue\Queue;
|
|
use WapplerSystems\Meilisearch\System\TCA\TCAService;
|
|
use TYPO3\CMS\Backend\Utility\BackendUtility;
|
|
use TYPO3\CMS\Core\DataHandling\DataHandler;
|
|
use TYPO3\CMS\Core\SingletonInterface;
|
|
use TYPO3\CMS\Core\Utility\GeneralUtility;
|
|
|
|
/**
|
|
* Garbage Collector, removes related documents from the index when a record is
|
|
* set to hidden, is deleted or is otherwise made invisible to website visitors.
|
|
*
|
|
* Garbage collection will happen for online/LIVE workspaces only.
|
|
*
|
|
* @author Ingo Renner <ingo@typo3.org>
|
|
* @author Timo Schmidt <timo.schmidt@dkd.de>
|
|
*/
|
|
class GarbageCollector extends AbstractDataHandlerListener implements SingletonInterface
|
|
{
|
|
/**
|
|
* @var array
|
|
*/
|
|
protected $trackedRecords = [];
|
|
|
|
/**
|
|
* @var TCAService
|
|
*/
|
|
protected $tcaService;
|
|
|
|
/**
|
|
* GarbageCollector constructor.
|
|
* @param TCAService|null $TCAService
|
|
*/
|
|
public function __construct(TCAService $TCAService = null)
|
|
{
|
|
parent::__construct();
|
|
$this->tcaService = $TCAService ?? GeneralUtility::makeInstance(TCAService::class);
|
|
}
|
|
|
|
/**
|
|
* Hooks into TCE main and tracks record deletion commands.
|
|
*
|
|
* @param string $command The command.
|
|
* @param string $table The table the record belongs to
|
|
* @param int $uid The record's uid
|
|
* @param string $value Not used
|
|
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
|
|
* @return void
|
|
*/
|
|
public function processCmdmap_preProcess($command, $table, $uid, $value, DataHandler $tceMain)
|
|
{
|
|
// workspaces: collect garbage only for LIVE workspace
|
|
if ($command === 'delete' && $GLOBALS['BE_USER']->workspace == 0) {
|
|
$this->collectGarbage($table, $uid);
|
|
|
|
if ($table === 'pages') {
|
|
$this->getIndexQueue()->deleteItem($table, $uid);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Holds the configuration when a recursive page queing should be triggered.
|
|
*
|
|
* Note: The SQL transaction is already committed, so the current state covers only "non"-changed fields.
|
|
*
|
|
* @var array
|
|
* @return array
|
|
*/
|
|
protected function getUpdateSubPagesRecursiveTriggerConfiguration()
|
|
{
|
|
return [
|
|
// the current page has the field "extendToSubpages" enabled and the field "hidden" was set to 1
|
|
// covers following scenarios:
|
|
// 'currentState' => ['hidden' => '0', 'extendToSubpages' => '0|1'], 'changeSet' => ['hidden' => '1', (optional)'extendToSubpages' => '1']
|
|
'extendToSubpageEnabledAndHiddenFlagWasAdded' => [
|
|
'currentState' => ['extendToSubpages' => '1'],
|
|
'changeSet' => ['hidden' => '1']
|
|
],
|
|
// the current page has the field "hidden" enabled and the field "extendToSubpages" was set to 1
|
|
// covers following scenarios:
|
|
// 'currentState' => ['hidden' => '0|1', 'extendToSubpages' => '0'], 'changeSet' => [(optional)'hidden' => '1', 'extendToSubpages' => '1']
|
|
'hiddenIsEnabledAndExtendToSubPagesWasAdded' => [
|
|
'currentState' => ['hidden' => '1'],
|
|
'changeSet' => ['extendToSubpages' => '1']
|
|
]
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Tracks down index documents belonging to a particular record or page and
|
|
* removes them from the index and the Index Queue.
|
|
*
|
|
* @param string $table The record's table name.
|
|
* @param int $uid The record's uid.
|
|
* @throws \UnexpectedValueException if a hook object does not implement interface \WapplerSystems\Meilisearch\GarbageCollectorPostProcessor
|
|
*/
|
|
public function collectGarbage($table, $uid)
|
|
{
|
|
$garbageRemoverStrategy = StrategyFactory::getByTable($table);
|
|
$garbageRemoverStrategy->removeGarbageOf($table, $uid);
|
|
}
|
|
|
|
/**
|
|
* @param string $table
|
|
* @param int $uid
|
|
* @param array $changedFields
|
|
*/
|
|
protected function deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $changedFields)
|
|
{
|
|
if (!$this->isRecursivePageUpdateRequired($uid, $changedFields)) {
|
|
return;
|
|
}
|
|
|
|
// get affected subpages when "extendToSubpages" flag was set
|
|
$pagesToDelete = $this->getSubPageIds($uid);
|
|
// we need to at least remove this page
|
|
foreach ($pagesToDelete as $pageToDelete) {
|
|
$this->collectGarbage($table, $pageToDelete);
|
|
}
|
|
}
|
|
|
|
// methods checking whether to trigger garbage collection
|
|
|
|
/**
|
|
* Hooks into TCE main and tracks page move commands.
|
|
*
|
|
* @param string $command The command.
|
|
* @param string $table The table the record belongs to
|
|
* @param int $uid The record's uid
|
|
* @param string $value Not used
|
|
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
|
|
*/
|
|
public function processCmdmap_postProcess($command, $table, $uid, $value, DataHandler $tceMain) {
|
|
// workspaces: collect garbage only for LIVE workspace
|
|
if ($command === 'move' && $table === 'pages' && $GLOBALS['BE_USER']->workspace == 0) {
|
|
// TODO the below comment is not valid anymore, pid has been removed from doc ID
|
|
// ...still needed?
|
|
|
|
// must be removed from index since the pid changes and
|
|
// is part of the Meilisearch document ID
|
|
$this->collectGarbage($table, $uid);
|
|
|
|
// now re-index with new properties
|
|
$this->getIndexQueue()->updateItem($table, $uid);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Hooks into TCE main and tracks changed records. In this case the current
|
|
* record's values are stored to do a change comparison later on for fields
|
|
* like fe_group.
|
|
*
|
|
* @param array $incomingFields An array of incoming fields, new or changed, not used
|
|
* @param string $table The table the record belongs to
|
|
* @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
|
|
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
|
|
*/
|
|
public function processDatamap_preProcessFieldArray($incomingFields, $table, $uid, DataHandler $tceMain)
|
|
{
|
|
if (!is_int($uid)) {
|
|
// a newly created record, skip
|
|
return;
|
|
}
|
|
|
|
if (Util::isDraftRecord($table, $uid)) {
|
|
// skip workspaces: collect garbage only for LIVE workspace
|
|
return;
|
|
}
|
|
|
|
$hasConfiguredEnableColumnForFeGroup = $this->tcaService->isEnableColumn($table, 'fe_group');
|
|
if (!$hasConfiguredEnableColumnForFeGroup) {
|
|
return;
|
|
}
|
|
|
|
$visibilityAffectingFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
|
|
$record = (array)BackendUtility::getRecord($table, $uid, $visibilityAffectingFields, '', false);
|
|
// If no record could be found skip further processing
|
|
if (empty($record)) {
|
|
return;
|
|
}
|
|
|
|
$record = $this->tcaService->normalizeFrontendGroupField($table, $record);
|
|
|
|
// keep previous state of important fields for later comparison
|
|
$this->trackedRecords[$table][$uid] = $record;
|
|
}
|
|
|
|
/**
|
|
* Hooks into TCE Main and watches all record updates. If a change is
|
|
* detected that would remove the record from the website, we try to find
|
|
* related documents and remove them from the index.
|
|
*
|
|
* @param string $status Status of the current operation, 'new' or 'update'
|
|
* @param string $table The table the record belongs to
|
|
* @param mixed $uid The record's uid, [integer] or [string] (like 'NEW...')
|
|
* @param array $fields The record's data, not used
|
|
* @param DataHandler $tceMain TYPO3 Core Engine parent object, not used
|
|
*/
|
|
public function processDatamap_afterDatabaseOperations($status, $table, $uid, array $fields, DataHandler $tceMain)
|
|
{
|
|
if ($status === 'new') {
|
|
// a newly created record, skip
|
|
return;
|
|
}
|
|
|
|
if (Util::isDraftRecord($table, $uid)) {
|
|
// skip workspaces: collect garbage only for LIVE workspace
|
|
return;
|
|
}
|
|
|
|
$record = $this->getRecordWithFieldRelevantForGarbageCollection($table, $uid);
|
|
|
|
// If no record could be found skip further processing
|
|
if (empty($record)) {
|
|
return;
|
|
}
|
|
|
|
$record = $this->tcaService->normalizeFrontendGroupField($table, $record);
|
|
$isGarbage = $this->getIsGarbageRecord($table, $record);
|
|
if (!$isGarbage) {
|
|
return;
|
|
}
|
|
|
|
$this->collectGarbage($table, $uid);
|
|
|
|
if ($table === 'pages') {
|
|
$this->deleteSubpagesWhenExtendToSubpagesIsSet($table, $uid, $fields);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check if a record is getting invisible due to changes in start or endtime. In addition it is checked that the related
|
|
* queue item was marked as indexed.
|
|
*
|
|
* @param string $table
|
|
* @param array $record
|
|
* @return bool
|
|
*/
|
|
protected function isInvisibleByStartOrEndtime($table, $record)
|
|
{
|
|
return (
|
|
($this->tcaService->isStartTimeInFuture($table, $record) || $this->tcaService->isEndTimeInPast($table, $record)) &&
|
|
$this->isRelatedQueueRecordMarkedAsIndexed($table, $record)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Checks if the related index queue item is indexed.
|
|
*
|
|
* * For tt_content the page from the pid is checked
|
|
* * For all other records the table it's self is checked
|
|
*
|
|
* @param string $table The table name.
|
|
* @param array $record An array with record fields that may affect visibility.
|
|
* @return bool True if the record is marked as being indexed
|
|
*/
|
|
protected function isRelatedQueueRecordMarkedAsIndexed($table, $record)
|
|
{
|
|
if ($table === 'tt_content') {
|
|
$table = 'pages';
|
|
$uid = $record['pid'];
|
|
} else {
|
|
$uid = $record['uid'];
|
|
}
|
|
|
|
return $this->getIndexQueue()->containsIndexedItem($table, $uid);
|
|
}
|
|
|
|
/**
|
|
* @return Queue
|
|
*/
|
|
private function getIndexQueue()
|
|
{
|
|
return GeneralUtility::makeInstance(Queue::class);
|
|
}
|
|
|
|
/**
|
|
* Checks whether the a frontend group field exists for the record and if so
|
|
* whether groups have been removed from accessing the record thus making
|
|
* the record invisible to at least some people.
|
|
*
|
|
* @param string $table The table name.
|
|
* @param array $record An array with record fields that may affect visibility.
|
|
* @return bool TRUE if frontend groups have been removed from access to the record, FALSE otherwise.
|
|
*/
|
|
protected function hasFrontendGroupsRemoved($table, $record)
|
|
{
|
|
if (!isset($GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'])) {
|
|
return false;
|
|
}
|
|
|
|
$frontendGroupsField = $GLOBALS['TCA'][$table]['ctrl']['enablecolumns']['fe_group'];
|
|
|
|
$previousGroups = explode(',', (string)$this->trackedRecords[$table][$record['uid']][$frontendGroupsField]);
|
|
$currentGroups = explode(',', (string)$record[$frontendGroupsField]);
|
|
$removedGroups = array_diff($previousGroups, $currentGroups);
|
|
|
|
return (boolean)count($removedGroups);
|
|
}
|
|
|
|
/**
|
|
* Checks whether the page has been excluded from searching.
|
|
*
|
|
* @param array $record An array with record fields that may affect visibility.
|
|
* @return bool True if the page has been excluded from searching, FALSE otherwise
|
|
*/
|
|
protected function isPageExcludedFromSearch($record)
|
|
{
|
|
return (boolean)$record['no_search'];
|
|
}
|
|
|
|
/**
|
|
* Checks whether a page has a page type that can be indexed.
|
|
* Currently standard pages and mount pages can be indexed.
|
|
*
|
|
* @param array $record A page record
|
|
* @return bool TRUE if the page can be indexed according to its page type, FALSE otherwise
|
|
*/
|
|
protected function isIndexablePageType(array $record)
|
|
{
|
|
return $this->frontendEnvironment->isAllowedPageType($record);
|
|
}
|
|
|
|
/**
|
|
* Determines if a record is garbage and can be deleted.
|
|
*
|
|
* @param string $table
|
|
* @param array $record
|
|
* @return bool
|
|
*/
|
|
protected function getIsGarbageRecord($table, $record):bool
|
|
{
|
|
return $this->tcaService->isHidden($table, $record) ||
|
|
$this->isInvisibleByStartOrEndtime($table, $record) ||
|
|
$this->hasFrontendGroupsRemoved($table, $record) ||
|
|
($table === 'pages' && $this->isPageExcludedFromSearch($record)) ||
|
|
($table === 'pages' && !$this->isIndexablePageType($record));
|
|
}
|
|
|
|
/**
|
|
* Returns a record with all visibility affecting fields.
|
|
*
|
|
* @param string $table
|
|
* @param int $uid
|
|
* @return array
|
|
*/
|
|
protected function getRecordWithFieldRelevantForGarbageCollection($table, $uid):array
|
|
{
|
|
$garbageCollectionRelevantFields = $this->tcaService->getVisibilityAffectingFieldsByTable($table);
|
|
$record = (array)BackendUtility::getRecord($table, $uid, $garbageCollectionRelevantFields, '', false);
|
|
return $record;
|
|
}
|
|
}
|