* All rights reserved * * This script is part of the TYPO3 project. The TYPO3 project is * free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * The GNU General Public License can be found at * http://www.gnu.org/copyleft/gpl.html. * * This script is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * This copyright notice MUST APPEAR in all copies of the script! ***************************************************************/ use WapplerSystems\Meilisearch\Domain\Search\Query\Query; use WapplerSystems\Meilisearch\Domain\Search\ResultSet\SearchResultSet; use WapplerSystems\Meilisearch\Domain\Search\ResultSet\SearchResultSetProcessor; use WapplerSystems\Meilisearch\HtmlContentExtractor; use WapplerSystems\Meilisearch\Domain\Site\SiteRepository; use WapplerSystems\Meilisearch\Util; use TYPO3\CMS\Core\Utility\GeneralUtility; use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController; /** * Writes statistics after searches have been conducted. * * @author Ingo Renner * @author Dimitri Ebert * @author Timo Hund */ class StatisticsWriterProcessor implements SearchResultSetProcessor { /** * @var StatisticsRepository */ protected $statisticsRepository; /** * @var SiteRepository */ protected $siteRepository; /** * @param StatisticsRepository $statisticsRepository * @param SiteRepository $siteRepository */ public function __construct(StatisticsRepository $statisticsRepository = null, SiteRepository $siteRepository = null) { $this->statisticsRepository = $statisticsRepository ?? GeneralUtility::makeInstance(StatisticsRepository::class); $this->siteRepository = $siteRepository ?? GeneralUtility::makeInstance(SiteRepository::class); } /** * @param SearchResultSet $resultSet * @return SearchResultSet */ public function process(SearchResultSet $resultSet) { $searchRequest = $resultSet->getUsedSearchRequest(); $response = $resultSet->getResponse(); $configuration = $searchRequest->getContextTypoScriptConfiguration(); $keywords = $this->getProcessedKeywords($resultSet->getUsedQuery(), $configuration->getSearchFrequentSearchesUseLowercaseKeywords()); if (empty($keywords)) { // do not track empty queries return $resultSet; } $filters = $searchRequest->getActiveFacets(); $sorting = $this->sanitizeString($searchRequest->getSorting()); $page = (int)$searchRequest->getPage(); $ipMaskLength = (int)$configuration->getStatisticsAnonymizeIP(); $TSFE = $this->getTSFE(); $root_pid = $this->siteRepository->getSiteByPageId($TSFE->id)->getRootPageId(); $statisticData = [ 'pid' => $TSFE->id, 'root_pid' => $root_pid, 'tstamp' => $this->getTime(), 'language' => Util::getLanguageUid(), // @extensionScannerIgnoreLine 'num_found' => (int)$resultSet->getAllResultCount(), 'suggestions_shown' => is_object($response->spellcheck->suggestions) ? (int)get_object_vars($response->spellcheck->suggestions) : 0, // @extensionScannerIgnoreLine 'time_total' => isset($response->debug->timing->time) ? $response->debug->timing->time : 0, // @extensionScannerIgnoreLine 'time_preparation' => isset($response->debug->timing->prepare->time) ? $response->debug->timing->prepare->time : 0, // @extensionScannerIgnoreLine 'time_processing' => isset($response->debug->timing->process->time) ? $response->debug->timing->process->time : 0, 'feuser_id' => (int)$TSFE->fe_user->user['uid'], 'cookie' => $TSFE->fe_user->id ?? '', 'ip' => $this->applyIpMask((string)$this->getUserIp(), $ipMaskLength), 'page' => (int)$page, 'keywords' => $keywords, 'filters' => serialize($filters), 'sorting' => $sorting, 'parameters' => serialize($response->responseHeader->params) ]; $this->statisticsRepository->saveStatisticsRecord($statisticData); return $resultSet; } /** * @param Query $query * @param boolean $lowerCaseQuery * @return string */ protected function getProcessedKeywords(Query $query, $lowerCaseQuery = false) { $keywords = $query->getQuery(); $keywords = $this->sanitizeString($keywords); if ($lowerCaseQuery) { $keywords = mb_strtolower($keywords); } return $keywords; } /** * Sanitizes a string * * @param $string String to sanitize * @return string Sanitized string */ protected function sanitizeString($string) { // clean content $string = HtmlContentExtractor::cleanContent($string); $string = html_entity_decode($string, ENT_QUOTES, 'UTF-8'); $string = filter_var(strip_tags($string), FILTER_SANITIZE_STRING); // after entity decoding we might have tags again $string = trim($string); return $string; } /** * Internal function to mask portions of the visitor IP address * * @param string $ip IP address in network address format * @param int $maskLength Number of octets to reset * @return string */ protected function applyIpMask(string $ip, int $maskLength): string { if (empty($ip) || $maskLength === 0) { return $ip; } // IPv4 or mapped IPv4 in IPv6 if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { return $this->applyIpV4Mask($ip, $maskLength); } return $this->applyIpV6Mask($ip, $maskLength); } /** * Apply a mask filter on the ip v4 address. * * @param string $ip * @param int $maskLength * @return string */ protected function applyIpV4Mask($ip, $maskLength) { $i = strlen($ip); if ($maskLength > $i) { $maskLength = $i; } while ($maskLength-- > 0) { $ip[--$i] = chr(0); } return (string)$ip; } /** * Apply a mask filter on the ip v6 address. * * @param string $ip * @param int $maskLength * @return string */ protected function applyIpV6Mask($ip, $maskLength):string { $masks = ['ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', 'ffff:ffff:ffff:ffff::', 'ffff:ffff:ffff:0000::', 'ffff:ff00:0000:0000::']; $packedAddress = inet_pton($masks[$maskLength]); $binaryString = pack('a16', $packedAddress); return (string)($ip & $binaryString); } /** * @return TypoScriptFrontendController */ protected function getTSFE() { return $GLOBALS['TSFE']; } /** * @return string */ protected function getUserIp() { return GeneralUtility::getIndpEnv('REMOTE_ADDR'); } /** * @return mixed */ protected function getTime() { return $GLOBALS['EXEC_TIME']; } }