first commit
This commit is contained in:
225
Classes/Domain/Search/Statistics/StatisticsRepository.php
Normal file
225
Classes/Domain/Search/Statistics/StatisticsRepository.php
Normal file
@@ -0,0 +1,225 @@
|
||||
<?php declare(strict_types = 1);
|
||||
namespace WapplerSystems\Meilisearch\Domain\Search\Statistics;
|
||||
|
||||
/***************************************************************
|
||||
* Copyright notice
|
||||
*
|
||||
* (c) 2016 Thomas Hohn <tho@systime.dk>
|
||||
* All rights reserved
|
||||
*
|
||||
* This script is part of the TYPO3 project. The TYPO3 project is
|
||||
* free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* The GNU General Public License can be found at
|
||||
* http://www.gnu.org/copyleft/gpl.html.
|
||||
*
|
||||
* This script is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* This copyright notice MUST APPEAR in all copies of the script!
|
||||
***************************************************************/
|
||||
|
||||
use WapplerSystems\Meilisearch\System\Records\AbstractRepository;
|
||||
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
|
||||
|
||||
/**
|
||||
* Calculates the SearchQueryStatistics
|
||||
*
|
||||
* @author Thomas Hohn <tho@systime.dk>
|
||||
*/
|
||||
class StatisticsRepository extends AbstractRepository
|
||||
{
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected $table = 'tx_meilisearch_statistics';
|
||||
|
||||
/**
|
||||
* Fetches must popular search keys words from the table tx_meilisearch_statistics
|
||||
*
|
||||
* @param int $rootPageId
|
||||
* @param int $days number of days of history to query
|
||||
* @param int $limit
|
||||
* @return mixed
|
||||
*/
|
||||
public function getSearchStatistics(int $rootPageId, int $days = 30, $limit = 10)
|
||||
{
|
||||
$now = time();
|
||||
$timeStart = (int)($now - 86400 * $days); // 86400 seconds/day
|
||||
$limit = (int)$limit;
|
||||
|
||||
return $this->getPreparedQueryBuilderForSearchStatisticsAndTopKeywords($rootPageId, $timeStart, $limit)
|
||||
->execute()->fetchAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns prepared QueryBuilder for two purposes:
|
||||
* for getSearchStatistics() and getTopKeyWordsWithOrWithoutHits() methods
|
||||
*
|
||||
* @param int $rootPageId
|
||||
* @param int $timeStart
|
||||
* @param int $limit
|
||||
* @return QueryBuilder
|
||||
*/
|
||||
protected function getPreparedQueryBuilderForSearchStatisticsAndTopKeywords(int $rootPageId, int $timeStart, int $limit) : QueryBuilder
|
||||
{
|
||||
$countRows = $this->countByRootPageId($rootPageId);
|
||||
$queryBuilder = $this->getQueryBuilder();
|
||||
$statisticsQueryBuilder = $queryBuilder
|
||||
->select('keywords')
|
||||
->add('select', $queryBuilder->expr()->count('keywords', 'count'), true)
|
||||
->add('select', $queryBuilder->expr()->avg('num_found', 'hits'), true)
|
||||
->add('select', '(' . $queryBuilder->expr()->count('keywords') . ' * 100 / ' . $countRows . ') AS percent', true)
|
||||
->from($this->table)
|
||||
->andWhere(
|
||||
$queryBuilder->expr()->gt('tstamp', $timeStart),
|
||||
$queryBuilder->expr()->eq('root_pid', $rootPageId)
|
||||
)
|
||||
->groupBy('keywords')
|
||||
->orderBy('count', 'DESC')
|
||||
->addOrderBy('hits', 'DESC')
|
||||
->addOrderBy('keywords', 'ASC')
|
||||
->setMaxResults($limit);
|
||||
|
||||
return $statisticsQueryBuilder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find Top search keywords with results
|
||||
*
|
||||
* @param int $rootPageId
|
||||
* @param int $days number of days of history to query
|
||||
* @param int $limit
|
||||
* @return array
|
||||
*/
|
||||
public function getTopKeyWordsWithHits(int $rootPageId, int $days = 30, int $limit = 10) : array
|
||||
{
|
||||
return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find Top search keywords without results
|
||||
*
|
||||
* @param int $rootPageId
|
||||
* @param int $days number of days of history to query
|
||||
* @param int $limit
|
||||
* @return array
|
||||
*/
|
||||
public function getTopKeyWordsWithoutHits(int $rootPageId, int $days = 30, int $limit = 10) : array
|
||||
{
|
||||
return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find Top search keywords with or without results
|
||||
*
|
||||
* @param int $rootPageId
|
||||
* @param int $days number of days of history to query
|
||||
* @param int $limit
|
||||
* @param bool $withoutHits
|
||||
* @return array
|
||||
*/
|
||||
protected function getTopKeyWordsWithOrWithoutHits(int $rootPageId, int $days = 30, int $limit = 10, bool $withoutHits = false) : array
|
||||
{
|
||||
$now = time();
|
||||
$timeStart = $now - 86400 * $days; // 86400 seconds/day
|
||||
|
||||
$queryBuilder = $this->getPreparedQueryBuilderForSearchStatisticsAndTopKeywords($rootPageId, $timeStart, $limit);
|
||||
// Check if we want without or with hits
|
||||
if ($withoutHits === true) {
|
||||
$queryBuilder->andWhere($queryBuilder->expr()->eq('num_found', 0));
|
||||
} else {
|
||||
$queryBuilder->andWhere($queryBuilder->expr()->gt('num_found', 0));
|
||||
}
|
||||
|
||||
return $queryBuilder->execute()->fetchAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get number of queries over time
|
||||
*
|
||||
* @param int $rootPageId
|
||||
* @param int $days number of days of history to query
|
||||
* @param int $bucketSeconds Seconds per bucket
|
||||
* @return array [labels, data]
|
||||
*/
|
||||
public function getQueriesOverTime(int $rootPageId, int $days = 30, int $bucketSeconds = 3600) : array
|
||||
{
|
||||
$now = time();
|
||||
$timeStart = $now - 86400 * intval($days); // 86400 seconds/day
|
||||
|
||||
$queryBuilder = $this->getQueryBuilder();
|
||||
$result = $queryBuilder
|
||||
->addSelectLiteral(
|
||||
'FLOOR(tstamp/' . $bucketSeconds . ') AS bucket',
|
||||
'(tstamp - (tstamp % 86400)) AS timestamp',
|
||||
$queryBuilder->expr()->count('*', 'numQueries')
|
||||
)
|
||||
->from($this->table)
|
||||
->andWhere(
|
||||
$queryBuilder->expr()->gt('tstamp', $timeStart),
|
||||
$queryBuilder->expr()->eq('root_pid', $rootPageId)
|
||||
)
|
||||
->groupBy('bucket', 'timestamp')
|
||||
->orderBy('bucket', 'ASC')
|
||||
->execute()->fetchAll();
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Regurns a result set by given plugin.tx_meilisearch.search.frequentSearches.select configuration.
|
||||
*
|
||||
* @param array $frequentSearchConfiguration
|
||||
* @return array Array of frequent search terms, keys are the terms, values are hits
|
||||
*/
|
||||
public function getFrequentSearchTermsFromStatisticsByFrequentSearchConfiguration(array $frequentSearchConfiguration) : array
|
||||
{
|
||||
$queryBuilder = $this->getQueryBuilder();
|
||||
$resultSet = $queryBuilder
|
||||
->addSelectLiteral(
|
||||
$frequentSearchConfiguration['select.']['SELECT']
|
||||
)
|
||||
->from($frequentSearchConfiguration['select.']['FROM'])
|
||||
->add('where', $frequentSearchConfiguration['select.']['ADD_WHERE'], true)
|
||||
->add('groupBy', $frequentSearchConfiguration['select.']['GROUP_BY'], true)
|
||||
->add('orderBy', $frequentSearchConfiguration['select.']['ORDER_BY'])
|
||||
->setMaxResults((int)$frequentSearchConfiguration['limit'])
|
||||
->execute()->fetchAll();
|
||||
|
||||
return $resultSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Persists statistics record
|
||||
*
|
||||
* @param array $statisticsRecord
|
||||
* @return void
|
||||
*/
|
||||
public function saveStatisticsRecord(array $statisticsRecord)
|
||||
{
|
||||
$queryBuilder = $this->getQueryBuilder();
|
||||
$queryBuilder->insert($this->table)->values($statisticsRecord)->execute();
|
||||
}
|
||||
|
||||
/**
|
||||
* Counts rows for specified site
|
||||
*
|
||||
* @param int $rootPageId
|
||||
* @return int
|
||||
*/
|
||||
public function countByRootPageId(int $rootPageId): int
|
||||
{
|
||||
$queryBuilder = $this->getQueryBuilder();
|
||||
return (int)$this->getQueryBuilder()
|
||||
->count('*')
|
||||
->from($this->table)
|
||||
->andWhere($queryBuilder->expr()->eq('root_pid', $rootPageId))
|
||||
->execute()->fetchColumn(0);
|
||||
}
|
||||
}
|
228
Classes/Domain/Search/Statistics/StatisticsWriterProcessor.php
Normal file
228
Classes/Domain/Search/Statistics/StatisticsWriterProcessor.php
Normal file
@@ -0,0 +1,228 @@
|
||||
<?php
|
||||
namespace WapplerSystems\Meilisearch\Domain\Search\Statistics;
|
||||
|
||||
/***************************************************************
|
||||
* Copyright notice
|
||||
*
|
||||
* (c) 2009-2015 Ingo Renner <ingo@typo3.org>
|
||||
* All rights reserved
|
||||
*
|
||||
* This script is part of the TYPO3 project. The TYPO3 project is
|
||||
* free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* The GNU General Public License can be found at
|
||||
* http://www.gnu.org/copyleft/gpl.html.
|
||||
*
|
||||
* This script is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* This copyright notice MUST APPEAR in all copies of the script!
|
||||
***************************************************************/
|
||||
|
||||
use WapplerSystems\Meilisearch\Domain\Search\Query\Query;
|
||||
use WapplerSystems\Meilisearch\Domain\Search\ResultSet\SearchResultSet;
|
||||
use WapplerSystems\Meilisearch\Domain\Search\ResultSet\SearchResultSetProcessor;
|
||||
use WapplerSystems\Meilisearch\HtmlContentExtractor;
|
||||
use WapplerSystems\Meilisearch\Domain\Site\SiteRepository;
|
||||
use WapplerSystems\Meilisearch\Util;
|
||||
use TYPO3\CMS\Core\Utility\GeneralUtility;
|
||||
use TYPO3\CMS\Frontend\Controller\TypoScriptFrontendController;
|
||||
|
||||
/**
|
||||
* Writes statistics after searches have been conducted.
|
||||
*
|
||||
* @author Ingo Renner <ingo@typo3.org>
|
||||
* @author Dimitri Ebert <dimitri.ebert@dkd.de>
|
||||
* @author Timo Hund <timo.hund@dkd.de>
|
||||
*/
|
||||
class StatisticsWriterProcessor implements SearchResultSetProcessor
|
||||
{
|
||||
/**
|
||||
* @var StatisticsRepository
|
||||
*/
|
||||
protected $statisticsRepository;
|
||||
|
||||
/**
|
||||
* @var SiteRepository
|
||||
*/
|
||||
protected $siteRepository;
|
||||
|
||||
/**
|
||||
* @param StatisticsRepository $statisticsRepository
|
||||
* @param SiteRepository $siteRepository
|
||||
*/
|
||||
public function __construct(StatisticsRepository $statisticsRepository = null, SiteRepository $siteRepository = null)
|
||||
{
|
||||
$this->statisticsRepository = $statisticsRepository ?? GeneralUtility::makeInstance(StatisticsRepository::class);
|
||||
$this->siteRepository = $siteRepository ?? GeneralUtility::makeInstance(SiteRepository::class);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param SearchResultSet $resultSet
|
||||
* @return SearchResultSet
|
||||
*/
|
||||
public function process(SearchResultSet $resultSet) {
|
||||
$searchRequest = $resultSet->getUsedSearchRequest();
|
||||
$response = $resultSet->getResponse();
|
||||
$configuration = $searchRequest->getContextTypoScriptConfiguration();
|
||||
$keywords = $this->getProcessedKeywords($resultSet->getUsedQuery(), $configuration->getSearchFrequentSearchesUseLowercaseKeywords());
|
||||
|
||||
if (empty($keywords)) {
|
||||
// do not track empty queries
|
||||
return $resultSet;
|
||||
}
|
||||
|
||||
$filters = $searchRequest->getActiveFacets();
|
||||
$sorting = $this->sanitizeString($searchRequest->getSorting());
|
||||
$page = (int)$searchRequest->getPage();
|
||||
$ipMaskLength = (int)$configuration->getStatisticsAnonymizeIP();
|
||||
|
||||
$TSFE = $this->getTSFE();
|
||||
$root_pid = $this->siteRepository->getSiteByPageId($TSFE->id)->getRootPageId();
|
||||
$statisticData = [
|
||||
'pid' => $TSFE->id,
|
||||
'root_pid' => $root_pid,
|
||||
'tstamp' => $this->getTime(),
|
||||
'language' => Util::getLanguageUid(),
|
||||
// @extensionScannerIgnoreLine
|
||||
'num_found' => (int)$resultSet->getAllResultCount(),
|
||||
'suggestions_shown' => is_object($response->spellcheck->suggestions) ? (int)get_object_vars($response->spellcheck->suggestions) : 0,
|
||||
// @extensionScannerIgnoreLine
|
||||
'time_total' => isset($response->debug->timing->time) ? $response->debug->timing->time : 0,
|
||||
// @extensionScannerIgnoreLine
|
||||
'time_preparation' => isset($response->debug->timing->prepare->time) ? $response->debug->timing->prepare->time : 0,
|
||||
// @extensionScannerIgnoreLine
|
||||
'time_processing' => isset($response->debug->timing->process->time) ? $response->debug->timing->process->time : 0,
|
||||
'feuser_id' => (int)$TSFE->fe_user->user['uid'],
|
||||
'cookie' => $TSFE->fe_user->id ?? '',
|
||||
'ip' => $this->applyIpMask((string)$this->getUserIp(), $ipMaskLength),
|
||||
'page' => (int)$page,
|
||||
'keywords' => $keywords,
|
||||
'filters' => serialize($filters),
|
||||
'sorting' => $sorting,
|
||||
'parameters' => serialize($response->responseHeader->params)
|
||||
];
|
||||
|
||||
$this->statisticsRepository->saveStatisticsRecord($statisticData);
|
||||
|
||||
return $resultSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Query $query
|
||||
* @param boolean $lowerCaseQuery
|
||||
* @return string
|
||||
*/
|
||||
protected function getProcessedKeywords(Query $query, $lowerCaseQuery = false)
|
||||
{
|
||||
$keywords = $query->getQuery();
|
||||
$keywords = $this->sanitizeString($keywords);
|
||||
if ($lowerCaseQuery) {
|
||||
$keywords = mb_strtolower($keywords);
|
||||
}
|
||||
|
||||
return $keywords;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitizes a string
|
||||
*
|
||||
* @param $string String to sanitize
|
||||
* @return string Sanitized string
|
||||
*/
|
||||
protected function sanitizeString($string)
|
||||
{
|
||||
// clean content
|
||||
$string = HtmlContentExtractor::cleanContent($string);
|
||||
$string = html_entity_decode($string, ENT_QUOTES, 'UTF-8');
|
||||
$string = filter_var(strip_tags($string), FILTER_SANITIZE_STRING); // after entity decoding we might have tags again
|
||||
$string = trim($string);
|
||||
|
||||
return $string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal function to mask portions of the visitor IP address
|
||||
*
|
||||
* @param string $ip IP address in network address format
|
||||
* @param int $maskLength Number of octets to reset
|
||||
* @return string
|
||||
*/
|
||||
protected function applyIpMask(string $ip, int $maskLength): string
|
||||
{
|
||||
if (empty($ip) || $maskLength === 0) {
|
||||
return $ip;
|
||||
}
|
||||
|
||||
// IPv4 or mapped IPv4 in IPv6
|
||||
if (filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
|
||||
return $this->applyIpV4Mask($ip, $maskLength);
|
||||
}
|
||||
|
||||
return $this->applyIpV6Mask($ip, $maskLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a mask filter on the ip v4 address.
|
||||
*
|
||||
* @param string $ip
|
||||
* @param int $maskLength
|
||||
* @return string
|
||||
*/
|
||||
protected function applyIpV4Mask($ip, $maskLength)
|
||||
{
|
||||
$i = strlen($ip);
|
||||
if ($maskLength > $i) {
|
||||
$maskLength = $i;
|
||||
}
|
||||
|
||||
while ($maskLength-- > 0) {
|
||||
$ip[--$i] = chr(0);
|
||||
}
|
||||
return (string)$ip;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a mask filter on the ip v6 address.
|
||||
*
|
||||
* @param string $ip
|
||||
* @param int $maskLength
|
||||
* @return string
|
||||
*/
|
||||
protected function applyIpV6Mask($ip, $maskLength):string
|
||||
{
|
||||
$masks = ['ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff', 'ffff:ffff:ffff:ffff::', 'ffff:ffff:ffff:0000::', 'ffff:ff00:0000:0000::'];
|
||||
$packedAddress = inet_pton($masks[$maskLength]);
|
||||
$binaryString = pack('a16', $packedAddress);
|
||||
return (string)($ip & $binaryString);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return TypoScriptFrontendController
|
||||
*/
|
||||
protected function getTSFE()
|
||||
{
|
||||
return $GLOBALS['TSFE'];
|
||||
}
|
||||
|
||||
/**
|
||||
* @return string
|
||||
*/
|
||||
protected function getUserIp()
|
||||
{
|
||||
return GeneralUtility::getIndpEnv('REMOTE_ADDR');
|
||||
}
|
||||
|
||||
/**
|
||||
* @return mixed
|
||||
*/
|
||||
protected function getTime()
|
||||
{
|
||||
return $GLOBALS['EXEC_TIME'];
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user