first commit

This commit is contained in:
Sven Wappler
2021-04-17 21:20:54 +02:00
parent c93ec9492a
commit cadcc8edb4
406 changed files with 4917 additions and 5157 deletions

View File

@@ -34,7 +34,7 @@ class HtmlContentExtractor
{
/**
* Unicode ranges which should get stripped before sending a document to solr.
* Unicode ranges which should get stripped before sending a document to meilisearch.
* This is necessary if a document (PDF, etc.) contains unicode characters which
* are valid in the font being used in the document but are not available in the
* font being used for displaying results.
@@ -63,7 +63,7 @@ class HtmlContentExtractor
*/
protected $content;
/**
* Mapping of HTML tags to Solr document fields.
* Mapping of HTML tags to Meilisearch document fields.
*
* @var array
*/
@@ -104,7 +104,7 @@ class HtmlContentExtractor
protected function getConfiguration()
{
if ($this->configuration == null) {
$this->configuration = Util::getSolrConfiguration();
$this->configuration = Util::getMeilisearchConfiguration();
}
return $this->configuration;
@@ -121,7 +121,7 @@ class HtmlContentExtractor
/**
* Returns the cleaned indexable content from the page's HTML markup.
*
* The content is cleaned from HTML tags and control chars Solr could
* The content is cleaned from HTML tags and control chars Meilisearch could
* stumble on.
*
* @return string Indexable, cleaned content ready for indexing.
@@ -165,7 +165,7 @@ class HtmlContentExtractor
}
/**
* Strips control characters that cause Jetty/Solr to fail.
* Strips control characters that cause Jetty/Meilisearch to fail.
*
* @param string $content the content to sanitize
* @return string the sanitized content
@@ -220,7 +220,7 @@ class HtmlContentExtractor
/**
* Extracts HTML tag content from tags in the content marked for indexing.
*
* @return array A mapping of Solr document field names to content found in defined tags.
* @return array A mapping of Meilisearch document field names to content found in defined tags.
*/
public function getTagContent()
{