diff options
Diffstat (limited to 'MLEB/Translate/ttmserver/ElasticSearchTTMServer.php')
-rw-r--r-- | MLEB/Translate/ttmserver/ElasticSearchTTMServer.php | 109 |
1 files changed, 60 insertions, 49 deletions
diff --git a/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php b/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php index ff4c7b79..87d5e582 100644 --- a/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php +++ b/MLEB/Translate/ttmserver/ElasticSearchTTMServer.php @@ -8,6 +8,17 @@ * @ingroup TTMServer */ +use Elastica\Aggregation\Terms; +use Elastica\Client; +use Elastica\Document; +use Elastica\Exception\ExceptionInterface; +use Elastica\Query; +use Elastica\Query\BoolQuery; +use Elastica\Query\FunctionScore; +use Elastica\Query\MatchQuery; +use Elastica\Query\Term; +use Elastica\Type\Mapping; +use MediaWiki\Extension\Translate\TranslatorInterface\TranslationHelperException; use MediaWiki\Logger\LoggerFactory; /** @@ -45,7 +56,7 @@ class ElasticSearchTTMServer */ protected const FROZEN_TYPE = 'frozen'; - /** @var \Elastica\Client */ + /** @var Client */ protected $client; /** * Reference to the maintenance script to relay logging output. @@ -76,7 +87,7 @@ class ElasticSearchTTMServer if ( !$this->useWikimediaExtraPlugin() ) { // ElasticTTM is currently not compatible with elasticsearch 2.x/5.x // It needs FuzzyLikeThis ported via the wmf extra plugin - throw new \RuntimeException( 'The wikimedia extra plugin is mandatory.' ); + throw new RuntimeException( 'The wikimedia extra plugin is mandatory.' ); } /* Two query system: * 1) Find all strings in source language that match text @@ -90,7 +101,7 @@ class ElasticSearchTTMServer $fuzzyQuery->setLikeText( $text ); $fuzzyQuery->addFields( [ 'content' ] ); - $boostQuery = new \Elastica\Query\FunctionScore(); + $boostQuery = new FunctionScore(); $boostQuery->addFunction( 'levenshtein_distance_score', [ @@ -98,20 +109,20 @@ class ElasticSearchTTMServer 'field' => 'content' ] ); - $boostQuery->setBoostMode( \Elastica\Query\FunctionScore::BOOST_MODE_REPLACE ); + $boostQuery->setBoostMode( FunctionScore::BOOST_MODE_REPLACE ); // Wrap the fuzzy query so it can be used as a filter. // This is slightly faster, as ES can throw away the scores by this query. - $bool = new \Elastica\Query\BoolQuery(); + $bool = new BoolQuery(); $bool->addFilter( $fuzzyQuery ); $bool->addMust( $boostQuery ); - $languageFilter = new \Elastica\Query\Term(); + $languageFilter = new Term(); $languageFilter->setTerm( 'language', $sourceLanguage ); $bool->addFilter( $languageFilter ); // The whole query - $query = new \Elastica\Query(); + $query = new Query(); $query->setQuery( $bool ); // The interface usually displays three best candidates. These might @@ -184,10 +195,10 @@ class ElasticSearchTTMServer // Skip second query if first query found nothing. Keeping only one return // statement in this method to avoid forgetting to reset connection timeout if ( $terms !== [] ) { - $idQuery = new \Elastica\Query\Terms(); + $idQuery = new Query\Terms(); $idQuery->setTerms( '_id', $terms ); - $query = new \Elastica\Query( $idQuery ); + $query = new Query( $idQuery ); $query->setSize( 25 ); $query->setParam( '_source', [ 'wiki', 'uri', 'content', 'localid' ] ); $resultset = $this->getType()->search( $query ); @@ -210,7 +221,7 @@ class ElasticSearchTTMServer } // Ensure results are in quality order - uasort( $suggestions, function ( $a, $b ) { + uasort( $suggestions, static function ( $a, $b ) { if ( $a['quality'] === $b['quality'] ) { return 0; } @@ -231,8 +242,8 @@ class ElasticSearchTTMServer * * @param MessageHandle $handle * @param ?string $targetText - * @throws \RuntimeException * @return bool + * @throws RuntimeException */ public function update( MessageHandle $handle, $targetText ) { if ( !$handle->isValid() || $handle->getCode() === '' ) { @@ -254,11 +265,11 @@ class ElasticSearchTTMServer // Do not delete definitions, because the translations are attached to that if ( $handle->getCode() !== $sourceLanguage ) { $localid = $handle->getTitleForBase()->getPrefixedText(); - $this->deleteByQuery( $this->getType(), Elastica\Query::create( - ( new \Elastica\Query\BoolQuery() ) - ->addFilter( new Elastica\Query\Term( [ 'wiki' => wfWikiID() ] ) ) - ->addFilter( new Elastica\Query\Term( [ 'language' => $handle->getCode() ] ) ) - ->addFilter( new Elastica\Query\Term( [ 'localid' => $localid ] ) ) ) ); + $this->deleteByQuery( $this->getType(), Query::create( + ( new BoolQuery() ) + ->addFilter( new Term( [ 'wiki' => wfWikiID() ] ) ) + ->addFilter( new Term( [ 'language' => $handle->getCode() ] ) ) + ->addFilter( new Term( [ 'localid' => $localid ] ) ) ) ); } // If translation was made fuzzy, we do not need to add anything @@ -274,7 +285,7 @@ class ElasticSearchTTMServer function () use ( $doc ) { $this->getType()->addDocument( $doc ); }, - function ( $e, $errors ) use ( $fname ) { + static function ( $e, $errors ) use ( $fname ) { $c = get_class( $e ); $msg = $e->getMessage(); error_log( $fname . ": update failed ($c: $msg); retrying." ); @@ -289,7 +300,7 @@ class ElasticSearchTTMServer * @param MessageHandle $handle * @param string $text * @param int $revId - * @return \Elastica\Document + * @return Document */ protected function createDocument( MessageHandle $handle, $text, $revId ) { $language = $handle->getCode(); @@ -307,7 +318,7 @@ class ElasticSearchTTMServer 'group' => $handle->getGroupIds(), ]; - return new \Elastica\Document( $globalid, $data ); + return new Document( $globalid, $data ); } /** @@ -352,7 +363,7 @@ class ElasticSearchTTMServer /** * Begin the bootstrap process. * - * @throws \RuntimeException + * @throws RuntimeException */ public function beginBootstrap() { $type = $this->getType(); @@ -366,10 +377,10 @@ class ElasticSearchTTMServer $settings = $type->getIndex()->getSettings(); $settings->setRefreshInterval( '-1' ); - $this->deleteByQuery( $this->getType(), \Elastica\Query::create( - ( new Elastica\Query\Term() )->setTerm( 'wiki', wfWikiID() ) ) ); + $this->deleteByQuery( $this->getType(), Query::create( + ( new Term() )->setTerm( 'wiki', wfWikiID() ) ) ); - $mapping = new \Elastica\Type\Mapping(); + $mapping = new Mapping(); $mapping->setType( $type ); $mapping->setProperties( [ 'wiki' => [ 'type' => 'keyword' ], @@ -424,7 +435,7 @@ class ElasticSearchTTMServer public function batchInsertTranslations( array $batch ) { $docs = []; foreach ( $batch as $data ) { - list( $handle, $sourceLanguage, $text ) = $data; + [ $handle, $sourceLanguage, $text ] = $data; $revId = $handle->getTitleForLanguage( $sourceLanguage )->getLatestRevID(); $docs[] = $this->createDocument( $handle, $text, $revId ); } @@ -456,9 +467,9 @@ class ElasticSearchTTMServer public function getClient() { if ( !$this->client ) { if ( isset( $this->config['config'] ) ) { - $this->client = new \Elastica\Client( $this->config['config'] ); + $this->client = new Client( $this->config['config'] ); } else { - $this->client = new \Elastica\Client(); + $this->client = new Client(); } } return $this->client; @@ -500,7 +511,7 @@ class ElasticSearchTTMServer $path = "_cluster/health/$indexName"; $response = $this->getClient()->request( $path ); if ( $response->hasError() ) { - throw new \Exception( "Error while fetching index health status: " . $response->getError() ); + throw new Exception( "Error while fetching index health status: " . $response->getError() ); } return $response->getData(); } @@ -531,7 +542,7 @@ class ElasticSearchTTMServer } $this->logOutput( "\tIndex is $status retrying..." ); sleep( 5 ); - } catch ( \Exception $e ) { + } catch ( Exception $e ) { $this->logOutput( "Error while waiting for green ({$e->getMessage()}), retrying..." ); } } @@ -600,14 +611,14 @@ class ElasticSearchTTMServer // Allow searching either by message content or message id (page name // without language subpage) with exact match only. - $searchQuery = new \Elastica\Query\BoolQuery(); + $searchQuery = new BoolQuery(); foreach ( $fields as $analyzer => $words ) { foreach ( $words as $word ) { - $boolQuery = new \Elastica\Query\BoolQuery(); - $contentQuery = new \Elastica\Query\Match(); + $boolQuery = new BoolQuery(); + $contentQuery = new MatchQuery(); $contentQuery->setFieldQuery( $analyzer, $word ); $boolQuery->addShould( $contentQuery ); - $messageQuery = new \Elastica\Query\Term(); + $messageQuery = new Term(); $messageQuery->setTerm( 'localid', $word ); $boolQuery->addShould( $messageQuery ); @@ -631,8 +642,8 @@ class ElasticSearchTTMServer $handle = new MessageHandle( $title ); if ( $handle->isValid() && $handle->getCode() !== '' ) { $localid = $handle->getTitleForBase()->getPrefixedText(); - $boolQuery = new \Elastica\Query\BoolQuery(); - $messageId = new \Elastica\Query\Term(); + $boolQuery = new BoolQuery(); + $messageId = new Term(); $messageId->setTerm( 'localid', $localid ); $boolQuery->addMust( $messageId ); $searchQuery->addShould( $boolQuery ); @@ -651,17 +662,17 @@ class ElasticSearchTTMServer * @return \Elastica\Search */ public function createSearch( $queryString, $opts, $highlight ) { - $query = new \Elastica\Query(); + $query = new Query(); - list( $searchQuery, $highlights ) = $this->parseQueryString( $queryString, $opts ); + [ $searchQuery, $highlights ] = $this->parseQueryString( $queryString, $opts ); $query->setQuery( $searchQuery ); - $language = new \Elastica\Aggregation\Terms( 'language' ); + $language = new Terms( 'language' ); $language->setField( 'language' ); $language->setSize( 500 ); $query->addAggregation( $language ); - $group = new \Elastica\Aggregation\Terms( 'group' ); + $group = new Terms( 'group' ); $group->setField( 'group' ); // Would like to prioritize the top level groups and not show subgroups // if the top group has only few hits, but that doesn't seem to be possile. @@ -675,18 +686,18 @@ class ElasticSearchTTMServer // multiple must clauses are executed by converting each filter into a bit // field then anding them together. The latter is normally faster if either // of the subfilters are reused. May not make a difference in this context. - $filters = new \Elastica\Query\BoolQuery(); + $filters = new BoolQuery(); $language = $opts['language']; if ( $language !== '' ) { - $languageFilter = new \Elastica\Query\Term(); + $languageFilter = new Term(); $languageFilter->setTerm( 'language', $language ); $filters->addFilter( $languageFilter ); } $group = $opts['group']; if ( $group !== '' ) { - $groupFilter = new \Elastica\Query\Term(); + $groupFilter = new Term(); $groupFilter->setTerm( 'group', $group ); $filters->addFilter( $groupFilter ); } @@ -699,7 +710,7 @@ class ElasticSearchTTMServer $query->setPostFilter( $filters ); } - list( $pre, $post ) = $highlight; + [ $pre, $post ] = $highlight; $query->setHighlight( [ // The value must be an object 'pre_tags' => [ $pre ], @@ -723,7 +734,7 @@ class ElasticSearchTTMServer try { return $search->search(); - } catch ( \Elastica\Exception\ExceptionInterface $e ) { + } catch ( ExceptionInterface $e ) { throw new TTMServerException( $e->getMessage() ); } } @@ -786,19 +797,19 @@ class ElasticSearchTTMServer * in 2.x and returned in 5.x. * * @param \Elastica\Type $type the source index - * @param \Elastica\Query $query - * @throws \RuntimeException + * @param Query $query + * @throws RuntimeException */ - private function deleteByQuery( \Elastica\Type $type, \Elastica\Query $query ) { + private function deleteByQuery( \Elastica\Type $type, Query $query ) { try { MWElasticUtils::deleteByQuery( $type->getIndex(), $query, /* $allowConflicts = */ true ); - } catch ( \Exception $e ) { + } catch ( Exception $e ) { LoggerFactory::getInstance( 'ElasticSearchTTMServer' )->error( 'Problem encountered during deletion.', [ 'exception' => $e ] ); - throw new \RuntimeException( "Problem encountered during deletion.\n" . $e ); + throw new RuntimeException( "Problem encountered during deletion.\n" . $e ); } } @@ -806,7 +817,7 @@ class ElasticSearchTTMServer public function isFrozen() { try { return MWElasticUtils::isFrozen( $this->getClient() ); - } catch ( \Exception $e ) { + } catch ( Exception $e ) { LoggerFactory::getInstance( 'ElasticSearchTTMServer' )->warning( 'Problem encountered while checking the frozen index.', [ 'exception' => $e ] |