From 4f0b376fc1fcc72caa175eee5ca684e8b4c83305 Mon Sep 17 00:00:00 2001 From: Mark Robert Miller Date: Wed, 27 May 2026 20:11:46 -0500 Subject: [PATCH] Unify SolrIndexSearcher search paths onto a single CollectorManager-based entry point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the dual sequential/parallel branches in getDocListNC and getDocListAndSetNC with a single private searchAndCollect helper that internally selects between single-slice and multi-slice execution. The top-level methods no longer fork on MultiThreadedSearcher.allowMT. Key changes: - Extracts TopDocsCM, MaxScoreCM, DocSetCM, FixedBitSetCollector, SearchResult, TopDocsResult, and MaxScoreResult into a new SolrCollectorManagers utility. Adds PostFilterCM (single-slice; setLastDelegate per newCollector, complete() at reduce) and DocSetCollectorCM (preserves the SortedIntDocSet sparse optimization). Multi-slice paths keep the FixedBitSet-based DocSetCM. - Deletes MultiThreadedSearcher; allowMT is inlined as the canParallelize check inside searchAndCollect. - Migrates the collector chain (segment-terminate-early, max-hits early termination, optional post-filter, optional cancellation) to a CollectorManager-based ChainCM applied per newCollector(). Single-slice vs multi-slice is chosen by ChainCM.requiresSingleSlice() OR the caller's parallelEligible hint. - Replaces the outer CancellableCollector with a shared AtomicBoolean polled by every per-slice CancellableCollector, so a single tracker.cancel(queryID) is observed across all parallel slices. - ChainCM.getScoreMode() returns the chain-wrapped collector's score mode so a Solr post-filter override (e.g. CollapsingPostFilter forcing COMPLETE) reaches populateScoresIfNeeded. Guaranteed non-null at the call site: Lucene's IndexSearcher.search calls newCollector() unconditionally before its empty-index short-circuit, so the call site uses Objects.requireNonNull rather than an unreachable fallback. - TopDocsCM.reduce skips TopDocs.merge for a single input, preserving the underlying TopDocsCollector's EQUAL_TO / GREATER_THAN_OR_EQUAL_TO relation exactly. - Aligns SolrMultiCollectorManager with Lucene's MultiCollector: per-child CollectionTerminatedException handling, per-child setMinCompetitiveScore aggregation (MinCompetitiveScoreAwareScorable), and removal of the EarlyTerminatingCollector wrap (ChainCM owns it, with a shared LongAdder across slices to preserve cross-slice maxHitsAllowed semantics). Clamps EarlyTerminatingCollector chunkSize to >= 1 to avoid divide-by-zero when maxDocsToCollect < 10. A few hot-path alignments with Lucene's MultiCollector pattern that together avoid a regression on heavy multi-term boolean OR queries: - searchAndCollect skips SolrMultiCollectorManager entirely when only one inner CollectorManager is built (top-N without fl=score and without docSet — the common case), routing through a new SolrCollectorManagers.wrapSingle adapter. Mirrors Lucene's own MultiCollectorManager.wrap() short-circuit for the one-collector case. - SolrMultiCollectorManager$Collectors$LeafCollectors.collect now matches MultiCollector.MultiLeafCollector#collect exactly: the per-iteration body is load-slot / null-check / call, and "all children terminated" is detected lazily inside the rare CollectionTerminatedException catch via allLeavesTerminated(). - QueryCancelledException and EarlyTerminatingCollectorException override fillInStackTrace() to return this, matching CollectionTerminatedException — these are control-flow exceptions and should not pay for stack-trace fill. No behavior change: numFound, ranked doc_ids, maxScore, numFoundExact and start match the parent commit on every probed (query, mode, multiThreaded) cell, including post-filter, STE, and post-filter + STE combinations. Verified with the full org.apache.solr.search.* suite plus targeted runs for TestMultiThreadedSearcher, TestEarlyTerminatingQueries, TestQueryLimits, CursorPagingTest, TestRankQueryPlugin, TestCollapseQParserPlugin, TestRandomCollapseQParserPlugin, and TestReRankQParserPlugin. --- .../solr/search/CancellableCollector.java | 41 +- .../search/EarlyTerminatingCollector.java | 8 +- .../EarlyTerminatingCollectorException.java | 14 + .../solr/search/MultiThreadedSearcher.java | 374 ----------- .../solr/search/SolrCollectorManagers.java | 602 ++++++++++++++++++ .../apache/solr/search/SolrIndexSearcher.java | 320 +++++----- .../search/SolrMultiCollectorManager.java | 131 +++- .../search/TestMultiThreadedSearcher.java | 2 +- 8 files changed, 933 insertions(+), 559 deletions(-) delete mode 100644 solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java create mode 100644 solr/core/src/java/org/apache/solr/search/SolrCollectorManagers.java diff --git a/solr/core/src/java/org/apache/solr/search/CancellableCollector.java b/solr/core/src/java/org/apache/solr/search/CancellableCollector.java index 46ece0ef8753..5bfe3a1bf364 100644 --- a/solr/core/src/java/org/apache/solr/search/CancellableCollector.java +++ b/solr/core/src/java/org/apache/solr/search/CancellableCollector.java @@ -28,18 +28,40 @@ /** Allows a query to be cancelled */ public class CancellableCollector implements Collector { - /** Thrown when a query gets cancelled */ - public static class QueryCancelledException extends RuntimeException {} + /** + * Thrown when a query gets cancelled. This is a control-flow exception only — it is caught by the + * searcher and never inspected, so we skip filling in the stack trace to keep cancellation cheap + * (matches Lucene {@code CollectionTerminatedException} and {@code + * TimeLimitingBulkScorer.TimeExceededException}). + */ + @SuppressWarnings("serial") + public static class QueryCancelledException extends RuntimeException { + @Override + public Throwable fillInStackTrace() { + // never re-thrown so we can save the expensive stacktrace + return this; + } + } private final Collector collector; private final AtomicBoolean isQueryCancelled; public CancellableCollector(Collector collector) { + this(collector, new AtomicBoolean()); + } + + /** + * Creates a {@link CancellableCollector} that polls a caller-supplied cancellation flag. Multiple + * per-slice collectors can share the same {@link AtomicBoolean} so that a single {@link + * #cancel()} on any instance is observed by all of them — required for cancelling a query that is + * fanned out across parallel segment slices. + */ + public CancellableCollector(Collector collector, AtomicBoolean cancellationFlag) { Objects.requireNonNull( collector, "Internal collector not provided but wrapper collector accessed"); - + Objects.requireNonNull(cancellationFlag, "cancellationFlag must not be null"); this.collector = collector; - this.isQueryCancelled = new AtomicBoolean(); + this.isQueryCancelled = cancellationFlag; } @Override @@ -50,11 +72,18 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept } return new FilterLeafCollector(collector.getLeafCollector(context)) { + int collectCount = 0; @Override public void collect(int doc) throws IOException { - if (isQueryCancelled.get()) { - throw new QueryCancelledException(); + // To avoid polling the AtomicBoolean (volatile read) on every collected document, + // which acts as a memory barrier and prevents JIT optimizations, + // we check it only every 1024 docs. This keeps the hot loop fast while maintaining + // responsive cancellation. + if ((collectCount++ & 0x3FF) == 0) { + if (isQueryCancelled.get()) { + throw new QueryCancelledException(); + } } in.collect(doc); } diff --git a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java index 43b0f8e6fdc3..e601d8e3b266 100644 --- a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java +++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java @@ -58,7 +58,9 @@ public EarlyTerminatingCollector( assert null != delegate; this.maxDocsToCollect = maxDocsToCollect; this.pendingDocsToCollect = docsToCollect; - this.chunkSize = Math.min(100, maxDocsToCollect / 10); + // chunkSize must be at least 1 (small maxDocsToCollect would otherwise divide-by-zero + // in the modulo check below). + this.chunkSize = Math.max(1, Math.min(100, maxDocsToCollect / 10)); } @Override @@ -74,9 +76,9 @@ public void collect(int doc) throws IOException { numCollectedLocally++; terminatedEarly = numCollectedLocally >= maxDocsToCollect; if (pendingDocsToCollect != null) { - pendingDocsToCollect.increment(); if (numCollectedLocally % chunkSize == 0) { - final long overallCollectedDocCount = pendingDocsToCollect.intValue(); + pendingDocsToCollect.add(chunkSize); + final long overallCollectedDocCount = pendingDocsToCollect.longValue(); terminatedEarly = overallCollectedDocCount >= maxDocsToCollect; } } diff --git a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java index 8f4470060efa..df248f434a9f 100644 --- a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java +++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java @@ -21,7 +21,15 @@ /** * Thrown by {@link EarlyTerminatingCollector} when the maximum to abort the scoring / collection * process early, when the specified maximum number of documents were collected. + * + *

This is a control-flow exception only — it is caught at the search entry point and its + * formatted {@link #getMessage() message} plus {@link #getNumberCollected()}, {@link + * #getNumberScanned()}, and {@link #getApproximateTotalHits(int)} are inspected, but the stack + * trace itself is never used. We skip {@link #fillInStackTrace()} to keep early-termination cheap, + * matching the Lucene {@code CollectionTerminatedException} and {@code + * TimeLimitingBulkScorer.TimeExceededException} convention. */ +@SuppressWarnings("serial") public class EarlyTerminatingCollectorException extends RuntimeException { private static final long serialVersionUID = 5939241340763428118L; private final int numberScanned; @@ -41,6 +49,12 @@ public EarlyTerminatingCollectorException(int numberCollected, int numberScanned this.numberScanned = numberScanned; } + @Override + public Throwable fillInStackTrace() { + // never re-thrown so we can save the expensive stacktrace + return this; + } + /** * The total number of documents in the index that were "scanned" by the index when collecting the * {@link #getNumberCollected()} documents that triggered this exception. diff --git a/solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java b/solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java deleted file mode 100644 index 3ad0712ff1a9..000000000000 --- a/solr/core/src/java/org/apache/solr/search/MultiThreadedSearcher.java +++ /dev/null @@ -1,374 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.search; - -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.concurrent.ExecutionException; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.SimpleCollector; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopDocsCollector; -import org.apache.lucene.search.TopFieldDocs; -import org.apache.lucene.util.FixedBitSet; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -public class MultiThreadedSearcher { - private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - final SolrIndexSearcher searcher; - - public MultiThreadedSearcher(SolrIndexSearcher searcher) { - this.searcher = searcher; - } - - SearchResult searchCollectorManagers( - int len, - QueryCommand cmd, - Query query, - boolean needTopDocs, - boolean needMaxScore, - boolean needDocSet, - QueryResult queryResult) - throws IOException { - Collection> collectors = new ArrayList<>(); - - int firstCollectorsSize = 0; - - final int firstTopDocsCollectorIndex; - if (needTopDocs) { - firstTopDocsCollectorIndex = firstCollectorsSize; - firstCollectorsSize++; - } else { - firstTopDocsCollectorIndex = -1; - } - - final int firstMaxScoreCollectorIndex; - if (needMaxScore) { - firstMaxScoreCollectorIndex = firstCollectorsSize; - firstCollectorsSize++; - } else { - firstMaxScoreCollectorIndex = -1; - } - - Collector[] firstCollectors = new Collector[firstCollectorsSize]; - - if (needTopDocs) { - - collectors.add(new TopDocsCM(len, cmd, firstCollectors, firstTopDocsCollectorIndex)); - } - if (needMaxScore) { - collectors.add(new MaxScoreCM(firstCollectors, firstMaxScoreCollectorIndex)); - } - if (needDocSet) { - int maxDoc = searcher.getRawReader().maxDoc(); - collectors.add(new DocSetCM(maxDoc)); - } - - @SuppressWarnings({"unchecked", "rawtypes"}) - CollectorManager[] colls = collectors.toArray(new CollectorManager[0]); - final SolrMultiCollectorManager manager = new SolrMultiCollectorManager(cmd, colls); - Object[] ret; - try { - ret = searcher.search(query, manager); - } catch (EarlyTerminatingCollectorException ex) { - ret = manager.reduce(); - queryResult.setMaxHitsTerminatedEarly(true); - queryResult.setPartialResults(Boolean.TRUE); - queryResult.setPartialResultsDetails(ex.getMessage()); - queryResult.setApproximateTotalHits( - ex.getApproximateTotalHits(searcher.getIndexReader().maxDoc())); - } catch (Exception ex) { - if (ex instanceof RuntimeException - && ex.getCause() != null - && ex.getCause() instanceof ExecutionException - && ex.getCause().getCause() != null - && ex.getCause().getCause() instanceof RuntimeException) { - throw (RuntimeException) ex.getCause().getCause(); - } else { - throw ex; - } - } - - ScoreMode scoreMode = SolrMultiCollectorManager.scoreMode(firstCollectors); - - return new SearchResult(scoreMode, ret); - } - - static boolean allowMT(DelegatingCollector postFilter, QueryCommand cmd) { - // TODO: it's unclear if segmentTerminateEarly is truly incompatible but - // since it has to appropriately denote partial results this needs to be - // investigated/tested before we can remove this check (perhaps for 9.8). - return postFilter == null && !cmd.getSegmentTerminateEarly() && cmd.getMultiThreaded(); - } - - static class MaxScoreResult { - final float maxScore; - - public MaxScoreResult(float maxScore) { - this.maxScore = maxScore; - } - } - - static class FixedBitSetCollector extends SimpleCollector { - @SuppressWarnings("JdkObsolete") - private final LinkedList bitSets = new LinkedList<>(); - - @SuppressWarnings("JdkObsolete") - private final LinkedList skipWords = new LinkedList<>(); - - @SuppressWarnings("JdkObsolete") - private final LinkedList skipBits = new LinkedList<>(); - - FixedBitSetCollector() {} - - @Override - protected void doSetNextReader(LeafReaderContext context) throws IOException { - this.bitSets.add(null); // lazy allocate when collecting document(s) - this.skipWords.add(context.docBase / 64); - this.skipBits.add(context.docBase % 64); - } - - @Override - public void collect(int doc) throws IOException { - FixedBitSet bitSet = this.bitSets.getLast(); - final int idx = this.skipBits.getLast() + doc; - - final int numWords = FixedBitSet.bits2words(idx + 1); // +1 to ensure minimum 1 word - - if (bitSet == null) { - this.bitSets.removeLast(); - bitSet = new FixedBitSet(numWords * 64); - this.bitSets.addLast(bitSet); - - } else if (bitSet.getBits().length < numWords) { - FixedBitSet smallerBitSet = this.bitSets.removeLast(); - bitSet = new FixedBitSet(numWords * 64); - bitSet.xor(smallerBitSet); - this.bitSets.addLast(bitSet); - } - - bitSet.set(idx); - } - - void update(FixedBitSet allBitSet) { - final long[] allBits = allBitSet.getBits(); - for (int bs_idx = 0; bs_idx < this.bitSets.size(); ++bs_idx) { - final FixedBitSet itBitSet = this.bitSets.get(bs_idx); - if (itBitSet != null) { - final int skipWords = this.skipWords.get(bs_idx); - final long[] itBits = itBitSet.getBits(); - for (int idx = 0; idx < itBits.length && skipWords + idx < allBits.length; ++idx) { - allBits[skipWords + idx] ^= itBits[idx]; - } - } - } - } - - @Override - public ScoreMode scoreMode() { - return ScoreMode.COMPLETE_NO_SCORES; - } - } - - static class SearchResult { - final ScoreMode scoreMode; - private final Object[] result; - - SearchResult(ScoreMode scoreMode, Object[] result) { - this.scoreMode = scoreMode; - this.result = result; - } - - public TopDocsResult getTopDocsResult() { - for (Object res : result) { - if (res instanceof TopDocsResult) { - return (TopDocsResult) res; - } - } - return null; - } - - public float getMaxScore(int totalHits) { - if (totalHits > 0) { - for (Object res : result) { - if (res instanceof MaxScoreResult) { - return ((MaxScoreResult) res).maxScore; - } - } - return Float.NaN; - } else { - return 0.0f; - } - } - - public FixedBitSet getFixedBitSet() { - for (Object res : result) { - if (res instanceof FixedBitSet) { - return (FixedBitSet) res; - } - } - return null; - } - } - - private static class MaxScoreCM implements CollectorManager { - private final Collector[] firstCollectors; - private final int firstMaxScoreCollectorIndex; - - public MaxScoreCM(Collector[] firstCollectors, int firstMaxScoreCollectorIndex) { - this.firstCollectors = firstCollectors; - this.firstMaxScoreCollectorIndex = firstMaxScoreCollectorIndex; - } - - @Override - public Collector newCollector() throws IOException { - MaxScoreCollector collector = new MaxScoreCollector(); - if (firstCollectors[firstMaxScoreCollectorIndex] == null) { - firstCollectors[firstMaxScoreCollectorIndex] = collector; - } - return collector; - } - - @Override - @SuppressWarnings("rawtypes") - public Object reduce(Collection collectors) throws IOException { - - MaxScoreCollector collector; - float maxScore = 0.0f; - for (Iterator collectorIterator = collectors.iterator(); - collectorIterator.hasNext(); - maxScore = Math.max(maxScore, collector.getMaxScore())) { - Collector next = (Collector) collectorIterator.next(); - if (next instanceof final EarlyTerminatingCollector earlyTerminatingCollector) { - next = earlyTerminatingCollector.getDelegate(); - } - collector = (MaxScoreCollector) next; - } - - return new MaxScoreResult(maxScore); - } - } - - private static class DocSetCM implements CollectorManager { - private final int maxDoc; - - public DocSetCM(int maxDoc) { - this.maxDoc = maxDoc; - } - - @Override - public Collector newCollector() throws IOException { - // TODO: add to firstCollectors here? or if not have comment w.r.t. why not adding - return new FixedBitSetCollector(); - } - - @Override - @SuppressWarnings({"rawtypes"}) - public Object reduce(Collection collectors) throws IOException { - final FixedBitSet reduced = new FixedBitSet(maxDoc); - for (Object collector : collectors) { - if (collector instanceof FixedBitSetCollector fixedBitSetCollector) { - fixedBitSetCollector.update(reduced); - } - } - return reduced; - } - } - - private class TopDocsCM implements CollectorManager { - private final int len; - private final QueryCommand cmd; - private final Collector[] firstCollectors; - private final int firstTopDocsCollectorIndex; - - public TopDocsCM( - int len, QueryCommand cmd, Collector[] firstCollectors, int firstTopDocsCollectorIndex) { - this.len = len; - this.cmd = cmd; - this.firstCollectors = firstCollectors; - this.firstTopDocsCollectorIndex = firstTopDocsCollectorIndex; - } - - @Override - public Collector newCollector() throws IOException { - @SuppressWarnings("rawtypes") - TopDocsCollector collector = searcher.buildTopDocsCollector(len, cmd); - if (firstCollectors[firstTopDocsCollectorIndex] == null) { - firstCollectors[firstTopDocsCollectorIndex] = collector; - } - return collector; - } - - @Override - @SuppressWarnings("rawtypes") - public Object reduce(Collection collectors) throws IOException { - - TopDocs[] topDocs = new TopDocs[collectors.size()]; - - int totalHits = -1; - int i = 0; - - Collector collector; - for (Object o : collectors) { - collector = (Collector) o; - if (collector instanceof final EarlyTerminatingCollector earlyTerminatingCollector) { - collector = earlyTerminatingCollector.getDelegate(); - } - if (collector instanceof TopDocsCollector) { - TopDocs td = ((TopDocsCollector) collector).topDocs(0, len); - assert td != null : Arrays.asList(topDocs); - topDocs[i++] = td; - } - } - - TopDocs mergedTopDocs = null; - - if (topDocs.length > 0 && topDocs[0] != null) { - if (Arrays.stream(topDocs).allMatch(td -> td instanceof TopFieldDocs)) { - TopFieldDocs[] topFieldDocs = - Arrays.copyOf(topDocs, topDocs.length, TopFieldDocs[].class); - mergedTopDocs = TopFieldDocs.merge(searcher.weightSort(cmd.getSort()), len, topFieldDocs); - } else { - mergedTopDocs = TopDocs.merge(0, len, topDocs); - } - totalHits = (int) mergedTopDocs.totalHits.value(); - } - return new TopDocsResult(mergedTopDocs, totalHits); - } - } - - static class TopDocsResult { - final TopDocs topDocs; - final int totalHits; - - public TopDocsResult(TopDocs topDocs, int totalHits) { - this.topDocs = topDocs; - this.totalHits = totalHits; - } - } -} diff --git a/solr/core/src/java/org/apache/solr/search/SolrCollectorManagers.java b/solr/core/src/java/org/apache/solr/search/SolrCollectorManagers.java new file mode 100644 index 000000000000..994365bf5625 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/SolrCollectorManagers.java @@ -0,0 +1,602 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.LongAdder; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopDocsCollector; +import org.apache.lucene.search.TopFieldDocs; +import org.apache.lucene.util.FixedBitSet; +import org.apache.solr.core.CancellableQueryTracker; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Holders for the {@link CollectorManager} implementations used by Solr's search paths. Shared + * across the single-slice and multi-slice (parallel-segment) execution paths. + */ +final class SolrCollectorManagers { + + private SolrCollectorManagers() {} + + static class MaxScoreResult { + final float maxScore; + + MaxScoreResult(float maxScore) { + this.maxScore = maxScore; + } + } + + static class TopDocsResult { + final TopDocs topDocs; + final int totalHits; + + TopDocsResult(TopDocs topDocs, int totalHits) { + this.topDocs = topDocs; + this.totalHits = totalHits; + } + } + + static class SearchResult { + final ScoreMode scoreMode; + private final Object[] result; + + SearchResult(ScoreMode scoreMode, Object[] result) { + this.scoreMode = scoreMode; + this.result = result; + } + + public TopDocsResult getTopDocsResult() { + for (Object res : result) { + if (res instanceof TopDocsResult) { + return (TopDocsResult) res; + } + } + return null; + } + + public float getMaxScore(int totalHits) { + if (totalHits > 0) { + for (Object res : result) { + if (res instanceof MaxScoreResult) { + return ((MaxScoreResult) res).maxScore; + } + } + return Float.NaN; + } else { + return 0.0f; + } + } + + public DocSet getDocSet() { + for (Object res : result) { + if (res instanceof DocSet) { + return (DocSet) res; + } + if (res instanceof FixedBitSet fbs) { + return new BitDocSet(fbs); + } + } + return null; + } + } + + static class FixedBitSetCollector extends SimpleCollector { + @SuppressWarnings("JdkObsolete") + private final LinkedList bitSets = new LinkedList<>(); + + @SuppressWarnings("JdkObsolete") + private final LinkedList skipWords = new LinkedList<>(); + + @SuppressWarnings("JdkObsolete") + private final LinkedList skipBits = new LinkedList<>(); + + FixedBitSetCollector() {} + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + this.bitSets.add(null); // lazy allocate when collecting document(s) + this.skipWords.add(context.docBase / 64); + this.skipBits.add(context.docBase % 64); + } + + @Override + public void collect(int doc) throws IOException { + FixedBitSet bitSet = this.bitSets.getLast(); + final int idx = this.skipBits.getLast() + doc; + + final int numWords = FixedBitSet.bits2words(idx + 1); // +1 to ensure minimum 1 word + + if (bitSet == null) { + this.bitSets.removeLast(); + bitSet = new FixedBitSet(numWords * 64); + this.bitSets.addLast(bitSet); + + } else if (bitSet.getBits().length < numWords) { + FixedBitSet smallerBitSet = this.bitSets.removeLast(); + bitSet = new FixedBitSet(numWords * 64); + bitSet.xor(smallerBitSet); + this.bitSets.addLast(bitSet); + } + + bitSet.set(idx); + } + + void update(FixedBitSet allBitSet) { + final long[] allBits = allBitSet.getBits(); + for (int bs_idx = 0; bs_idx < this.bitSets.size(); ++bs_idx) { + final FixedBitSet itBitSet = this.bitSets.get(bs_idx); + if (itBitSet != null) { + final int skipWords = this.skipWords.get(bs_idx); + final long[] itBits = itBitSet.getBits(); + for (int idx = 0; idx < itBits.length && skipWords + idx < allBits.length; ++idx) { + allBits[skipWords + idx] ^= itBits[idx]; + } + } + } + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + } + + static class MaxScoreCM implements CollectorManager { + private final Collector[] firstCollectors; + private final int firstMaxScoreCollectorIndex; + + MaxScoreCM(Collector[] firstCollectors, int firstMaxScoreCollectorIndex) { + this.firstCollectors = firstCollectors; + this.firstMaxScoreCollectorIndex = firstMaxScoreCollectorIndex; + } + + @Override + public Collector newCollector() throws IOException { + MaxScoreCollector collector = new MaxScoreCollector(); + if (firstCollectors[firstMaxScoreCollectorIndex] == null) { + firstCollectors[firstMaxScoreCollectorIndex] = collector; + } + return collector; + } + + @Override + @SuppressWarnings("rawtypes") + public Object reduce(Collection collectors) throws IOException { + float maxScore = 0.0f; + for (Object o : collectors) { + Collector next = (Collector) o; + if (next instanceof final EarlyTerminatingCollector etc) { + next = etc.getDelegate(); + } + maxScore = Math.max(maxScore, ((MaxScoreCollector) next).getMaxScore()); + } + return new MaxScoreResult(maxScore); + } + } + + static class DocSetCM implements CollectorManager { + private final int maxDoc; + + DocSetCM(int maxDoc) { + this.maxDoc = maxDoc; + } + + @Override + public Collector newCollector() throws IOException { + return new FixedBitSetCollector(); + } + + @Override + @SuppressWarnings({"rawtypes"}) + public Object reduce(Collection collectors) throws IOException { + final FixedBitSet reduced = new FixedBitSet(maxDoc); + for (Object collector : collectors) { + if (collector instanceof FixedBitSetCollector fixedBitSetCollector) { + fixedBitSetCollector.update(reduced); + } + } + return reduced; + } + } + + /** + * Single-slice {@link DocSet} producer. Wraps a {@link DocSetCollector} (which preserves Solr's + * sparse {@code SortedIntDocSet} optimization for small result sets) and reduces via {@link + * DocSetUtil#getDocSet}. Use this in single-slice paths where we want to keep the legacy DocSet + * shape; multi-slice paths use {@link DocSetCM} which produces a {@link FixedBitSet} (and thus + * always a {@code BitDocSet}). + */ + static class DocSetCollectorCM implements CollectorManager { + private final SolrIndexSearcher searcher; + private final int maxDoc; + + DocSetCollectorCM(SolrIndexSearcher searcher) { + this.searcher = searcher; + this.maxDoc = searcher.maxDoc(); + } + + @Override + public Collector newCollector() throws IOException { + return new DocSetCollector(maxDoc); + } + + @Override + public Object reduce(Collection collectors) throws IOException { + DocSetCollector dsc = null; + for (Collector c : collectors) { + if (c instanceof DocSetCollector inner) { + if (dsc != null) { + throw new IllegalStateException("DocSetCollectorCM is single-slice only"); + } + dsc = inner; + } + } + if (dsc == null) { + return null; + } + return DocSetUtil.getDocSet(dsc, searcher); + } + } + + static class TopDocsCM implements CollectorManager { + private final SolrIndexSearcher searcher; + private final int len; + private final QueryCommand cmd; + private final Collector[] firstCollectors; + private final int firstTopDocsCollectorIndex; + + TopDocsCM( + SolrIndexSearcher searcher, + int len, + QueryCommand cmd, + Collector[] firstCollectors, + int firstTopDocsCollectorIndex) { + this.searcher = searcher; + this.len = len; + this.cmd = cmd; + this.firstCollectors = firstCollectors; + this.firstTopDocsCollectorIndex = firstTopDocsCollectorIndex; + } + + @Override + public Collector newCollector() throws IOException { + @SuppressWarnings("rawtypes") + TopDocsCollector collector = searcher.buildTopDocsCollector(len, cmd); + if (firstCollectors[firstTopDocsCollectorIndex] == null) { + firstCollectors[firstTopDocsCollectorIndex] = collector; + } + return collector; + } + + @Override + @SuppressWarnings("rawtypes") + public Object reduce(Collection collectors) throws IOException { + + TopDocs[] topDocs = new TopDocs[collectors.size()]; + + int totalHits = -1; + int i = 0; + + Collector collector; + for (Object o : collectors) { + collector = (Collector) o; + if (collector instanceof final EarlyTerminatingCollector earlyTerminatingCollector) { + collector = earlyTerminatingCollector.getDelegate(); + } + if (collector instanceof TopDocsCollector) { + TopDocs td = ((TopDocsCollector) collector).topDocs(0, len); + assert td != null : Arrays.asList(topDocs); + topDocs[i++] = td; + } + } + + TopDocs mergedTopDocs = null; + + if (topDocs.length > 0 && topDocs[0] != null) { + if (i == 1) { + // Single-slice: skip merge. TopDocs.merge with one shard creates a fresh TotalHits and + // can lose the EQUAL_TO/GREATER_THAN_OR_EQUAL_TO distinction that the original + // TopDocsCollector recorded — single-slice callers (e.g. queries with a post-filter) + // expect the original relation to be preserved verbatim. + mergedTopDocs = topDocs[0]; + } else if (Arrays.stream(topDocs).allMatch(td -> td instanceof TopFieldDocs)) { + TopFieldDocs[] topFieldDocs = + Arrays.copyOf(topDocs, topDocs.length, TopFieldDocs[].class); + mergedTopDocs = TopFieldDocs.merge(searcher.weightSort(cmd.getSort()), len, topFieldDocs); + } else { + mergedTopDocs = TopDocs.merge(0, len, topDocs); + } + totalHits = (int) mergedTopDocs.totalHits.value(); + } + return new TopDocsResult(mergedTopDocs, totalHits); + } + } + + /** + * Wraps a single pre-built {@link Collector} as a single-slice {@link CollectorManager}. {@link + * CollectorManager#newCollector()} returns the wrapped collector exactly once; a second call + * throws. {@link CollectorManager#reduce} returns the same collector instance, letting callers + * read state out of it. + */ + static CollectorManager singleSlice(C collector) { + return new CollectorManager() { + private boolean given = false; + + @Override + public C newCollector() { + if (given) { + throw new IllegalStateException( + "singleSlice() collector manager only supports a single slice"); + } + given = true; + return collector; + } + + @Override + public C reduce(Collection collectors) { + return collector; + } + }; + } + + /** + * Wraps a single inner {@link CollectorManager} as a {@code CollectorManager} + * compatible with {@link SolrMultiCollectorManager}'s reduce shape, but without the per-doc + * fan-out overhead. + * + *

{@link SolrMultiCollectorManager} adds a per-document {@code LeafCollectors.collect()} loop + * on top of every inner collector — worth it when there are multiple inner CMs, but pure overhead + * when there is only one. {@link SolrIndexSearcher#searchAndCollect} routes single-inner-CM + * searches through this adapter so {@code LeafCollector.collect(doc)} dispatches directly to the + * inner collector. Mirrors the spirit of Lucene's {@code MultiCollectorManager.wrap(...)} + * short-circuit for the one-collector case. + */ + static CollectorManager wrapSingle( + CollectorManager inner) { + return new CollectorManager() { + @Override + public C newCollector() throws IOException { + return inner.newCollector(); + } + + @SuppressWarnings({"unchecked", "rawtypes"}) + @Override + public Object[] reduce(Collection collectors) throws IOException { + return new Object[] {((CollectorManager) inner).reduce(collectors)}; + } + }; + } + + /** + * Wraps an inner {@link CollectorManager} with the search-chain features that {@link + * SolrIndexSearcher} requires: {@code segmentTerminateEarly}, {@code maxHitsAllowed} early + * termination, optional Solr post-filter, and query cancellation via a shared {@link + * AtomicBoolean}. + * + *

Wrapping order from inside out: inner collector → {@link EarlyTerminatingSortingCollector} → + * {@link EarlyTerminatingCollector} → {@link DelegatingCollector} post-filter → {@link + * CancellableCollector}. + * + *

Per-slice cancellation works because every {@link CancellableCollector} created by {@link + * #newCollector()} polls the same shared flag — see {@link + * CancellableCollector#CancellableCollector(Collector, AtomicBoolean)}. The first cancellable + * created is registered with {@link CancellableQueryTracker} so the existing cancel-by-id API + * remains intact. + * + *

Post-filter handling is intentionally limited: a {@link DelegatingCollector} is a single + * per-search instance with cross-collect state, so {@link #requiresSingleSlice()} returns {@code + * true} when one is configured. The calling search path must honor that and execute as a single + * slice. + * + *

{@link #reduce} aggregates {@code segmentTerminatedEarly} across the per-slice {@link + * EarlyTerminatingSortingCollector} instances, calls {@link DelegatingCollector#complete()} once + * if a post-filter is present, then forwards to the inner manager's reduce on the inner + * collectors that were created. + */ + static class ChainCM implements CollectorManager { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final CollectorManager inner; + private final QueryCommand cmd; + private final DelegatingCollector postFilter; + private final Sort mergePolicySort; + private final QueryResult qr; + private final AtomicBoolean cancellationFlag; + private final CancellableQueryTracker cancellableTracker; + + private final List innerCollectors = Collections.synchronizedList(new ArrayList<>()); + private final List segmentTermCollectors = + Collections.synchronizedList(new ArrayList<>()); + private final LongAdder earlyTerminatingHits; + private CancellableCollector trackerRegistration; + private Collector firstWrappedCollector; + private boolean newCollectorCalled = false; + private boolean reduced = false; + private T reducedResult; + + ChainCM( + CollectorManager inner, + QueryCommand cmd, + DelegatingCollector postFilter, + Sort mergePolicySort, + QueryResult qr, + AtomicBoolean cancellationFlag, + CancellableQueryTracker cancellableTracker, + boolean parallelEligible) { + this.inner = inner; + this.cmd = cmd; + this.postFilter = postFilter; + this.mergePolicySort = mergePolicySort; + this.qr = qr; + this.cancellationFlag = cancellationFlag; + this.cancellableTracker = cancellableTracker; + boolean singleSlice = (postFilter != null) || !parallelEligible; + this.earlyTerminatingHits = + (cmd.shouldEarlyTerminateSearch() && !singleSlice) ? new LongAdder() : null; + } + + @Override + public Collector newCollector() throws IOException { + if (postFilter != null && newCollectorCalled) { + throw new IllegalStateException( + "ChainCM with a post-filter requires single-slice execution"); + } + + C innerCollector = inner.newCollector(); + innerCollectors.add(innerCollector); + Collector c = innerCollector; + + if (cmd.getSegmentTerminateEarly()) { + Sort cmdSort = cmd.getSort(); + int cmdLen = cmd.getLen(); + if (cmdSort == null + || cmdLen <= 0 + || mergePolicySort == null + || !EarlyTerminatingSortingCollector.canEarlyTerminate(cmdSort, mergePolicySort)) { + log.warn( + "unsupported combination: segmentTerminateEarly=true cmdSort={} cmdLen={} mergeSort={}", + cmdSort, + cmdLen, + mergePolicySort); + } else { + EarlyTerminatingSortingCollector etsc = + new EarlyTerminatingSortingCollector(c, cmdSort, cmdLen); + segmentTermCollectors.add(etsc); + c = etsc; + } + } + + if (cmd.shouldEarlyTerminateSearch()) { + c = new EarlyTerminatingCollector(c, cmd.getMaxHitsAllowed(), earlyTerminatingHits); + } + + if (postFilter != null) { + postFilter.setLastDelegate(c); + c = postFilter; + } + + if (cmd.isQueryCancellable()) { + CancellableCollector cancellable = new CancellableCollector(c, cancellationFlag); + c = cancellable; + synchronized (this) { + if (trackerRegistration == null && cancellableTracker != null) { + trackerRegistration = cancellable; + cancellableTracker.addShardLevelActiveQuery(cmd.getQueryID(), cancellable); + } + } + } + + synchronized (this) { + newCollectorCalled = true; + if (firstWrappedCollector == null) { + firstWrappedCollector = c; + } + } + return c; + } + + /** + * Returns the {@link ScoreMode} of the first chain-wrapped collector built by {@link + * #newCollector()}. Used by callers (e.g. {@link SolrIndexSearcher#searchAndCollect}) that need + * the score mode after applying the chain wrappers — a post-filter, for example, may override + * its delegate's score mode (e.g. {@code CollapsingPostFilter} returns {@code COMPLETE}), and + * that override is invisible to the inner per-CM collectors. Returns {@code null} if {@link + * #newCollector()} has not been called. + */ + public ScoreMode getScoreMode() { + return firstWrappedCollector == null ? null : firstWrappedCollector.scoreMode(); + } + + @Override + public T reduce(Collection collectors) throws IOException { + if (reduced) { + return reducedResult; + } + reduced = true; + + boolean anyTerminatedEarly = false; + synchronized (segmentTermCollectors) { + for (EarlyTerminatingSortingCollector etsc : segmentTermCollectors) { + if (etsc.terminatedEarly()) { + anyTerminatedEarly = true; + break; + } + } + } + if (anyTerminatedEarly && qr != null) { + qr.setSegmentTerminatedEarly(true); + } + + // postFilter.complete() emits any buffered docs into the delegate collector chain, + // which may include EarlyTerminatingCollector. A throw from here must still leave + // reducedResult populated with whatever the inner collectors managed to gather, + // because the outer catch handler in SolrIndexSearcher.runChainAndCollect calls + // chain.reduce(List.of()) a second time to fetch the partial result -- and that + // second call short-circuits on `reduced == true` above. + EarlyTerminatingCollectorException pendingEtc = null; + if (postFilter != null) { + try { + postFilter.complete(); + } catch (EarlyTerminatingCollectorException etce) { + pendingEtc = etce; + } + } + + List innerList; + synchronized (innerCollectors) { + innerList = new ArrayList<>(innerCollectors); + } + reducedResult = inner.reduce(innerList); + + if (pendingEtc != null) { + throw pendingEtc; + } + return reducedResult; + } + + /** + * Removes the tracker registration created lazily on first {@link #newCollector()}. Callers + * must invoke this after the search completes, in a {@code finally} block, to avoid leaking + * registrations on exception. + */ + public void cleanup() { + if (trackerRegistration != null && cancellableTracker != null) { + cancellableTracker.removeCancellableQuery(cmd.getQueryID()); + } + } + + public boolean requiresSingleSlice() { + return postFilter != null; + } + } +} diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 8098b39f4a19..b96ed1017d8d 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -24,15 +24,18 @@ import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Comparator; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.LongAdder; import java.util.function.Function; @@ -55,6 +58,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.FieldDoc; @@ -96,6 +100,7 @@ import org.apache.solr.common.util.IOUtils; import org.apache.solr.common.util.ObjectReleaseTracker; import org.apache.solr.common.util.SolrNamedThreadFactory; +import org.apache.solr.core.CancellableQueryTracker; import org.apache.solr.core.DirectoryFactory; import org.apache.solr.core.DirectoryFactory.DirContext; import org.apache.solr.core.NodeConfig; @@ -270,88 +275,85 @@ protected void beforeExecute(Thread t, Runnable r) { } /** - * Builds the necessary collector chain (via delegate wrapping) and executes the query against it. - * This method takes into consideration both the explicitly provided collector and postFilter as - * well as any needed collector wrappers for dealing with options specified in the QueryCommand. - * - * @return The collector used for search + * Builds a {@link SolrCollectorManagers.ChainCM} wrapping {@code innerManager} with the Solr + * collector-chain features driven by {@code cmd} (segment-terminate-early, max-hits early + * termination, optional post-filter, optional cancellation). The returned chain is un-executed; + * pass it to {@link #runChainAndCollect}. */ - private Collector buildAndRunCollectorChain( + private SolrCollectorManagers.ChainCM buildChain( QueryResult qr, - Query query, - Collector collector, QueryCommand cmd, - DelegatingCollector postFilter) + DelegatingCollector postFilter, + CollectorManager innerManager, + boolean parallelEligible) throws IOException { + AtomicBoolean cancellationFlag = cmd.isQueryCancellable() ? new AtomicBoolean() : null; + Sort mergePolicySort = + cmd.getSegmentTerminateEarly() ? core.getSolrCoreState().getMergePolicySort() : null; + CancellableQueryTracker tracker = + cmd.isQueryCancellable() ? core.getCancellableQueryTracker() : null; + return new SolrCollectorManagers.ChainCM<>( + innerManager, + cmd, + postFilter, + mergePolicySort, + qr, + cancellationFlag, + tracker, + parallelEligible); + } - EarlyTerminatingSortingCollector earlyTerminatingSortingCollector = null; - if (cmd.getSegmentTerminateEarly()) { - final Sort cmdSort = cmd.getSort(); - final int cmdLen = cmd.getLen(); - final Sort mergeSort = core.getSolrCoreState().getMergePolicySort(); - - if (cmdSort == null - || cmdLen <= 0 - || mergeSort == null - || !EarlyTerminatingSortingCollector.canEarlyTerminate(cmdSort, mergeSort)) { - log.warn( - "unsupported combination: segmentTerminateEarly=true cmdSort={} cmdLen={} mergeSort={}", - cmdSort, - cmdLen, - mergeSort); - } else { - collector = - earlyTerminatingSortingCollector = - new EarlyTerminatingSortingCollector(collector, cmdSort, cmd.getLen()); - } - } - - if (cmd.shouldEarlyTerminateSearch()) { - collector = new EarlyTerminatingCollector(collector, cmd.getMaxHitsAllowed()); - } - - if (postFilter != null) { - postFilter.setLastDelegate(collector); - collector = postFilter; - } - - if (cmd.isQueryCancellable()) { - collector = new CancellableCollector(collector); + /** + * Executes a search using the supplied {@link SolrCollectorManagers.ChainCM}, returning the inner + * manager's reduced result. Selects between single-slice and multi-slice execution based on + * {@code chain.requiresSingleSlice()} and {@code parallelEligible}, and translates + * partial-results signaling onto {@link QueryResult}. + */ + private T runChainAndCollect( + QueryResult qr, + Query query, + SolrCollectorManagers.ChainCM chain, + boolean parallelEligible) + throws IOException { - // Add this to the local active queries map - core.getCancellableQueryTracker() - .addShardLevelActiveQuery(cmd.getQueryID(), (CancellableCollector) collector); - } + final boolean singleSlice = chain.requiresSingleSlice() || !parallelEligible; + T result = null; try { try { - search(query, collector); - } finally { - // The complete() method can use the collectors, so this needs to be surrounded by the same - // catch logic that limit collecting - if (collector instanceof DelegatingCollector) { - ((DelegatingCollector) collector).complete(); + if (singleSlice) { + Collector chainCollector = chain.newCollector(); + search(query, chainCollector); + result = chain.reduce(List.of(chainCollector)); + } else { + result = search(query, chain); + } + } catch (ExitableDirectoryReader.ExitingReaderException + | CancellableCollector.QueryCancelledException x) { + log.warn("Query: [{}]; ", query, x); + qr.setPartialResults(true); + result = chain.reduce(List.of()); + } catch (EarlyTerminatingCollectorException etce) { + qr.setPartialResults(true); + qr.setMaxHitsTerminatedEarly(true); + qr.setPartialResultsDetails(etce.getMessage()); + qr.setApproximateTotalHits(etce.getApproximateTotalHits(reader.maxDoc())); + result = chain.reduce(List.of()); + } catch (RuntimeException ex) { + // Lucene's CollectorManager fan-out wraps slice failures in ExecutionException, which + // is itself wrapped in a RuntimeException. Unwrap to surface the original cause so + // callers see the same exception they would in single-slice mode. + if (ex.getCause() instanceof ExecutionException + && ex.getCause().getCause() instanceof RuntimeException) { + throw (RuntimeException) ex.getCause().getCause(); } + throw ex; } - } catch (ExitableDirectoryReader.ExitingReaderException - | CancellableCollector.QueryCancelledException x) { - log.warn("Query: [{}]; ", query, x); - qr.setPartialResults(true); - } catch (EarlyTerminatingCollectorException etce) { - qr.setPartialResults(true); - qr.setMaxHitsTerminatedEarly(true); - qr.setPartialResultsDetails(etce.getMessage()); - qr.setApproximateTotalHits(etce.getApproximateTotalHits(reader.maxDoc())); } finally { - if (earlyTerminatingSortingCollector != null) { - qr.setSegmentTerminatedEarly(earlyTerminatingSortingCollector.terminatedEarly()); - } - if (cmd.isQueryCancellable()) { - core.getCancellableQueryTracker().removeCancellableQuery(cmd.getQueryID()); - } + chain.cleanup(); } - return collector; + return result; } public SolrIndexSearcher( @@ -1913,6 +1915,81 @@ TopDocsCollector buildTopDocsCollector(int len, QueryCommand } } + /** + * Builds and executes the collection phase of a search, returning a uniform {@link + * SolrCollectorManagers.SearchResult}. Constructs the inner per-segment-friendly collector + * managers (top-docs, max-score, doc-set) and delegates to {@link #runChainAndCollect} which + * handles the chain wrappers and selects between single-slice and multi-slice execution. + * + *

The {@link DocSet} shape is preserved from the legacy code paths: when single-slice + * execution is forced (post-filter, segment-terminate-early, or {@code multiThreaded=false}) the + * doc-set is built via {@link SolrCollectorManagers.DocSetCollectorCM} so sparse results keep the + * {@code SortedIntDocSet} optimization; multi-slice uses {@link SolrCollectorManagers.DocSetCM} + * which always produces a {@code BitDocSet}. + */ + private SolrCollectorManagers.SearchResult searchAndCollect( + QueryResult qr, + QueryCommand cmd, + ProcessedFilter pf, + Query query, + int len, + boolean needTopDocs, + boolean needMaxScore, + boolean needDocSet) + throws IOException { + final boolean parallelEligible = + pf.postFilter == null && !cmd.getSegmentTerminateEarly() && cmd.getMultiThreaded(); + + Collection> innerCMs = new ArrayList<>(); + int firstSize = 0; + final int firstTopDocsIdx = needTopDocs ? firstSize++ : -1; + final int firstMaxScoreIdx = needMaxScore ? firstSize++ : -1; + Collector[] firstCollectors = new Collector[firstSize]; + if (needTopDocs) { + innerCMs.add( + new SolrCollectorManagers.TopDocsCM(this, len, cmd, firstCollectors, firstTopDocsIdx)); + } + if (needMaxScore) { + innerCMs.add(new SolrCollectorManagers.MaxScoreCM(firstCollectors, firstMaxScoreIdx)); + } + if (needDocSet) { + if (parallelEligible) { + innerCMs.add(new SolrCollectorManagers.DocSetCM(getRawReader().maxDoc())); + } else { + innerCMs.add(new SolrCollectorManagers.DocSetCollectorCM(this)); + } + } + @SuppressWarnings({"unchecked", "rawtypes"}) + CollectorManager innerManager = + innerCMs.size() == 1 + // Single inner CM — skip SolrMultiCollectorManager to avoid the per-doc + // LeafCollectors fan-out loop. Adapt the inner CM's reduce result to the + // Object[] shape the rest of searchAndCollect expects. Mirrors Lucene's + // MultiCollectorManager.wrap() short-circuit for the one-collector case. + ? SolrCollectorManagers.wrapSingle((CollectorManager) innerCMs.iterator().next()) + : new SolrMultiCollectorManager(innerCMs.toArray(new CollectorManager[0])); + + SolrCollectorManagers.ChainCM chain = + buildChain(qr, cmd, pf.postFilter, innerManager, parallelEligible); + Object[] ret = runChainAndCollect(qr, query, chain, parallelEligible); + + // Use the chain-wrapped collector's score mode rather than the inner per-CM collectors': + // a Solr post-filter (e.g. CollapsingPostFilter) overrides scoreMode on the wrapped chain + // collector, and that override must propagate to populateScoresIfNeeded so the matches + // relation is reported correctly. + // + // chain.getScoreMode() is populated when newCollector() is first called. Lucene's + // IndexSearcher.search(Query, CollectorManager) calls newCollector() unconditionally + // before its empty-index short-circuit, so by the time runChainAndCollect returns this + // is always non-null. + return new SolrCollectorManagers.SearchResult( + Objects.requireNonNull( + chain.getScoreMode(), + "ChainCM.getScoreMode() returned null after runChainAndCollect; " + + "Lucene's IndexSearcher.search must call newCollector() at least once"), + ret); + } + private void getDocListNC(QueryResult qr, QueryCommand cmd) throws IOException { final int len = cmd.getSupersetMaxDoc(); int last = len; @@ -1962,9 +2039,12 @@ public ScoreMode scoreMode() { }; } - buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter); + runChainAndCollect( + qr, + query, + buildChain(qr, cmd, pf.postFilter, SolrCollectorManagers.singleSlice(collector), false), + false); - totalHits = numHits[0]; if (collector instanceof TotalHitCountCollector) { totalHits = ((TotalHitCountCollector) collector).getTotalHits(); } else { @@ -1977,42 +2057,13 @@ public ScoreMode scoreMode() { // no docs on this page, so cursor doesn't change qr.setNextCursorMark(cmd.getCursorMark()); } else { - if (log.isDebugEnabled()) { - log.debug("calling from 2, query: {}", query.getClass()); - } - final TopDocs topDocs; - final ScoreMode scoreModeUsed; - if (!MultiThreadedSearcher.allowMT(pf.postFilter, cmd)) { - log.trace("SINGLE THREADED search, skipping collector manager in getDocListNC"); - final TopDocsCollector topCollector = buildTopDocsCollector(len, cmd); - MaxScoreCollector maxScoreCollector = null; - Collector collector = topCollector; - if (needScores) { - maxScoreCollector = new MaxScoreCollector(); - collector = MultiCollector.wrap(topCollector, maxScoreCollector); - } - scoreModeUsed = - buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter).scoreMode(); - - totalHits = topCollector.getTotalHits(); - topDocs = topCollector.topDocs(0, len); - - maxScore = - totalHits > 0 - ? (maxScoreCollector == null ? Float.NaN : maxScoreCollector.getMaxScore()) - : 0.0f; - } else { - log.trace("MULTI-THREADED search, using CollectorManager int getDocListNC"); - final MultiThreadedSearcher.SearchResult searchResult = - new MultiThreadedSearcher(this) - .searchCollectorManagers(len, cmd, query, true, needScores, false, qr); - scoreModeUsed = searchResult.scoreMode; - - MultiThreadedSearcher.TopDocsResult topDocsResult = searchResult.getTopDocsResult(); - totalHits = topDocsResult.totalHits; - topDocs = topDocsResult.topDocs; - maxScore = searchResult.getMaxScore(totalHits); - } + final SolrCollectorManagers.SearchResult searchResult = + searchAndCollect(qr, cmd, pf, query, len, true, needScores, false); + final ScoreMode scoreModeUsed = searchResult.scoreMode; + final SolrCollectorManagers.TopDocsResult topDocsResult = searchResult.getTopDocsResult(); + totalHits = topDocsResult.totalHits; + final TopDocs topDocs = topDocsResult.topDocs; + maxScore = searchResult.getMaxScore(totalHits); hitsRelation = populateScoresIfNeeded(cmd, needScores, topDocs, query, scoreModeUsed); populateNextCursorMarkFromTopDocs(qr, cmd, topDocs); @@ -2081,7 +2132,11 @@ public ScoreMode scoreMode() { collector = MultiCollector.wrap(setCollector, topScoreCollector); } - buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter); + runChainAndCollect( + qr, + query, + buildChain(qr, cmd, pf.postFilter, SolrCollectorManagers.singleSlice(collector), false), + false); set = DocSetUtil.getDocSet(setCollector, this); @@ -2092,48 +2147,15 @@ public ScoreMode scoreMode() { // no docs on this page, so cursor doesn't change qr.setNextCursorMark(cmd.getCursorMark()); } else { - final TopDocs topDocs; - if (!MultiThreadedSearcher.allowMT(pf.postFilter, cmd)) { - log.trace("SINGLE THREADED search, skipping collector manager in getDocListAndSetNC"); - - @SuppressWarnings({"rawtypes"}) - final TopDocsCollector topCollector = buildTopDocsCollector(len, cmd); - final DocSetCollector setCollector = new DocSetCollector(maxDoc); - MaxScoreCollector maxScoreCollector = null; - List collectors = new ArrayList<>(Arrays.asList(topCollector, setCollector)); - - if (needScores) { - maxScoreCollector = new MaxScoreCollector(); - collectors.add(maxScoreCollector); - } - - Collector collector = MultiCollector.wrap(collectors); - - buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter); - - set = DocSetUtil.getDocSet(setCollector, this); - - totalHits = topCollector.getTotalHits(); - assert (totalHits == set.size()) || qr.isPartialResults(); - - topDocs = topCollector.topDocs(0, len); - maxScore = - totalHits > 0 - ? (maxScoreCollector == null ? Float.NaN : maxScoreCollector.getMaxScore()) - : 0.0f; - } else { - log.trace("MULTI-THREADED search, using CollectorManager in getDocListAndSetNC"); - - MultiThreadedSearcher.SearchResult searchResult = - new MultiThreadedSearcher(this) - .searchCollectorManagers(len, cmd, query, true, needScores, true, qr); - MultiThreadedSearcher.TopDocsResult topDocsResult = searchResult.getTopDocsResult(); - totalHits = topDocsResult.totalHits; - topDocs = topDocsResult.topDocs; - maxScore = searchResult.getMaxScore(totalHits); - set = new BitDocSet(searchResult.getFixedBitSet()); - // TODO: Think about using ScoreMode from searchResult down below - } + final SolrCollectorManagers.SearchResult searchResult = + searchAndCollect(qr, cmd, pf, query, len, true, needScores, true); + final SolrCollectorManagers.TopDocsResult topDocsResult = searchResult.getTopDocsResult(); + totalHits = topDocsResult.totalHits; + final TopDocs topDocs = topDocsResult.topDocs; + maxScore = searchResult.getMaxScore(totalHits); + set = searchResult.getDocSet(); + assert set != null; + assert (totalHits == set.size()) || qr.isPartialResults(); final Relation relation = populateScoresIfNeeded(cmd, needScores, topDocs, query, ScoreMode.COMPLETE); populateNextCursorMarkFromTopDocs(qr, cmd, topDocs); diff --git a/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java b/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java index 390f409616d4..6b9cf9352cb4 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java +++ b/solr/core/src/java/org/apache/solr/search/SolrMultiCollectorManager.java @@ -20,8 +20,8 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; -import java.util.concurrent.atomic.LongAdder; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.FilterScorable; @@ -33,28 +33,24 @@ /** * A {@link CollectorManager} implements which wrap a set of {@link CollectorManager} as {@link * MultiCollector} acts for {@link Collector}. + * + *

Cross-cutting search-chain features (max-hits early termination, segment-terminate-early, + * post-filter, cancellation) are handled by {@link SolrCollectorManagers.ChainCM} which wraps this + * manager. This class only composes its child managers. */ public class SolrMultiCollectorManager implements CollectorManager { private final CollectorManager[] collectorManagers; - private LongAdder runningHits = null; - private int maxDocsToCollect; - private final List reducableCollectors = new ArrayList<>(); @SafeVarargs @SuppressWarnings({"varargs", "unchecked"}) public SolrMultiCollectorManager( - QueryCommand queryCommand, final CollectorManager... collectorManagers) { if (collectorManagers.length < 1) { throw new IllegalArgumentException("There must be at least one collector"); } this.collectorManagers = (CollectorManager[]) collectorManagers; - if (queryCommand.shouldEarlyTerminateSearch()) { - runningHits = new LongAdder(); - maxDocsToCollect = queryCommand.getMaxHitsAllowed(); - } } // TODO: could Lucene's MultiCollector permit reuse of its logic? @@ -72,9 +68,7 @@ public static ScoreMode scoreMode(Collector[] collectors) { @Override public Collectors newCollector() throws IOException { - final Collectors collector = new Collectors(); - reducableCollectors.add(collector); - return collector; + return new Collectors(); } @Override @@ -91,10 +85,6 @@ public Object[] reduce(Collection reducableCollectors) throws IOExce return results; } - public Object[] reduce() throws IOException { - return reduce(reducableCollectors); - } - /** Wraps multiple collectors for processing */ class Collectors implements Collector { @@ -103,11 +93,7 @@ class Collectors implements Collector { private Collectors() throws IOException { collectors = new Collector[collectorManagers.length]; for (int i = 0; i < collectors.length; i++) { - Collector collector = collectorManagers[i].newCollector(); - if (runningHits != null) { - collector = new EarlyTerminatingCollector(collector, maxDocsToCollect, runningHits); - } - collectors[i] = collector; + collectors[i] = collectorManagers[i].newCollector(); } } @@ -136,17 +122,44 @@ private LeafCollectors(final LeafReaderContext context, boolean skipNonCompetiti throws IOException { this.skipNonCompetitiveScores = skipNonCompetitiveScores; leafCollectors = new LeafCollector[collectors.length]; + int terminated = 0; for (int i = 0; i < collectors.length; i++) { - leafCollectors[i] = collectors[i].getLeafCollector(context); + try { + leafCollectors[i] = collectors[i].getLeafCollector(context); + } catch (CollectionTerminatedException e) { + // Per-child handling matches Lucene's MultiCollector: drop this child for the + // rest of the leaf and continue collecting for the others. Only when ALL children + // have terminated do we propagate the exception. + leafCollectors[i] = null; + terminated++; + } + } + if (terminated == leafCollectors.length && leafCollectors.length > 0) { + throw new CollectionTerminatedException(); } } @Override public final void setScorer(final Scorable scorer) throws IOException { if (skipNonCompetitiveScores) { - for (LeafCollector leafCollector : leafCollectors) { - if (leafCollector != null) { - leafCollector.setScorer(scorer); + // TOP_SCORES: aggregate per-child setMinCompetitiveScore so the underlying scorer + // only skips a doc when ALL active children agree it's non-competitive — mirroring + // Lucene's MultiCollector.MinCompetitiveScoreAwareScorable. Without this, the last + // child to call setMinCompetitiveScore overwrites previous values and can cause + // over-aggressive skipping (e.g. ReRankCollector + MaxScoreCollector with + // minExactCount tightly bounded). + if (leafCollectors.length == 1) { + LeafCollector lc = leafCollectors[0]; + if (lc != null) { + lc.setScorer(scorer); + } + } else { + float[] minScores = new float[leafCollectors.length]; + for (int i = 0; i < leafCollectors.length; i++) { + LeafCollector lc = leafCollectors[i]; + if (lc != null) { + lc.setScorer(new MinCompetitiveScoreAwareScorable(scorer, i, minScores)); + } } } } else { @@ -169,12 +182,78 @@ public void setMinCompetitiveScore(float minScore) throws IOException { @Override public final void collect(final int doc) throws IOException { + // Matches Lucene MultiCollector.MultiLeafCollector#collect exactly: keep the per-iteration + // body small (load slot, null-check, call) so the JIT can inline the child collect call, + // and only walk the array to detect "all terminated" inside the rare catch branch. + for (int i = 0; i < leafCollectors.length; i++) { + LeafCollector leafCollector = leafCollectors[i]; + if (leafCollector != null) { + try { + leafCollector.collect(doc); + } catch (CollectionTerminatedException e) { + leafCollectors[i].finish(); + leafCollectors[i] = null; + if (allLeavesTerminated()) { + throw new CollectionTerminatedException(); + } + } + } + } + } + + private boolean allLeavesTerminated() { + for (LeafCollector lc : leafCollectors) { + if (lc != null) { + return false; + } + } + return true; + } + + @Override + public final void finish() throws IOException { + // Matches Lucene MultiCollector.MultiLeafCollector#finish: forward end-of-leaf to + // every child that hasn't already been finished via per-doc termination above. for (LeafCollector leafCollector : leafCollectors) { if (leafCollector != null) { - leafCollector.collect(doc); + leafCollector.finish(); } } } } } + + /** + * Mirrors {@code Lucene MultiCollector.MinCompetitiveScoreAwareScorable}: tracks per-child + * minimum competitive scores and propagates the smallest of them to the underlying scorer, so the + * scorer only skips a doc when every wrapped collector agrees it's non-competitive. + */ + private static final class MinCompetitiveScoreAwareScorable extends FilterScorable { + private final int idx; + private final float[] minScores; + + MinCompetitiveScoreAwareScorable(Scorable in, int idx, float[] minScores) { + super(in); + this.idx = idx; + this.minScores = minScores; + } + + @Override + public void setMinCompetitiveScore(float minScore) throws IOException { + if (minScore > minScores[idx]) { + minScores[idx] = minScore; + in.setMinCompetitiveScore(minScore()); + } + } + + private float minScore() { + float min = Float.MAX_VALUE; + for (int i = 0; i < minScores.length; i++) { + if (minScores[i] < min) { + min = minScores[i]; + } + } + return min; + } + } } diff --git a/solr/core/src/test/org/apache/solr/search/TestMultiThreadedSearcher.java b/solr/core/src/test/org/apache/solr/search/TestMultiThreadedSearcher.java index 2b83390158f9..34e499b41f63 100644 --- a/solr/core/src/test/org/apache/solr/search/TestMultiThreadedSearcher.java +++ b/solr/core/src/test/org/apache/solr/search/TestMultiThreadedSearcher.java @@ -40,7 +40,7 @@ import org.junit.AfterClass; import org.junit.BeforeClass; -/** Tests for {@link MultiThreadedSearcher}. */ +/** Tests for parallel-segment search via {@link SolrIndexSearcher#search(QueryCommand)}. */ public class TestMultiThreadedSearcher extends SolrTestCaseJ4 { @BeforeClass