diff --git a/changelog/unreleased/SOLR-8127-distributed-luke.yml b/changelog/unreleased/SOLR-8127-distributed-luke.yml new file mode 100644 index 000000000000..c4654c30cc1f --- /dev/null +++ b/changelog/unreleased/SOLR-8127-distributed-luke.yml @@ -0,0 +1,8 @@ +# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc +title: Luke handler now aggregates results across multiple shards and does this by default in SolrCloud mode. +type: changed # added, changed, fixed, deprecated, removed, dependency_update, security, other +authors: + - name: Luke Kot-Zaniewski +links: + name: SOLR-8127 + url: https://issues.apache.org/jira/browse/SOLR-8127 diff --git a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java index d59695da95f0..624f6bed6535 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java @@ -19,6 +19,8 @@ import static org.apache.lucene.index.IndexOptions.DOCS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS; import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; +import static org.apache.solr.common.params.CommonParams.DISTRIB; +import static org.apache.solr.common.params.CommonParams.PATH; import java.io.IOException; import java.lang.invoke.MethodHandles; @@ -28,11 +30,14 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Base64; +import java.util.Comparator; import java.util.Date; +import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; @@ -66,14 +71,24 @@ import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.PriorityQueue; import org.apache.solr.analysis.TokenizerChain; +import org.apache.solr.client.api.util.SolrVersion; +import org.apache.solr.client.solrj.response.LukeResponse; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.core.SolrCore; import org.apache.solr.handler.RequestHandlerBase; +import org.apache.solr.handler.component.ResponseBuilder; +import org.apache.solr.handler.component.ShardHandler; +import org.apache.solr.handler.component.ShardHandlerFactory; +import org.apache.solr.handler.component.ShardRequest; +import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.CopyField; @@ -82,7 +97,9 @@ import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.security.AuthorizationContext; +import org.apache.solr.servlet.HttpSolrCall; import org.apache.solr.update.SolrIndexWriter; +import org.apache.solr.util.plugin.SolrCoreAware; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,7 +112,7 @@ * @see SegmentsInfoRequestHandler * @since solr 1.2 */ -public class LukeRequestHandler extends RequestHandlerBase { +public class LukeRequestHandler extends RequestHandlerBase implements SolrCoreAware { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); public static final String NUMTERMS = "numTerms"; @@ -106,6 +123,35 @@ public class LukeRequestHandler extends RequestHandlerBase { static final int HIST_ARRAY_SIZE = 33; + // Response section keys + static final String RSP_INDEX = "index"; + static final String RSP_FIELDS = "fields"; + static final String RSP_SCHEMA = "schema"; + static final String RSP_INFO = "info"; + static final String RSP_DOC = "doc"; + static final String RSP_SHARDS = "shards"; + + // Field-level keys + static final String KEY_NUM_DOCS = "numDocs"; + static final String KEY_MAX_DOC = "maxDoc"; + static final String KEY_DELETED_DOCS = "deletedDocs"; + static final String KEY_SEGMENT_COUNT = "segmentCount"; + static final String KEY_TYPE = "type"; + static final String KEY_SCHEMA_FLAGS = "schema"; + static final String KEY_DOCS = "docs"; + static final String KEY_DISTINCT = "distinct"; + static final String KEY_TOP_TERMS = "topTerms"; + static final String KEY_DYNAMIC_BASE = "dynamicBase"; + static final String KEY_INDEX_FLAGS = "index"; + static final String KEY_HISTOGRAM = "histogram"; + + private ShardHandlerFactory shardHandlerFactory; + + @Override + public void inform(SolrCore core) { + this.shardHandlerFactory = core.getCoreContainer().getShardHandlerFactory(); + } + @Override public Name getPermissionName(AuthorizationContext request) { return Name.READ_PERM; @@ -121,7 +167,7 @@ public static ShowStyle get(String v) { if (v == null) return null; if ("schema".equalsIgnoreCase(v)) return SCHEMA; if ("index".equalsIgnoreCase(v)) return INDEX; - if ("doc".equalsIgnoreCase(v)) return DOC; + if (RSP_DOC.equalsIgnoreCase(v)) return DOC; if ("all".equalsIgnoreCase(v)) return ALL; throw new SolrException(ErrorCode.BAD_REQUEST, "Unknown Show Style: " + v); } @@ -129,15 +175,24 @@ public static ShowStyle get(String v) { @Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { + SolrParams params = req.getParams(); + + boolean isDistrib = params.getBool(DISTRIB, req.getCoreContainer().isZooKeeperAware()); + if (!isDistrib) { + String shards = params.get(ShardParams.SHARDS); + isDistrib = shards != null && shards.indexOf('/') > 0; + } + if (isDistrib && handleDistributed(req, rsp)) { + return; + } + IndexSchema schema = req.getSchema(); SolrIndexSearcher searcher = req.getSearcher(); DirectoryReader reader = searcher.getIndexReader(); - SolrParams params = req.getParams(); ShowStyle style = ShowStyle.get(params.get("show")); // If no doc is given, show all fields and top terms - - rsp.add("index", getIndexInfo(reader)); + rsp.add(RSP_INDEX, getIndexInfo(reader)); if (ShowStyle.INDEX == style) { return; // that's all we need @@ -150,14 +205,10 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw String v = uniqueKey.getType().toInternal(params.get(ID)); Term t = new Term(uniqueKey.getName(), v); docId = searcher.getFirstMatch(t); - if (docId < 0) { - throw new SolrException( - SolrException.ErrorCode.NOT_FOUND, "Can't find document: " + params.get(ID)); - } } // Read the document from the index - if (docId != null) { + if (docId != null && docId > -1) { if (style != null && style != ShowStyle.DOC) { throw new SolrException(ErrorCode.BAD_REQUEST, "missing doc param for doc style"); } @@ -176,11 +227,11 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw docinfo.add("docId", docId); docinfo.add("lucene", info); docinfo.add("solr", doc); - rsp.add("doc", docinfo); + rsp.add(RSP_DOC, docinfo); } else if (ShowStyle.SCHEMA == style) { - rsp.add("schema", getSchemaInfo(req.getSchema())); + rsp.add(RSP_SCHEMA, getSchemaInfo(req.getSchema())); } else { - rsp.add("fields", getIndexedFieldsInfo(req)); + rsp.add(RSP_FIELDS, getIndexedFieldsInfo(req)); } // Add some generally helpful information @@ -189,8 +240,411 @@ public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throw info.add( "NOTE", "Document Frequency (df) is not updated when a document is marked for deletion. df values include deleted documents."); - rsp.add("info", info); + rsp.add(RSP_INFO, info); + rsp.setHttpCaching(false); + } + + /** + * Field-level response keys, declared in the order they appear in the local (non-distributed) + * response. EnumMap iteration follows declaration order, giving deterministic output. + */ + enum FieldDataKey { + TYPE(KEY_TYPE), + SCHEMA(KEY_SCHEMA_FLAGS), + DYNAMIC_BASE(KEY_DYNAMIC_BASE), + INDEX(KEY_INDEX_FLAGS), + DOCS(KEY_DOCS); + + final String responseKey; + + FieldDataKey(String responseKey) { + this.responseKey = responseKey; + } + } + + /** Per-field accumulation state across shards: aggregated response data and field validation. */ + private static class AggregatedFieldData { + final EnumMap properties = new EnumMap<>(FieldDataKey.class); + final String originalShardAddr; + final LukeResponse.FieldInfo originalFieldInfo; + private String indexFlagsShardAddr; + + AggregatedFieldData(String shardAddr, LukeResponse.FieldInfo fieldInfo) { + this.originalShardAddr = shardAddr; + this.originalFieldInfo = fieldInfo; + properties.put(FieldDataKey.TYPE, fieldInfo.getType()); + properties.put(FieldDataKey.SCHEMA, fieldInfo.getSchema()); + Object dynBase = fieldInfo.getExtras().get(KEY_DYNAMIC_BASE); + if (dynBase != null) { + properties.put(FieldDataKey.DYNAMIC_BASE, dynBase); + } + Object indexFlags = fieldInfo.getExtras().get(KEY_INDEX_FLAGS); + if (indexFlags != null) { + properties.put(FieldDataKey.INDEX, indexFlags); + this.indexFlagsShardAddr = shardAddr; + } + } + + SimpleOrderedMap toResponse() { + SimpleOrderedMap result = new SimpleOrderedMap<>(); + for (Map.Entry entry : properties.entrySet()) { + result.add(entry.getKey().responseKey, entry.getValue()); + } + return result; + } + } + + private static class ShardData { + final String shardAddr; // key in "shards" response map + final Map shardFieldInfo; // keyed by field name + private NamedList indexInfo; // value for "index" key in per-shard entry + private SimpleOrderedMap detailedFields; // keyed by field name + + ShardData(String shardAddr, Map shardFieldInfo) { + this.shardAddr = shardAddr; + this.shardFieldInfo = shardFieldInfo; + } + + void setIndexInfo(NamedList indexInfo) { + this.indexInfo = indexInfo; + } + + void addDetailedFieldInfo(String fieldName, SimpleOrderedMap fieldStats) { + if (detailedFields == null) { + detailedFields = new SimpleOrderedMap<>(); + } + detailedFields.add(fieldName, fieldStats); + } + + SimpleOrderedMap toResponseEntry() { + SimpleOrderedMap entry = new SimpleOrderedMap<>(); + if (indexInfo != null) { + entry.add(RSP_INDEX, indexInfo); + } + if (detailedFields != null) { + entry.add(RSP_FIELDS, detailedFields); + } + return entry; + } + } + + /** + * @return true if the request was handled in distributed mode, false if prepDistributed + * short-circuited (e.g. single-shard collection) and the caller should fall through to local + * logic. + */ + private boolean handleDistributed(SolrQueryRequest req, SolrQueryResponse rsp) { + SolrParams reqParams = req.getParams(); + + // docId is a Lucene-internal integer, not meaningful across shards + if (reqParams.getInt(DOC_ID) != null) { + throw new SolrException( + ErrorCode.BAD_REQUEST, + "docId parameter is not supported in distributed mode." + + " Use the id parameter to look up documents by their Solr unique key."); + } + + ShardHandler shardHandler = shardHandlerFactory.getShardHandler(); + ResponseBuilder rb = new ResponseBuilder(req, rsp, List.of()); + shardHandler.prepDistributed(rb); + + String[] shards = rb.shards; + if (shards == null || shards.length == 0) { + return false; + } + + ShardRequest sreq = new ShardRequest(); + sreq.shards = shards; + sreq.actualShards = shards; + sreq.responses = new ArrayList<>(shards.length); + + String reqPath = (String) req.getContext().get(PATH); + + for (String shard : shards) { + ModifiableSolrParams params = new ModifiableSolrParams(reqParams); + params.set(CommonParams.QT, reqPath); + ShardHandler.setShardAttributesToParams(params, sreq.purpose); + shardHandler.submit(sreq, shard, params); + } + + ShardResponse lastSrsp = shardHandler.takeCompletedOrError(); + if (lastSrsp == null) { + throw new SolrException(ErrorCode.SERVER_ERROR, "No responses received from shards"); + } + List responses = sreq.responses; + for (ShardResponse srsp : responses) { + if (srsp.getException() != null) { + shardHandler.cancelAll(); + if (srsp.getException() instanceof SolrException) { + throw (SolrException) srsp.getException(); + } + throw new SolrException(ErrorCode.SERVER_ERROR, srsp.getException()); + } + } + + aggregateDistributedResponses(req, rsp, responses); rsp.setHttpCaching(false); + return true; + } + + private static String shardAddress(ShardResponse srsp) { + return srsp.getShardAddress() != null ? srsp.getShardAddress() : srsp.getShard(); + } + + private void aggregateDistributedResponses( + SolrQueryRequest req, SolrQueryResponse rsp, List responses) { + + if (!responses.isEmpty()) { + ShardResponse firstRsp = responses.get(0); + NamedList firstShardRsp = firstRsp.getSolrResponse().getResponse(); + if (firstShardRsp == null) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Unexpected empty response from shard: " + shardAddress(firstRsp)); + } + Object schema = firstShardRsp.get(RSP_SCHEMA); + if (schema != null) { + rsp.add(RSP_SCHEMA, schema); + } + } + + long totalNumDocs = 0; + int totalMaxDoc = 0; + long totalDeletedDocs = 0; + int totalSegmentCount = 0; + Map aggregatedFields = new TreeMap<>(); + String firstDocShard = null; + Object firstDoc = null; + List shardDataList = new ArrayList<>(); + + for (ShardResponse srsp : responses) { + NamedList shardRsp = srsp.getSolrResponse().getResponse(); + LukeResponse lukeRsp = new LukeResponse(); + lukeRsp.setResponse(shardRsp); + // Only process field info if the shard explicitly included it in its response. + // LukeResponse.getFieldInfo() falls back to schema.fields which has incomplete data. + Map fieldInfo = + shardRsp.get(RSP_FIELDS) != null ? lukeRsp.getFieldInfo() : null; + ShardData shardData = new ShardData(shardAddress(srsp), fieldInfo); + + NamedList shardIndex = lukeRsp.getIndexInfo(); + if (shardIndex != null) { + totalNumDocs += Optional.ofNullable(lukeRsp.getNumDocs()).orElse(0L); + totalMaxDoc = Math.max(totalMaxDoc, Optional.ofNullable(lukeRsp.getMaxDoc()).orElse(0)); + totalDeletedDocs += Optional.ofNullable(lukeRsp.getDeletedDocs()).orElse(0L); + Number segCount = (Number) shardIndex.get(KEY_SEGMENT_COUNT); + totalSegmentCount += segCount != null ? segCount.intValue() : 0; + + shardData.setIndexInfo(shardIndex); + } + + processShardFields(shardData, aggregatedFields); + Object doc = shardRsp.get(RSP_DOC); + if (doc != null) { + if (firstDoc != null) { + throw new SolrException( + ErrorCode.SERVER_ERROR, + "Solr Id of document " + + firstDoc + + " found on multiple shards (" + + firstDocShard + + " and " + + shardAddress(srsp) + + "). The index is corrupt: unique key constraint violated."); + } + firstDoc = doc; + firstDocShard = shardAddress(srsp); + } + shardDataList.add(shardData); + } + + shardDataList.sort(Comparator.comparing(sd -> sd.shardAddr)); + SimpleOrderedMap shardsInfo = new SimpleOrderedMap<>(); + for (ShardData sd : shardDataList) { + SimpleOrderedMap entry = sd.toResponseEntry(); + if (!entry.isEmpty()) { + shardsInfo.add(sd.shardAddr, entry); + } + } + + SimpleOrderedMap aggregatedIndex = new SimpleOrderedMap<>(); + aggregatedIndex.add(KEY_NUM_DOCS, totalNumDocs); + aggregatedIndex.add(KEY_MAX_DOC, totalMaxDoc); + aggregatedIndex.add(KEY_DELETED_DOCS, totalDeletedDocs); + aggregatedIndex.add(KEY_SEGMENT_COUNT, totalSegmentCount); + rsp.add(RSP_INDEX, aggregatedIndex); + + if (firstDoc != null) { + rsp.add(RSP_DOC, firstDoc); + } + boolean narrowLongs = shouldNarrowLongsForOldClient(req); + if (narrowLongs) { + narrowLongToInt(aggregatedIndex, KEY_NUM_DOCS); + narrowLongToInt(aggregatedIndex, KEY_DELETED_DOCS); + } + if (!aggregatedFields.isEmpty()) { + SimpleOrderedMap aggregatedFieldsNL = new SimpleOrderedMap<>(); + for (Map.Entry entry : aggregatedFields.entrySet()) { + SimpleOrderedMap fieldResponse = entry.getValue().toResponse(); + if (narrowLongs) { + narrowLongToInt(fieldResponse, KEY_DOCS); + } + aggregatedFieldsNL.add(entry.getKey(), fieldResponse); + } + rsp.add(RSP_FIELDS, aggregatedFieldsNL); + } + + // Add info section last (before shards), matching the local-mode key order. + if (!responses.isEmpty()) { + NamedList firstShardRsp = responses.get(0).getSolrResponse().getResponse(); + Object info = firstShardRsp == null ? null : firstShardRsp.get(RSP_INFO); + if (info != null) { + rsp.add(RSP_INFO, info); + } + } + + if (req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { + rsp.add(RSP_SHARDS, shardsInfo); + } + } + + private void processShardFields( + ShardData shardData, Map aggregatedFields) { + if (shardData.shardFieldInfo == null) { + return; + } + for (Map.Entry entry : shardData.shardFieldInfo.entrySet()) { + String fieldName = entry.getKey(); + LukeResponse.FieldInfo fi = entry.getValue(); + + aggregateShardField(shardData.shardAddr, fi, aggregatedFields); + + // Detailed stats — kept per-shard, not aggregated + NamedList topTerms = fi.getTopTerms(); + if (topTerms != null) { + SimpleOrderedMap detailedFieldInfo = new SimpleOrderedMap<>(); + detailedFieldInfo.add(KEY_TOP_TERMS, topTerms); + detailedFieldInfo.add(KEY_HISTOGRAM, fi.getExtras().get(KEY_HISTOGRAM)); + detailedFieldInfo.add(KEY_DISTINCT, fi.getDistinct()); + shardData.addDetailedFieldInfo(fieldName, detailedFieldInfo); + } + } + } + + private void aggregateShardField( + String shardAddr, + LukeResponse.FieldInfo fi, + Map aggregatedFields) { + + String fieldName = fi.getName(); + + AggregatedFieldData fieldData = aggregatedFields.get(fieldName); + if (fieldData == null) { + fieldData = new AggregatedFieldData(shardAddr, fi); + aggregatedFields.put(fieldName, fieldData); + } else { + // Subsequent shards: validate that type, schema, and dynamicBase match + validateFieldAttr( + fieldName, + KEY_TYPE, + fi.getType(), + fieldData.originalFieldInfo.getType(), + shardAddr, + fieldData.originalShardAddr); + validateFieldAttr( + fieldName, + KEY_SCHEMA_FLAGS, + fi.getSchema(), + fieldData.originalFieldInfo.getSchema(), + shardAddr, + fieldData.originalShardAddr); + validateFieldAttr( + fieldName, + KEY_DYNAMIC_BASE, + fi.getExtras().get(KEY_DYNAMIC_BASE), + fieldData.originalFieldInfo.getExtras().get(KEY_DYNAMIC_BASE), + shardAddr, + fieldData.originalShardAddr); + + Object indexFlags = fi.getExtras().get(KEY_INDEX_FLAGS); + if (indexFlags != null) { + Object existing = fieldData.properties.get(FieldDataKey.INDEX); + if (existing == null) { + fieldData.properties.put(FieldDataKey.INDEX, indexFlags); + fieldData.indexFlagsShardAddr = shardAddr; + } else { + validateFieldAttr( + fieldName, + KEY_INDEX_FLAGS, + indexFlags, + existing, + shardAddr, + fieldData.indexFlagsShardAddr); + } + } + } + + // Sum per-shard doc counts + fieldData.properties.merge(FieldDataKey.DOCS, fi.getDocs(), (a, b) -> (long) a + (long) b); + } + + /** + * Minimum client version that understands Long values in distributed Luke responses. Distributed + * Luke aggregates counts across shards, which can overflow Integer. Older clients cast these + * values to Integer and would fail with a ClassCastException. + */ + private static final SolrVersion DISTRIB_LONG_COUNTS_MIN_VERSION = + SolrVersion.forIntegers(9, 11, 0); + + private static boolean shouldNarrowLongsForOldClient(SolrQueryRequest req) { + HttpSolrCall call = req.getHttpSolrCall(); + if (call == null) return false; + SolrVersion clientVersion = call.getUserAgentSolrVersion(); + return clientVersion != null && clientVersion.lessThan(DISTRIB_LONG_COUNTS_MIN_VERSION); + } + + /** Narrows a Long value to Integer if it fits, for javabin backward compatibility. */ + private static void narrowLongToInt(NamedList nl, String key) { + int idx = nl.indexOf(key, 0); + if (idx >= 0) { + Object val = nl.getVal(idx); + if (val instanceof Long) { + long l = (Long) val; + if (l >= Integer.MIN_VALUE && l <= Integer.MAX_VALUE) { + nl.setVal(idx, (int) l); + } + } + } + } + + /** Validates that a field attribute value is identical across shards. */ + private void validateFieldAttr( + String fieldName, + String attrName, + Object currentVal, + Object expectedVal, + String currentShardAddr, + String expectedShardAddr) { + String currentStr = currentVal != null ? currentVal.toString() : null; + String expectedStr = expectedVal != null ? expectedVal.toString() : null; + if (!Objects.equals(currentStr, expectedStr)) { + String error = + "FIELD CONFIGURATION MISMATCH! Field '" + + fieldName + + "' has inconsistent '" + + attrName + + "' across shards: '" + + expectedStr + + "' (from " + + expectedShardAddr + + ") vs '" + + currentStr + + "' (from " + + currentShardAddr + + "). Use distrib=false to query individual shards and compare field configurations."; + log.error(error); + throw new SolrException(ErrorCode.SERVER_ERROR, error); + } } /** @@ -326,8 +780,8 @@ private static SimpleOrderedMap getDocumentFieldsInfo( SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); - f.add("type", (ftype == null) ? null : ftype.getTypeName()); - f.add("schema", getFieldFlags(sfield)); + f.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + f.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); @@ -414,17 +868,18 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re SchemaField sfield = schema.getFieldOrNull(fieldName); FieldType ftype = (sfield == null) ? null : sfield.getType(); - fieldMap.add("type", (ftype == null) ? null : ftype.getTypeName()); - fieldMap.add("schema", getFieldFlags(sfield)); + fieldMap.add(KEY_TYPE, (ftype == null) ? null : ftype.getTypeName()); + fieldMap.add(KEY_SCHEMA_FLAGS, getFieldFlags(sfield)); if (sfield != null && schema.isDynamicField(sfield.getName()) && schema.getDynamicPattern(sfield.getName()) != null) { - fieldMap.add("dynamicBase", schema.getDynamicPattern(sfield.getName())); + fieldMap.add(KEY_DYNAMIC_BASE, schema.getDynamicPattern(sfield.getName())); } Terms terms = reader.terms(fieldName); // Not indexed, so we need to report what we can (it made it through the fl param if // specified) if (terms == null) { + fieldMap.add(KEY_DOCS, 0); finfo.add(fieldName, fieldMap); continue; } @@ -438,17 +893,16 @@ private static SimpleOrderedMap getIndexedFieldsInfo(SolrQueryRequest re try { IndexableField fld = doc.getField(fieldName); if (fld != null) { - fieldMap.add("index", getFieldFlags(fld)); + fieldMap.add(KEY_INDEX_FLAGS, getFieldFlags(fld)); } else { - // it is a non-stored field... - fieldMap.add("index", "(unstored field)"); + fieldMap.add(KEY_INDEX_FLAGS, "(unstored field)"); } } catch (Exception ex) { log.warn("error reading field: {}", fieldName); } } } - fieldMap.add("docs", terms.getDocCount()); + fieldMap.add(KEY_DOCS, terms.getDocCount()); } if (fields != null && (fields.contains(fieldName) || fields.contains("*"))) { getDetailedFieldInfo(req, fieldName, fieldMap); @@ -469,7 +923,7 @@ private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws I StoredFields storedFields = reader.storedFields(); // Deal with the chance that the first bunch of terms are in deleted documents. Is there a // better way? - for (int idx = 0; idx < 1000 && postingsEnum == null; ++idx) { + for (int idx = 0; idx < 1000; ++idx) { text = termsEnum.next(); // Ran off the end of the terms enum without finding any live docs with that field in them. if (text == null) { @@ -478,7 +932,7 @@ private static Document getFirstLiveDoc(Terms terms, LeafReader reader) throws I postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); final Bits liveDocs = reader.getLiveDocs(); if (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - if (liveDocs != null && liveDocs.get(postingsEnum.docID())) { + if (liveDocs != null && !liveDocs.get(postingsEnum.docID())) { continue; } return storedFields.document(postingsEnum.docID()); @@ -726,13 +1180,13 @@ private static void getDetailedFieldInfo( } } tiq.histogram.add(buckets); - fieldMap.add("distinct", tiq.distinctTerms); + fieldMap.add(KEY_DISTINCT, tiq.distinctTerms); // Include top terms - fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); + fieldMap.add(KEY_TOP_TERMS, tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram - fieldMap.add("histogram", tiq.histogram.toNamedList()); + fieldMap.add(KEY_HISTOGRAM, tiq.histogram.toNamedList()); } private static List toListOfStrings(SchemaField[] raw) { diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java new file mode 100644 index 000000000000..601c5eeeac5e --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeHandlerCloudTest.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import static org.apache.solr.common.params.CommonParams.DISTRIB; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.client.solrj.request.LukeRequest; +import org.apache.solr.client.solrj.request.schema.SchemaRequest; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Replica; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.SolrParams; +import org.junit.BeforeClass; +import org.junit.Test; + +/** Cloud-specific Luke tests that require SolrCloud features like managed schema and Schema API. */ +public class LukeHandlerCloudTest extends SolrCloudTestCase { + + private static final String DISTRIB_COLLECTION = "lukeDistribTests"; + private static final int NUM_DOCS = 20; + + @BeforeClass + public static void setupCluster() throws Exception { + System.setProperty("managed.schema.mutable", "true"); + configureCluster(2) + .addConfig("managed", configset("cloud-managed")) + .addConfig("dynamic", configset("cloud-dynamic")) + .configure(); + + CollectionAdminRequest.createCollection(DISTRIB_COLLECTION, "dynamic", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + cluster.waitForActiveCollection(DISTRIB_COLLECTION, 2, 2); + + for (int i = 0; i < NUM_DOCS; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", String.valueOf(i)); + doc.addField("name", "name_" + i); + doc.addField("subject", "subject value " + (i % 5)); + cluster.getSolrClient().add(DISTRIB_COLLECTION, doc); + } + cluster.getSolrClient().commit(DISTRIB_COLLECTION); + } + + private void requestLuke(String collection, SolrParams extra) throws Exception { + LukeRequest req = new LukeRequest(extra); + req.setNumTerms(0); + req.process(cluster.getSolrClient(), collection); + } + + @Test + public void testDistributedShardError() { + SolrParams lukeParams = params("id", "0", "show", "schema"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(DISTRIB_COLLECTION, lukeParams)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention doc style mismatch: " + fullMessage, + fullMessage.contains("missing doc param for doc style")); + } + + @Test + public void testDistributedDocIdRejected() { + SolrParams lukeParams = params("docId", "0"); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(DISTRIB_COLLECTION, lukeParams)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention docId not supported: " + fullMessage, + fullMessage.contains("docId parameter is not supported in distributed mode")); + } + + /** + * Verifies that distributed Luke detects inconsistent index flags across shards. Uses Schema API + * to change a field's {@code stored} property between indexing on different shards, producing + * different Lucene FieldInfo (and thus different index flags strings) on each shard. + */ + @Test + public void testInconsistentIndexFlagsAcrossShards() throws Exception { + String collection = "lukeInconsistentFlags"; + CollectionAdminRequest.createCollection(collection, "managed", 2, 1) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + + cluster.waitForActiveCollection(collection, 2, 2); + + try { + // Add a field with stored=true, indexed=true + Map fieldAttrs = new LinkedHashMap<>(); + fieldAttrs.put("name", "test_flag_s"); + fieldAttrs.put("type", "string"); + fieldAttrs.put("stored", true); + fieldAttrs.put("indexed", true); + new SchemaRequest.AddField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Index a target doc WITH the field, plus seed docs without it + SolrInputDocument targetDoc = new SolrInputDocument(); + targetDoc.addField("id", "target"); + targetDoc.addField("test_flag_s", "has_indexed"); + cluster.getSolrClient().add(collection, targetDoc); + + List seedDocs = new ArrayList<>(); + for (int i = 0; i < 20; i++) { + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "seed_" + i); + seedDocs.add(doc); + } + cluster.getSolrClient().add(collection, seedDocs); + cluster.getSolrClient().commit(collection); + + // Find which shard has the target doc by querying each replica directly. + // Must use distrib=false — SolrCloud defaults distrib to true even on direct replica queries. + DocCollection docColl = getCollectionState(collection); + String targetSliceName = null; + for (Slice slice : docColl.getSlices()) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("id:target"); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + if (qr.getResults().getNumFound() > 0) { + targetSliceName = slice.getName(); + } + } + } + assertNotNull("target doc should exist on a shard", targetSliceName); + + // Find a seed doc on the other shard + String otherDocId = null; + for (Slice slice : docColl.getSlices()) { + if (!slice.getName().equals(targetSliceName)) { + Replica leader = slice.getLeader(); + try (SolrClient client = getHttpSolrClient(leader)) { + SolrQuery q = new SolrQuery("*:*"); + q.setRows(1); + q.set(DISTRIB, "false"); + QueryResponse qr = client.query(q); + assertTrue("other shard should have seed docs", qr.getResults().getNumFound() > 0); + otherDocId = (String) qr.getResults().get(0).getFieldValue("id"); + } + break; + } + } + assertNotNull("should find a seed doc on the other shard", otherDocId); + + // Change the field to stored=false via Schema API + fieldAttrs.put("stored", false); + new SchemaRequest.ReplaceField(fieldAttrs).process(cluster.getSolrClient(), collection); + + // Reload collection to pick up schema change + CollectionAdminRequest.reloadCollection(collection).process(cluster.getSolrClient()); + + // Update the other-shard doc to include the field (now unstored in the new segment) + SolrInputDocument updateDoc = new SolrInputDocument(); + updateDoc.addField("id", otherDocId); + updateDoc.addField("test_flag_s", "not_indexed"); + cluster.getSolrClient().add(collection, updateDoc); + cluster.getSolrClient().commit(collection); + + // Distributed Luke should detect inconsistent index flags between the two shards. + // One shard has stored=true segments, the other has stored=false segments for test_flag_s. + // No need to set distrib=true — ZK-aware nodes default to distributed mode. + Exception ex = + expectThrows(Exception.class, () -> requestLuke(collection, params("fl", "test_flag_s"))); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception chain should mention inconsistent index flags: " + fullMessage, + fullMessage.contains("inconsistent")); + } finally { + CollectionAdminRequest.deleteCollection(collection) + .processAndWait(cluster.getSolrClient(), DEFAULT_TIMEOUT); + } + } +} diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java new file mode 100644 index 000000000000..b727114572f3 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeRequestHandlerDistribTest.java @@ -0,0 +1,422 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import java.util.Map; +import org.apache.solr.BaseDistributedSearchTestCase; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.request.LukeRequest; +import org.apache.solr.client.solrj.response.LukeResponse; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.request.SolrQueryRequestBase; +import org.apache.solr.update.AddUpdateCommand; +import org.apache.solr.update.CommitUpdateCommand; +import org.junit.Test; + +public class LukeRequestHandlerDistribTest extends BaseDistributedSearchTestCase { + + private static final Long NUM_DOCS = 20L; + + public LukeRequestHandlerDistribTest() { + fixShardCount(2); + } + + private LukeResponse requestLuke() throws Exception { + return requestLuke(new ModifiableSolrParams()); + } + + private LukeResponse requestLuke(ModifiableSolrParams extra) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("qt", "/admin/luke"); + params.set("numTerms", "0"); + params.set("shards.info", "true"); + params.add(extra); + + // query() sends to control and a random shard with shards param, compares responses + handle.put("QTime", SKIPVAL); + handle.put(LukeRequestHandler.RSP_INDEX, SKIP); + handle.put(LukeRequestHandler.RSP_SHARDS, SKIP); + // Detailed per-field stats (distinct, topTerms, histogram) are kept per-shard in + // distributed mode and intentionally excluded from the aggregated top-level fields. + // Local mode includes them inline, so skip them in the comparison. + handle.put(LukeRequestHandler.KEY_DISTINCT, SKIP); + handle.put(LukeRequestHandler.KEY_TOP_TERMS, SKIP); + handle.put(LukeRequestHandler.KEY_HISTOGRAM, SKIP); + QueryResponse qr = query(params); + LukeResponse rsp = new LukeResponse(); + rsp.setResponse(qr.getResponse()); + + return rsp; + } + + private void assertLukeXPath(ModifiableSolrParams extra, String... xpaths) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("shards", shards); + params.add(extra); + LukeTestUtil.assertLukeXPath(controlClient, null, params, xpaths); + } + + private void indexTestData() throws Exception { + for (int i = 0; i < NUM_DOCS; i++) { + index("id", String.valueOf(i), "name", "name_" + i, "subject", "subject value " + (i % 5)); + } + commit(); + } + + @Test + public void testDistributedAggregateAndFields() throws Exception { + indexTestData(); + + // --- Aggregate index stats --- + LukeResponse rsp = requestLuke(); + + assertEquals("aggregated numDocs should equal total docs", NUM_DOCS, rsp.getNumDocs()); + assertTrue("aggregated maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertNotNull("deletedDocs should be present", rsp.getDeletedDocs()); + + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + assertEquals("should have 2 shard entries", 2, shardResponses.size()); + + Long sumShardDocs = 0L; + for (Map.Entry entry : shardResponses.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); + assertNotNull("each shard should have maxDoc", shardLuke.getMaxDoc()); + sumShardDocs += shardLuke.getNumDocs(); + } + assertEquals( + "sum of per-shard numDocs should equal aggregated numDocs", rsp.getNumDocs(), sumShardDocs); + + // --- Field-level aggregation --- + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("field type should be present", nameField.getType()); + assertNotNull("schema flags should be present", nameField.getSchema()); + assertEquals( + "aggregated docs count for 'name' should equal total docs", + (long) NUM_DOCS, + nameField.getDocs()); + + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id field type should be string", "string", idField.getType()); + + assertLukeXPath( + new ModifiableSolrParams(), + "//lst[@name='index']/long[@name='numDocs'][.='20']", + "count(//lst[@name='shards']/lst)=2", + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='20']", + "//lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='id']/long[@name='docs'][.='20']"); + + // --- Detailed per-shard stats (topTerms, histogram, distinct) --- + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("fl", "name"); + params.set("numTerms", "5"); + + rsp = requestLuke(params); + + // Top-level fields should NOT have topTerms, distinct, histogram + nameField = rsp.getFieldInfo().get("name"); + assertNotNull("'name' field should be present", nameField); + assertNull("topTerms should NOT be in top-level fields", nameField.getTopTerms()); + assertEquals("distinct should NOT be in top-level fields", 0, nameField.getDistinct()); + + // Per-shard entries should have detailed stats + shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + + assertLukeXPath( + params, + "/response/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "/response/lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='20']", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms'])", + "not(/response/lst[@name='fields']/lst[@name='name']/lst[@name='histogram'])", + "not(/response/lst[@name='fields']/lst[@name='name']/int[@name='distinct'])", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='topTerms']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/lst[@name='histogram']/int[@name='1']", + "//lst[@name='shards']/lst/lst[@name='fields']/lst[@name='name']/int[@name='distinct']"); + + // Query a single client without the shards param — local mode + LukeRequest req = new LukeRequest(); + req.setNumTerms(0); + rsp = req.process(controlClient); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull("shards should NOT be present in local mode", rsp.getShardResponses()); + + // Query a single client with distrib=false — no shards param + req = new LukeRequest(params("distrib", "false")); + req.setNumTerms(0); + rsp = req.process(controlClient); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertNull("shards should NOT be present with distrib=false", rsp.getShardResponses()); + + // --- Schema view --- + params = new ModifiableSolrParams(); + params.set("show", "schema"); + + assertLukeXPath( + params, + "//lst[@name='schema']/lst[@name='fields']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='schema']/lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='schema']/lst[@name='dynamicFields']/lst[@name='*_s']", + "//lst[@name='schema']/str[@name='uniqueKeyField'][.='id']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='string']", + "//lst[@name='schema']/lst[@name='types']/lst[@name='nametext']", + "//lst[@name='schema']/lst[@name='similarity']", + "not(/response/lst[@name='fields'])", + "count(//lst[@name='shards']/lst)=2"); + + // --- Doc lookup not found --- + params = new ModifiableSolrParams(); + params.set("id", "999888777"); + + rsp = requestLuke(params); + + NamedList raw = rsp.getResponse(); + assertNull("doc section should NOT be present for missing ID", raw.get("doc")); + + assertLukeXPath(params, "not(//lst[@name='doc'])"); + + // --- Doc lookup found --- + params = new ModifiableSolrParams(); + params.set("id", "0"); + + assertLukeXPath( + params, + "//lst[@name='doc']/int[@name='docId']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='type'][.='string']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='id']/str[@name='value'][.='0']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='doc']/lst[@name='lucene']/lst[@name='name']/str[@name='value'][.='name_0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='0']", + "//lst[@name='doc']/arr[@name='solr']/str[.='name_0']", + "//lst[@name='index']", + "//lst[@name='info']"); + } + + @Test + @ShardsFixed(num = 4) + public void testSparseShardsAndDeferredIndexFlags() throws Exception { + // Index a single doc on shard 0 + index_specific( + 0, "id", "100", "name", "sparse test", "subject", "subject value", "cat_s", "category"); + commit(); + + LukeResponse rsp = requestLuke(); + + // Index-level stats + assertEquals("numDocs should be 1", 1, (long) rsp.getNumDocs()); + assertTrue("maxDoc should be > 0", rsp.getMaxDoc() > 0); + assertEquals("deletedDocs should be 0", 0L, (long) rsp.getDeletedDocs()); + + Map shardResponses = rsp.getShardResponses(); + assertNotNull("shards section should be present", shardResponses); + assertEquals( + "should have " + getShardCount() + " shard entries", + getShardCount(), + shardResponses.size()); + + long sumShardDocs = 0; + for (Map.Entry entry : shardResponses.entrySet()) { + LukeResponse shardLuke = entry.getValue(); + assertNotNull("each shard should have numDocs", shardLuke.getNumDocs()); + sumShardDocs += shardLuke.getNumDocs(); + } + assertEquals("sum of per-shard numDocs should be 1", 1, sumShardDocs); + + // Field-level checks + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + + LukeResponse.FieldInfo idField = fields.get("id"); + assertNotNull("'id' field should be present", idField); + assertEquals("id type", "string", idField.getType()); + assertNotNull("id schema flags", idField.getSchema()); + + LukeResponse.FieldInfo nameField = fields.get("name"); + assertNotNull("'name' field should be present", nameField); + assertNotNull("name type", nameField.getType()); + assertNotNull("name schema flags", nameField.getSchema()); + assertEquals("name docs should be 1", 1, nameField.getDocs()); + + // Dynamic field — should have dynamicBase in extras + LukeResponse.FieldInfo catField = fields.get("cat_s"); + assertNotNull("'cat_s' field should be present", catField); + assertNotNull("cat_s type", catField.getType()); + assertNotNull("cat_s dynamicBase", catField.getExtras().get("dynamicBase")); + + assertLukeXPath( + new ModifiableSolrParams(), + "//lst[@name='index']/long[@name='numDocs'][.='1']", + "//lst[@name='index']/long[@name='deletedDocs'][.='0']", + "count(//lst[@name='shards']/lst)=" + getShardCount(), + "//lst[@name='fields']/lst[@name='name']/str[@name='type'][.='nametext']", + "//lst[@name='fields']/lst[@name='name']/str[@name='schema']", + "//lst[@name='fields']/lst[@name='name']/str[@name='index']", + "//lst[@name='fields']/lst[@name='name']/long[@name='docs'][.='1']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='cat_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='cat_s']/long[@name='docs'][.='1']"); + + // Index docs with the target field across shards, plus anchor docs without it. + // Use numeric IDs (the default test schema copies id to integer fields). + // Target docs get even IDs starting at 1000, anchor docs get odd IDs. + for (int i = 0; i < getShardCount() * 4; i++) { + index("id", String.valueOf(1000 + i * 2), "flag_target_s", "value_" + i); + index("id", String.valueOf(1001 + i * 2), "name", "anchor"); + } + commit(); + + // Delete all target docs except the first one, using per-shard deletes. + // Then optimize to force segment merge — expunges soft-deleted docs so + // Terms.getDocCount() (which backs docs) reflects only live docs. + for (SolrClient client : clients) { + client.deleteByQuery("flag_target_s:* AND -id:1000"); + client.optimize(); + } + controlClient.deleteByQuery("flag_target_s:* AND -id:1000"); + controlClient.optimize(); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("fl", "flag_target_s"); + + rsp = requestLuke(params); + + fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + LukeResponse.FieldInfo targetField = fields.get("flag_target_s"); + assertNotNull("'flag_target_s' field should be present", targetField); + + assertLukeXPath( + params, + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='type'][.='string']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='dynamicBase'][.='*_s']", + "//lst[@name='fields']/lst[@name='flag_target_s']/str[@name='index']", + "//lst[@name='fields']/lst[@name='flag_target_s']/long[@name='docs'][.='1']"); + } + + @Test + public void testDistributedDocLookupDuplicateId() throws Exception { + String dupId = "99999"; + + // Write the same document directly to two shard cores via UpdateHandler, + // completely bypassing the distributed update processor chain. + for (int i = 0; i < 2; i++) { + try (SolrCore core = jettys.get(i).getCoreContainer().getCore("collection1")) { + SolrInputDocument solrDoc = new SolrInputDocument(); + solrDoc.addField("id", dupId); + solrDoc.addField("name", "dup_copy_" + i); + + AddUpdateCommand addCmd = + new AddUpdateCommand(new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}); + addCmd.solrDoc = solrDoc; + core.getUpdateHandler().addDoc(addCmd); + + CommitUpdateCommand commitCmd = + new CommitUpdateCommand( + new SolrQueryRequestBase(core, new ModifiableSolrParams()) {}, false); + commitCmd.waitSearcher = true; + core.getUpdateHandler().commit(commitCmd); + } + } + + // Distributed Luke doc lookup should detect the corruption + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("id", dupId); + + Exception ex = expectThrows(Exception.class, () -> requestLuke(params)); + String fullMessage = SolrException.getRootCause(ex).getMessage(); + assertTrue( + "exception should mention duplicate/corrupt index: " + fullMessage, + fullMessage.contains("found on multiple shards")); + } + + @Test + public void testShardsParamRoutesToSpecificShard() throws Exception { + // Index a doc with a dynamic field only to shard 0 + index_specific(0, "id", "700", "name", "shard0_only", "only_on_shard0_s", "present"); + // Index a plain doc to shard 1 (no dynamic field) + index_specific(1, "id", "701", "name", "shard1_only"); + commit(); + + // Query with shards= pointing only at shard 1 — the dynamic field should NOT appear. + // This also tests that a single remote shard is correctly fanned out to rather than + // falling through to local-mode on the coordinating node. + LukeRequest req = new LukeRequest(params("shards", shardsArr[1])); + req.setNumTerms(0); + LukeResponse rsp = req.process(controlClient); + + Map fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + assertNull( + "only_on_shard0_s should NOT be present when querying only shard 1", + fields.get("only_on_shard0_s")); + assertNotNull("'name' field should still be present", fields.get("name")); + + // Now query with shards= pointing only at shard 0 — the dynamic field SHOULD appear + req = new LukeRequest(params("shards", shardsArr[0])); + req.setNumTerms(0); + rsp = req.process(controlClient); + + fields = rsp.getFieldInfo(); + assertNotNull("fields should be present", fields); + assertNotNull( + "only_on_shard0_s SHOULD be present when querying shard 0", fields.get("only_on_shard0_s")); + } + + @Test + @ShardsFixed(num = 1) + public void testSingleShardViaParamStillDistributes() throws Exception { + index("id", "500", "name", "test_name"); + commit(); + + // Pass the shards param with a single shard — should still fan out to it + // rather than incorrectly falling through to local mode + LukeRequest req = new LukeRequest(params("shards", shards, "shards.info", "true")); + req.setNumTerms(0); + LukeResponse rsp = req.process(controlClient); + + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertEquals("should see the 1 doc we indexed", 1, (long) rsp.getNumDocs()); + assertNotNull( + "shards section should be present when shards.info=true", rsp.getShardResponses()); + assertEquals("should have 1 shard entry", 1, rsp.getShardResponses().size()); + + // Without shards.info, shards section should be absent + req = new LukeRequest(params("shards", shards)); + req.setNumTerms(0); + rsp = req.process(controlClient); + assertNotNull("index info should be present", rsp.getIndexInfo()); + assertEquals("should see the 1 doc we indexed", 1, (long) rsp.getNumDocs()); + assertNull("shards section should be absent without shards.info", rsp.getShardResponses()); + } +} diff --git a/solr/core/src/test/org/apache/solr/handler/admin/LukeTestUtil.java b/solr/core/src/test/org/apache/solr/handler/admin/LukeTestUtil.java new file mode 100644 index 000000000000..6f646d7fa572 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/handler/admin/LukeTestUtil.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.handler.admin; + +import static org.junit.Assert.assertNull; + +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import org.apache.solr.client.solrj.SolrClient; +import org.apache.solr.client.solrj.impl.InputStreamResponseParser; +import org.apache.solr.client.solrj.request.LukeRequest; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.util.BaseTestHarness; + +final class LukeTestUtil { + + private LukeTestUtil() {} + + static void assertLukeXPath( + SolrClient client, String collection, SolrParams extra, String... xpaths) throws Exception { + ModifiableSolrParams params = new ModifiableSolrParams(); + params.set("shards.info", "true"); + params.add(extra); + LukeRequest req = new LukeRequest(params); + req.setNumTerms(0); + req.setResponseParser(new InputStreamResponseParser("xml")); + NamedList raw = + collection != null ? client.request(req, collection) : client.request(req); + String xml; + try (InputStream is = (InputStream) raw.get("stream")) { + xml = new String(is.readAllBytes(), StandardCharsets.UTF_8); + } + String failedXpath = BaseTestHarness.validateXPath(xml, xpaths); + assertNull("XPath validation failed: " + failedXpath + "\nResponse:\n" + xml, failedXpath); + } +} diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc index fb795f62cc17..42180716544a 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/luke-request-handler.adoc @@ -83,6 +83,18 @@ The number of top terms for each field. Choose whether `/luke` should return the index-flags for each field. Fetching and returning the index-flags for each field in the index has non-zero cost, and can slow down requests to `/luke`. +`distrib`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: `false` +|=== ++ +When set to `true` in SolrCloud mode, the handler aggregates results from all shards in the collection. +Additive index metrics (`numDocs`, `deletedDocs`, `segmentCount`) are summed across shards; `maxDoc` is the maximum across shards. +Field types and schema flags are validated for consistency across shards. +Per-shard index details and per-field detailed statistics are returned under a `shards` key. + == LukeRequestHandler Examples All of the examples in this section assume you are running the "techproducts" Solr example: @@ -118,3 +130,43 @@ Alternatively, to work through the Lucene native id: http://localhost:8983/solr/techproducts/admin/luke?fl=manu&docId=0 From SolrJ, you can access /luke using the {solr-javadocs}/solrj/org/apache/solr/client/solrj/request/LukeRequest.html[`LukeRequest`] object. + +== Distributed Mode (multiple shards) + +When running in SolrCloud, the Luke handler automatically distributes requests across all shards in the collection, the same as search requests. +To inspect only the receiving shard's index set `distrib=false`. +In user-managed clusters, you can distribute across shards by passing the `shards` parameter with explicit shard URLs. + +To get a collection-wide view: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke + +To get detailed field statistics across all shards for a specific field: + +[source,text] +http://localhost:8983/solr/techproducts/admin/luke?fl=manu + +=== Response Structure + +In distributed mode, the response contains: + +* `index` -- Aggregated metrics across all shards: `numDocs`, `deletedDocs`, `segmentCount` are summed; `maxDoc` is the maximum across shards. +* `fields` -- Aggregated field metadata. For each field: `type`, `schema` flags, and `dynamicBase` are validated to be consistent across shards; `index` flags use the first non-null value. The `docs` count is summed. Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are _not_ included at this level. +* `doc` -- Present when `id` is specified. Contains the document from whichever shard owns it, including a `lucene` section (per-field analysis with shard-local `docFreq` values) and a `solr` section (stored fields). Only `id` is supported for distributed doc lookup; `docId` is rejected because Lucene document IDs are shard-local. +* `schema` -- Schema information from the first responding shard (identical across shards sharing the same configset). +* `info` -- Static info from the first responding shard. +* `shards` -- Only present when `shards.info=true`. Contains per-shard details, with each entry keyed by shard address: +** `index` -- Full index info for that shard (including `directory`, `segmentsFile`, `version`, `current`, `hasDeletions`, `lastModified`, `userData`). +** `fields` -- Only present when `fl` triggers detailed statistics. Contains per-field `topTerms`, `distinct`, and `histogram` from that shard. + +=== Aggregation Semantics + +Field `type`, `schema` flags, and `dynamicBase` are validated for consistency across shards. +If a mismatch is detected, the handler returns an error identifying the field, the conflicting values, and the shard addresses involved. +You can use `distrib=false` to query individual shards and compare their field configurations when troubleshooting mismatches. +The `index` flags are index-derived (not schema-derived) and may be absent on shards where the field has no indexed data; the first non-null value is used, and any subsequent non-null values are validated for consistency. + +Per-field detailed statistics (`topTerms`, `distinct`, `histogram`) are not aggregated across shards. +These statistics are shard-local and appear in each shard's entry under the `shards` key (requires `shards.info=true`). +For collection-wide term frequencies or cardinality estimates, Solr's xref:query-guide:faceting.adoc[faceting API] may cover some of these use cases. diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc index a9fde5d46967..697d4a7745b3 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc @@ -97,6 +97,24 @@ The project normally doesn't remove functionality in a minor release, but we mad +NOTE: The previous parse-context-based configuration (`parseContext.config`) is no longer supported. Tika parser-specific properties must now be configured directly on the Tika Server itself, rather than through Solr configuration. Please refer to the Tika Server documentation for details on how to set these properties. +=== Luke requests now distributed by default + +In SolrCloud mode, the xref:indexing-guide:luke-request-handler.adoc[Luke Request Handler] (`/admin/luke`) now automatically distributes requests across all shards in the collection and returns an aggregated view of the index. +Previously, Luke only reported on the local shard that received the request, giving an incomplete picture of multi-shard collections. +In user-managed clusters, you can opt into distributed behavior by passing the `shards` parameter with explicit shard URLs. +See xref:indexing-guide:luke-request-handler.adoc[] for more details. + +To revert to the previous local-only behavior, set `distrib=false` on individual requests, or configure it as a default in `solrconfig.xml`: + +[source,xml] +---- + + + false + + +---- + === JWT Authentication The `blockUnknown` setting in the JWT Authentication plugin now defaults to `true`, meaning requests without a valid JWT token are blocked by default. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java index a3225c4fc878..51c405c8d26f 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/LukeRequest.java @@ -33,11 +33,17 @@ public class LukeRequest extends CollectionRequiringSolrRequest { private int numTerms = -1; private boolean showSchema = false; private Boolean includeIndexFieldFlags = null; + private SolrParams extraParams; public LukeRequest() { super(METHOD.GET, "/admin/luke"); } + public LukeRequest(SolrParams params) { + this(); + this.extraParams = params; + } + public LukeRequest(String path) { super(METHOD.GET, path); } @@ -121,6 +127,9 @@ public SolrParams getParams() { if (includeIndexFieldFlags != null) { params.add("includeIndexFieldFlags", includeIndexFieldFlags.toString()); } + if (extraParams != null) { + params.add(extraParams); + } return params; } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java index c38a2caf8300..9370a253ccf9 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/LukeResponse.java @@ -21,10 +21,13 @@ import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.TreeMap; import org.apache.solr.common.luke.FieldFlag; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.SimpleOrderedMap; /** * This is an incomplete representation of the data returned from Luke @@ -114,11 +117,12 @@ public static class FieldInfo implements Serializable { String name; String type; String schema; - int docs; + long docs; int distinct; EnumSet flags; boolean cacheableFaceting; NamedList topTerms; + Map extras = new TreeMap<>(); public FieldInfo(String n) { name = n; @@ -129,19 +133,20 @@ public void read(NamedList nl) { for (Map.Entry entry : nl) { if ("type".equals(entry.getKey())) { type = (String) entry.getValue(); - } - if ("flags".equals(entry.getKey())) { + } else if ("flags".equals(entry.getKey())) { flags = parseFlags((String) entry.getValue()); } else if ("schema".equals(entry.getKey())) { schema = (String) entry.getValue(); } else if ("docs".equals(entry.getKey())) { - docs = (Integer) entry.getValue(); + docs = ((Number) entry.getValue()).longValue(); } else if ("distinct".equals(entry.getKey())) { distinct = (Integer) entry.getValue(); } else if ("cacheableFaceting".equals(entry.getKey())) { cacheableFaceting = (Boolean) entry.getValue(); } else if ("topTerms".equals(entry.getKey())) { topTerms = (NamedList) entry.getValue(); + } else { + extras.put(entry.getKey(), entry.getValue()); } } } @@ -174,7 +179,7 @@ public int getDistinct() { return distinct; } - public int getDocs() { + public long getDocs() { return docs; } @@ -193,12 +198,17 @@ public EnumSet getSchemaFlags() { public NamedList getTopTerms() { return topTerms; } + + public Map getExtras() { + return extras; + } } private NamedList indexInfo; private Map fieldInfo; private Map dynamicFieldInfo; private Map fieldTypeInfo; + private Map shardResponses; @Override @SuppressWarnings("unchecked") @@ -247,6 +257,18 @@ public void setResponse(NamedList res) { } } } + + // Parse shards section (present in distributed responses) + SimpleOrderedMap> shardsNL = + (SimpleOrderedMap>) res.get("shards"); + if (shardsNL != null) { + shardResponses = new LinkedHashMap<>(); + for (Map.Entry> entry : shardsNL) { + LukeResponse shardRsp = new LukeResponse(); + shardRsp.setResponse(entry.getValue()); + shardResponses.put(entry.getKey(), shardRsp); + } + } } // ---------------------------------------------------------------- @@ -257,9 +279,14 @@ public String getIndexDirectory() { return (String) indexInfo.get("directory"); } - public Integer getNumDocs() { + private Long getLong(String key) { if (indexInfo == null) return null; - return (Integer) indexInfo.get("numDocs"); + Number n = (Number) indexInfo.get(key); + return n != null ? n.longValue() : null; + } + + public Long getNumDocs() { + return getLong("numDocs"); } public Integer getMaxDoc() { @@ -267,6 +294,10 @@ public Integer getMaxDoc() { return (Integer) indexInfo.get("maxDoc"); } + public Long getDeletedDocs() { + return getLong("deletedDocs"); + } + public Integer getNumTerms() { if (indexInfo == null) return null; return (Integer) indexInfo.get("numTerms"); @@ -300,5 +331,9 @@ public FieldInfo getDynamicFieldInfo(String f) { return dynamicFieldInfo.get(f); } + public Map getShardResponses() { + return shardResponses; + } + // ---------------------------------------------------------------- } diff --git a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java index e804b41f3ee7..0d2723326564 100644 --- a/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java +++ b/solr/test-framework/src/java/org/apache/solr/BaseDistributedSearchTestCase.java @@ -94,6 +94,13 @@ * without annotations in that class hierarchy. Ideally this function should be retired in favour of * better annotations.. * + *

WARNING each test annotated with @Shards* will spin up its own set of Jetty servers which can + * be a substantial performance hit. Therefore, one should be mindful about the total number of + * independent tests using such annotations. One approach is to pool assertions in a single test to + * minimize jetty server construction overhead. If the test doesn't rely on the comparison features + * of this class, i.e. {@link #query} it may be wise to make it a {@link + * org.apache.solr.cloud.SolrCloudTestCase} instead. + * * @since solr 1.5 */ public abstract class BaseDistributedSearchTestCase extends SolrTestCaseJ4 {