apache · CrownChu · May 22, 2026 · May 25, 2026 · May 25, 2026 · May 26, 2026
diff --git a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexMultiColumnWriter.java b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexMultiColumnWriter.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.globalindex;
+
+import org.apache.paimon.data.InternalRow;
+
+import javax.annotation.Nullable;
+
+/** Index writer for global index that accepts multiple column values per row. */
+public interface GlobalIndexMultiColumnWriter extends GlobalIndexWriter {
+
+    /**
+     * Write one record's indexed columns at the given relative row id.
+     *
+     * @param rowId the record's row id relative to the current shard (0 to rowCnt - 1); a null row
+     *     still advances the row id without indexing a value
+     * @param row a projected row containing only the indexed columns, whose layout matches the
+     *     fields order passed to {@link GlobalIndexerFactory#create(java.util.List,
+     *     org.apache.paimon.options.Options)}
+     */
+    void write(long rowId, @Nullable InternalRow row);
+}
diff --git a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexer.java b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexer.java
@@ -41,4 +41,9 @@ static GlobalIndexer create(String type, DataField dataField, Options options) {
         GlobalIndexerFactory globalIndexerFactory = GlobalIndexerFactoryUtils.load(type);
         return globalIndexerFactory.create(dataField, options);
     }
+
+    static GlobalIndexer create(String type, List<DataField> fields, Options options) {
+        GlobalIndexerFactory globalIndexerFactory = GlobalIndexerFactoryUtils.load(type);
+        return globalIndexerFactory.create(fields, options);
+    }
 }
diff --git a/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexerFactory.java b/paimon-common/src/main/java/org/apache/paimon/globalindex/GlobalIndexerFactory.java
@@ -22,10 +22,30 @@
 import org.apache.paimon.options.Options;
 import org.apache.paimon.types.DataField;
 
+import java.util.List;
+
 /** File index factory to construct {@link FileIndexer}. */
 public interface GlobalIndexerFactory {
 
     String identifier();
 
     GlobalIndexer create(DataField dataField, Options options);
+
+    /**
+     * Whether this index type supports multi-column indexes. A factory that returns {@code true}
+     * must override {@link #create(List, Options)} to handle more than one column.
+     */
+    default boolean supportsMultiColumn() {
+        return false;
+    }
+
+    default GlobalIndexer create(List<DataField> fields, Options options) {
+        if (fields.size() > 1) {
+            throw new UnsupportedOperationException(
+                    String.format(
+                            "Index type '%s' does not support multi-column index, got columns: %s",
+                            identifier(), fields));
+        }
+        return create(fields.get(0), options);
+    }
 }
diff --git a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexBuilderUtils.java b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexBuilderUtils.java
@@ -24,18 +24,29 @@
 import org.apache.paimon.index.GlobalIndexMeta;
 import org.apache.paimon.index.IndexFileMeta;
 import org.apache.paimon.index.IndexPathFactory;
+import org.apache.paimon.manifest.ManifestEntry;
 import org.apache.paimon.options.Options;
+import org.apache.paimon.schema.SchemaManager;
 import org.apache.paimon.table.FileStoreTable;
 import org.apache.paimon.types.DataField;
 import org.apache.paimon.utils.Range;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.annotation.Nullable;
+
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 /** Utils for global index build. */
 public class GlobalIndexBuilderUtils {
 
+    private static final Logger LOG = LoggerFactory.getLogger(GlobalIndexBuilderUtils.class);
+
     public static List<IndexFileMeta> toIndexFileMetas(
             FileIO fileIO,
             IndexPathFactory indexPathFactory,
@@ -45,12 +56,62 @@ public static List<IndexFileMeta> toIndexFileMetas(
             String indexType,
             List<ResultEntry> entries)
             throws IOException {
+        return toIndexFileMetas(
+                fileIO, indexPathFactory, options, range, indexFieldId, null, indexType, entries);
+    }
+
+    /**
+     * Builds the index file metas. The first column in {@code fields} is treated as the primary
+     * index column (e.g. the first column in {@code CREATE ... INDEX ON (a, b, c)}) and is stored
+     * as {@code indexFieldId}; the remaining columns go into {@code extraFieldIds}. Callers must
+     * pass {@code fields} in the intended column order.
+     */
+    public static List<IndexFileMeta> toIndexFileMetas(
+            FileIO fileIO,
+            IndexPathFactory indexPathFactory,
+            CoreOptions options,
+            Range range,
+            List<DataField> fields,
+            String indexType,
+            List<ResultEntry> entries)
+            throws IOException {
+        // The first column is the primary index column and is stored as indexFieldId; the
+        // remaining columns (if any) go into extraFieldIds.
+        int indexFieldId = fields.get(0).id();
+        int[] extraFieldIds =
+                fields.size() > 1
+                        ? fields.subList(1, fields.size()).stream()
+                                .mapToInt(DataField::id)
+                                .toArray()
+                        : null;
+        return toIndexFileMetas(
+                fileIO,
+                indexPathFactory,
+                options,
+                range,
+                indexFieldId,
+                extraFieldIds,
+                indexType,
+                entries);
+    }
+
+    private static List<IndexFileMeta> toIndexFileMetas(
+            FileIO fileIO,
+            IndexPathFactory indexPathFactory,
+            CoreOptions options,
+            Range range,
+            int indexFieldId,
+            @Nullable int[] extraFieldIds,
+            String indexType,
+            List<ResultEntry> entries)
+            throws IOException {
         List<IndexFileMeta> results = new ArrayList<>();
         for (ResultEntry entry : entries) {
             String fileName = entry.fileName();
             long fileSize = fileIO.getFileSize(indexPathFactory.toPath(fileName));
             GlobalIndexMeta globalIndexMeta =
-                    new GlobalIndexMeta(range.from, range.to, indexFieldId, null, entry.meta());
+                    new GlobalIndexMeta(
+                            range.from, range.to, indexFieldId, extraFieldIds, entry.meta());
 
             Path externalPathDir = options.globalIndexExternalPath();
             String externalPathString = null;
@@ -78,6 +139,77 @@ public static GlobalIndexWriter createIndexWriter(
         return globalIndexer.createWriter(createGlobalIndexFileReadWrite(table));
     }
 
+    public static GlobalIndexWriter createIndexWriter(
+            FileStoreTable table, String indexType, List<DataField> fields, Options options)
+            throws IOException {
+        GlobalIndexer globalIndexer = GlobalIndexer.create(indexType, fields, options);
+        return globalIndexer.createWriter(createGlobalIndexFileReadWrite(table));
+    }
+
+    /**
+     * Find the minimum firstRowId among files whose schema does not contain all index columns.
+     * Files at or beyond this rowId cannot be indexed because the column was added later via ALTER
+     * TABLE.
+     *
+     * @return the boundary rowId, or {@link Long#MAX_VALUE} if all files contain the columns
+     */
+    public static long findMinNonIndexableRowId(
+            SchemaManager schemaManager, List<ManifestEntry> entries, List<String> indexColumns) {
+        Map<Long, Boolean> schemaContainsColumns = new HashMap<>();
+        long minRowId = Long.MAX_VALUE;
+        long minSchemaId = -1;
+        for (ManifestEntry entry : entries) {
+            long sid = entry.file().schemaId();
+            boolean contains =
+                    schemaContainsColumns.computeIfAbsent(
+                            sid,
+                            id -> schemaManager.schema(id).fieldNames().containsAll(indexColumns));
+            if (!contains && entry.file().firstRowId() != null) {
+                long rowId = entry.file().nonNullFirstRowId();
+                if (rowId < minRowId) {
+                    minRowId = rowId;
+                    minSchemaId = sid;
+                }
+            }
+        }
+        if (minRowId != Long.MAX_VALUE) {
+            List<String> schemaFields = schemaManager.schema(minSchemaId).fieldNames();
+            List<String> missingColumns = new ArrayList<>();
+            for (String col : indexColumns) {
+                if (!schemaFields.contains(col)) {
+                    missingColumns.add(col);
+                }
+            }
+            LOG.info(
+                    "Found non-indexable files: schemaId={} missing columns {}, boundaryRowId={}.",
+                    minSchemaId,
+                    missingColumns,
+                    minRowId);
+        }
+        return minRowId;
+    }
+
+    /** Keep only entries whose firstRowId is strictly less than the given boundary. */
+    public static List<ManifestEntry> filterEntriesBefore(
+            List<ManifestEntry> entries, long boundaryRowId) {
+        if (boundaryRowId == Long.MAX_VALUE) {
+            return entries;
+        }
+        List<ManifestEntry> result = new ArrayList<>();
+        for (ManifestEntry entry : entries) {
+            if (entry.file().firstRowId() != null
+                    && entry.file().nonNullFirstRowId() < boundaryRowId) {
+                result.add(entry);
+            }
+        }
+        LOG.info(
+                "Filtered {} files to {} indexable files (boundaryRowId={}).",
+                entries.size(),
+                result.size(),
+                boundaryRowId);
+        return result;
+    }
+
     private static GlobalIndexFileReadWrite createGlobalIndexFileReadWrite(FileStoreTable table) {
         IndexPathFactory indexPathFactory = table.store().pathFactory().globalIndexFileFactory();
         return new GlobalIndexFileReadWrite(table.fileIO(), indexPathFactory);

diff --git a/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java b/paimon-core/src/main/java/org/apache/paimon/globalindex/GlobalIndexScanner.java
@@ -53,6 +53,7 @@
 import static org.apache.paimon.CoreOptions.GLOBAL_INDEX_THREAD_NUM;
 import static org.apache.paimon.predicate.PredicateVisitor.collectFieldNames;
 import static org.apache.paimon.table.source.snapshot.TimeTravelUtil.tryTravelOrLatest;
+import static org.apache.paimon.utils.Preconditions.checkArgument;
 import static org.apache.paimon.utils.Preconditions.checkNotNull;
 
 /** Scanner for shard-based global indexes. */
@@ -74,29 +75,73 @@ public GlobalIndexScanner(
                 GlobalIndexReadThreadPool.getExecutorService(options.get(GLOBAL_INDEX_THREAD_NUM));
         this.indexPathFactory = indexPathFactory;
         GlobalIndexFileReader indexFileReader = meta -> fileIO.newInputStream(meta.filePath());
-        Map<Integer, Map<String, Map<Range, List<IndexFileMeta>>>> indexMetas = new HashMap<>();
+        Map<Integer, IndexMetaFileGroup> indexMetas = new HashMap<>();
+        Map<Integer, List<IndexMetaFileGroup>> extraIndexMetas = new HashMap<>();
         for (IndexFileMeta indexFile : indexFiles) {
             GlobalIndexMeta meta = checkNotNull(indexFile.globalIndexMeta());
-            int fieldId = meta.indexFieldId();
             String indexType = indexFile.indexType();
-            indexMetas
-                    .computeIfAbsent(fieldId, k -> new HashMap<>())
-                    .computeIfAbsent(indexType, k -> new HashMap<>())
-                    .computeIfAbsent(
-                            new Range(meta.rowRangeStart(), meta.rowRangeEnd()),
-                            k -> new ArrayList<>())
-                    .add(indexFile);
+            Range range = new Range(meta.rowRangeStart(), meta.rowRangeEnd());
+            int indexFieldId = meta.indexFieldId();
+            List<Integer> fieldIds = meta.getIndexedFieldIds();
+            IndexMetaFileGroup group = indexMetas.get(indexFieldId);
+            if (group == null) {
+                group = new IndexMetaFileGroup(indexFieldId, fieldIds);
+                indexMetas.put(indexFieldId, group);
+                if (meta.extraFieldIds() != null) {
+                    for (int extra : meta.extraFieldIds()) {
+                        extraIndexMetas.computeIfAbsent(extra, k -> new ArrayList<>()).add(group);
+                    }
+                }
+            } else {
+                checkArgument(
+                        group.fieldIds.equals(fieldIds),
+                        "Primary field %s owns multiple indexes with different columns %s and %s; "
+                                + "a primary column can own at most one index.",
+                        indexFieldId,
+                        group.fieldIds,
+                        fieldIds);
+            }
+            group.addFile(indexType, range, indexFile);
         }
 
         IntFunction<Collection<GlobalIndexReader>> readersFunction =
-                fieldId ->
-                        createReaders(
-                                indexFileReader,
-                                indexMetas.get(fieldId),
-                                rowType.getField(fieldId));
+                fId -> {
+                    IndexMetaFileGroup group = indexMetas.get(fId);
+                    if (group == null) {
+                        List<IndexMetaFileGroup> extraGroups = extraIndexMetas.get(fId);
+                        if (extraGroups == null || extraGroups.isEmpty()) {
+                            return Collections.emptyList();
+                        }
+                        group = extraGroups.get(0);
+                    }
+                    List<DataField> fields =
+                            group.fieldIds.stream()
+                                    .map(rowType::getField)
+                                    .collect(Collectors.toList());
+                    return createReaders(indexFileReader, group.metas, fields);
+                };
         this.globalIndexEvaluator = new GlobalIndexEvaluator(rowType, readersFunction);
     }
 
+    /** All index files of one global index (single- or multi-column), grouped for reading. */
+    private static class IndexMetaFileGroup {
+
+        private final int indexFieldId;
+        private final List<Integer> fieldIds;
+        private final Map<String, Map<Range, List<IndexFileMeta>>> metas = new HashMap<>();
+
+        IndexMetaFileGroup(int indexFieldId, List<Integer> fieldIds) {
+            this.indexFieldId = indexFieldId;
+            this.fieldIds = fieldIds;
+        }
+
+        void addFile(String indexType, Range range, IndexFileMeta indexFile) {
+            metas.computeIfAbsent(indexType, k -> new HashMap<>())
+                    .computeIfAbsent(range, k -> new ArrayList<>())
+                    .add(indexFile);
+        }
+    }
+
     public static Optional<GlobalIndexScanner> create(
             FileStoreTable table, Collection<IndexFileMeta> indexFiles) {
         if (indexFiles.isEmpty()) {
@@ -127,7 +172,19 @@ public static Optional<GlobalIndexScanner> create(
                     if (globalIndex == null) {
                         return false;
                     }
-                    return filterFieldIds.contains(globalIndex.indexFieldId());
+                    // Collect indexes whose primary column is filtered, and also multi-column
+                    // indexes that have a filtered column as an extra (used as a fallback).
+                    if (filterFieldIds.contains(globalIndex.indexFieldId())) {
+                        return true;
+                    }
+                    if (globalIndex.extraFieldIds() != null) {
+                        for (int id : globalIndex.extraFieldIds()) {
+                            if (filterFieldIds.contains(id)) {
+                                return true;
+                            }
+                        }
+                    }
+                    return false;
                 };
 
         List<IndexFileMeta> indexFiles =
@@ -145,7 +202,7 @@ public Optional<GlobalIndexResult> scan(Predicate predicate) {
     private Collection<GlobalIndexReader> createReaders(
             GlobalIndexFileReader indexFileReadWrite,
             Map<String, Map<Range, List<IndexFileMeta>>> indexMetas,
-            DataField dataField) {
+            List<DataField> fields) {
         if (indexMetas == null) {
             return Collections.emptyList();
         }
@@ -155,7 +212,7 @@ private Collection<GlobalIndexReader> createReaders(
             String indexType = entry.getKey();
             Map<Range, List<IndexFileMeta>> metas = entry.getValue();
             GlobalIndexerFactory globalIndexerFactory = GlobalIndexerFactoryUtils.load(indexType);
-            GlobalIndexer globalIndexer = globalIndexerFactory.create(dataField, options);
+            GlobalIndexer globalIndexer = globalIndexerFactory.create(fields, options);
 
             List<CompletableFuture<GlobalIndexReader>> futures = new ArrayList<>(metas.size());
             for (Map.Entry<Range, List<IndexFileMeta>> rangeMetas : metas.entrySet()) {