From ba4ff84944e8db6d3689e8c07f9e940a189e598a Mon Sep 17 00:00:00 2001 From: umi Date: Fri, 24 Apr 2026 15:57:52 +0800 Subject: [PATCH 01/51] proto batch externalSort fix add manifest sort to compact job addTest review mvMorax fix spi proto proto fix fix # Conflicts: # paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java --- .../java/org/apache/paimon/CoreOptions.java | 33 ++ .../paimon/operation/FileStoreCommitImpl.java | 17 +- .../paimon/operation/ManifestFileMerger.java | 537 +++++++++++++++++- .../operation/ManifestPickStrategy.java | 138 +++++ .../paimon/operation/ManifestSortedRun.java | 131 +++++ .../paimon/manifest/ManifestFileMetaTest.java | 75 ++- .../NoPartitionManifestFileMetaTest.java | 20 +- 7 files changed, 912 insertions(+), 39 deletions(-) create mode 100644 paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java create mode 100644 paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index f09a6edb4a74..aed7616d5707 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -471,6 +471,30 @@ public InlineElement getDescription() { "To avoid frequent manifest merges, this parameter specifies the minimum number " + "of ManifestFileMeta to merge."); + public static final ConfigOption MANIFEST_SORT_ENABLE = + key("manifest-sort.enable") + .booleanType() + .defaultValue(false) + .withDescription( + "Whether to invoke manifest sort rewrite right after manifest merge" + + " during commit. The sort rewrite implementation is provided" + + " by an external module (e.g. morax) and discovered via" + + " ServiceLoader. When no implementation is registered on the" + + " classpath, this flag has no effect (manifest sort is" + + " silently skipped)."); + + public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = + key("manifest-sort.partition-field") + .stringType() + .noDefaultValue() + .withDescription( + "Partition field name to sort manifest entries by. Validated by" + + " schema validation; resolved to a 0-based index by the" + + " caller (an external sort rewrite implementation). For" + + " single-partition tables, optional (defaults to the only" + + " partition field). For multi-partition tables, REQUIRED" + + " when 'manifest-sort.enable' is true."); + public static final ConfigOption UPSERT_KEY = key("upsert-key") .stringType() @@ -2603,6 +2627,15 @@ public MemorySize manifestFullCompactionThresholdSize() { return options.get(MANIFEST_FULL_COMPACTION_FILE_SIZE); } + public boolean manifestSortEnable() { + return options.get(MANIFEST_SORT_ENABLE); + } + + @Nullable + public String manifestSortPartitionField() { + return options.get(MANIFEST_SORT_PARTITION_FIELD); + } + public String partitionDefaultName() { return options.get(PARTITION_DEFAULT_NAME); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java index 3f9fdb9f1c0c..96833dddb11d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java @@ -55,6 +55,7 @@ import org.apache.paimon.operation.commit.SuccessCommitResult; import org.apache.paimon.operation.metrics.CommitMetrics; import org.apache.paimon.operation.metrics.CommitStats; +import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.partition.PartitionStatistics; import org.apache.paimon.predicate.Predicate; @@ -964,13 +965,7 @@ CommitResult tryCommitOnce( // try to merge old manifest files to create base manifest list mergeAfterManifests = ManifestFileMerger.merge( - mergeBeforeManifests, - manifestFile, - options.manifestTargetSize().getBytes(), - options.manifestMergeMinCount(), - options.manifestFullCompactionThresholdSize().getBytes(), - partitionType, - options.scanManifestParallelism()); + mergeBeforeManifests, manifestFile, partitionType, options); baseManifestList = manifestList.write(mergeAfterManifests); if (options.rowTrackingEnabled()) { @@ -1191,15 +1186,15 @@ private boolean compactManifestOnce() { List mergeAfterManifests; // the fist trial + Options tempOptions = options.toConfiguration(); + tempOptions.set("manifest.merge-min-count", "1"); + tempOptions.set("manifest.full-compaction-threshold-size", "1B"); mergeAfterManifests = ManifestFileMerger.merge( mergeBeforeManifests, manifestFile, - options.manifestTargetSize().getBytes(), - 1, - 1, partitionType, - options.scanManifestParallelism()); + CoreOptions.fromMap(tempOptions.toMap())); if (new HashSet<>(mergeBeforeManifests).equals(new HashSet<>(mergeAfterManifests))) { // no need to commit this snapshot, because no compact were happened diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index cdcad1ed3e84..3d7cccd5b34b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -18,13 +18,17 @@ package org.apache.paimon.operation; +import org.apache.paimon.CoreOptions; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; +import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -34,6 +38,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -48,7 +53,7 @@ import static org.apache.paimon.utils.ManifestReadThreadPool.sequentialBatchedExecute; import static org.apache.paimon.utils.Preconditions.checkArgument; -/** Util for merging manifest files. */ +/** Manifest file merger with standard merge logic and optional sort rewrite. */ public class ManifestFileMerger { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileMerger.class); @@ -62,25 +67,41 @@ public class ManifestFileMerger { public static List merge( List input, ManifestFile manifestFile, - long suggestedMetaSize, - int suggestedMinMetaCount, - long manifestFullCompactionSize, RowType partitionType, - @Nullable Integer manifestReadParallelism) { + CoreOptions options) { + // Extract configuration from options + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + int suggestedMinMetaCount = options.manifestMergeMinCount(); + long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + Options tableOptions = options.toConfiguration(); + // these are the newly created manifest files, clean them up if exception occurs List newFilesForAbort = new ArrayList<>(); try { - Optional> fullCompacted = - tryFullCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - manifestFullCompactionSize, - partitionType, - manifestReadParallelism); - return fullCompacted.orElseGet( + Optional> merged; + + // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite + if (tableOptions.getBoolean("manifest-sort.enable", false) + && partitionType.getFieldCount() > 0) { + merged = + trySortRewrite( + input, newFilesForAbort, manifestFile, partitionType, options); + } else { + // Otherwise try full compaction first, then minor compaction if needed + merged = + tryFullCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + manifestFullCompactionSize, + partitionType, + manifestReadParallelism); + } + + return merged.orElseGet( () -> tryMinorCompaction( input, @@ -303,6 +324,492 @@ private static Set computeDeletePartitions(Set return partitions; } + // ==================== Manifest Sort Rewrite ==================== + + /** + * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort + * field cannot be resolved or the delta file size is below the full compaction threshold, the + * input is returned as-is. + */ + private static Optional> trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + // Extract configuration from options + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + Options tableOptions = options.toConfiguration(); + + // Step 1: Resolve sort field. + String sortField = resolveSortField(tableOptions.toMap(), partitionType); + if (sortField == null) { + LOG.warn( + "Cannot resolve sort field for manifest sort rewrite. " + + "Skipping sort. Configure 'manifest-sort.partition-field'" + + " for multi-partition tables."); + return Optional.of(input); + } + int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); + DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); + + // Step 2: Check full compact trigger. + Filter mustChange = + file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; + + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (mustChange.test(file)) { + totalDeltaFileSize += file.fileSize(); + } + } + + List fullCompactionManifests = new ArrayList<>(); + List lsmFiles = new LinkedList<>(input); + Set deleteEntries = null; + if (totalDeltaFileSize >= manifestFullCompactionSize) { + // Step 3: Read delete entries and build partition predicate. + deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + + PartitionPredicate predicate; + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; + } else { + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + } else { + predicate = PartitionPredicate.ALWAYS_TRUE; + } + } + + // Step 4: Classify input into level0 runs and LSM files. + Iterator iterator = lsmFiles.iterator(); + while (iterator.hasNext()) { + ManifestFileMeta file = iterator.next(); + if (mustChange.test(file)) { + iterator.remove(); + fullCompactionManifests.add(file); + } else if (predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts())) { + iterator.remove(); + fullCompactionManifests.add(file); + } + } + } + + // Process full compaction manifests separately: sort, deduplicate, and rewrite + List fullCompactionRewritten = new ArrayList<>(); + if (!fullCompactionManifests.isEmpty()) { + fullCompactionRewritten = + sortAndRewriteFullCompaction( + fullCompactionManifests, + manifestFile, + sortFieldIndex, + sortFieldType, + suggestedMetaSize, + deleteEntries); + newFilesForAbort.addAll(fullCompactionRewritten); + } + + // Step 5: Build LSM Tree and assign levels (only for lsmFiles). + List levelRuns = + lsmFiles.isEmpty() + ? new ArrayList<>() + : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); + + // Step 6: Pick runs to compact. + int sizeAmpThreshold = tableOptions.getInteger("manifest-sort.size-amp-threshold", 2); + int sizeRatioThreshold = tableOptions.getInteger("manifest-sort.size-ratio-threshold", 10); + ManifestPickStrategy pickStrategy = + new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); + List pickedRuns = pickStrategy.pick(levelRuns); + + Set pickedSet = new HashSet<>(pickedRuns); + List reusedFiles = new ArrayList<>(); + for (ManifestSortedRun run : levelRuns) { + if (!pickedSet.contains(run)) { + reusedFiles.addAll(run.files()); + } + } + + if (pickedRuns.isEmpty()) { + return Optional.of(new ArrayList<>(input)); + } + + // Step 7: Split picked files into sections, sort and rewrite each. + List pickedFiles = new ArrayList<>(); + for (ManifestSortedRun run : pickedRuns) { + pickedFiles.addAll(run.files()); + } + + List> sections = + splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType); + long maxRewriteSize = + parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); + long processedSize = 0; + + List result = new ArrayList<>(reusedFiles); + List sortNewFiles = new ArrayList<>(); + for (List section : sections) { + long sectionSize = 0; + for (ManifestFileMeta m : section) { + sectionSize += m.fileSize(); + } + if (processedSize + sectionSize > maxRewriteSize) { + result.addAll(section); + continue; + } + processedSize += sectionSize; + + List merged = + sortAndRewriteSection( + section, manifestFile, sortFieldIndex, sortFieldType, deleteEntries); + sortNewFiles.addAll(merged); + result.addAll(merged); + } + newFilesForAbort.addAll(sortNewFiles); + result.addAll(fullCompactionRewritten); + return Optional.of(result); + } + + // ==================== Sort Rewrite Helpers ==================== + + /** + * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. + */ + static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { + switch (type.getTypeRoot()) { + case INTEGER: + case DATE: + return Integer.compare(a.getInt(k), b.getInt(k)); + case BIGINT: + return Long.compare(a.getLong(k), b.getLong(k)); + case SMALLINT: + return Short.compare(a.getShort(k), b.getShort(k)); + case TINYINT: + return Byte.compare(a.getByte(k), b.getByte(k)); + case FLOAT: + return Float.compare(a.getFloat(k), b.getFloat(k)); + case DOUBLE: + return Double.compare(a.getDouble(k), b.getDouble(k)); + case BOOLEAN: + return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); + case VARCHAR: + case CHAR: + return a.getString(k).compareTo(b.getString(k)); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return a.getTimestamp(k, type.defaultSize()) + .compareTo(b.getTimestamp(k, type.defaultSize())); + case DECIMAL: + DecimalType dt = (DecimalType) type; + return a.getDecimal(k, dt.getPrecision(), dt.getScale()) + .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); + default: + String errorMsg = + String.format( + "Unsupported partition field type '%s' for manifest sort rewrite. " + + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " + + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " + + "DECIMAL.", + type.getTypeRoot()); + LOG.error(errorMsg); + throw new UnsupportedOperationException(errorMsg); + } + } + + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + + /** + * Resolve the partition field to sort manifests by. + * + *

Resolution rules: + * + *

    + *
  1. If {@code manifest-sort.partition-field} is configured, return that value. + *
  2. Otherwise, if the table has exactly one partition field, return that field name. + *
  3. Otherwise return {@code null}. + *
+ */ + @Nullable + static String resolveSortField(Map tableOptions, RowType partitionType) { + String configured = tableOptions.get("manifest-sort.partition-field"); + if (configured != null && !configured.isEmpty()) { + return configured; + } + if (partitionType.getFieldCount() == 1) { + return partitionType.getFieldNames().get(0); + } + return null; + } + + /** + * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, + * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 + * largest to level 1~4, rest to level 0). + */ + static List buildLevelSortedRuns( + List input, int sortFieldIndex, DataType sortFieldType) { + input.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List> runFilesList = new ArrayList<>(); + List currentRun = new ArrayList<>(); + currentRun.add(input.get(0)); + for (int i = 1; i < input.size(); i++) { + ManifestFileMeta file = input.get(i); + ManifestFileMeta last = currentRun.get(currentRun.size() - 1); + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + > 0) { + currentRun.add(file); + } else { + runFilesList.add(currentRun); + currentRun = new ArrayList<>(); + currentRun.add(file); + } + } + runFilesList.add(currentRun); + + List runs = new ArrayList<>(runFilesList.size()); + for (List rf : runFilesList) { + runs.add(ManifestSortedRun.fromSorted(rf)); + } + + runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = runs.size(); + for (int i = 0; i < n; i++) { + if (i >= n - 4) { + runs.get(i).setLevel(n - i); + } else { + runs.get(i).setLevel(0); + } + } + return runs; + } + + /** + * Split picked files into sections. Files with overlapping sort-key intervals go into the same + * section. + */ + static List> splitIntoSections( + List pickedFiles, int sortFieldIndex, DataType sortFieldType) { + pickedFiles.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List> sections = new ArrayList<>(); + List currentSection = new ArrayList<>(); + currentSection.add(pickedFiles.get(0)); + BinaryRow sectionMaxBound = pickedFiles.get(0).partitionStats().maxValues(); + for (int i = 1; i < pickedFiles.size(); i++) { + ManifestFileMeta file = pickedFiles.get(i); + if (compareField( + file.partitionStats().minValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sections.add(currentSection); + currentSection = new ArrayList<>(); + currentSection.add(file); + sectionMaxBound = file.partitionStats().maxValues(); + } else { + currentSection.add(file); + if (compareField( + file.partitionStats().maxValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sectionMaxBound = file.partitionStats().maxValues(); + } + } + } + sections.add(currentSection); + return sections; + } + + /** + * Sort and rewrite full compaction manifests. Files are sorted by min partition value, then + * processed in batches. A batch stops when total size reaches threshold or when current max + * doesn't overlap with next min. Each batch is sorted, deduplicated (DELETE entries removed), + * and written to new manifest files. + */ + private static List sortAndRewriteFullCompaction( + List fullCompactionManifests, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + long suggestedMetaSize, + @Nullable Set deletedIdentifiers) + throws Exception { + + // Sort by min partition value + fullCompactionManifests.sort( + (a, b) -> + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType)); + + List result = new ArrayList<>(); + List batch = new ArrayList<>(); + long batchSize = 0; + + for (int i = 0; i < fullCompactionManifests.size(); i++) { + ManifestFileMeta current = fullCompactionManifests.get(i); + boolean shouldFlush = false; + + // Check if batch size reaches threshold + if (batchSize + current.fileSize() >= suggestedMetaSize && !batch.isEmpty()) { + shouldFlush = true; + } + + // Check if current max overlaps with next min + if (i < fullCompactionManifests.size() - 1 && !batch.isEmpty()) { + ManifestFileMeta next = fullCompactionManifests.get(i + 1); + int cmp = + compareField( + current.partitionStats().maxValues(), + next.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp < 0) { + shouldFlush = true; + } + } + + batch.add(current); + batchSize += current.fileSize(); + + if (shouldFlush || i == fullCompactionManifests.size() - 1) { + // Process batch: sort entries, remove DELETE, write out + List rewritten = + sortAndRewriteSection( + batch, + manifestFile, + sortFieldIndex, + sortFieldType, + deletedIdentifiers); + result.addAll(rewritten); + batch.clear(); + batchSize = 0; + } + } + + return result; + } + + /** + * Read all entries from a section's manifest files, sort them in memory by the specified + * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving + * entries to the rolling writer. + */ + private static List sortAndRewriteSection( + List section, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deletedIdentifiers) + throws Exception { + + List allEntries = new ArrayList<>(); + for (ManifestFileMeta meta : section) { + allEntries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); + } + + allEntries.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + + Set safeDeletedIds = + deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); + + RollingFileWriter writer = + manifestFile.createRollingWriter(); + try { + for (ManifestEntry entry : allEntries) { + if (entry.kind() == FileKind.ADD && !safeDeletedIds.contains(entry.identifier())) { + writer.write(entry); + } + } + } finally { + writer.close(); + } + return writer.result(); + } + + /** Parse a long option from table options with a default value. */ + private static long parseLongOption(Options options, String key, long defaultValue) { + String value = options.get(key); + if (value == null || value.isEmpty()) { + return defaultValue; + } + try { + return Long.parseLong(value.trim()); + } catch (NumberFormatException e) { + LOG.warn( + "Invalid long value '{}' for option '{}', using default {}.", + value, + key, + defaultValue); + return defaultValue; + } + } + private static class FullCompactionReadResult { private final ManifestFileMeta file; diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java new file mode 100644 index 000000000000..cc88417b2765 --- /dev/null +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.operation; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Pick strategy for manifest LSM Tree compaction. + * + *

Strategy priority: + * + *

    + *
  1. SizeAmp: if all lower-level runs' total size exceeds the highest-level run's size + * times {@code sizeAmpThreshold}, trigger full compaction (pick all runs). + *
  2. SizeRatio: from low to high, pick adjacent runs whose amplification factor is less + * than {@code sizeRatioThreshold}. + *
  3. Forced pick: level0 and level1 runs are always picked. + *
  4. Delete pick: additionally pick runs containing manifest files with {@code + * numDeletedFiles > 0}. + *
+ */ +public class ManifestPickStrategy { + + private final int sizeAmpThreshold; + private final int sizeRatioThreshold; + + public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { + this.sizeAmpThreshold = sizeAmpThreshold; + this.sizeRatioThreshold = sizeRatioThreshold; + } + + /** + * Pick runs that need compaction from the given level runs. + * + * @param levelRuns runs with assigned levels (level 0~4) + * @return list of picked runs to compact + */ + public List pick(List levelRuns) { + if (levelRuns.isEmpty()) { + return new ArrayList<>(); + } + + // Try SizeAmp first + List sizeAmpResult = pickForSizeAmp(levelRuns); + if (sizeAmpResult != null) { + return sizeAmpResult; + } + + // SizeRatio + forced pick + return pickForSizeRatioAndForce(levelRuns); + } + + /** + * SizeAmp check: if all lower-level (0~3) runs' total size > highest-level (level4) run's size + * * sizeAmpThreshold, pick all runs for full compaction. + */ + private List pickForSizeAmp(List levelRuns) { + int maxLevel = -1; + ManifestSortedRun highestRun = null; + long lowerLevelTotalSize = 0; + + for (ManifestSortedRun run : levelRuns) { + if (run.level() > maxLevel) { + maxLevel = run.level(); + highestRun = run; + } + } + + if (highestRun == null || maxLevel <= 0) { + return null; + } + + for (ManifestSortedRun run : levelRuns) { + if (run.level() < maxLevel) { + lowerLevelTotalSize += run.totalSize(); + } + } + + if (lowerLevelTotalSize > highestRun.totalSize() * sizeAmpThreshold) { + return new ArrayList<>(levelRuns); + } + return null; + } + + /** + * SizeRatio + forced pick. + * + *
    + *
  • Level0 and level1 are always picked. + *
  • From low to high, if the cumulative picked size * sizeRatioThreshold >= next run's + * size, continue picking. + *
+ */ + private List pickForSizeRatioAndForce(List levelRuns) { + // Sort by level ascending for low-to-high traversal + List sorted = new ArrayList<>(levelRuns); + sorted.sort(Comparator.comparingInt(ManifestSortedRun::level)); + + Set pickedSet = new HashSet<>(); + long pickedSize = 0; + + // From low to high: forced pick level0/level1, then SizeRatio for the rest. + for (ManifestSortedRun run : sorted) { + if (run.level() <= 1) { + pickedSet.add(run); + pickedSize += run.totalSize(); + } else { + long nextRunSize = run.totalSize(); + if (pickedSize > 0 && pickedSize * sizeRatioThreshold >= nextRunSize) { + pickedSet.add(run); + pickedSize += nextRunSize; + } + } + } + + return new ArrayList<>(pickedSet); + } +} diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java new file mode 100644 index 000000000000..49baabfe7161 --- /dev/null +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.operation; + +import org.apache.paimon.data.BinaryRow; +import org.apache.paimon.manifest.ManifestFileMeta; +import org.apache.paimon.utils.Preconditions; + +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +/** + * A {@code ManifestSortedRun} is a list of {@link ManifestFileMeta}s sorted by a single partition + * field (the configured manifest sort field). The intervals {@code [partitionStats.minValues[k], + * partitionStats.maxValues[k]]} of these manifests do not overlap on field {@code k}, where {@code + * k} is the configured sort field index. + */ +public class ManifestSortedRun { + + private int level; + private final List files; + private final long totalSize; + + private ManifestSortedRun(List files) { + this.level = -1; + this.files = Collections.unmodifiableList(files); + long size = 0L; + for (ManifestFileMeta file : files) { + size += file.fileSize(); + } + this.totalSize = size; + } + + public static ManifestSortedRun empty() { + return new ManifestSortedRun(Collections.emptyList()); + } + + public static ManifestSortedRun fromSingle(ManifestFileMeta file) { + return new ManifestSortedRun(Collections.singletonList(file)); + } + + /** + * Build a {@code ManifestSortedRun} from an already-sorted list. The caller MUST guarantee that + * {@code sortedFiles} is sorted ascending on the configured sort field's min value, and that + * intervals do not overlap on that field. + */ + public static ManifestSortedRun fromSorted(List sortedFiles) { + return new ManifestSortedRun(sortedFiles); + } + + public List files() { + return files; + } + + public boolean isEmpty() { + return files.isEmpty(); + } + + public boolean nonEmpty() { + return !isEmpty(); + } + + public long totalSize() { + return totalSize; + } + + public int level() { + return level; + } + + public void setLevel(int level) { + this.level = level; + } + + /** + * Validate that this run is monotonically non-overlapping on the sort field at {@code + * sortFieldIndex}. Used in tests and as an assertion in development. + */ + public void validate(int sortFieldIndex, Comparator partitionComparator) { + for (int i = 1; i < files.size(); i++) { + BinaryRow prevMax = files.get(i - 1).partitionStats().maxValues(); + BinaryRow currMin = files.get(i).partitionStats().minValues(); + Preconditions.checkState( + partitionComparator.compare(prevMax, currMin) <= 0, + "ManifestSortedRun is not sorted on field %s; prev.max > curr.min", + sortFieldIndex); + } + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof ManifestSortedRun)) { + return false; + } + ManifestSortedRun that = (ManifestSortedRun) o; + return level == that.level && files.equals(that.files); + } + + @Override + public int hashCode() { + return Objects.hash(level, files); + } + + @Override + public String toString() { + return "ManifestSortedRun{level=" + + level + + ", files=[" + + files.stream().map(ManifestFileMeta::fileName).collect(Collectors.joining(", ")) + + "]}"; + } +} diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 36b0d15f114f..3b6bc379840b 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -18,12 +18,14 @@ package org.apache.paimon.manifest; +import org.apache.paimon.CoreOptions; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.fs.Path; import org.apache.paimon.fs.SeekableInputStream; import org.apache.paimon.fs.SeekableInputStreamWrapper; import org.apache.paimon.fs.local.LocalFileIO; import org.apache.paimon.operation.ManifestFileMerger; +import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; @@ -84,9 +86,16 @@ public void testMergeWithoutFullCompaction(int numLastBits) { createData(numLastBits, input, expected); // no trigger Full Compaction + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "9223372036854775807B"); List actual = ManifestFileMerger.merge( - input, manifestFile, 500, 3, Long.MAX_VALUE, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertThat(actual).hasSameSizeAs(expected); // these two manifest files are merged from the input @@ -118,14 +127,16 @@ private void testCleanUp(List input, long fullCompactionThresh ManifestFile failingManifestFile = createManifestFile(FailingFileIO.getFailingPath(failingName, tempDir.toString())); try { + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set( + "manifest.full-compaction-threshold-size", fullCompactionThreshold + "B"); ManifestFileMerger.merge( input, failingManifestFile, - 500, - 3, - fullCompactionThreshold, getPartitionType(), - null); + CoreOptions.fromMap(testOptions.toMap())); } catch (Throwable e) { assertThat(e).hasRootCauseExactlyInstanceOf(FailingFileIO.ArtificialException.class); // old files should be kept untouched, while new files should be cleaned up @@ -156,9 +167,16 @@ public void testMerge() { // delta with delete apply partition 1,2 addDeltaManifests(input, true); // trigger full compaction + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); // 1st Manifest don't need to Merge assertSameContent(input.get(0), merged.get(0), manifestFile); @@ -173,9 +191,16 @@ public void testMergeWithoutDelta() { // base List input = createBaseManifestFileMetas(true); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); assertThat(merged).hasSameElementsAs(input); @@ -186,9 +211,16 @@ public void testMergeWithoutDelta() { ManifestFileMeta delta = makeManifest(makeEntry(true, "A", 1), makeEntry(false, "A", 1)); input1.add(delta); + Options testOptions1 = new Options(); + testOptions1.set("manifest.target-file-size", "500B"); + testOptions1.set("manifest.merge-min-count", "3"); + testOptions1.set("manifest.full-compaction-threshold-size", "200B"); List merged1 = ManifestFileMerger.merge( - input1, manifestFile, 500, 3, 200, getPartitionType(), null); + input1, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions1.toMap())); assertThat(base).hasSameElementsAs(merged1); assertEquivalentEntries(input1, merged1); @@ -198,9 +230,16 @@ public void testMergeWithoutDelta() { public void testMergeWithoutBase() { List input = new ArrayList<>(); addDeltaManifests(input, true); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); } @@ -225,9 +264,16 @@ public void testMergeWithoutDeleteFile() { input.add(makeManifest(makeEntry(true, "F"))); input.add(makeManifest(makeEntry(true, "G"))); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); } @@ -489,9 +535,16 @@ public void testMergeFullCompactionWithoutDeleteFile() { input.add(makeManifest(makeEntry(true, "F"))); input.add(makeManifest(makeEntry(true, "G"))); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", threshold + "B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, threshold, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries( input.stream() .filter(f -> !baseFiles.contains(f.fileName())) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java index 591b3206518d..66465f1e7531 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/NoPartitionManifestFileMetaTest.java @@ -18,7 +18,9 @@ package org.apache.paimon.manifest; +import org.apache.paimon.CoreOptions; import org.apache.paimon.operation.ManifestFileMerger; +import org.apache.paimon.options.Options; import org.apache.paimon.types.RowType; import org.junit.jupiter.api.BeforeEach; @@ -49,9 +51,16 @@ public void testMerge() { List input = createBaseManifestFileMetas(false); addDeltaManifests(input, false); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, 500, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries(input, merged); // the first one is not deleted, it should not be merged @@ -89,9 +98,16 @@ public void testMergeFullCompactionWithoutDeleteFile() { input.add(makeManifest(makeEntry(true, "F", null))); input.add(makeManifest(makeEntry(true, "G", null))); + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", threshold + "B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); List merged = ManifestFileMerger.merge( - input, manifestFile, threshold, 3, 200, getPartitionType(), null); + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); assertEquivalentEntries( input.stream() .filter(f -> !baseFiles.contains(f.fileName())) From 21635b4ecef9aacd08a7b7268216eababfa9aa92 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 19:39:24 +0800 Subject: [PATCH 02/51] fix --- .../java/org/apache/paimon/operation/ManifestFileMerger.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 3d7cccd5b34b..fba89c3b7626 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -22,6 +22,7 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; +import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; From 5298e7cb4d448313889a4de45268c800d2e3d8fc Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 20:38:25 +0800 Subject: [PATCH 03/51] addTest --- .../paimon/operation/ManifestFileMerger.java | 9 +- .../paimon/manifest/ManifestFileMetaTest.java | 398 ++++++++++++++++++ 2 files changed, 403 insertions(+), 4 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index fba89c3b7626..17c14258599d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -441,9 +441,10 @@ private static Optional> trySortRewrite( reusedFiles.addAll(run.files()); } } - + List result = new ArrayList<>(reusedFiles); if (pickedRuns.isEmpty()) { - return Optional.of(new ArrayList<>(input)); + result.addAll(fullCompactionRewritten); + return Optional.of(new ArrayList<>(result)); } // Step 7: Split picked files into sections, sort and rewrite each. @@ -458,7 +459,7 @@ private static Optional> trySortRewrite( parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); long processedSize = 0; - List result = new ArrayList<>(reusedFiles); + List sortNewFiles = new ArrayList<>(); for (List section : sections) { long sectionSize = 0; @@ -473,7 +474,7 @@ private static Optional> trySortRewrite( List merged = sortAndRewriteSection( - section, manifestFile, sortFieldIndex, sortFieldType, deleteEntries); + section, manifestFile, sortFieldIndex, sortFieldType, null); sortNewFiles.addAll(merged); result.addAll(merged); } diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 3b6bc379840b..f4adf35802e4 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -872,4 +872,402 @@ private void beforeFirstRead() throws IOException { } } } + + // ==================== Manifest Sort Tests ==================== + + /** + * Test manifest sort with overlapping partition ranges. Each manifest contains entries spanning + * multiple partitions, creating overlapping intervals that require sort rewrite to resolve. + * + *

Input manifests (deliberately unordered and overlapping): + * + *

+     *   manifest-A: partitions [5, 9]  (entries in partition 5,6,7,8,9)
+     *   manifest-B: partitions [0, 4]  (entries in partition 0,1,2,3,4)
+     *   manifest-C: partitions [3, 7]  (entries in partition 3,4,5,6,7) -- overlaps A and B
+     *   manifest-D: partitions [8, 12] (entries in partition 8,9,10,11,12) -- overlaps A
+     *   manifest-E: partitions [1, 3]  (entries in partition 1,2,3) -- overlaps B and C
+     *   manifest-F: partitions [10, 14](entries in partition 10,11,12,13,14) -- overlaps D
+     * 
+ * + *

After sort rewrite, all surviving ADD entries should be sorted by partition field. + */ + @Test + public void testManifestSortWithOverlappingPartitions() { + List input = new ArrayList<>(); + + // manifest-A: partitions [5, 9] + List entriesA = new ArrayList<>(); + for (int p = 5; p <= 9; p++) { + entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); + } + input.add(makeManifest(entriesA.toArray(new ManifestEntry[0]))); + + // manifest-B: partitions [0, 4] + List entriesB = new ArrayList<>(); + for (int p = 0; p <= 4; p++) { + entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); + } + input.add(makeManifest(entriesB.toArray(new ManifestEntry[0]))); + + // manifest-C: partitions [3, 7] -- overlaps with A and B + List entriesC = new ArrayList<>(); + for (int p = 3; p <= 7; p++) { + entriesC.add(makeEntry(true, String.format("C-p%d", p), p)); + } + input.add(makeManifest(entriesC.toArray(new ManifestEntry[0]))); + + // manifest-D: partitions [8, 12] -- overlaps with A + List entriesD = new ArrayList<>(); + for (int p = 8; p <= 12; p++) { + entriesD.add(makeEntry(true, String.format("D-p%d", p), p)); + } + input.add(makeManifest(entriesD.toArray(new ManifestEntry[0]))); + + // manifest-E: partitions [1, 3] -- overlaps with B and C + List entriesE = new ArrayList<>(); + for (int p = 1; p <= 3; p++) { + entriesE.add(makeEntry(true, String.format("E-p%d", p), p)); + } + input.add(makeManifest(entriesE.toArray(new ManifestEntry[0]))); + + // manifest-F: partitions [10, 14] -- overlaps with D + List entriesF = new ArrayList<>(); + for (int p = 10; p <= 14; p++) { + entriesF.add(makeEntry(true, String.format("F-p%d", p), p)); + } + input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); + + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "200B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify entries are equivalent (no data loss) + assertEquivalentEntries(input, merged); + + // Verify all entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within a manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + + // Verify manifest files themselves are ordered by minValues + for (int i = 1; i < merged.size(); i++) { + int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); + int currMin = merged.get(i).partitionStats().minValues().getInt(0); + assertThat(currMin).isGreaterThanOrEqualTo(prevMin); + } + } + + /** + * Test manifest sort with more manifests having overlapping partition ranges. Creates a larger + * number of manifests in shuffled order to stress-test the sort rewrite logic. + * + *

Input manifests (shuffled, all ADD-only): + * + *

+     *   manifest-1: partitions [6, 10]
+     *   manifest-2: partitions [0, 3]
+     *   manifest-3: partitions [4, 8]  -- overlaps 1 and 2
+     *   manifest-4: partitions [9, 14] -- overlaps 1
+     *   manifest-5: partitions [2, 5]  -- overlaps 2 and 3
+     *   manifest-6: partitions [11, 15]-- overlaps 4
+     * 
+ */ + @Test + public void testManifestSortWithShuffledOverlappingPartitions() { + List input = new ArrayList<>(); + + // manifest-1: partitions [6, 10] + List entries1 = new ArrayList<>(); + for (int p = 6; p <= 10; p++) { + entries1.add(makeEntry(true, String.format("m1-p%d", p), p)); + } + input.add(makeManifest(entries1.toArray(new ManifestEntry[0]))); + + // manifest-2: partitions [0, 3] + List entries2 = new ArrayList<>(); + for (int p = 0; p <= 3; p++) { + entries2.add(makeEntry(true, String.format("m2-p%d", p), p)); + } + input.add(makeManifest(entries2.toArray(new ManifestEntry[0]))); + + // manifest-3: partitions [4, 8] -- overlaps manifest-1 and manifest-2 + List entries3 = new ArrayList<>(); + for (int p = 4; p <= 8; p++) { + entries3.add(makeEntry(true, String.format("m3-p%d", p), p)); + } + input.add(makeManifest(entries3.toArray(new ManifestEntry[0]))); + + // manifest-4: partitions [9, 14] -- overlaps manifest-1 + List entries4 = new ArrayList<>(); + for (int p = 9; p <= 14; p++) { + entries4.add(makeEntry(true, String.format("m4-p%d", p), p)); + } + input.add(makeManifest(entries4.toArray(new ManifestEntry[0]))); + + // manifest-5: partitions [2, 5] -- overlaps manifest-2 and manifest-3 + List entries5 = new ArrayList<>(); + for (int p = 2; p <= 5; p++) { + entries5.add(makeEntry(true, String.format("m5-p%d", p), p)); + } + input.add(makeManifest(entries5.toArray(new ManifestEntry[0]))); + + // manifest-6: partitions [11, 15] -- overlaps manifest-4 + List entries6 = new ArrayList<>(); + for (int p = 11; p <= 15; p++) { + entries6.add(makeEntry(true, String.format("m6-p%d", p), p)); + } + input.add(makeManifest(entries6.toArray(new ManifestEntry[0]))); + + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "100B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify no data loss + assertEquivalentEntries(input, merged); + + // Verify entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within a manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + + // Verify output manifests are ordered by minValues + for (int i = 1; i < merged.size(); i++) { + int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); + int currMin = merged.get(i).partitionStats().minValues().getInt(0); + assertThat(currMin).isGreaterThanOrEqualTo(prevMin); + } + } + + /** + * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This + * exercises buildLevelSortedRuns and the LSM level assignment logic. + * + *

Creates manifests whose partition ranges overlap in various ways: + * + *

+     *   run1 (non-overlapping): [0,2], [3,5], [6,8]
+     *   run2 (overlapping with run1): [1,4], [5,7]
+     *   run3 (overlapping with both): [0,9]
+     * 
+ */ + @Test + public void testManifestSortWithMultipleOverlappingRuns() { + List input = new ArrayList<>(); + + // Run1: non-overlapping within itself [0,2], [3,5], [6,8] + input.add( + makeManifest( + makeEntry(true, "r1a-p0", 0), + makeEntry(true, "r1a-p1", 1), + makeEntry(true, "r1a-p2", 2))); + input.add( + makeManifest( + makeEntry(true, "r1b-p3", 3), + makeEntry(true, "r1b-p4", 4), + makeEntry(true, "r1b-p5", 5))); + input.add( + makeManifest( + makeEntry(true, "r1c-p6", 6), + makeEntry(true, "r1c-p7", 7), + makeEntry(true, "r1c-p8", 8))); + + // Run2: overlaps with run1 [1,4], [5,7] + input.add( + makeManifest( + makeEntry(true, "r2a-p1", 1), + makeEntry(true, "r2a-p2", 2), + makeEntry(true, "r2a-p3", 3), + makeEntry(true, "r2a-p4", 4))); + input.add( + makeManifest( + makeEntry(true, "r2b-p5", 5), + makeEntry(true, "r2b-p6", 6), + makeEntry(true, "r2b-p7", 7))); + + // Run3: a large manifest overlapping everything [0,9] + List run3Entries = new ArrayList<>(); + for (int p = 0; p <= 9; p++) { + run3Entries.add(makeEntry(true, String.format("r3-p%d", p), p)); + } + input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); + + Options testOptions = new Options(); + testOptions.set("manifest.target-file-size", "500B"); + testOptions.set("manifest.merge-min-count", "3"); + testOptions.set("manifest.full-compaction-threshold-size", "100B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify no data loss + assertEquivalentEntries(input, merged); + + // Verify entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as( + "Entries within manifest should be sorted, but found %d after %d", + currPartition, prevPartition) + .isGreaterThanOrEqualTo(prevPartition); + } + } + + // Verify output manifests are ordered by minValues + for (int i = 1; i < merged.size(); i++) { + int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); + int currMin = merged.get(i).partitionStats().minValues().getInt(0); + assertThat(currMin).isGreaterThanOrEqualTo(prevMin); + } + } + + /** + * Test that sort rewrite correctly eliminates DELETE entries and their corresponding ADD + * entries. The key condition is that totalDeltaFileSize must reach manifestFullCompactionSize + * to trigger the full compaction path inside trySortRewrite, which reads deleteEntries and + * passes them to sortAndRewriteSection for elimination. + * + *

Design: + * + *

+     *   - Base manifests with overlapping partitions (all ADD, large enough to be "mustChange"
+     *     since fileSize < suggestedMetaSize):
+     *     manifest-A: partitions [0, 4] with entries A-p0..A-p4
+     *     manifest-B: partitions [2, 6] with entries B-p2..B-p6 (overlaps A)
+     *     manifest-C: partitions [5, 9] with entries C-p5..C-p9 (overlaps B)
+     *   - Delta manifests with DELETE entries (cancel some ADD entries):
+     *     manifest-D: DELETE A-p2, DELETE B-p4, ADD new-p2, ADD new-p4
+     *     manifest-E: DELETE C-p7, ADD new-p7
+     *   - After sort rewrite: A-p2, B-p4, C-p7 should be eliminated,
+     *     replaced by new-p2, new-p4, new-p7. Output should only contain ADD entries,
+     *     sorted by partition.
+     * 
+ */ + @Test + public void testManifestSortEliminatesDeleteEntries() { + List input = new ArrayList<>(); + + // manifest-A: partitions [0, 4] + List entriesA = new ArrayList<>(); + for (int p = 0; p <= 4; p++) { + entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); + } + input.add(makeManifest(entriesA.toArray(new ManifestEntry[0]))); + + // manifest-B: partitions [2, 6] -- overlaps A + List entriesB = new ArrayList<>(); + for (int p = 2; p <= 6; p++) { + entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); + } + input.add(makeManifest(entriesB.toArray(new ManifestEntry[0]))); + + // manifest-C: partitions [5, 9] -- overlaps B + List entriesC = new ArrayList<>(); + for (int p = 5; p <= 9; p++) { + entriesC.add(makeEntry(true, String.format("C-p%d", p), p)); + } + input.add(makeManifest(entriesC.toArray(new ManifestEntry[0]))); + + // manifest-D: DELETE A-p2, DELETE B-p4, ADD new-p2, ADD new-p4 + input.add( + makeManifest( + makeEntry(false, "A-p2", 2), + makeEntry(false, "B-p4", 4), + makeEntry(true, "new-p2", 2), + makeEntry(true, "new-p4", 4))); + + // manifest-E: DELETE C-p7, ADD new-p7 + input.add(makeManifest(makeEntry(false, "C-p7", 7), makeEntry(true, "new-p7", 7))); + + Options testOptions = new Options(); + // Set target file size very large so all input manifests are considered "small" + // (fileSize < suggestedMetaSize), which makes them all satisfy mustChange condition + testOptions.set("manifest.target-file-size", "16MB"); + testOptions.set("manifest.merge-min-count", "3"); + // Set full-compaction threshold very small to ensure it triggers + testOptions.set("manifest.full-compaction-threshold-size", "1B"); + testOptions.set("manifest-sort.enable", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Collect all output entries + List allOutputEntries = new ArrayList<>(); + for (ManifestFileMeta meta : merged) { + allOutputEntries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); + } + + // Verify: no DELETE entries in output (all DELETE pairs eliminated) + long deleteCount = + allOutputEntries.stream().filter(e -> e.kind() == FileKind.DELETE).count(); + assertThat(deleteCount).as("Sort rewrite should eliminate all DELETE entries").isEqualTo(0); + + // Verify: the deleted ADD entries (A-p2, B-p4, C-p7) are NOT in output + Set outputFileNames = + allOutputEntries.stream().map(e -> e.file().fileName()).collect(Collectors.toSet()); + assertThat(outputFileNames).doesNotContain("A-p2", "B-p4", "C-p7"); + + // Verify: the replacement entries (new-p2, new-p4, new-p7) ARE in output + assertThat(outputFileNames).contains("new-p2", "new-p4", "new-p7"); + + // Verify: all surviving entries match what FileEntry.mergeEntries would produce + assertEquivalentEntries(input, merged); + + // Verify entries within each output manifest are sorted by partition + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + } } From b0639b8e715ad8e920c3146653bc2dba571674b0 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 20:45:00 +0800 Subject: [PATCH 04/51] spotless --- .../java/org/apache/paimon/operation/ManifestFileMerger.java | 1 - 1 file changed, 1 deletion(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 17c14258599d..26d1a405284a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -459,7 +459,6 @@ private static Optional> trySortRewrite( parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); long processedSize = 0; - List sortNewFiles = new ArrayList<>(); for (List section : sections) { long sectionSize = 0; From 290e19a84618397f1ca4989ee7f21b6874e2e063 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 13 May 2026 21:32:15 +0800 Subject: [PATCH 05/51] fix --- .../java/org/apache/paimon/CoreOptions.java | 28 +++++ .../paimon/operation/ManifestFileMerger.java | 114 +++++++++++++----- 2 files changed, 113 insertions(+), 29 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index aed7616d5707..e8f73dcb3bf6 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -495,6 +495,26 @@ public InlineElement getDescription() { + " partition field). For multi-partition tables, REQUIRED" + " when 'manifest-sort.enable' is true."); + public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = + key("manifest-sort.max-rewrite-size") + .memoryType() + .defaultValue(MemorySize.ofMebiBytes(256)) + .withDescription( + "Maximum total size of manifest files to rewrite in a single" + + " sort rewrite pass. Sections exceeding this limit are" + + " skipped. Set to a larger value to allow more aggressive" + + " sort rewriting."); + + public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = + key("manifest-sort.open-file-cost") + .memoryType() + .defaultValue(MemorySize.ofMebiBytes(4)) + .withDescription( + "Open file cost of a manifest file during sort rewrite. " + + "It is added to each manifest file's size when computing " + + "section size, to avoid rewriting too many small manifest " + + "files in a single section."); + public static final ConfigOption UPSERT_KEY = key("upsert-key") .stringType() @@ -2636,6 +2656,14 @@ public String manifestSortPartitionField() { return options.get(MANIFEST_SORT_PARTITION_FIELD); } + public long manifestSortMaxRewriteSize() { + return options.get(MANIFEST_SORT_MAX_REWRITE_SIZE).getBytes(); + } + + public long manifestSortOpenFileCost() { + return options.get(MANIFEST_SORT_OPEN_FILE_COST).getBytes(); + } + public String partitionDefaultName() { return options.get(PARTITION_DEFAULT_NAME); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 26d1a405284a..74d887285d5a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -75,7 +75,6 @@ public static List merge( int suggestedMinMetaCount = options.manifestMergeMinCount(); long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); - Options tableOptions = options.toConfiguration(); // these are the newly created manifest files, clean them up if exception occurs List newFilesForAbort = new ArrayList<>(); @@ -84,8 +83,7 @@ public static List merge( Optional> merged; // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite - if (tableOptions.getBoolean("manifest-sort.enable", false) - && partitionType.getFieldCount() > 0) { + if (options.manifestSortEnable() && partitionType.getFieldCount() > 0) { merged = trySortRewrite( input, newFilesForAbort, manifestFile, partitionType, options); @@ -417,7 +415,8 @@ private static Optional> trySortRewrite( sortFieldIndex, sortFieldType, suggestedMetaSize, - deleteEntries); + deleteEntries, + manifestReadParallelism); newFilesForAbort.addAll(fullCompactionRewritten); } @@ -428,8 +427,8 @@ private static Optional> trySortRewrite( : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); // Step 6: Pick runs to compact. - int sizeAmpThreshold = tableOptions.getInteger("manifest-sort.size-amp-threshold", 2); - int sizeRatioThreshold = tableOptions.getInteger("manifest-sort.size-ratio-threshold", 10); + int sizeAmpThreshold = options.maxSizeAmplificationPercent(); + int sizeRatioThreshold = options.sortedRunSizeRatio(); ManifestPickStrategy pickStrategy = new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); List pickedRuns = pickStrategy.pick(levelRuns); @@ -455,15 +454,15 @@ private static Optional> trySortRewrite( List> sections = splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType); - long maxRewriteSize = - parseLongOption(tableOptions, "manifest-sort.max-rewrite-size", Long.MAX_VALUE); + long maxRewriteSize = options.manifestSortMaxRewriteSize(); long processedSize = 0; + long openFileCost = options.manifestSortOpenFileCost(); List sortNewFiles = new ArrayList<>(); for (List section : sections) { long sectionSize = 0; for (ManifestFileMeta m : section) { - sectionSize += m.fileSize(); + sectionSize += m.fileSize() + openFileCost; } if (processedSize + sectionSize > maxRewriteSize) { result.addAll(section); @@ -473,7 +472,12 @@ private static Optional> trySortRewrite( List merged = sortAndRewriteSection( - section, manifestFile, sortFieldIndex, sortFieldType, null); + section, + manifestFile, + sortFieldIndex, + sortFieldType, + null, + manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); } @@ -696,7 +700,8 @@ private static List sortAndRewriteFullCompaction( int sortFieldIndex, DataType sortFieldType, long suggestedMetaSize, - @Nullable Set deletedIdentifiers) + @Nullable Set deletedIdentifiers, + @Nullable Integer manifestReadParallelism) throws Exception { // Sort by min partition value @@ -746,7 +751,8 @@ private static List sortAndRewriteFullCompaction( manifestFile, sortFieldIndex, sortFieldType, - deletedIdentifiers); + deletedIdentifiers, + manifestReadParallelism); result.addAll(rewritten); batch.clear(); batchSize = 0; @@ -759,38 +765,88 @@ private static List sortAndRewriteFullCompaction( /** * Read all entries from a section's manifest files, sort them in memory by the specified * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving - * entries to the rolling writer. + * entries to the rolling writer. Manifest files without delete entries and without cancelled + * ADD entries are kept as-is. + * + *

Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as + * {@link #tryFullCompaction}. */ private static List sortAndRewriteSection( List section, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, - @Nullable Set deletedIdentifiers) + @Nullable Set deletedIdentifiers, + @Nullable Integer manifestReadParallelism) throws Exception { - List allEntries = new ArrayList<>(); - for (ManifestFileMeta meta : section) { - allEntries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); - } - - allEntries.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - Set safeDeletedIds = deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); - RollingFileWriter writer = - manifestFile.createRollingWriter(); - try { - for (ManifestEntry entry : allEntries) { - if (entry.kind() == FileKind.ADD && !safeDeletedIds.contains(entry.identifier())) { + // Parallel read: each meta is read independently + Function> reader = + meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); + + List result = new ArrayList<>(); + List entriesToRewrite = new ArrayList<>(); + + for (FullCompactionReadResult readResult : + sequentialBatchedExecute(reader, section, manifestReadParallelism)) { + if (readResult.requireChange) { + entriesToRewrite.addAll(readResult.entries); + } else { + result.add(readResult.file); + } + } + + if (!entriesToRewrite.isEmpty()) { + entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + + RollingFileWriter writer = + manifestFile.createRollingWriter(); + try { + for (ManifestEntry entry : entriesToRewrite) { writer.write(entry); } + } finally { + writer.close(); } - } finally { - writer.close(); + result.addAll(writer.result()); + } + + return result; + } + + /** + * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD + * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is + * cancelled; if not, the file is kept as-is ({@code requireChange = false}). + */ + private static FullCompactionReadResult readForSortRewrite( + ManifestFileMeta meta, + ManifestFile manifestFile, + Set deletedIdentifiers) { + if (meta.numDeletedFiles() > 0) { + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (entry.kind() == FileKind.ADD + && !deletedIdentifiers.contains(entry.identifier())) { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, true, entries); + } else { + boolean requireChange = false; + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (deletedIdentifiers.contains(entry.identifier())) { + requireChange = true; + } else { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, requireChange, entries); } - return writer.result(); } /** Parse a long option from table options with a default value. */ From b34f5678817779c30f80dd56e7d6ba68ceb04e53 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 00:01:24 +0800 Subject: [PATCH 06/51] fix --- docs/generated/core_configuration.html | 24 +++ .../paimon/operation/ManifestFileMerger.java | 190 ++++++++---------- .../operation/ManifestPickStrategy.java | 36 ++-- .../paimon/operation/ManifestSortedRun.java | 34 ---- 4 files changed, 120 insertions(+), 164 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index ebd8a5aca7d1..558367174df9 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -921,6 +921,30 @@ Integer To avoid frequent manifest merges, this parameter specifies the minimum number of ManifestFileMeta to merge. + +

manifest-sort.enable
+ false + Boolean + Whether to invoke manifest sort rewrite right after manifest merge during commit. The sort rewrite implementation is provided by an external module (e.g. morax) and discovered via ServiceLoader. When no implementation is registered on the classpath, this flag has no effect (manifest sort is silently skipped). + + +
manifest-sort.max-rewrite-size
+ 256 mb + MemorySize + Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. + + +
manifest-sort.open-file-cost
+ 4 mb + MemorySize + Open file cost of a manifest file during sort rewrite. It is added to each manifest file's size when computing section size, to avoid rewriting too many small manifest files in a single section. + + +
manifest-sort.partition-field
+ (none) + String + Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enable' is true. +
manifest.target-file-size
8 mb diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 74d887285d5a..2363595d7168 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -26,7 +26,6 @@ import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; -import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; @@ -341,16 +340,12 @@ private static Optional> trySortRewrite( long suggestedMetaSize = options.manifestTargetSize().getBytes(); long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); - Options tableOptions = options.toConfiguration(); - + String sortPartitionField = options.manifestSortPartitionField(); // Step 1: Resolve sort field. - String sortField = resolveSortField(tableOptions.toMap(), partitionType); + String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { - LOG.warn( - "Cannot resolve sort field for manifest sort rewrite. " - + "Skipping sort. Configure 'manifest-sort.partition-field'" - + " for multi-partition tables."); - return Optional.of(input); + throw new IllegalArgumentException( + "Cannot resolve sort field for manifest sort rewrite. "); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); @@ -488,87 +483,6 @@ private static Optional> trySortRewrite( // ==================== Sort Rewrite Helpers ==================== - /** - * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. - */ - static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { - switch (type.getTypeRoot()) { - case INTEGER: - case DATE: - return Integer.compare(a.getInt(k), b.getInt(k)); - case BIGINT: - return Long.compare(a.getLong(k), b.getLong(k)); - case SMALLINT: - return Short.compare(a.getShort(k), b.getShort(k)); - case TINYINT: - return Byte.compare(a.getByte(k), b.getByte(k)); - case FLOAT: - return Float.compare(a.getFloat(k), b.getFloat(k)); - case DOUBLE: - return Double.compare(a.getDouble(k), b.getDouble(k)); - case BOOLEAN: - return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); - case VARCHAR: - case CHAR: - return a.getString(k).compareTo(b.getString(k)); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return a.getTimestamp(k, type.defaultSize()) - .compareTo(b.getTimestamp(k, type.defaultSize())); - case DECIMAL: - DecimalType dt = (DecimalType) type; - return a.getDecimal(k, dt.getPrecision(), dt.getScale()) - .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); - default: - String errorMsg = - String.format( - "Unsupported partition field type '%s' for manifest sort rewrite. " - + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " - + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " - + "DECIMAL.", - type.getTypeRoot()); - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - } - - /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. - * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field - * value AND the same data file are emitted contiguously. - */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); - if (c != 0) { - return c; - } - return a.file().fileName().compareTo(b.file().fileName()); - } - - /** - * Resolve the partition field to sort manifests by. - * - *

Resolution rules: - * - *

    - *
  1. If {@code manifest-sort.partition-field} is configured, return that value. - *
  2. Otherwise, if the table has exactly one partition field, return that field name. - *
  3. Otherwise return {@code null}. - *
- */ - @Nullable - static String resolveSortField(Map tableOptions, RowType partitionType) { - String configured = tableOptions.get("manifest-sort.partition-field"); - if (configured != null && !configured.isEmpty()) { - return configured; - } - if (partitionType.getFieldCount() == 1) { - return partitionType.getFieldNames().get(0); - } - return null; - } - /** * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 @@ -816,7 +730,85 @@ private static List sortAndRewriteSection( return result; } + /** + * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. + */ + static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { + switch (type.getTypeRoot()) { + case INTEGER: + case DATE: + return Integer.compare(a.getInt(k), b.getInt(k)); + case BIGINT: + return Long.compare(a.getLong(k), b.getLong(k)); + case SMALLINT: + return Short.compare(a.getShort(k), b.getShort(k)); + case TINYINT: + return Byte.compare(a.getByte(k), b.getByte(k)); + case FLOAT: + return Float.compare(a.getFloat(k), b.getFloat(k)); + case DOUBLE: + return Double.compare(a.getDouble(k), b.getDouble(k)); + case BOOLEAN: + return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); + case VARCHAR: + case CHAR: + return a.getString(k).compareTo(b.getString(k)); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return a.getTimestamp(k, type.defaultSize()) + .compareTo(b.getTimestamp(k, type.defaultSize())); + case DECIMAL: + DecimalType dt = (DecimalType) type; + return a.getDecimal(k, dt.getPrecision(), dt.getScale()) + .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); + default: + String errorMsg = + String.format( + "Unsupported partition field type '%s' for manifest sort rewrite. " + + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " + + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " + + "DECIMAL.", + type.getTypeRoot()); + LOG.error(errorMsg); + throw new UnsupportedOperationException(errorMsg); + } + } + + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + /** + * Resolve the partition field to sort manifests by. + * + *

Resolution rules: + * + *

    + *
  1. If {@code manifest-sort.partition-field} is configured, return that value. + *
  2. Otherwise, if the table has exactly one partition field, return that field name. + *
  3. Otherwise return {@code null}. + *
+ */ + @Nullable + static String resolveSortField(String sortPartitionField, RowType partitionType) { + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + return sortPartitionField; + } + if (partitionType.getFieldCount() == 1) { + return partitionType.getFieldNames().get(0); + } + return null; + } /** * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is @@ -849,24 +841,6 @@ private static FullCompactionReadResult readForSortRewrite( } } - /** Parse a long option from table options with a default value. */ - private static long parseLongOption(Options options, String key, long defaultValue) { - String value = options.get(key); - if (value == null || value.isEmpty()) { - return defaultValue; - } - try { - return Long.parseLong(value.trim()); - } catch (NumberFormatException e) { - LOG.warn( - "Invalid long value '{}' for option '{}', using default {}.", - value, - key, - defaultValue); - return defaultValue; - } - } - private static class FullCompactionReadResult { private final ManifestFileMeta file; diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index cc88417b2765..6421328550c9 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -19,10 +19,7 @@ package org.apache.paimon.operation; import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashSet; import java.util.List; -import java.util.Set; /** * Pick strategy for manifest LSM Tree compaction. @@ -75,21 +72,19 @@ public List pick(List levelRuns) { * * sizeAmpThreshold, pick all runs for full compaction. */ private List pickForSizeAmp(List levelRuns) { - int maxLevel = -1; - ManifestSortedRun highestRun = null; - long lowerLevelTotalSize = 0; - - for (ManifestSortedRun run : levelRuns) { - if (run.level() > maxLevel) { - maxLevel = run.level(); - highestRun = run; - } + if (levelRuns.isEmpty()) { + return null; } - if (highestRun == null || maxLevel <= 0) { + // The last run has the highest level (set by buildLevelSortedRuns) + ManifestSortedRun highestRun = levelRuns.get(levelRuns.size() - 1); + int maxLevel = highestRun.level(); + + if (maxLevel <= 0) { return null; } + long lowerLevelTotalSize = 0; for (ManifestSortedRun run : levelRuns) { if (run.level() < maxLevel) { lowerLevelTotalSize += run.totalSize(); @@ -112,27 +107,24 @@ private List pickForSizeAmp(List levelRuns * */ private List pickForSizeRatioAndForce(List levelRuns) { - // Sort by level ascending for low-to-high traversal - List sorted = new ArrayList<>(levelRuns); - sorted.sort(Comparator.comparingInt(ManifestSortedRun::level)); - - Set pickedSet = new HashSet<>(); + // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) + List picked = new ArrayList<>(); long pickedSize = 0; // From low to high: forced pick level0/level1, then SizeRatio for the rest. - for (ManifestSortedRun run : sorted) { + for (ManifestSortedRun run : levelRuns) { if (run.level() <= 1) { - pickedSet.add(run); + picked.add(run); pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); if (pickedSize > 0 && pickedSize * sizeRatioThreshold >= nextRunSize) { - pickedSet.add(run); + picked.add(run); pickedSize += nextRunSize; } } } - return new ArrayList<>(pickedSet); + return picked; } } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java index 49baabfe7161..c270677e1f8d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java @@ -18,12 +18,9 @@ package org.apache.paimon.operation; -import org.apache.paimon.data.BinaryRow; import org.apache.paimon.manifest.ManifestFileMeta; -import org.apache.paimon.utils.Preconditions; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.stream.Collectors; @@ -50,14 +47,6 @@ private ManifestSortedRun(List files) { this.totalSize = size; } - public static ManifestSortedRun empty() { - return new ManifestSortedRun(Collections.emptyList()); - } - - public static ManifestSortedRun fromSingle(ManifestFileMeta file) { - return new ManifestSortedRun(Collections.singletonList(file)); - } - /** * Build a {@code ManifestSortedRun} from an already-sorted list. The caller MUST guarantee that * {@code sortedFiles} is sorted ascending on the configured sort field's min value, and that @@ -71,14 +60,6 @@ public List files() { return files; } - public boolean isEmpty() { - return files.isEmpty(); - } - - public boolean nonEmpty() { - return !isEmpty(); - } - public long totalSize() { return totalSize; } @@ -91,21 +72,6 @@ public void setLevel(int level) { this.level = level; } - /** - * Validate that this run is monotonically non-overlapping on the sort field at {@code - * sortFieldIndex}. Used in tests and as an assertion in development. - */ - public void validate(int sortFieldIndex, Comparator partitionComparator) { - for (int i = 1; i < files.size(); i++) { - BinaryRow prevMax = files.get(i - 1).partitionStats().maxValues(); - BinaryRow currMin = files.get(i).partitionStats().minValues(); - Preconditions.checkState( - partitionComparator.compare(prevMax, currMin) <= 0, - "ManifestSortedRun is not sorted on field %s; prev.max > curr.min", - sortFieldIndex); - } - } - @Override public boolean equals(Object o) { if (!(o instanceof ManifestSortedRun)) { From b9af75b4d137ded6484b9100a92568bc3d7f2226 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 14:21:38 +0800 Subject: [PATCH 07/51] fix --- .../paimon/operation/ManifestFileMerger.java | 564 +------------ .../paimon/operation/ManifestFileSorter.java | 745 ++++++++++++++++++ .../paimon/manifest/ManifestFileMetaTest.java | 10 - 3 files changed, 765 insertions(+), 554 deletions(-) create mode 100644 paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 2363595d7168..83ef75a0335a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -22,13 +22,10 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; -import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; import org.apache.paimon.partition.PartitionPredicate; -import org.apache.paimon.types.DataType; -import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -38,7 +35,6 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -79,26 +75,25 @@ public static List merge( List newFilesForAbort = new ArrayList<>(); try { - Optional> merged; - // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite if (options.manifestSortEnable() && partitionType.getFieldCount() > 0) { - merged = - trySortRewrite( + Optional> sorted = + ManifestFileSorter.trySortRewrite( input, newFilesForAbort, manifestFile, partitionType, options); - } else { - // Otherwise try full compaction first, then minor compaction if needed - merged = - tryFullCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - manifestFullCompactionSize, - partitionType, - manifestReadParallelism); + return sorted.orElse(input); } + // Otherwise try full compaction first, then minor compaction if needed + Optional> merged = + tryFullCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + manifestFullCompactionSize, + partitionType, + manifestReadParallelism); + return merged.orElseGet( () -> tryMinorCompaction( @@ -314,7 +309,7 @@ private static FullCompactionReadResult readForFullCompaction( return new FullCompactionReadResult(file, requireChange, entries); } - private static Set computeDeletePartitions(Set deleteEntries) { + static Set computeDeletePartitions(Set deleteEntries) { Set partitions = new HashSet<>(); for (FileEntry.Identifier identifier : deleteEntries) { partitions.add(identifier.partition); @@ -322,532 +317,13 @@ private static Set computeDeletePartitions(Set return partitions; } - // ==================== Manifest Sort Rewrite ==================== - - /** - * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort - * field cannot be resolved or the delta file size is below the full compaction threshold, the - * input is returned as-is. - */ - private static Optional> trySortRewrite( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) - throws Exception { - // Extract configuration from options - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); - Integer manifestReadParallelism = options.scanManifestParallelism(); - String sortPartitionField = options.manifestSortPartitionField(); - // Step 1: Resolve sort field. - String sortField = resolveSortField(sortPartitionField, partitionType); - if (sortField == null) { - throw new IllegalArgumentException( - "Cannot resolve sort field for manifest sort rewrite. "); - } - int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); - - // Step 2: Check full compact trigger. - Filter mustChange = - file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - - long totalDeltaFileSize = 0; - for (ManifestFileMeta file : input) { - if (mustChange.test(file)) { - totalDeltaFileSize += file.fileSize(); - } - } - - List fullCompactionManifests = new ArrayList<>(); - List lsmFiles = new LinkedList<>(input); - Set deleteEntries = null; - if (totalDeltaFileSize >= manifestFullCompactionSize) { - // Step 3: Read delete entries and build partition predicate. - deleteEntries = - FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); - - PartitionPredicate predicate; - if (deleteEntries.isEmpty()) { - predicate = PartitionPredicate.ALWAYS_FALSE; - } else { - if (partitionType.getFieldCount() > 0) { - Set deletePartitions = computeDeletePartitions(deleteEntries); - predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); - } else { - predicate = PartitionPredicate.ALWAYS_TRUE; - } - } - - // Step 4: Classify input into level0 runs and LSM files. - Iterator iterator = lsmFiles.iterator(); - while (iterator.hasNext()) { - ManifestFileMeta file = iterator.next(); - if (mustChange.test(file)) { - iterator.remove(); - fullCompactionManifests.add(file); - } else if (predicate != null - && predicate.test( - file.numAddedFiles() + file.numDeletedFiles(), - file.partitionStats().minValues(), - file.partitionStats().maxValues(), - file.partitionStats().nullCounts())) { - iterator.remove(); - fullCompactionManifests.add(file); - } - } - } - - // Process full compaction manifests separately: sort, deduplicate, and rewrite - List fullCompactionRewritten = new ArrayList<>(); - if (!fullCompactionManifests.isEmpty()) { - fullCompactionRewritten = - sortAndRewriteFullCompaction( - fullCompactionManifests, - manifestFile, - sortFieldIndex, - sortFieldType, - suggestedMetaSize, - deleteEntries, - manifestReadParallelism); - newFilesForAbort.addAll(fullCompactionRewritten); - } - - // Step 5: Build LSM Tree and assign levels (only for lsmFiles). - List levelRuns = - lsmFiles.isEmpty() - ? new ArrayList<>() - : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); - - // Step 6: Pick runs to compact. - int sizeAmpThreshold = options.maxSizeAmplificationPercent(); - int sizeRatioThreshold = options.sortedRunSizeRatio(); - ManifestPickStrategy pickStrategy = - new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); - List pickedRuns = pickStrategy.pick(levelRuns); - - Set pickedSet = new HashSet<>(pickedRuns); - List reusedFiles = new ArrayList<>(); - for (ManifestSortedRun run : levelRuns) { - if (!pickedSet.contains(run)) { - reusedFiles.addAll(run.files()); - } - } - List result = new ArrayList<>(reusedFiles); - if (pickedRuns.isEmpty()) { - result.addAll(fullCompactionRewritten); - return Optional.of(new ArrayList<>(result)); - } - - // Step 7: Split picked files into sections, sort and rewrite each. - List pickedFiles = new ArrayList<>(); - for (ManifestSortedRun run : pickedRuns) { - pickedFiles.addAll(run.files()); - } - - List> sections = - splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType); - long maxRewriteSize = options.manifestSortMaxRewriteSize(); - long processedSize = 0; - - long openFileCost = options.manifestSortOpenFileCost(); - List sortNewFiles = new ArrayList<>(); - for (List section : sections) { - long sectionSize = 0; - for (ManifestFileMeta m : section) { - sectionSize += m.fileSize() + openFileCost; - } - if (processedSize + sectionSize > maxRewriteSize) { - result.addAll(section); - continue; - } - processedSize += sectionSize; - - List merged = - sortAndRewriteSection( - section, - manifestFile, - sortFieldIndex, - sortFieldType, - null, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } - newFilesForAbort.addAll(sortNewFiles); - result.addAll(fullCompactionRewritten); - return Optional.of(result); - } - - // ==================== Sort Rewrite Helpers ==================== - - /** - * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, - * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 - * largest to level 1~4, rest to level 0). - */ - static List buildLevelSortedRuns( - List input, int sortFieldIndex, DataType sortFieldType) { - input.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - List> runFilesList = new ArrayList<>(); - List currentRun = new ArrayList<>(); - currentRun.add(input.get(0)); - for (int i = 1; i < input.size(); i++) { - ManifestFileMeta file = input.get(i); - ManifestFileMeta last = currentRun.get(currentRun.size() - 1); - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - > 0) { - currentRun.add(file); - } else { - runFilesList.add(currentRun); - currentRun = new ArrayList<>(); - currentRun.add(file); - } - } - runFilesList.add(currentRun); - - List runs = new ArrayList<>(runFilesList.size()); - for (List rf : runFilesList) { - runs.add(ManifestSortedRun.fromSorted(rf)); - } - - runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); - int n = runs.size(); - for (int i = 0; i < n; i++) { - if (i >= n - 4) { - runs.get(i).setLevel(n - i); - } else { - runs.get(i).setLevel(0); - } - } - return runs; - } - - /** - * Split picked files into sections. Files with overlapping sort-key intervals go into the same - * section. - */ - static List> splitIntoSections( - List pickedFiles, int sortFieldIndex, DataType sortFieldType) { - pickedFiles.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - List> sections = new ArrayList<>(); - List currentSection = new ArrayList<>(); - currentSection.add(pickedFiles.get(0)); - BinaryRow sectionMaxBound = pickedFiles.get(0).partitionStats().maxValues(); - for (int i = 1; i < pickedFiles.size(); i++) { - ManifestFileMeta file = pickedFiles.get(i); - if (compareField( - file.partitionStats().minValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - > 0) { - sections.add(currentSection); - currentSection = new ArrayList<>(); - currentSection.add(file); - sectionMaxBound = file.partitionStats().maxValues(); - } else { - currentSection.add(file); - if (compareField( - file.partitionStats().maxValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - > 0) { - sectionMaxBound = file.partitionStats().maxValues(); - } - } - } - sections.add(currentSection); - return sections; - } - - /** - * Sort and rewrite full compaction manifests. Files are sorted by min partition value, then - * processed in batches. A batch stops when total size reaches threshold or when current max - * doesn't overlap with next min. Each batch is sorted, deduplicated (DELETE entries removed), - * and written to new manifest files. - */ - private static List sortAndRewriteFullCompaction( - List fullCompactionManifests, - ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, - long suggestedMetaSize, - @Nullable Set deletedIdentifiers, - @Nullable Integer manifestReadParallelism) - throws Exception { - - // Sort by min partition value - fullCompactionManifests.sort( - (a, b) -> - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType)); - - List result = new ArrayList<>(); - List batch = new ArrayList<>(); - long batchSize = 0; - - for (int i = 0; i < fullCompactionManifests.size(); i++) { - ManifestFileMeta current = fullCompactionManifests.get(i); - boolean shouldFlush = false; - - // Check if batch size reaches threshold - if (batchSize + current.fileSize() >= suggestedMetaSize && !batch.isEmpty()) { - shouldFlush = true; - } - - // Check if current max overlaps with next min - if (i < fullCompactionManifests.size() - 1 && !batch.isEmpty()) { - ManifestFileMeta next = fullCompactionManifests.get(i + 1); - int cmp = - compareField( - current.partitionStats().maxValues(), - next.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp < 0) { - shouldFlush = true; - } - } - - batch.add(current); - batchSize += current.fileSize(); - - if (shouldFlush || i == fullCompactionManifests.size() - 1) { - // Process batch: sort entries, remove DELETE, write out - List rewritten = - sortAndRewriteSection( - batch, - manifestFile, - sortFieldIndex, - sortFieldType, - deletedIdentifiers, - manifestReadParallelism); - result.addAll(rewritten); - batch.clear(); - batchSize = 0; - } - } - - return result; - } - - /** - * Read all entries from a section's manifest files, sort them in memory by the specified - * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving - * entries to the rolling writer. Manifest files without delete entries and without cancelled - * ADD entries are kept as-is. - * - *

Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as - * {@link #tryFullCompaction}. - */ - private static List sortAndRewriteSection( - List section, - ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, - @Nullable Set deletedIdentifiers, - @Nullable Integer manifestReadParallelism) - throws Exception { - - Set safeDeletedIds = - deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); - - // Parallel read: each meta is read independently - Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); - - List result = new ArrayList<>(); - List entriesToRewrite = new ArrayList<>(); - - for (FullCompactionReadResult readResult : - sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - if (readResult.requireChange) { - entriesToRewrite.addAll(readResult.entries); - } else { - result.add(readResult.file); - } - } - - if (!entriesToRewrite.isEmpty()) { - entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - - RollingFileWriter writer = - manifestFile.createRollingWriter(); - try { - for (ManifestEntry entry : entriesToRewrite) { - writer.write(entry); - } - } finally { - writer.close(); - } - result.addAll(writer.result()); - } - - return result; - } - /** - * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. - */ - static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { - switch (type.getTypeRoot()) { - case INTEGER: - case DATE: - return Integer.compare(a.getInt(k), b.getInt(k)); - case BIGINT: - return Long.compare(a.getLong(k), b.getLong(k)); - case SMALLINT: - return Short.compare(a.getShort(k), b.getShort(k)); - case TINYINT: - return Byte.compare(a.getByte(k), b.getByte(k)); - case FLOAT: - return Float.compare(a.getFloat(k), b.getFloat(k)); - case DOUBLE: - return Double.compare(a.getDouble(k), b.getDouble(k)); - case BOOLEAN: - return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); - case VARCHAR: - case CHAR: - return a.getString(k).compareTo(b.getString(k)); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return a.getTimestamp(k, type.defaultSize()) - .compareTo(b.getTimestamp(k, type.defaultSize())); - case DECIMAL: - DecimalType dt = (DecimalType) type; - return a.getDecimal(k, dt.getPrecision(), dt.getScale()) - .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); - default: - String errorMsg = - String.format( - "Unsupported partition field type '%s' for manifest sort rewrite. " - + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " - + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " - + "DECIMAL.", - type.getTypeRoot()); - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - } - - /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. - * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field - * value AND the same data file are emitted contiguously. - */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); - if (c != 0) { - return c; - } - return a.file().fileName().compareTo(b.file().fileName()); - } - - /** - * Resolve the partition field to sort manifests by. - * - *

Resolution rules: - * - *

    - *
  1. If {@code manifest-sort.partition-field} is configured, return that value. - *
  2. Otherwise, if the table has exactly one partition field, return that field name. - *
  3. Otherwise return {@code null}. - *
- */ - @Nullable - static String resolveSortField(String sortPartitionField, RowType partitionType) { - if (sortPartitionField != null && !sortPartitionField.isEmpty()) { - return sortPartitionField; - } - if (partitionType.getFieldCount() == 1) { - return partitionType.getFieldNames().get(0); - } - return null; - } - /** - * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD - * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is - * cancelled; if not, the file is kept as-is ({@code requireChange = false}). - */ - private static FullCompactionReadResult readForSortRewrite( - ManifestFileMeta meta, - ManifestFile manifestFile, - Set deletedIdentifiers) { - if (meta.numDeletedFiles() > 0) { - List entries = new ArrayList<>(); - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (entry.kind() == FileKind.ADD - && !deletedIdentifiers.contains(entry.identifier())) { - entries.add(entry); - } - } - return new FullCompactionReadResult(meta, true, entries); - } else { - boolean requireChange = false; - List entries = new ArrayList<>(); - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (deletedIdentifiers.contains(entry.identifier())) { - requireChange = true; - } else { - entries.add(entry); - } - } - return new FullCompactionReadResult(meta, requireChange, entries); - } - } - - private static class FullCompactionReadResult { + static class FullCompactionReadResult { - private final ManifestFileMeta file; - private final boolean requireChange; - private final List entries; + final ManifestFileMeta file; + final boolean requireChange; + final List entries; - private FullCompactionReadResult( + FullCompactionReadResult( ManifestFileMeta file, boolean requireChange, List entries) { this.file = file; this.requireChange = requireChange; diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java new file mode 100644 index 000000000000..9f40d430be91 --- /dev/null +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -0,0 +1,745 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.operation; + +import org.apache.paimon.CoreOptions; +import org.apache.paimon.data.BinaryRow; +import org.apache.paimon.io.RollingFileWriter; +import org.apache.paimon.manifest.FileEntry; +import org.apache.paimon.manifest.FileKind; +import org.apache.paimon.manifest.ManifestEntry; +import org.apache.paimon.manifest.ManifestFile; +import org.apache.paimon.manifest.ManifestFileMeta; +import org.apache.paimon.operation.ManifestFileMerger.FullCompactionReadResult; +import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DecimalType; +import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.Filter; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; + +import static java.util.Collections.singletonList; +import static org.apache.paimon.utils.ManifestReadThreadPool.sequentialBatchedExecute; + +/** Manifest file sorter that sorts and rewrites manifest files by a configured partition field. */ +public class ManifestFileSorter { + + private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); + + /** + * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort + * field cannot be resolved or the delta file size is below the full compaction threshold, the + * input is returned as-is. + */ + static Optional> trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + // Extract configuration from options + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + String sortPartitionField = options.manifestSortPartitionField(); + int mergeMinCount = options.manifestMergeMinCount(); + // Step 1: Resolve sort field. + String sortField = resolveSortField(sortPartitionField, partitionType); + if (sortField == null) { + throw new IllegalArgumentException( + "Cannot resolve sort field for manifest sort rewrite. "); + } + int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); + DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); + + // Step 2: Classify manifests into defaultCompaction and LSM groups. + ClassifyResult classified = + classifyManifests( + input, + suggestedMetaSize, + manifestFullCompactionSize, + mergeMinCount, + manifestFile, + partitionType, + manifestReadParallelism); + List defaultCompactionManifests = classified.defaultCompactionManifests; + List lsmFiles = classified.lsmFiles; + Set deleteEntries = classified.deleteEntries; + + // Step 3: Build LSM Tree and assign levels (only for lsmFiles). + List levelRuns = + lsmFiles.isEmpty() + ? new ArrayList<>() + : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); + + // Step 4: Pick runs to compact. + int sizeAmpThreshold = options.maxSizeAmplificationPercent(); + int sizeRatioThreshold = options.sortedRunSizeRatio(); + ManifestPickStrategy pickStrategy = + new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); + List pickedRuns = pickStrategy.pick(levelRuns); + + if (pickedRuns.isEmpty() && defaultCompactionManifests.isEmpty()) { + return Optional.of(input); + } + + Set pickedSet = new HashSet<>(pickedRuns); + List reusedFiles = new ArrayList<>(); + for (ManifestSortedRun run : levelRuns) { + if (!pickedSet.contains(run)) { + reusedFiles.addAll(run.files()); + } + } + List result = new ArrayList<>(reusedFiles); + + // Step 5: Split picked files into sections, sort and rewrite each. + List pickedFiles = new ArrayList<>(); + for (ManifestSortedRun run : pickedRuns) { + pickedFiles.addAll(run.files()); + } + pickedFiles.addAll(defaultCompactionManifests); + + Set defaultCompactionSet = new HashSet<>(defaultCompactionManifests); + + List
sections = + splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionSet); + sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + long maxRewriteSize = options.manifestSortMaxRewriteSize(); + long openFileCost = options.manifestSortOpenFileCost(); + List sortNewFiles = new ArrayList<>(); + + List rewritten = + rewriteSections( + sections, + defaultCompactionSet, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + maxRewriteSize, + openFileCost, + sortNewFiles, + manifestReadParallelism); + result.addAll(rewritten); + + newFilesForAbort.addAll(sortNewFiles); + return Optional.of(result); + } + + /** + * Classify manifest files into default-compaction group and LSM group. + * + *

When full compaction is triggered (totalDeltaFileSize >= threshold), files that must + * change or overlap with delete partitions go into defaultCompactionManifests; the rest stay as + * lsmFiles. + * + *

When full compaction is NOT triggered, adjacent small manifests whose cumulative size + * reaches suggestedMetaSize are grouped into defaultCompactionManifests (minor-style pick). + */ + private static ClassifyResult classifyManifests( + List input, + long suggestedMetaSize, + long manifestFullCompactionSize, + int mergeMinCount, + ManifestFile manifestFile, + RowType partitionType, + @Nullable Integer manifestReadParallelism) { + Filter mustChange = + file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; + + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (mustChange.test(file)) { + totalDeltaFileSize += file.fileSize(); + } + } + + List defaultCompactionManifests = new ArrayList<>(); + List lsmFiles = new LinkedList<>(input); + Set deleteEntries = null; + + if (totalDeltaFileSize >= manifestFullCompactionSize) { + // Full compact triggered: read delete entries and classify by predicate. + deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + + PartitionPredicate predicate; + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; + } else { + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = + ManifestFileMerger.computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + } else { + predicate = PartitionPredicate.ALWAYS_TRUE; + } + } + + Iterator iterator = lsmFiles.iterator(); + while (iterator.hasNext()) { + ManifestFileMeta file = iterator.next(); + if (mustChange.test(file)) { + iterator.remove(); + defaultCompactionManifests.add(file); + } else if (predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts())) { + iterator.remove(); + defaultCompactionManifests.add(file); + } + } + } else { + // Minor-style pick: merge adjacent small manifests when no full compact triggered. + List candidates = new ArrayList<>(); + long candidateSize = 0; + for (ManifestFileMeta file : input) { + candidateSize += file.fileSize(); + candidates.add(file); + if (candidateSize >= suggestedMetaSize) { + if (candidates.size() > 1) { + defaultCompactionManifests.addAll(candidates); + lsmFiles.removeAll(candidates); + } + candidates.clear(); + candidateSize = 0; + } + } + if (candidates.size() >= mergeMinCount) { + defaultCompactionManifests.addAll(candidates); + lsmFiles.removeAll(candidates); + } + } + + return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); + } + + /** + * Iterate over sections, decide whether to rewrite each section fully or partially based on the + * maxRewriteSize threshold and whether the section contains defaultCompaction files. + * + *

Within threshold: read all metas, sort and rewrite the entire section. Exceeds threshold + * but contains defaultCompaction files: only rewrite sub-segments around those files. Exceeds + * threshold with no defaultCompaction files: skip (keep as-is). + * + * @return the list of result manifest files (both rewritten and kept-as-is) + */ + private static List rewriteSections( + List

sections, + Set defaultCompactionSet, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deleteEntries, + long suggestedMetaSize, + long maxRewriteSize, + long openFileCost, + List sortNewFiles, + @Nullable Integer manifestReadParallelism) + throws Exception { + List result = new ArrayList<>(); + long processedSize = 0; + + for (Section section : sections) { + // Single-file section without defaultCompaction: already sorted, skip rewrite. + if (section.files.size() == 1 && !section.hasDefaultCompactMeta) { + result.addAll(section.files); + continue; + } + + long sectionSize = section.totalSize + (long) section.files.size() * openFileCost; + + boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; + if (exceedsThreshold && !section.hasDefaultCompactMeta) { + result.addAll(section.files); + continue; + } + + if (!exceedsThreshold) { + processedSize += sectionSize; + List merged = + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } else { + rewriteSubSegments( + section.files, + defaultCompactionSet, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + sortNewFiles, + result, + manifestReadParallelism); + } + } + return result; + } + + /** + * Rewrite sub-segments within a section that exceeds the rewrite threshold. Only sub-segments + * containing defaultCompaction files are rewritten; other files are kept as-is. + */ + private static void rewriteSubSegments( + List section, + Set defaultCompactionSet, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deleteEntries, + long suggestedMetaSize, + List sortNewFiles, + List result, + @Nullable Integer manifestReadParallelism) + throws Exception { + List subSegment = new ArrayList<>(); + long subSegmentSize = 0; + for (ManifestFileMeta m : section) { + if (defaultCompactionSet.contains(m)) { + subSegment.add(m); + subSegmentSize += m.fileSize(); + } else if (!subSegment.isEmpty()) { + subSegment.add(m); + subSegmentSize += m.fileSize(); + if (subSegmentSize >= suggestedMetaSize) { + List merged = + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + subSegment = new ArrayList<>(); + subSegmentSize = 0; + } + } else { + result.add(m); + } + } + // Flush remaining sub-segment + if (!subSegment.isEmpty()) { + List merged = + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } + } + + /** + * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, + * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 + * largest to level 1~4, rest to level 0). + */ + static List buildLevelSortedRuns( + List input, int sortFieldIndex, DataType sortFieldType) { + input.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List> runFilesList = new ArrayList<>(); + List currentRun = new ArrayList<>(); + currentRun.add(input.get(0)); + for (int i = 1; i < input.size(); i++) { + ManifestFileMeta file = input.get(i); + ManifestFileMeta last = currentRun.get(currentRun.size() - 1); + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + >= 0) { + currentRun.add(file); + } else { + runFilesList.add(currentRun); + currentRun = new ArrayList<>(); + currentRun.add(file); + } + } + runFilesList.add(currentRun); + + List runs = new ArrayList<>(runFilesList.size()); + for (List rf : runFilesList) { + runs.add(ManifestSortedRun.fromSorted(rf)); + } + + runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = runs.size(); + for (int i = 0; i < n; i++) { + if (i >= n - 4) { + runs.get(i).setLevel(n - i); + } else { + runs.get(i).setLevel(0); + } + } + return runs; + } + + /** + * Split picked files into sections. Files with overlapping sort-key intervals go into the same + * section. Each section is built with pre-computed totalSize and hasDefaultCompactMeta. + */ + static List
splitIntoSections( + List pickedFiles, + int sortFieldIndex, + DataType sortFieldType, + Set defaultCompactionSet) { + pickedFiles.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List
sections = new ArrayList<>(); + List currentFiles = new ArrayList<>(); + long currentTotalSize = 0; + boolean currentHasDefault = false; + ManifestFileMeta first = pickedFiles.get(0); + currentFiles.add(first); + currentTotalSize += first.fileSize(); + currentHasDefault = defaultCompactionSet.contains(first); + BinaryRow sectionMaxBound = first.partitionStats().maxValues(); + + for (int i = 1; i < pickedFiles.size(); i++) { + ManifestFileMeta file = pickedFiles.get(i); + if (compareField( + file.partitionStats().minValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + >= 0) { + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + currentFiles = new ArrayList<>(); + currentTotalSize = 0; + currentFiles.add(file); + currentTotalSize += file.fileSize(); + currentHasDefault = defaultCompactionSet.contains(file); + sectionMaxBound = file.partitionStats().maxValues(); + } else { + currentFiles.add(file); + currentTotalSize += file.fileSize(); + if (!currentHasDefault && defaultCompactionSet.contains(file)) { + currentHasDefault = true; + } + if (compareField( + file.partitionStats().maxValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sectionMaxBound = file.partitionStats().maxValues(); + } + } + } + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + return sections; + } + + /** + * Merge small adjacent sections to avoid producing too many small rewrite batches. If either + * the pending section or the current section total size is smaller than half of {@code + * suggestedMetaSize}, they are combined into a single section. + */ + private static List
mergeSmallAdjacentSections( + List
sections, long suggestedMetaSize) { + long smallThreshold = suggestedMetaSize / 2; + List
merged = new ArrayList<>(); + Section pending = null; + + for (Section section : sections) { + if (pending == null) { + pending = section; + } else { + if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { + pending = Section.merge(pending, section); + } else { + merged.add(pending); + pending = section; + } + } + } + if (pending != null) { + merged.add(pending); + } + return merged; + } + + /** + * Read all entries from a section's manifest files, sort them in memory by the specified + * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving + * entries to new manifest files via the rolling writer. + * + *

All files participate in sorting, enabling full sort across the entire section. + * + *

Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as + * {@link ManifestFileMerger#tryFullCompaction}. + */ + private static List sortAndRewriteSection( + List section, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deletedIdentifiers, + @Nullable Integer manifestReadParallelism) + throws Exception { + + Set safeDeletedIds = + deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); + + // Parallel read: each meta is read independently + Function> reader = + meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); + + List entriesToRewrite = new ArrayList<>(); + for (FullCompactionReadResult readResult : + sequentialBatchedExecute(reader, section, manifestReadParallelism)) { + entriesToRewrite.addAll(readResult.entries); + } + + List result = new ArrayList<>(); + if (!entriesToRewrite.isEmpty()) { + entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + + RollingFileWriter writer = + manifestFile.createRollingWriter(); + try { + for (ManifestEntry entry : entriesToRewrite) { + writer.write(entry); + } + } finally { + writer.close(); + } + result.addAll(writer.result()); + } + + return result; + } + + /** + * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. + */ + static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { + switch (type.getTypeRoot()) { + case INTEGER: + case DATE: + return Integer.compare(a.getInt(k), b.getInt(k)); + case BIGINT: + return Long.compare(a.getLong(k), b.getLong(k)); + case SMALLINT: + return Short.compare(a.getShort(k), b.getShort(k)); + case TINYINT: + return Byte.compare(a.getByte(k), b.getByte(k)); + case FLOAT: + return Float.compare(a.getFloat(k), b.getFloat(k)); + case DOUBLE: + return Double.compare(a.getDouble(k), b.getDouble(k)); + case BOOLEAN: + return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); + case VARCHAR: + case CHAR: + return a.getString(k).compareTo(b.getString(k)); + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return a.getTimestamp(k, type.defaultSize()) + .compareTo(b.getTimestamp(k, type.defaultSize())); + case DECIMAL: + DecimalType dt = (DecimalType) type; + return a.getDecimal(k, dt.getPrecision(), dt.getScale()) + .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); + default: + String errorMsg = + String.format( + "Unsupported partition field type '%s' for manifest sort rewrite. " + + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " + + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " + + "DECIMAL.", + type.getTypeRoot()); + LOG.error(errorMsg); + throw new UnsupportedOperationException(errorMsg); + } + } + + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + + /** + * Resolve the partition field to sort manifests by. + * + *

Resolution rules: + * + *

    + *
  1. If {@code manifest-sort.partition-field} is configured, return that value. + *
  2. Otherwise, if the table has exactly one partition field, return that field name. + *
  3. Otherwise return {@code null}. + *
+ */ + @Nullable + static String resolveSortField(String sortPartitionField, RowType partitionType) { + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + return sortPartitionField; + } + if (partitionType.getFieldCount() == 1) { + return partitionType.getFieldNames().get(0); + } + return null; + } + + /** + * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD + * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is + * cancelled; if not, the file is kept as-is ({@code requireChange = false}). + */ + private static FullCompactionReadResult readForSortRewrite( + ManifestFileMeta meta, + ManifestFile manifestFile, + Set deletedIdentifiers) { + if (meta.numDeletedFiles() > 0) { + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (entry.kind() == FileKind.ADD + && !deletedIdentifiers.contains(entry.identifier())) { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, true, entries); + } else { + boolean requireChange = false; + List entries = new ArrayList<>(); + for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + if (deletedIdentifiers.contains(entry.identifier())) { + requireChange = true; + } else { + entries.add(entry); + } + } + return new FullCompactionReadResult(meta, requireChange, entries); + } + } + + /** A section of manifest files with pre-computed metadata. */ + static class Section { + final List files; + final long totalSize; + final boolean hasDefaultCompactMeta; + + Section(List files, long totalSize, boolean hasDefaultCompactMeta) { + this.files = files; + this.totalSize = totalSize; + this.hasDefaultCompactMeta = hasDefaultCompactMeta; + } + + /** Create a merged section from two sections. */ + static Section merge(Section a, Section b) { + List merged = new ArrayList<>(a.files); + merged.addAll(b.files); + return new Section( + merged, + a.totalSize + b.totalSize, + a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); + } + } + + /** Result of classifying manifest files into default-compaction and LSM groups. */ + private static class ClassifyResult { + final List defaultCompactionManifests; + final List lsmFiles; + @Nullable final Set deleteEntries; + + ClassifyResult( + List defaultCompactionManifests, + List lsmFiles, + @Nullable Set deleteEntries) { + this.defaultCompactionManifests = defaultCompactionManifests; + this.lsmFiles = lsmFiles; + this.deleteEntries = deleteEntries; + } + } +} diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index f4adf35802e4..6ded1beead21 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -939,9 +939,6 @@ public void testManifestSortWithOverlappingPartitions() { input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest.target-file-size", "500B"); - testOptions.set("manifest.merge-min-count", "3"); - testOptions.set("manifest.full-compaction-threshold-size", "200B"); testOptions.set("manifest-sort.enable", "true"); List merged = @@ -1036,9 +1033,6 @@ public void testManifestSortWithShuffledOverlappingPartitions() { input.add(makeManifest(entries6.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest.target-file-size", "500B"); - testOptions.set("manifest.merge-min-count", "3"); - testOptions.set("manifest.full-compaction-threshold-size", "100B"); testOptions.set("manifest-sort.enable", "true"); List merged = @@ -1125,9 +1119,6 @@ public void testManifestSortWithMultipleOverlappingRuns() { input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest.target-file-size", "500B"); - testOptions.set("manifest.merge-min-count", "3"); - testOptions.set("manifest.full-compaction-threshold-size", "100B"); testOptions.set("manifest-sort.enable", "true"); List merged = @@ -1224,7 +1215,6 @@ public void testManifestSortEliminatesDeleteEntries() { // Set target file size very large so all input manifests are considered "small" // (fileSize < suggestedMetaSize), which makes them all satisfy mustChange condition testOptions.set("manifest.target-file-size", "16MB"); - testOptions.set("manifest.merge-min-count", "3"); // Set full-compaction threshold very small to ensure it triggers testOptions.set("manifest.full-compaction-threshold-size", "1B"); testOptions.set("manifest-sort.enable", "true"); From 04808377735e6d1894b928df923da5629cf64bc2 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 14:37:22 +0800 Subject: [PATCH 08/51] rm --- .../paimon/operation/ManifestFileSorter.java | 2 +- .../paimon/manifest/ManifestFileMetaTest.java | 93 ------------------- 2 files changed, 1 insertion(+), 94 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 9f40d430be91..53d04cefc5e4 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -78,7 +78,7 @@ static Optional> trySortRewrite( String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { throw new IllegalArgumentException( - "Cannot resolve sort field for manifest sort rewrite. "); + "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 6ded1beead21..9c7bb03aa204 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -971,99 +971,6 @@ public void testManifestSortWithOverlappingPartitions() { } } - /** - * Test manifest sort with more manifests having overlapping partition ranges. Creates a larger - * number of manifests in shuffled order to stress-test the sort rewrite logic. - * - *

Input manifests (shuffled, all ADD-only): - * - *

-     *   manifest-1: partitions [6, 10]
-     *   manifest-2: partitions [0, 3]
-     *   manifest-3: partitions [4, 8]  -- overlaps 1 and 2
-     *   manifest-4: partitions [9, 14] -- overlaps 1
-     *   manifest-5: partitions [2, 5]  -- overlaps 2 and 3
-     *   manifest-6: partitions [11, 15]-- overlaps 4
-     * 
- */ - @Test - public void testManifestSortWithShuffledOverlappingPartitions() { - List input = new ArrayList<>(); - - // manifest-1: partitions [6, 10] - List entries1 = new ArrayList<>(); - for (int p = 6; p <= 10; p++) { - entries1.add(makeEntry(true, String.format("m1-p%d", p), p)); - } - input.add(makeManifest(entries1.toArray(new ManifestEntry[0]))); - - // manifest-2: partitions [0, 3] - List entries2 = new ArrayList<>(); - for (int p = 0; p <= 3; p++) { - entries2.add(makeEntry(true, String.format("m2-p%d", p), p)); - } - input.add(makeManifest(entries2.toArray(new ManifestEntry[0]))); - - // manifest-3: partitions [4, 8] -- overlaps manifest-1 and manifest-2 - List entries3 = new ArrayList<>(); - for (int p = 4; p <= 8; p++) { - entries3.add(makeEntry(true, String.format("m3-p%d", p), p)); - } - input.add(makeManifest(entries3.toArray(new ManifestEntry[0]))); - - // manifest-4: partitions [9, 14] -- overlaps manifest-1 - List entries4 = new ArrayList<>(); - for (int p = 9; p <= 14; p++) { - entries4.add(makeEntry(true, String.format("m4-p%d", p), p)); - } - input.add(makeManifest(entries4.toArray(new ManifestEntry[0]))); - - // manifest-5: partitions [2, 5] -- overlaps manifest-2 and manifest-3 - List entries5 = new ArrayList<>(); - for (int p = 2; p <= 5; p++) { - entries5.add(makeEntry(true, String.format("m5-p%d", p), p)); - } - input.add(makeManifest(entries5.toArray(new ManifestEntry[0]))); - - // manifest-6: partitions [11, 15] -- overlaps manifest-4 - List entries6 = new ArrayList<>(); - for (int p = 11; p <= 15; p++) { - entries6.add(makeEntry(true, String.format("m6-p%d", p), p)); - } - input.add(makeManifest(entries6.toArray(new ManifestEntry[0]))); - - Options testOptions = new Options(); - testOptions.set("manifest-sort.enable", "true"); - - List merged = - ManifestFileMerger.merge( - input, - manifestFile, - getPartitionType(), - CoreOptions.fromMap(testOptions.toMap())); - - // Verify no data loss - assertEquivalentEntries(input, merged); - - // Verify entries within each output manifest are sorted by partition - for (ManifestFileMeta meta : merged) { - List entries = manifestFile.read(meta.fileName(), meta.fileSize()); - for (int i = 1; i < entries.size(); i++) { - int prevPartition = entries.get(i - 1).partition().getInt(0); - int currPartition = entries.get(i).partition().getInt(0); - assertThat(currPartition) - .as("Entries within a manifest should be sorted by partition") - .isGreaterThanOrEqualTo(prevPartition); - } - } - - // Verify output manifests are ordered by minValues - for (int i = 1; i < merged.size(); i++) { - int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); - int currMin = merged.get(i).partitionStats().minValues().getInt(0); - assertThat(currMin).isGreaterThanOrEqualTo(prevMin); - } - } /** * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This From 1b7083960454bd52faee2faf8e6830f4b8a5d134 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 14:37:51 +0800 Subject: [PATCH 09/51] fx --- .../java/org/apache/paimon/manifest/ManifestFileMetaTest.java | 1 - 1 file changed, 1 deletion(-) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 9c7bb03aa204..cd3ff5180899 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -971,7 +971,6 @@ public void testManifestSortWithOverlappingPartitions() { } } - /** * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This * exercises buildLevelSortedRuns and the LSM level assignment logic. From 7f3996eb61226b45434228da580b512c6d2407c8 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 15:46:08 +0800 Subject: [PATCH 10/51] fix # Conflicts: # paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java # Conflicts: # paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java # Conflicts: # paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java --- docs/generated/core_configuration.html | 2 +- .../java/org/apache/paimon/CoreOptions.java | 16 ++-- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 40 ++++++---- .../paimon/schema/SchemaValidation.java | 21 +++++ .../paimon/manifest/ManifestFileMetaTest.java | 6 +- .../paimon/schema/SchemaValidationTest.java | 77 +++++++++++++++++++ 7 files changed, 134 insertions(+), 33 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 558367174df9..c94ea0253635 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -925,7 +925,7 @@
manifest-sort.enable
false Boolean - Whether to invoke manifest sort rewrite right after manifest merge during commit. The sort rewrite implementation is provided by an external module (e.g. morax) and discovered via ServiceLoader. When no implementation is registered on the classpath, this flag has no effect (manifest sort is silently skipped). + Whether to invoke manifest sort rewrite right after manifest merge during commit.
manifest-sort.max-rewrite-size
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index e8f73dcb3bf6..bc6d81f1ec76 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -471,17 +471,13 @@ public InlineElement getDescription() { "To avoid frequent manifest merges, this parameter specifies the minimum number " + "of ManifestFileMeta to merge."); - public static final ConfigOption MANIFEST_SORT_ENABLE = - key("manifest-sort.enable") + public static final ConfigOption MANIFEST_SORT_ENABLED = + key("manifest-sort.enabled") .booleanType() .defaultValue(false) .withDescription( "Whether to invoke manifest sort rewrite right after manifest merge" - + " during commit. The sort rewrite implementation is provided" - + " by an external module (e.g. morax) and discovered via" - + " ServiceLoader. When no implementation is registered on the" - + " classpath, this flag has no effect (manifest sort is" - + " silently skipped)."); + + " during commit.)."); public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = key("manifest-sort.partition-field") @@ -493,7 +489,7 @@ public InlineElement getDescription() { + " caller (an external sort rewrite implementation). For" + " single-partition tables, optional (defaults to the only" + " partition field). For multi-partition tables, REQUIRED" - + " when 'manifest-sort.enable' is true."); + + " when 'manifest-sort.enabled' is true."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") @@ -2647,8 +2643,8 @@ public MemorySize manifestFullCompactionThresholdSize() { return options.get(MANIFEST_FULL_COMPACTION_FILE_SIZE); } - public boolean manifestSortEnable() { - return options.get(MANIFEST_SORT_ENABLE); + public boolean manifestSortEnabled() { + return options.get(MANIFEST_SORT_ENABLED); } @Nullable diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 83ef75a0335a..9c48d7d84508 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -75,8 +75,9 @@ public static List merge( List newFilesForAbort = new ArrayList<>(); try { - // If manifest-sort.enable is enabled and there are partition fields, use trySortRewrite - if (options.manifestSortEnable() && partitionType.getFieldCount() > 0) { + // If manifest-sort.enabled is enabled and there are partition fields, use + // trySortRewrite + if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { Optional> sorted = ManifestFileSorter.trySortRewrite( input, newFilesForAbort, manifestFile, partitionType, options); diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 53d04cefc5e4..c089b561fc5d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -244,6 +244,9 @@ private static ClassifyResult classifyManifests( defaultCompactionManifests.addAll(candidates); lsmFiles.removeAll(candidates); } + deleteEntries = + FileEntry.readDeletedEntries( + manifestFile, defaultCompactionManifests, manifestReadParallelism); } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -579,11 +582,18 @@ private static List sortAndRewriteSection( RollingFileWriter writer = manifestFile.createRollingWriter(); + Exception exception = null; try { for (ManifestEntry entry : entriesToRewrite) { writer.write(entry); } + } catch (Exception e) { + exception = e; } finally { + if (exception != null) { + writer.abort(); + throw exception; + } writer.close(); } result.addAll(writer.result()); @@ -673,35 +683,31 @@ static String resolveSortField(String sortPartitionField, RowType partitionType) } /** - * Read a single manifest file for sort rewrite. If the meta contains delete entries, only ADD - * entries not in {@code deletedIdentifiers} are returned. Otherwise, check if any ADD entry is - * cancelled; if not, the file is kept as-is ({@code requireChange = false}). + * Read a single manifest file for sort rewrite. + * + *

When {@code deletedIdentifiers} is non-empty (full compaction path), only surviving ADD + * entries (not cancelled by deletedIdentifiers) are kept, and DELETE entries are dropped + * because the full compaction has already resolved them. + * + *

When {@code deletedIdentifiers} is empty (non-full-compaction path), all entries (both ADD + * and DELETE) are preserved to avoid losing unresolved DELETE entries. */ private static FullCompactionReadResult readForSortRewrite( ManifestFileMeta meta, ManifestFile manifestFile, Set deletedIdentifiers) { - if (meta.numDeletedFiles() > 0) { - List entries = new ArrayList<>(); + List entries = new ArrayList<>(); + if (deletedIdentifiers.isEmpty()) { + entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); + } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { if (entry.kind() == FileKind.ADD && !deletedIdentifiers.contains(entry.identifier())) { entries.add(entry); } } - return new FullCompactionReadResult(meta, true, entries); - } else { - boolean requireChange = false; - List entries = new ArrayList<>(); - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (deletedIdentifiers.contains(entry.identifier())) { - requireChange = true; - } else { - entries.add(entry); - } - } - return new FullCompactionReadResult(meta, requireChange, entries); } + return new FullCompactionReadResult(meta, true, entries); } /** A section of manifest files with pre-computed metadata. */ diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java index d07ad2581944..128d33aa9649 100644 --- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java +++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java @@ -319,6 +319,8 @@ public static void validateTableSchema(TableSchema schema) { validateChangelogReadSequenceNumber(schema, options); validatePkClusteringOverride(options); + + validateManifestSort(schema, options); } public static void validateFallbackBranch(SchemaManager schemaManager, TableSchema schema) { @@ -1032,4 +1034,23 @@ public static void validatePkClusteringOverride(CoreOptions options) { } } } + + private static void validateManifestSort(TableSchema schema, CoreOptions options) { + if (options.manifestSortEnabled()) { + checkArgument( + !schema.partitionKeys().isEmpty(), + "Cannot enable '%s' for non-partition table.", + CoreOptions.MANIFEST_SORT_ENABLED.key()); + } + + String sortPartitionField = options.manifestSortPartitionField(); + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + checkArgument( + schema.partitionKeys().contains(sortPartitionField), + "'%s' = '%s' is not a partition field. Available partition fields: %s.", + CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), + sortPartitionField, + schema.partitionKeys()); + } + } } diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index cd3ff5180899..d1c15d412fad 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -939,7 +939,7 @@ public void testManifestSortWithOverlappingPartitions() { input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest-sort.enable", "true"); + testOptions.set("manifest-sort.enabled", "true"); List merged = ManifestFileMerger.merge( @@ -1025,7 +1025,7 @@ public void testManifestSortWithMultipleOverlappingRuns() { input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); - testOptions.set("manifest-sort.enable", "true"); + testOptions.set("manifest-sort.enabled", "true"); List merged = ManifestFileMerger.merge( @@ -1123,7 +1123,7 @@ public void testManifestSortEliminatesDeleteEntries() { testOptions.set("manifest.target-file-size", "16MB"); // Set full-compaction threshold very small to ensure it triggers testOptions.set("manifest.full-compaction-threshold-size", "1B"); - testOptions.set("manifest-sort.enable", "true"); + testOptions.set("manifest-sort.enabled", "true"); List merged = ManifestFileMerger.merge( diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java index 14652ec883c2..9a6a768c7b65 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java @@ -511,6 +511,83 @@ public void testFileFormatPerLevelAcceptsCompatibleSchema() { new TableSchema(1, fields, 10, emptyList(), singletonList("k"), options, "")); } + @Test + void testManifestSortEnableOnNonPartitionTable() { + Map options = new HashMap<>(); + options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options.put(BUCKET.key(), String.valueOf(-1)); + + List fields = + Arrays.asList( + new DataField(0, "f0", DataTypes.INT()), + new DataField(1, "f1", DataTypes.INT())); + + assertThatThrownBy( + () -> + validateTableSchema( + new TableSchema( + 1, + fields, + 10, + emptyList(), + emptyList(), + options, + ""))) + .hasMessageContaining( + "Cannot enable 'manifest-sort.enabled' for non-partition table."); + } + + @Test + void testManifestSortPartitionFieldNotInPartitionKeys() { + Map options = new HashMap<>(); + options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); + options.put(BUCKET.key(), String.valueOf(-1)); + + List fields = + Arrays.asList( + new DataField(0, "f0", DataTypes.INT()), + new DataField(1, "f1", DataTypes.INT())); + + assertThatThrownBy( + () -> + validateTableSchema( + new TableSchema( + 1, + fields, + 10, + singletonList("f0"), + emptyList(), + options, + ""))) + .hasMessageContaining("is not a partition field"); + } + + @Test + void testManifestSortValidConfig() { + Map options = new HashMap<>(); + options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f0"); + options.put(BUCKET.key(), String.valueOf(-1)); + + List fields = + Arrays.asList( + new DataField(0, "f0", DataTypes.INT()), + new DataField(1, "f1", DataTypes.INT())); + + assertThatNoException() + .isThrownBy( + () -> + validateTableSchema( + new TableSchema( + 1, + fields, + 10, + singletonList("f0"), + emptyList(), + options, + ""))); + } + @Test public void testMergeOnReadCoexistsWithVisibilityCallback() { Map options = new HashMap<>(); From ce67f323f97af395eac4b8bcda3e98f5348e31b4 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 16:35:01 +0800 Subject: [PATCH 11/51] fix --- docs/generated/core_configuration.html | 4 +- .../java/org/apache/paimon/CoreOptions.java | 2 +- .../paimon/operation/FileStoreCommitImpl.java | 11 ++-- .../paimon/operation/ManifestFileSorter.java | 59 ++++++++++++++----- .../operation/ManifestPickStrategy.java | 12 ++-- 5 files changed, 60 insertions(+), 28 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index c94ea0253635..6793223efa9a 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -922,7 +922,7 @@ To avoid frequent manifest merges, this parameter specifies the minimum number of ManifestFileMeta to merge. -

manifest-sort.enable
+
manifest-sort.enabled
false Boolean Whether to invoke manifest sort rewrite right after manifest merge during commit. @@ -943,7 +943,7 @@
manifest-sort.partition-field
(none) String - Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enable' is true. + Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enabled' is true.
manifest.target-file-size
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index bc6d81f1ec76..9eb445690769 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -477,7 +477,7 @@ public InlineElement getDescription() { .defaultValue(false) .withDescription( "Whether to invoke manifest sort rewrite right after manifest merge" - + " during commit.)."); + + " during commit."); public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = key("manifest-sort.partition-field") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java index 96833dddb11d..ee7fa5aed4da 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java @@ -55,6 +55,7 @@ import org.apache.paimon.operation.commit.SuccessCommitResult; import org.apache.paimon.operation.metrics.CommitMetrics; import org.apache.paimon.operation.metrics.CommitStats; +import org.apache.paimon.options.MemorySize; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.partition.PartitionStatistics; @@ -1185,16 +1186,16 @@ private boolean compactManifestOnce() { manifestList.readDataManifests(latestSnapshot); List mergeAfterManifests; - // the fist trial - Options tempOptions = options.toConfiguration(); - tempOptions.set("manifest.merge-min-count", "1"); - tempOptions.set("manifest.full-compaction-threshold-size", "1B"); + // the fist trial: use a copied options with forced full compaction settings + Options compactOptions = Options.fromMap(options.toMap()); + compactOptions.set(CoreOptions.MANIFEST_MERGE_MIN_COUNT, 1); + compactOptions.set(CoreOptions.MANIFEST_FULL_COMPACTION_FILE_SIZE, MemorySize.ofBytes(1)); mergeAfterManifests = ManifestFileMerger.merge( mergeBeforeManifests, manifestFile, partitionType, - CoreOptions.fromMap(tempOptions.toMap())); + CoreOptions.fromMap(compactOptions.toMap())); if (new HashSet<>(mergeBeforeManifests).equals(new HashSet<>(mergeAfterManifests))) { // no need to commit this snapshot, because no compact were happened diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index c089b561fc5d..49a436a57ed9 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -22,7 +22,6 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; -import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; @@ -42,6 +41,7 @@ import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Optional; @@ -111,9 +111,19 @@ static Optional> trySortRewrite( List pickedRuns = pickStrategy.pick(levelRuns); if (pickedRuns.isEmpty() && defaultCompactionManifests.isEmpty()) { + LOG.debug( + "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); return Optional.of(input); } + LOG.info( + "Manifest sort rewrite: input={} files, lsm={} runs, picked={} runs, " + + "defaultCompaction={} files.", + input.size(), + levelRuns.size(), + pickedRuns.size(), + defaultCompactionManifests.size()); + Set pickedSet = new HashSet<>(pickedRuns); List reusedFiles = new ArrayList<>(); for (ManifestSortedRun run : levelRuns) { @@ -155,6 +165,11 @@ static Optional> trySortRewrite( result.addAll(rewritten); newFilesForAbort.addAll(sortNewFiles); + LOG.info( + "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", + sections.size(), + sortNewFiles.size(), + result.size()); return Optional.of(result); } @@ -226,6 +241,7 @@ private static ClassifyResult classifyManifests( } } else { // Minor-style pick: merge adjacent small manifests when no full compact triggered. + Set toRemove = new HashSet<>(); List candidates = new ArrayList<>(); long candidateSize = 0; for (ManifestFileMeta file : input) { @@ -234,7 +250,7 @@ private static ClassifyResult classifyManifests( if (candidateSize >= suggestedMetaSize) { if (candidates.size() > 1) { defaultCompactionManifests.addAll(candidates); - lsmFiles.removeAll(candidates); + toRemove.addAll(candidates); } candidates.clear(); candidateSize = 0; @@ -242,11 +258,11 @@ private static ClassifyResult classifyManifests( } if (candidates.size() >= mergeMinCount) { defaultCompactionManifests.addAll(candidates); - lsmFiles.removeAll(candidates); + toRemove.addAll(candidates); + } + if (!toRemove.isEmpty()) { + lsmFiles.removeIf(toRemove::contains); } - deleteEntries = - FileEntry.readDeletedEntries( - manifestFile, defaultCompactionManifests, manifestReadParallelism); } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -435,7 +451,8 @@ static List buildLevelSortedRuns( int n = runs.size(); for (int i = 0; i < n; i++) { if (i >= n - 4) { - runs.get(i).setLevel(n - i); + // top-4 largest runs get level 4-1 + runs.get(i).setLevel(i - (n - 4) + 1); } else { runs.get(i).setLevel(0); } @@ -580,6 +597,17 @@ private static List sortAndRewriteSection( if (!entriesToRewrite.isEmpty()) { entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + // When non-full-compact (deletedIdentifiers is null, meaning delete entries + // were not read), entries may contain both ADD and DELETE. Merge them following + // FileEntry.mergeEntries logic to cancel paired ADD/DELETE and keep unresolved + // DELETE entries whose ADD is in a previous manifest file. + if (deletedIdentifiers == null) { + LinkedHashMap mergedMap = + new LinkedHashMap<>(); + FileEntry.mergeEntries(entriesToRewrite, mergedMap); + entriesToRewrite = new ArrayList<>(mergedMap.values()); + } + RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; @@ -657,6 +685,11 @@ static int compareSortKey( if (c != 0) { return c; } + // ADD before DELETE, so that mergeEntries can correctly cancel pairs + int kindCmp = a.kind().compareTo(b.kind()); + if (kindCmp != 0) { + return kindCmp; + } return a.file().fileName().compareTo(b.file().fileName()); } @@ -667,19 +700,14 @@ static int compareSortKey( * *
    *
  1. If {@code manifest-sort.partition-field} is configured, return that value. - *
  2. Otherwise, if the table has exactly one partition field, return that field name. - *
  3. Otherwise return {@code null}. + *
  4. Otherwise, default to the first partition field. *
*/ - @Nullable static String resolveSortField(String sortPartitionField, RowType partitionType) { if (sortPartitionField != null && !sortPartitionField.isEmpty()) { return sortPartitionField; } - if (partitionType.getFieldCount() == 1) { - return partitionType.getFieldNames().get(0); - } - return null; + return partitionType.getFieldNames().get(0); } /** @@ -701,8 +729,7 @@ private static FullCompactionReadResult readForSortRewrite( entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { - if (entry.kind() == FileKind.ADD - && !deletedIdentifiers.contains(entry.identifier())) { + if (!deletedIdentifiers.contains(entry.identifier())) { entries.add(entry); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 6421328550c9..736425bb4d6f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -109,16 +109,20 @@ private List pickForSizeAmp(List levelRuns private List pickForSizeRatioAndForce(List levelRuns) { // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) List picked = new ArrayList<>(); - long pickedSize = 0; - // From low to high: forced pick level0/level1, then SizeRatio for the rest. - for (ManifestSortedRun run : levelRuns) { + // Always pick the first run to guarantee a non-empty result. + picked.add(levelRuns.get(0)); + long pickedSize = levelRuns.get(0).totalSize(); + + // From the second run onward: forced pick level0/level1, then SizeRatio for the rest. + for (int i = 1; i < levelRuns.size(); i++) { + ManifestSortedRun run = levelRuns.get(i); if (run.level() <= 1) { picked.add(run); pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); - if (pickedSize > 0 && pickedSize * sizeRatioThreshold >= nextRunSize) { + if (pickedSize * sizeRatioThreshold >= nextRunSize) { picked.add(run); pickedSize += nextRunSize; } From 01f3b447605ad7068fef4fea64c75385a9dd8d5f Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 16:54:39 +0800 Subject: [PATCH 12/51] fix --- docs/generated/core_configuration.html | 2 +- .../main/java/org/apache/paimon/CoreOptions.java | 6 ++---- .../apache/paimon/operation/ManifestFileSorter.java | 13 +++++-------- .../paimon/operation/ManifestPickStrategy.java | 8 +++++--- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 6793223efa9a..ec6bffa3a99d 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -943,7 +943,7 @@
manifest-sort.partition-field
(none) String - Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). For single-partition tables, optional (defaults to the only partition field). For multi-partition tables, REQUIRED when 'manifest-sort.enabled' is true. + Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). If not configured, defaults to the first partition field.
manifest.target-file-size
diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 9eb445690769..f8612a856465 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -486,10 +486,8 @@ public InlineElement getDescription() { .withDescription( "Partition field name to sort manifest entries by. Validated by" + " schema validation; resolved to a 0-based index by the" - + " caller (an external sort rewrite implementation). For" - + " single-partition tables, optional (defaults to the only" - + " partition field). For multi-partition tables, REQUIRED" - + " when 'manifest-sort.enabled' is true."); + + " caller (an external sort rewrite implementation). If" + + " not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 49a436a57ed9..1475d9b13611 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -449,10 +449,10 @@ static List buildLevelSortedRuns( runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); int n = runs.size(); + int maxLevel = 4; for (int i = 0; i < n; i++) { - if (i >= n - 4) { - // top-4 largest runs get level 4-1 - runs.get(i).setLevel(i - (n - 4) + 1); + if (i >= n - maxLevel) { + runs.get(i).setLevel(i - (n - maxLevel) + 1); } else { runs.get(i).setLevel(0); } @@ -580,12 +580,9 @@ private static List sortAndRewriteSection( @Nullable Integer manifestReadParallelism) throws Exception { - Set safeDeletedIds = - deletedIdentifiers != null ? deletedIdentifiers : new HashSet<>(); - // Parallel read: each meta is read independently Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, safeDeletedIds)); + meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); List entriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : @@ -725,7 +722,7 @@ private static FullCompactionReadResult readForSortRewrite( ManifestFile manifestFile, Set deletedIdentifiers) { List entries = new ArrayList<>(); - if (deletedIdentifiers.isEmpty()) { + if (deletedIdentifiers == null || deletedIdentifiers.isEmpty()) { entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 736425bb4d6f..ffd10fcf5473 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -18,6 +18,8 @@ package org.apache.paimon.operation; +import org.apache.paimon.utils.Preconditions; + import java.util.ArrayList; import java.util.List; @@ -32,8 +34,6 @@ *
  • SizeRatio: from low to high, pick adjacent runs whose amplification factor is less * than {@code sizeRatioThreshold}. *
  • Forced pick: level0 and level1 runs are always picked. - *
  • Delete pick: additionally pick runs containing manifest files with {@code - * numDeletedFiles > 0}. * */ public class ManifestPickStrategy { @@ -42,6 +42,8 @@ public class ManifestPickStrategy { private final int sizeRatioThreshold; public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { + Preconditions.checkArgument(sizeAmpThreshold > 0, "sizeAmpThreshold must be positive"); + Preconditions.checkArgument(sizeRatioThreshold > 0, "sizeRatioThreshold must be positive"); this.sizeAmpThreshold = sizeAmpThreshold; this.sizeRatioThreshold = sizeRatioThreshold; } @@ -91,7 +93,7 @@ private List pickForSizeAmp(List levelRuns } } - if (lowerLevelTotalSize > highestRun.totalSize() * sizeAmpThreshold) { + if (lowerLevelTotalSize / sizeAmpThreshold > highestRun.totalSize()) { return new ArrayList<>(levelRuns); } return null; From 1e4f44fc26b3b9df3e834e37bf18c5d0a6339af8 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 17:09:32 +0800 Subject: [PATCH 13/51] fix --- .../operation/ManifestPickStrategy.java | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index ffd10fcf5473..524caed50dbf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -70,8 +70,11 @@ public List pick(List levelRuns) { } /** - * SizeAmp check: if all lower-level (0~3) runs' total size > highest-level (level4) run's size - * * sizeAmpThreshold, pick all runs for full compaction. + * SizeAmp check: if all lower-level (0~3) runs' total size exceeds the highest-level run's size + * by more than {@code sizeAmpThreshold} percent, pick all runs for full compaction. + * + *

    Formula (consistent with {@code UniversalCompaction#pickForSizeAmp}): {@code + * lowerLevelTotalSize * 100 > sizeAmpThreshold * highestRunSize} */ private List pickForSizeAmp(List levelRuns) { if (levelRuns.isEmpty()) { @@ -93,7 +96,8 @@ private List pickForSizeAmp(List levelRuns } } - if (lowerLevelTotalSize / sizeAmpThreshold > highestRun.totalSize()) { + // size amplification = percentage of additional size + if (lowerLevelTotalSize * 100 > (long) sizeAmpThreshold * highestRun.totalSize()) { return new ArrayList<>(levelRuns); } return null; @@ -104,9 +108,12 @@ private List pickForSizeAmp(List levelRuns * *

      *
    • Level0 and level1 are always picked. - *
    • From low to high, if the cumulative picked size * sizeRatioThreshold >= next run's - * size, continue picking. + *
    • From low to high, if the cumulative picked size with ratio amplification covers the + * next run's size, continue picking. *
    + * + *

    Formula (consistent with {@code UniversalCompaction#pickForSizeRatio}): {@code pickedSize + * * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize} */ private List pickForSizeRatioAndForce(List levelRuns) { // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) @@ -124,13 +131,15 @@ private List pickForSizeRatioAndForce(List pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); - if (pickedSize * sizeRatioThreshold >= nextRunSize) { + if (pickedSize * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize) { picked.add(run); pickedSize += nextRunSize; } } } - + if (picked.size() == 1) { + return new ArrayList<>(); + } return picked; } } From c4dee143c4c5953428cdb2770d1429d9139269a0 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 14 May 2026 17:32:38 +0800 Subject: [PATCH 14/51] rmMinorComp --- docs/generated/core_configuration.html | 2 +- .../java/org/apache/paimon/CoreOptions.java | 3 +- .../paimon/operation/ManifestFileSorter.java | 39 ------------------- 3 files changed, 2 insertions(+), 42 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index ec6bffa3a99d..17d2efeed612 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -943,7 +943,7 @@

    manifest-sort.partition-field
    (none) String - Partition field name to sort manifest entries by. Validated by schema validation; resolved to a 0-based index by the caller (an external sort rewrite implementation). If not configured, defaults to the first partition field. + Partition field name to sort manifest entries by. Validated by schema validation; If not configured, defaults to the first partition field.
    manifest.target-file-size
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index f8612a856465..b19b6d85c623 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -485,8 +485,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation; resolved to a 0-based index by the" - + " caller (an external sort rewrite implementation). If" + + " schema validation; If" + " not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 1475d9b13611..dfca7380250c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -41,7 +41,6 @@ import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; -import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Optional; @@ -73,7 +72,6 @@ static Optional> trySortRewrite( long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); String sortPartitionField = options.manifestSortPartitionField(); - int mergeMinCount = options.manifestMergeMinCount(); // Step 1: Resolve sort field. String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -89,7 +87,6 @@ static Optional> trySortRewrite( input, suggestedMetaSize, manifestFullCompactionSize, - mergeMinCount, manifestFile, partitionType, manifestReadParallelism); @@ -187,7 +184,6 @@ private static ClassifyResult classifyManifests( List input, long suggestedMetaSize, long manifestFullCompactionSize, - int mergeMinCount, ManifestFile manifestFile, RowType partitionType, @Nullable Integer manifestReadParallelism) { @@ -239,30 +235,6 @@ private static ClassifyResult classifyManifests( defaultCompactionManifests.add(file); } } - } else { - // Minor-style pick: merge adjacent small manifests when no full compact triggered. - Set toRemove = new HashSet<>(); - List candidates = new ArrayList<>(); - long candidateSize = 0; - for (ManifestFileMeta file : input) { - candidateSize += file.fileSize(); - candidates.add(file); - if (candidateSize >= suggestedMetaSize) { - if (candidates.size() > 1) { - defaultCompactionManifests.addAll(candidates); - toRemove.addAll(candidates); - } - candidates.clear(); - candidateSize = 0; - } - } - if (candidates.size() >= mergeMinCount) { - defaultCompactionManifests.addAll(candidates); - toRemove.addAll(candidates); - } - if (!toRemove.isEmpty()) { - lsmFiles.removeIf(toRemove::contains); - } } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -594,17 +566,6 @@ private static List sortAndRewriteSection( if (!entriesToRewrite.isEmpty()) { entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - // When non-full-compact (deletedIdentifiers is null, meaning delete entries - // were not read), entries may contain both ADD and DELETE. Merge them following - // FileEntry.mergeEntries logic to cancel paired ADD/DELETE and keep unresolved - // DELETE entries whose ADD is in a previous manifest file. - if (deletedIdentifiers == null) { - LinkedHashMap mergedMap = - new LinkedHashMap<>(); - FileEntry.mergeEntries(entriesToRewrite, mergedMap); - entriesToRewrite = new ArrayList<>(mergedMap.values()); - } - RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; From ad0054f4752d95dc7f6029d59b70fc55791f2211 Mon Sep 17 00:00:00 2001 From: umi Date: Fri, 15 May 2026 16:32:34 +0800 Subject: [PATCH 15/51] test --- .../java/org/apache/paimon/CoreOptions.java | 5 +- paimon-core/pom.xml | 15 + .../paimon/operation/ManifestFileMerger.java | 1 - .../paimon/operation/ManifestFileSorter.java | 298 +++++++++++++++--- 4 files changed, 269 insertions(+), 50 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index b19b6d85c623..0e1792fb1158 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -485,8 +485,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation; If" - + " not configured, defaults to the first partition field."); + + " schema validation; If not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") @@ -501,7 +500,7 @@ public InlineElement getDescription() { public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = key("manifest-sort.open-file-cost") .memoryType() - .defaultValue(MemorySize.ofMebiBytes(4)) + .defaultValue(MemorySize.ofKibiBytes(40)) .withDescription( "Open file cost of a manifest file during sort rewrite. " + "It is added to each manifest file's size when computing " diff --git a/paimon-core/pom.xml b/paimon-core/pom.xml index 9506bdf03959..e570324ee9e3 100644 --- a/paimon-core/pom.xml +++ b/paimon-core/pom.xml @@ -36,6 +36,21 @@ under the License. + + com.aliyun.jindodata + jindo-core-macos-11_0-aarch64 + 6.9.1 + + + + + + + + org.apache.paimon + paimon-ali-jindo + 1.4-ali-SNAPSHOT + org.apache.paimon paimon-common diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 9c48d7d84508..e197868849c1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -249,7 +249,6 @@ public static Optional> tryFullCompaction( } // 2.2. merge - if (toBeMerged.size() <= 1) { return Optional.empty(); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index dfca7380250c..64919346256e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -44,6 +44,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Optional; +import java.util.PriorityQueue; import java.util.Set; import java.util.function.Function; @@ -138,12 +139,24 @@ static Optional> trySortRewrite( pickedFiles.addAll(defaultCompactionManifests); Set defaultCompactionSet = new HashSet<>(defaultCompactionManifests); - - List
    sections = - splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionSet); - sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); long maxRewriteSize = options.manifestSortMaxRewriteSize(); long openFileCost = options.manifestSortOpenFileCost(); + + List
    sections = + splitIntoSections( + pickedFiles, + sortFieldIndex, + sortFieldType, + defaultCompactionSet, + openFileCost); +// sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + System.out.println( + "After splitIntoSections: sections=" + + sections.size() + + ", pickedFiles=" + + pickedFiles.size()); + LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); + List sortNewFiles = new ArrayList<>(); List rewritten = @@ -236,7 +249,6 @@ private static ClassifyResult classifyManifests( } } } - return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } @@ -266,20 +278,32 @@ private static List rewriteSections( List result = new ArrayList<>(); long processedSize = 0; - for (Section section : sections) { + boolean reachedLimit = false; + + for (int i = 0; i < sections.size(); i++) { + Section section = sections.get(i); // Single-file section without defaultCompaction: already sorted, skip rewrite. - if (section.files.size() == 1 && !section.hasDefaultCompactMeta) { - result.addAll(section.files); + if (section.files.size() == 1) { + if (!section.hasDefaultCompactMeta || deleteEntries == null) { + result.addAll(section.files); + } else { + processedSize = processedSize + section.totalSizeWithCost; + rewriteSubSegments( + section.files, + defaultCompactionSet, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + sortNewFiles, + result, + manifestReadParallelism); + } continue; } - - long sectionSize = section.totalSize + (long) section.files.size() * openFileCost; - + long sectionSize = section.totalSizeWithCost; boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; - if (exceedsThreshold && !section.hasDefaultCompactMeta) { - result.addAll(section.files); - continue; - } if (!exceedsThreshold) { processedSize += sectionSize; @@ -293,7 +317,63 @@ private static List rewriteSections( manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); - } else { + } else if (!reachedLimit) { + // First time exceeding threshold without defaultCompaction: + // partial rewrite within remaining budget. + long remaining = maxRewriteSize - processedSize; + processedSize += sectionSize; + // Split section into two parts: files within budget and remaining files + List toRewrite = new ArrayList<>(); + List remainingFiles = new ArrayList<>(); + long rewriteSize = 0; + long remainingSize = 0; + long remainingSizeWithCost = 0; + boolean remainingHasDefault = false; + + for (ManifestFileMeta file : section.files) { + long fileCost = Math.max(file.fileSize(), openFileCost); + if (rewriteSize + fileCost <= remaining) { + toRewrite.add(file); + rewriteSize += fileCost; + } else { + remainingFiles.add(file); + remainingSize += file.fileSize(); + remainingSizeWithCost += fileCost; + if (defaultCompactionSet.contains(file)) { + remainingHasDefault = true; + } + } + } + + if (toRewrite.size() > 1) { + List merged = + sortAndRewriteSection( + toRewrite, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } else if (toRewrite.size() == 1) { + sortNewFiles.addAll(toRewrite); + result.addAll(toRewrite); + } + + // Create new section for remaining files and append to sections list + if (!remainingFiles.isEmpty()) { + Section remainingSection = + new Section( + remainingFiles, + remainingSize, + remainingSizeWithCost, + remainingHasDefault); + // Append remaining section to the end of sections list + sections.add(remainingSection); + } + reachedLimit = true; + } else if (section.hasDefaultCompactMeta) { rewriteSubSegments( section.files, defaultCompactionSet, @@ -305,6 +385,8 @@ private static List rewriteSections( sortNewFiles, result, manifestReadParallelism); + } else { + result.addAll(section.files); } } return result; @@ -368,6 +450,53 @@ private static void rewriteSubSegments( } } + /** + * Partial rewrite of a section: only rewrite files that fit within the remaining budget. Files + * beyond the budget are kept as-is. + */ + private static void partialRewriteSection( + List sectionFiles, + long remaining, + long openFileCost, + ManifestFile manifestFile, + int sortFieldIndex, + DataType sortFieldType, + @Nullable Set deleteEntries, + List sortNewFiles, + List result, + @Nullable Integer manifestReadParallelism) + throws Exception { + List toRewrite = new ArrayList<>(); + int splitIndex = 0; + long partialSize = 0; + for (int i = 0; i < sectionFiles.size(); i++) { + long fileCost = Math.max(sectionFiles.get(i).fileSize(), openFileCost); + if (partialSize + fileCost > remaining) { + break; + } + toRewrite.add(sectionFiles.get(i)); + partialSize += fileCost; + splitIndex = i + 1; + } + if (toRewrite.size() > 1) { + List merged = + sortAndRewriteSection( + toRewrite, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); + } else { + result.addAll(toRewrite); + } + for (int i = splitIndex; i < sectionFiles.size(); i++) { + result.add(sectionFiles.get(i)); + } + } + /** * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 @@ -375,6 +504,7 @@ private static void rewriteSubSegments( */ static List buildLevelSortedRuns( List input, int sortFieldIndex, DataType sortFieldType) { + // Step 1: Sort by min value (if equal, then by max value) input.sort( (a, b) -> { int cmp = @@ -393,43 +523,69 @@ static List buildLevelSortedRuns( sortFieldType); }); - List> runFilesList = new ArrayList<>(); - List currentRun = new ArrayList<>(); - currentRun.add(input.get(0)); - for (int i = 1; i < input.size(); i++) { - ManifestFileMeta file = input.get(i); - ManifestFileMeta last = currentRun.get(currentRun.size() - 1); - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - >= 0) { - currentRun.add(file); - } else { - runFilesList.add(currentRun); - currentRun = new ArrayList<>(); - currentRun.add(file); + // Step 2: Interval graph coloring algorithm - assign files to runs + // Use priority queue to track runs by their max values + PriorityQueue> runs = + new PriorityQueue<>( + (r1, r2) -> { + ManifestFileMeta last1 = r1.get(r1.size() - 1); + ManifestFileMeta last2 = r2.get(r2.size() - 1); + return compareField( + last1.partitionStats().maxValues(), + last2.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + for (ManifestFileMeta file : input) { + boolean addedToExisting = false; + + // Try to find a run where current file's min >= run's max + if (!runs.isEmpty()) { + List earliestRun = runs.peek(); + ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); + + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + >= 0) { + // Current file can be added to this run + runs.poll(); + earliestRun.add(file); + runs.offer(earliestRun); + addedToExisting = true; + } + } + + if (!addedToExisting) { + // Create a new run + List newRun = new ArrayList<>(); + newRun.add(file); + runs.offer(newRun); } } - runFilesList.add(currentRun); - List runs = new ArrayList<>(runFilesList.size()); - for (List rf : runFilesList) { - runs.add(ManifestSortedRun.fromSorted(rf)); + // Step 3: Convert to ManifestSortedRun list + List result = new ArrayList<>(); + while (!runs.isEmpty()) { + result.add(ManifestSortedRun.fromSorted(runs.poll())); } - runs.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); - int n = runs.size(); + // Step 4: Sort by totalSize and assign levels + result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = result.size(); int maxLevel = 4; for (int i = 0; i < n; i++) { if (i >= n - maxLevel) { - runs.get(i).setLevel(i - (n - maxLevel) + 1); + result.get(i).setLevel(i - (n - maxLevel) + 1); } else { - runs.get(i).setLevel(0); + result.get(i).setLevel(0); } } - return runs; + System.out.println("run num: " + result.size()); + return result; } /** @@ -440,7 +596,8 @@ static List
    splitIntoSections( List pickedFiles, int sortFieldIndex, DataType sortFieldType, - Set defaultCompactionSet) { + Set defaultCompactionSet, + long openFileCost) { pickedFiles.sort( (a, b) -> { int cmp = @@ -462,10 +619,12 @@ static List
    splitIntoSections( List
    sections = new ArrayList<>(); List currentFiles = new ArrayList<>(); long currentTotalSize = 0; + long currentTotalSizeWithCost = 0; boolean currentHasDefault = false; ManifestFileMeta first = pickedFiles.get(0); currentFiles.add(first); currentTotalSize += first.fileSize(); + currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); currentHasDefault = defaultCompactionSet.contains(first); BinaryRow sectionMaxBound = first.partitionStats().maxValues(); @@ -477,16 +636,24 @@ static List
    splitIntoSections( sortFieldIndex, sortFieldType) >= 0) { - sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); currentFiles = new ArrayList<>(); currentTotalSize = 0; + currentTotalSizeWithCost = 0; currentFiles.add(file); currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); currentHasDefault = defaultCompactionSet.contains(file); sectionMaxBound = file.partitionStats().maxValues(); } else { currentFiles.add(file); currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); if (!currentHasDefault && defaultCompactionSet.contains(file)) { currentHasDefault = true; } @@ -500,7 +667,12 @@ static List
    splitIntoSections( } } } - sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); return sections; } @@ -552,7 +724,13 @@ private static List sortAndRewriteSection( @Nullable Integer manifestReadParallelism) throws Exception { + long totalStart = System.currentTimeMillis(); + long readTime = 0; + long sortTime = 0; + long writeTime = 0; + // Parallel read: each meta is read independently + long readStart = System.currentTimeMillis(); Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); @@ -561,11 +739,15 @@ private static List sortAndRewriteSection( sequentialBatchedExecute(reader, section, manifestReadParallelism)) { entriesToRewrite.addAll(readResult.entries); } + readTime = System.currentTimeMillis() - readStart; List result = new ArrayList<>(); if (!entriesToRewrite.isEmpty()) { + long sortStart = System.currentTimeMillis(); entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + sortTime = System.currentTimeMillis() - sortStart; + long writeStart = System.currentTimeMillis(); RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; @@ -583,6 +765,23 @@ private static List sortAndRewriteSection( writer.close(); } result.addAll(writer.result()); + writeTime = System.currentTimeMillis() - writeStart; + } + + long totalTime = System.currentTimeMillis() - totalStart; + if (totalTime > 0) { + System.out.println( + String.format( + "[sortAndRewriteSection] Total: %d ms, Read: %d ms (%.1f%%), Sort: %d ms (%.1f%%), Write: %d ms (%.1f%%), Entries: %d, Files: %d", + totalTime, + readTime, + 100.0 * readTime / totalTime, + sortTime, + 100.0 * sortTime / totalTime, + writeTime, + 100.0 * writeTime / totalTime, + entriesToRewrite.size(), + result.size())); } return result; @@ -699,11 +898,17 @@ private static FullCompactionReadResult readForSortRewrite( static class Section { final List files; final long totalSize; + final long totalSizeWithCost; final boolean hasDefaultCompactMeta; - Section(List files, long totalSize, boolean hasDefaultCompactMeta) { + Section( + List files, + long totalSize, + long totalSizeWithCost, + boolean hasDefaultCompactMeta) { this.files = files; this.totalSize = totalSize; + this.totalSizeWithCost = totalSizeWithCost; this.hasDefaultCompactMeta = hasDefaultCompactMeta; } @@ -714,6 +919,7 @@ static Section merge(Section a, Section b) { return new Section( merged, a.totalSize + b.totalSize, + a.totalSizeWithCost + b.totalSizeWithCost, a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); } } From b3ba51fdb405dd71672366d5cf8c66bab6ed401f Mon Sep 17 00:00:00 2001 From: umi Date: Sun, 17 May 2026 13:39:28 +0800 Subject: [PATCH 16/51] fix --- .../paimon/operation/ManifestFileMerger.java | 1 + .../paimon/operation/ManifestFileSorter.java | 49 +++++++++++-------- .../operation/ManifestPickStrategy.java | 2 +- 3 files changed, 31 insertions(+), 21 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index e197868849c1..b6bfedbfd6bf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -207,6 +207,7 @@ public static Optional> tryFullCompaction( totalManifestSize, deltaDeleteFileNum, totalDeltaFileSize); + System.out.println("Start Manifest File Full Compaction: totalManifestSize: " + totalManifestSize + ", deltaDeleteFileNum " + deltaDeleteFileNum + ", totalDeltaFileSize " + totalDeltaFileSize); // 2.1. read all delete entries diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 64919346256e..c4ef6aa802cf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -149,7 +149,7 @@ static Optional> trySortRewrite( sortFieldType, defaultCompactionSet, openFileCost); -// sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); System.out.println( "After splitIntoSections: sections=" + sections.size() @@ -167,7 +167,7 @@ static Optional> trySortRewrite( sortFieldIndex, sortFieldType, deleteEntries, - suggestedMetaSize, + manifestFullCompactionSize, maxRewriteSize, openFileCost, sortNewFiles, @@ -212,7 +212,7 @@ private static ClassifyResult classifyManifests( List defaultCompactionManifests = new ArrayList<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = null; + Set deleteEntries = new HashSet<>(); if (totalDeltaFileSize >= manifestFullCompactionSize) { // Full compact triggered: read delete entries and classify by predicate. @@ -268,8 +268,8 @@ private static List rewriteSections( ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, - @Nullable Set deleteEntries, - long suggestedMetaSize, + Set deleteEntries, + long manifestFullCompactionSize, long maxRewriteSize, long openFileCost, List sortNewFiles, @@ -284,7 +284,7 @@ private static List rewriteSections( Section section = sections.get(i); // Single-file section without defaultCompaction: already sorted, skip rewrite. if (section.files.size() == 1) { - if (!section.hasDefaultCompactMeta || deleteEntries == null) { + if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { result.addAll(section.files); } else { processedSize = processedSize + section.totalSizeWithCost; @@ -295,7 +295,7 @@ private static List rewriteSections( sortFieldIndex, sortFieldType, deleteEntries, - suggestedMetaSize, + manifestFullCompactionSize, sortNewFiles, result, manifestReadParallelism); @@ -381,7 +381,7 @@ private static List rewriteSections( sortFieldIndex, sortFieldType, deleteEntries, - suggestedMetaSize, + manifestFullCompactionSize, sortNewFiles, result, manifestReadParallelism); @@ -403,7 +403,7 @@ private static void rewriteSubSegments( int sortFieldIndex, DataType sortFieldType, @Nullable Set deleteEntries, - long suggestedMetaSize, + long manifestFullCompactionSize, List sortNewFiles, List result, @Nullable Integer manifestReadParallelism) @@ -411,13 +411,19 @@ private static void rewriteSubSegments( List subSegment = new ArrayList<>(); long subSegmentSize = 0; for (ManifestFileMeta m : section) { - if (defaultCompactionSet.contains(m)) { - subSegment.add(m); - subSegmentSize += m.fileSize(); - } else if (!subSegment.isEmpty()) { + boolean shouldAccumulate = + defaultCompactionSet.contains(m) + && subSegmentSize + m.fileSize() < manifestFullCompactionSize; + + if (shouldAccumulate) { + // Continue accumulating subSegment.add(m); subSegmentSize += m.fileSize(); - if (subSegmentSize >= suggestedMetaSize) { + } else { + // Need to break the segment + if (!subSegment.isEmpty()) { + // Process accumulated subSegment first + subSegment.add(m); List merged = sortAndRewriteSection( subSegment, @@ -428,11 +434,12 @@ private static void rewriteSubSegments( manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); - subSegment = new ArrayList<>(); + subSegment.clear(); subSegmentSize = 0; + } else { + // Directly add to result + result.add(m); } - } else { - result.add(m); } } // Flush remaining sub-segment @@ -720,10 +727,12 @@ private static List sortAndRewriteSection( ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, - @Nullable Set deletedIdentifiers, + Set deletedIdentifiers, @Nullable Integer manifestReadParallelism) throws Exception { - + if (section.size() == 1 && deletedIdentifiers.isEmpty()) { + return section; + } long totalStart = System.currentTimeMillis(); long readTime = 0; long sortTime = 0; @@ -882,7 +891,7 @@ private static FullCompactionReadResult readForSortRewrite( ManifestFile manifestFile, Set deletedIdentifiers) { List entries = new ArrayList<>(); - if (deletedIdentifiers == null || deletedIdentifiers.isEmpty()) { + if (deletedIdentifiers.isEmpty()) { entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 524caed50dbf..9744df5d7f1c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -55,7 +55,7 @@ public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { * @return list of picked runs to compact */ public List pick(List levelRuns) { - if (levelRuns.isEmpty()) { + if (levelRuns.isEmpty() || levelRuns.size() < 5) { return new ArrayList<>(); } From 0f25c9e80f0e1e9662064b95e5115711628be2f7 Mon Sep 17 00:00:00 2001 From: umi Date: Sun, 17 May 2026 14:03:28 +0800 Subject: [PATCH 17/51] rmTrigger --- .../paimon/operation/ManifestFileSorter.java | 64 ++++++++----------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index c4ef6aa802cf..0d85a4dd4ec1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -196,57 +196,45 @@ static Optional> trySortRewrite( private static ClassifyResult classifyManifests( List input, long suggestedMetaSize, - long manifestFullCompactionSize, ManifestFile manifestFile, RowType partitionType, @Nullable Integer manifestReadParallelism) { Filter mustChange = file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - long totalDeltaFileSize = 0; - for (ManifestFileMeta file : input) { - if (mustChange.test(file)) { - totalDeltaFileSize += file.fileSize(); - } - } List defaultCompactionManifests = new ArrayList<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = new HashSet<>(); - - if (totalDeltaFileSize >= manifestFullCompactionSize) { - // Full compact triggered: read delete entries and classify by predicate. - deleteEntries = - FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + Set deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); - PartitionPredicate predicate; - if (deleteEntries.isEmpty()) { - predicate = PartitionPredicate.ALWAYS_FALSE; + PartitionPredicate predicate; + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; + } else { + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = + ManifestFileMerger.computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); } else { - if (partitionType.getFieldCount() > 0) { - Set deletePartitions = - ManifestFileMerger.computeDeletePartitions(deleteEntries); - predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); - } else { - predicate = PartitionPredicate.ALWAYS_TRUE; - } + predicate = PartitionPredicate.ALWAYS_TRUE; } + } - Iterator iterator = lsmFiles.iterator(); - while (iterator.hasNext()) { - ManifestFileMeta file = iterator.next(); - if (mustChange.test(file)) { - iterator.remove(); - defaultCompactionManifests.add(file); - } else if (predicate != null - && predicate.test( - file.numAddedFiles() + file.numDeletedFiles(), - file.partitionStats().minValues(), - file.partitionStats().maxValues(), - file.partitionStats().nullCounts())) { - iterator.remove(); - defaultCompactionManifests.add(file); - } + Iterator iterator = lsmFiles.iterator(); + while (iterator.hasNext()) { + ManifestFileMeta file = iterator.next(); + if (mustChange.test(file)) { + iterator.remove(); + defaultCompactionManifests.add(file); + } else if (predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts())) { + iterator.remove(); + defaultCompactionManifests.add(file); } } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); From 46f064e4b4d896e7dbce0a0ffc182fa3253db1e9 Mon Sep 17 00:00:00 2001 From: umi Date: Sun, 17 May 2026 23:36:21 +0800 Subject: [PATCH 18/51] jili --- .../paimon/operation/ManifestFileMerger.java | 8 +- .../paimon/operation/ManifestFileSorter.java | 169 ++++++++++++------ 2 files changed, 125 insertions(+), 52 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index b6bfedbfd6bf..46371c698773 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -207,7 +207,13 @@ public static Optional> tryFullCompaction( totalManifestSize, deltaDeleteFileNum, totalDeltaFileSize); - System.out.println("Start Manifest File Full Compaction: totalManifestSize: " + totalManifestSize + ", deltaDeleteFileNum " + deltaDeleteFileNum + ", totalDeltaFileSize " + totalDeltaFileSize); + System.out.println( + "Start Manifest File Full Compaction: totalManifestSize: " + + totalManifestSize + + ", deltaDeleteFileNum " + + deltaDeleteFileNum + + ", totalDeltaFileSize " + + totalDeltaFileSize); // 2.1. read all delete entries diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 0d85a4dd4ec1..dfa3b37bb95d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -30,19 +30,18 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; -import org.apache.paimon.utils.Filter; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; - import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.PriorityQueue; import java.util.Set; @@ -87,14 +86,40 @@ static Optional> trySortRewrite( classifyManifests( input, suggestedMetaSize, - manifestFullCompactionSize, manifestFile, partitionType, manifestReadParallelism); - List defaultCompactionManifests = classified.defaultCompactionManifests; + Map defaultCompactionMap = + classified.defaultCompactionManifests; + List defaultCompactionManifests = + new ArrayList<>(defaultCompactionMap.keySet()); List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; + // Print classify reasons + int smallCount = 0; + int deleteRangeCount = 0; + int bothCount = 0; + for (boolean[] reasons : defaultCompactionMap.values()) { + if (reasons[0] && reasons[1]) { + bothCount++; + } else if (reasons[0]) { + smallCount++; + } else if (reasons[1]) { + deleteRangeCount++; + } + } + System.out.println( + "[classifyManifests] defaultCompaction=" + + defaultCompactionMap.size() + + " (small=" + + smallCount + + ", inDeleteRange=" + + deleteRangeCount + + ", both=" + + bothCount + + ")"); + // Step 3: Build LSM Tree and assign levels (only for lsmFiles). List levelRuns = lsmFiles.isEmpty() @@ -162,12 +187,12 @@ static Optional> trySortRewrite( List rewritten = rewriteSections( sections, - defaultCompactionSet, + defaultCompactionMap, manifestFile, sortFieldIndex, sortFieldType, deleteEntries, - manifestFullCompactionSize, + suggestedMetaSize, maxRewriteSize, openFileCost, sortNewFiles, @@ -199,11 +224,7 @@ private static ClassifyResult classifyManifests( ManifestFile manifestFile, RowType partitionType, @Nullable Integer manifestReadParallelism) { - Filter mustChange = - file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - - - List defaultCompactionManifests = new ArrayList<>(); + Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); @@ -224,17 +245,17 @@ private static ClassifyResult classifyManifests( Iterator iterator = lsmFiles.iterator(); while (iterator.hasNext()) { ManifestFileMeta file = iterator.next(); - if (mustChange.test(file)) { + boolean small = file.fileSize() < suggestedMetaSize; + boolean inDeleteRange = + predicate != null + && predicate.test( + file.numAddedFiles() + file.numDeletedFiles(), + file.partitionStats().minValues(), + file.partitionStats().maxValues(), + file.partitionStats().nullCounts()); + if (small || inDeleteRange) { iterator.remove(); - defaultCompactionManifests.add(file); - } else if (predicate != null - && predicate.test( - file.numAddedFiles() + file.numDeletedFiles(), - file.partitionStats().minValues(), - file.partitionStats().maxValues(), - file.partitionStats().nullCounts())) { - iterator.remove(); - defaultCompactionManifests.add(file); + defaultCompactionManifests.put(file, new boolean[] {small, inDeleteRange}); } } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); @@ -252,12 +273,12 @@ private static ClassifyResult classifyManifests( */ private static List rewriteSections( List
    sections, - Set defaultCompactionSet, + Map defaultCompactionMap, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, Set deleteEntries, - long manifestFullCompactionSize, + long suggestedMetaSize, long maxRewriteSize, long openFileCost, List sortNewFiles, @@ -267,6 +288,7 @@ private static List rewriteSections( long processedSize = 0; boolean reachedLimit = false; + long totalRewriteSubSegmentsMs = 0; for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); @@ -276,17 +298,19 @@ private static List rewriteSections( result.addAll(section.files); } else { processedSize = processedSize + section.totalSizeWithCost; + long t0 = System.currentTimeMillis(); rewriteSubSegments( section.files, - defaultCompactionSet, + defaultCompactionMap, manifestFile, sortFieldIndex, sortFieldType, deleteEntries, - manifestFullCompactionSize, + suggestedMetaSize, sortNewFiles, result, manifestReadParallelism); + totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } continue; } @@ -327,7 +351,7 @@ private static List rewriteSections( remainingFiles.add(file); remainingSize += file.fileSize(); remainingSizeWithCost += fileCost; - if (defaultCompactionSet.contains(file)) { + if (defaultCompactionMap.containsKey(file)) { remainingHasDefault = true; } } @@ -362,21 +386,27 @@ private static List rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { + long t0 = System.currentTimeMillis(); rewriteSubSegments( section.files, - defaultCompactionSet, + defaultCompactionMap, manifestFile, sortFieldIndex, sortFieldType, deleteEntries, - manifestFullCompactionSize, + suggestedMetaSize, sortNewFiles, result, manifestReadParallelism); + totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } else { result.addAll(section.files); } } + System.out.println( + "[rewriteSections] rewriteSubSegments total took " + + totalRewriteSubSegmentsMs + + " ms"); return result; } @@ -386,32 +416,29 @@ private static List rewriteSections( */ private static void rewriteSubSegments( List section, - Set defaultCompactionSet, + Map defaultCompactionMap, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, @Nullable Set deleteEntries, - long manifestFullCompactionSize, + long manifestTargetSize, List sortNewFiles, List result, @Nullable Integer manifestReadParallelism) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; + long totalSmallCount = 0; + int rewriteCount = 0; for (ManifestFileMeta m : section) { - boolean shouldAccumulate = - defaultCompactionSet.contains(m) - && subSegmentSize + m.fileSize() < manifestFullCompactionSize; - - if (shouldAccumulate) { - // Continue accumulating - subSegment.add(m); - subSegmentSize += m.fileSize(); - } else { - // Need to break the segment - if (!subSegment.isEmpty()) { - // Process accumulated subSegment first - subSegment.add(m); + subSegmentSize += m.fileSize(); + subSegment.add(m); + + if (subSegmentSize >= manifestTargetSize) { + if (subSegment.size() == 1 + && (!defaultCompactionMap.containsKey(m) + || !defaultCompactionMap.get(m)[1])) result.add(m); + else { List merged = sortAndRewriteSection( subSegment, @@ -420,14 +447,28 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); + long smallCount = 0; + for (ManifestFileMeta f : merged) { + if (f.fileSize() < manifestTargetSize) { + smallCount++; + } + } + rewriteCount++; + totalSmallCount += smallCount; + System.out.println( + "[rewriteSubSegments] merged " + + subSegment.size() + + " -> " + + merged.size() + + " files, small files(<" + + manifestTargetSize + + "): " + + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); - subSegment.clear(); - subSegmentSize = 0; - } else { - // Directly add to result - result.add(m); } + subSegment.clear(); + subSegmentSize = 0; } } // Flush remaining sub-segment @@ -440,9 +481,33 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); + long smallCount = 0; + for (ManifestFileMeta f : merged) { + if (f.fileSize() < manifestTargetSize) { + smallCount++; + } + } + rewriteCount++; + totalSmallCount += smallCount; + System.out.println( + "[rewriteSubSegments-flush] merged " + + subSegment.size() + + " -> " + + merged.size() + + " files, small files(<" + + manifestTargetSize + + "): " + + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); } + System.out.println( + "[rewriteSubSegments] sortAndRewriteSection called " + + rewriteCount + + " times, total small files: " + + totalSmallCount + + ", result size: " + + result.size()); } /** @@ -923,12 +988,14 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { - final List defaultCompactionManifests; + /** key: ManifestFileMeta, value: boolean[]{isSmall, isInDeleteRange}. */ + final Map defaultCompactionManifests; + final List lsmFiles; @Nullable final Set deleteEntries; ClassifyResult( - List defaultCompactionManifests, + Map defaultCompactionManifests, List lsmFiles, @Nullable Set deleteEntries) { this.defaultCompactionManifests = defaultCompactionManifests; From 6de59e0aca585cf2edeccdccb4629fdfc9209201 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 11:38:13 +0800 Subject: [PATCH 19/51] fix --- .../paimon/operation/ManifestFileSorter.java | 522 ++++++++---------- 1 file changed, 235 insertions(+), 287 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index dfa3b37bb95d..e4d87c4a91b8 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -69,7 +69,6 @@ static Optional> trySortRewrite( throws Exception { // Extract configuration from options long suggestedMetaSize = options.manifestTargetSize().getBytes(); - long manifestFullCompactionSize = options.manifestFullCompactionThresholdSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); String sortPartitionField = options.manifestSortPartitionField(); // Step 1: Resolve sort field. @@ -260,7 +259,213 @@ private static ClassifyResult classifyManifests( } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } + /** + * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, + * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 + * largest to level 1~4, rest to level 0). + */ + static List buildLevelSortedRuns( + List input, int sortFieldIndex, DataType sortFieldType) { + // Step 1: Sort by min value (if equal, then by max value) + input.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + // Step 2: Interval graph coloring algorithm - assign files to runs + // Use priority queue to track runs by their max values + PriorityQueue> runs = + new PriorityQueue<>( + (r1, r2) -> { + ManifestFileMeta last1 = r1.get(r1.size() - 1); + ManifestFileMeta last2 = r2.get(r2.size() - 1); + return compareField( + last1.partitionStats().maxValues(), + last2.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + for (ManifestFileMeta file : input) { + boolean addedToExisting = false; + + // Try to find a run where current file's min >= run's max + if (!runs.isEmpty()) { + List earliestRun = runs.peek(); + ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); + + if (compareField( + file.partitionStats().minValues(), + last.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType) + >= 0) { + // Current file can be added to this run + runs.poll(); + earliestRun.add(file); + runs.offer(earliestRun); + addedToExisting = true; + } + } + + if (!addedToExisting) { + // Create a new run + List newRun = new ArrayList<>(); + newRun.add(file); + runs.offer(newRun); + } + } + + // Step 3: Convert to ManifestSortedRun list + List result = new ArrayList<>(); + while (!runs.isEmpty()) { + result.add(ManifestSortedRun.fromSorted(runs.poll())); + } + + // Step 4: Sort by totalSize and assign levels + result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + int n = result.size(); + int maxLevel = 4; + for (int i = 0; i < n; i++) { + if (i >= n - maxLevel) { + result.get(i).setLevel(i - (n - maxLevel) + 1); + } else { + result.get(i).setLevel(0); + } + } + System.out.println("run num: " + result.size()); + return result; + } + /** + * Split picked files into sections. Files with overlapping sort-key intervals go into the same + * section. Each section is built with pre-computed totalSize and hasDefaultCompactMeta. + */ + static List
    splitIntoSections( + List pickedFiles, + int sortFieldIndex, + DataType sortFieldType, + Set defaultCompactionSet, + long openFileCost) { + pickedFiles.sort( + (a, b) -> { + int cmp = + compareField( + a.partitionStats().minValues(), + b.partitionStats().minValues(), + sortFieldIndex, + sortFieldType); + if (cmp != 0) { + return cmp; + } + return compareField( + a.partitionStats().maxValues(), + b.partitionStats().maxValues(), + sortFieldIndex, + sortFieldType); + }); + + List
    sections = new ArrayList<>(); + List currentFiles = new ArrayList<>(); + long currentTotalSize = 0; + long currentTotalSizeWithCost = 0; + boolean currentHasDefault = false; + ManifestFileMeta first = pickedFiles.get(0); + currentFiles.add(first); + currentTotalSize += first.fileSize(); + currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); + currentHasDefault = defaultCompactionSet.contains(first); + BinaryRow sectionMaxBound = first.partitionStats().maxValues(); + + for (int i = 1; i < pickedFiles.size(); i++) { + ManifestFileMeta file = pickedFiles.get(i); + if (compareField( + file.partitionStats().minValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + >= 0) { + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); + currentFiles = new ArrayList<>(); + currentTotalSize = 0; + currentTotalSizeWithCost = 0; + currentFiles.add(file); + currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); + currentHasDefault = defaultCompactionSet.contains(file); + sectionMaxBound = file.partitionStats().maxValues(); + } else { + currentFiles.add(file); + currentTotalSize += file.fileSize(); + currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); + if (!currentHasDefault && defaultCompactionSet.contains(file)) { + currentHasDefault = true; + } + if (compareField( + file.partitionStats().maxValues(), + sectionMaxBound, + sortFieldIndex, + sortFieldType) + > 0) { + sectionMaxBound = file.partitionStats().maxValues(); + } + } + } + sections.add( + new Section( + currentFiles, + currentTotalSize, + currentTotalSizeWithCost, + currentHasDefault)); + return sections; + } + + /** + * Merge small adjacent sections to avoid producing too many small rewrite batches. If either + * the pending section or the current section total size is smaller than half of {@code + * suggestedMetaSize}, they are combined into a single section. + */ + private static List
    mergeSmallAdjacentSections( + List
    sections, long suggestedMetaSize) { + long smallThreshold = suggestedMetaSize / 2; + List
    merged = new ArrayList<>(); + Section pending = null; + + for (Section section : sections) { + if (pending == null) { + pending = section; + } else { + if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { + pending = Section.merge(pending, section); + } else { + merged.add(pending); + pending = section; + } + } + } + if (pending != null) { + merged.add(pending); + } + return merged; + } /** * Iterate over sections, decide whether to rewrite each section fully or partially based on the * maxRewriteSize threshold and whether the section contains defaultCompaction files. @@ -297,19 +502,17 @@ private static List rewriteSections( if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { result.addAll(section.files); } else { - processedSize = processedSize + section.totalSizeWithCost; long t0 = System.currentTimeMillis(); - rewriteSubSegments( - section.files, - defaultCompactionMap, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - suggestedMetaSize, - sortNewFiles, - result, - manifestReadParallelism); + List merged = + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + manifestReadParallelism); + sortNewFiles.addAll(merged); + result.addAll(merged); totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } continue; @@ -510,261 +713,6 @@ private static void rewriteSubSegments( + result.size()); } - /** - * Partial rewrite of a section: only rewrite files that fit within the remaining budget. Files - * beyond the budget are kept as-is. - */ - private static void partialRewriteSection( - List sectionFiles, - long remaining, - long openFileCost, - ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, - @Nullable Set deleteEntries, - List sortNewFiles, - List result, - @Nullable Integer manifestReadParallelism) - throws Exception { - List toRewrite = new ArrayList<>(); - int splitIndex = 0; - long partialSize = 0; - for (int i = 0; i < sectionFiles.size(); i++) { - long fileCost = Math.max(sectionFiles.get(i).fileSize(), openFileCost); - if (partialSize + fileCost > remaining) { - break; - } - toRewrite.add(sectionFiles.get(i)); - partialSize += fileCost; - splitIndex = i + 1; - } - if (toRewrite.size() > 1) { - List merged = - sortAndRewriteSection( - toRewrite, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } else { - result.addAll(toRewrite); - } - for (int i = splitIndex; i < sectionFiles.size(); i++) { - result.add(sectionFiles.get(i)); - } - } - - /** - * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, - * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 - * largest to level 1~4, rest to level 0). - */ - static List buildLevelSortedRuns( - List input, int sortFieldIndex, DataType sortFieldType) { - // Step 1: Sort by min value (if equal, then by max value) - input.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - // Step 2: Interval graph coloring algorithm - assign files to runs - // Use priority queue to track runs by their max values - PriorityQueue> runs = - new PriorityQueue<>( - (r1, r2) -> { - ManifestFileMeta last1 = r1.get(r1.size() - 1); - ManifestFileMeta last2 = r2.get(r2.size() - 1); - return compareField( - last1.partitionStats().maxValues(), - last2.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - for (ManifestFileMeta file : input) { - boolean addedToExisting = false; - - // Try to find a run where current file's min >= run's max - if (!runs.isEmpty()) { - List earliestRun = runs.peek(); - ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); - - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - >= 0) { - // Current file can be added to this run - runs.poll(); - earliestRun.add(file); - runs.offer(earliestRun); - addedToExisting = true; - } - } - - if (!addedToExisting) { - // Create a new run - List newRun = new ArrayList<>(); - newRun.add(file); - runs.offer(newRun); - } - } - - // Step 3: Convert to ManifestSortedRun list - List result = new ArrayList<>(); - while (!runs.isEmpty()) { - result.add(ManifestSortedRun.fromSorted(runs.poll())); - } - - // Step 4: Sort by totalSize and assign levels - result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); - int n = result.size(); - int maxLevel = 4; - for (int i = 0; i < n; i++) { - if (i >= n - maxLevel) { - result.get(i).setLevel(i - (n - maxLevel) + 1); - } else { - result.get(i).setLevel(0); - } - } - System.out.println("run num: " + result.size()); - return result; - } - - /** - * Split picked files into sections. Files with overlapping sort-key intervals go into the same - * section. Each section is built with pre-computed totalSize and hasDefaultCompactMeta. - */ - static List
    splitIntoSections( - List pickedFiles, - int sortFieldIndex, - DataType sortFieldType, - Set defaultCompactionSet, - long openFileCost) { - pickedFiles.sort( - (a, b) -> { - int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); - if (cmp != 0) { - return cmp; - } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); - }); - - List
    sections = new ArrayList<>(); - List currentFiles = new ArrayList<>(); - long currentTotalSize = 0; - long currentTotalSizeWithCost = 0; - boolean currentHasDefault = false; - ManifestFileMeta first = pickedFiles.get(0); - currentFiles.add(first); - currentTotalSize += first.fileSize(); - currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(first); - BinaryRow sectionMaxBound = first.partitionStats().maxValues(); - - for (int i = 1; i < pickedFiles.size(); i++) { - ManifestFileMeta file = pickedFiles.get(i); - if (compareField( - file.partitionStats().minValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - >= 0) { - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); - currentFiles = new ArrayList<>(); - currentTotalSize = 0; - currentTotalSizeWithCost = 0; - currentFiles.add(file); - currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(file); - sectionMaxBound = file.partitionStats().maxValues(); - } else { - currentFiles.add(file); - currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - if (!currentHasDefault && defaultCompactionSet.contains(file)) { - currentHasDefault = true; - } - if (compareField( - file.partitionStats().maxValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - > 0) { - sectionMaxBound = file.partitionStats().maxValues(); - } - } - } - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); - return sections; - } - - /** - * Merge small adjacent sections to avoid producing too many small rewrite batches. If either - * the pending section or the current section total size is smaller than half of {@code - * suggestedMetaSize}, they are combined into a single section. - */ - private static List
    mergeSmallAdjacentSections( - List
    sections, long suggestedMetaSize) { - long smallThreshold = suggestedMetaSize / 2; - List
    merged = new ArrayList<>(); - Section pending = null; - - for (Section section : sections) { - if (pending == null) { - pending = section; - } else { - if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { - pending = Section.merge(pending, section); - } else { - merged.add(pending); - pending = section; - } - } - } - if (pending != null) { - merged.add(pending); - } - return merged; - } - /** * Read all entries from a section's manifest files, sort them in memory by the specified * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving @@ -849,6 +797,25 @@ private static List sortAndRewriteSection( return result; } + /** + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field + * value AND the same data file are emitted contiguously. + */ + static int compareSortKey( + ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { + int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + if (c != 0) { + return c; + } + // ADD before DELETE, so that mergeEntries can correctly cancel pairs + int kindCmp = a.kind().compareTo(b.kind()); + if (kindCmp != 0) { + return kindCmp; + } + return a.file().fileName().compareTo(b.file().fileName()); + } + /** * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. */ @@ -893,25 +860,6 @@ static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { } } - /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. - * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field - * value AND the same data file are emitted contiguously. - */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); - if (c != 0) { - return c; - } - // ADD before DELETE, so that mergeEntries can correctly cancel pairs - int kindCmp = a.kind().compareTo(b.kind()); - if (kindCmp != 0) { - return kindCmp; - } - return a.file().fileName().compareTo(b.file().fileName()); - } - /** * Resolve the partition field to sort manifests by. * From 6f4e3029ae021c54354d8e832963bfd30f9353ed Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 15:39:43 +0800 Subject: [PATCH 20/51] rmPrint --- .../java/org/apache/paimon/CoreOptions.java | 2 +- .../paimon/operation/ManifestFileSorter.java | 137 ++---------------- 2 files changed, 11 insertions(+), 128 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 0e1792fb1158..b668e8d7ef1d 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -500,7 +500,7 @@ public InlineElement getDescription() { public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = key("manifest-sort.open-file-cost") .memoryType() - .defaultValue(MemorySize.ofKibiBytes(40)) + .defaultValue(MemorySize.ofMebiBytes(4)) .withDescription( "Open file cost of a manifest file during sort rewrite. " + "It is added to each manifest file's size when computing " diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index e4d87c4a91b8..b462796477de 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -90,34 +90,9 @@ static Optional> trySortRewrite( manifestReadParallelism); Map defaultCompactionMap = classified.defaultCompactionManifests; - List defaultCompactionManifests = - new ArrayList<>(defaultCompactionMap.keySet()); List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; - // Print classify reasons - int smallCount = 0; - int deleteRangeCount = 0; - int bothCount = 0; - for (boolean[] reasons : defaultCompactionMap.values()) { - if (reasons[0] && reasons[1]) { - bothCount++; - } else if (reasons[0]) { - smallCount++; - } else if (reasons[1]) { - deleteRangeCount++; - } - } - System.out.println( - "[classifyManifests] defaultCompaction=" - + defaultCompactionMap.size() - + " (small=" - + smallCount - + ", inDeleteRange=" - + deleteRangeCount - + ", both=" - + bothCount - + ")"); // Step 3: Build LSM Tree and assign levels (only for lsmFiles). List levelRuns = @@ -132,7 +107,7 @@ static Optional> trySortRewrite( new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); List pickedRuns = pickStrategy.pick(levelRuns); - if (pickedRuns.isEmpty() && defaultCompactionManifests.isEmpty()) { + if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); return Optional.of(input); @@ -144,7 +119,7 @@ static Optional> trySortRewrite( input.size(), levelRuns.size(), pickedRuns.size(), - defaultCompactionManifests.size()); + defaultCompactionMap.size()); Set pickedSet = new HashSet<>(pickedRuns); List reusedFiles = new ArrayList<>(); @@ -160,9 +135,8 @@ static Optional> trySortRewrite( for (ManifestSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } - pickedFiles.addAll(defaultCompactionManifests); + pickedFiles.addAll(defaultCompactionMap.keySet()); - Set defaultCompactionSet = new HashSet<>(defaultCompactionManifests); long maxRewriteSize = options.manifestSortMaxRewriteSize(); long openFileCost = options.manifestSortOpenFileCost(); @@ -171,14 +145,9 @@ static Optional> trySortRewrite( pickedFiles, sortFieldIndex, sortFieldType, - defaultCompactionSet, + defaultCompactionMap, openFileCost); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); - System.out.println( - "After splitIntoSections: sections=" - + sections.size() - + ", pickedFiles=" - + pickedFiles.size()); LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); List sortNewFiles = new ArrayList<>(); @@ -358,7 +327,7 @@ static List
    splitIntoSections( List pickedFiles, int sortFieldIndex, DataType sortFieldType, - Set defaultCompactionSet, + Map defaultCompactionMap, long openFileCost) { pickedFiles.sort( (a, b) -> { @@ -387,7 +356,7 @@ static List
    splitIntoSections( currentFiles.add(first); currentTotalSize += first.fileSize(); currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(first); + currentHasDefault = defaultCompactionMap.containsKey(first); BinaryRow sectionMaxBound = first.partitionStats().maxValues(); for (int i = 1; i < pickedFiles.size(); i++) { @@ -410,13 +379,13 @@ static List
    splitIntoSections( currentFiles.add(file); currentTotalSize += file.fileSize(); currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - currentHasDefault = defaultCompactionSet.contains(file); + currentHasDefault = defaultCompactionMap.containsKey(file); sectionMaxBound = file.partitionStats().maxValues(); } else { currentFiles.add(file); currentTotalSize += file.fileSize(); currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); - if (!currentHasDefault && defaultCompactionSet.contains(file)) { + if (!currentHasDefault && defaultCompactionMap.containsKey(file)) { currentHasDefault = true; } if (compareField( @@ -445,7 +414,6 @@ static List
    splitIntoSections( */ private static List
    mergeSmallAdjacentSections( List
    sections, long suggestedMetaSize) { - long smallThreshold = suggestedMetaSize / 2; List
    merged = new ArrayList<>(); Section pending = null; @@ -453,7 +421,7 @@ private static List
    mergeSmallAdjacentSections( if (pending == null) { pending = section; } else { - if (pending.totalSize < smallThreshold || section.totalSize < smallThreshold) { + if (pending.totalSize < suggestedMetaSize || section.totalSize < suggestedMetaSize) { pending = Section.merge(pending, section); } else { merged.add(pending); @@ -491,9 +459,7 @@ private static List rewriteSections( throws Exception { List result = new ArrayList<>(); long processedSize = 0; - boolean reachedLimit = false; - long totalRewriteSubSegmentsMs = 0; for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); @@ -502,7 +468,6 @@ private static List rewriteSections( if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { result.addAll(section.files); } else { - long t0 = System.currentTimeMillis(); List merged = sortAndRewriteSection( section.files, @@ -513,7 +478,6 @@ private static List rewriteSections( manifestReadParallelism); sortNewFiles.addAll(merged); result.addAll(merged); - totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } continue; } @@ -589,7 +553,6 @@ private static List rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { - long t0 = System.currentTimeMillis(); rewriteSubSegments( section.files, defaultCompactionMap, @@ -601,21 +564,15 @@ private static List rewriteSections( sortNewFiles, result, manifestReadParallelism); - totalRewriteSubSegmentsMs += System.currentTimeMillis() - t0; } else { result.addAll(section.files); } } - System.out.println( - "[rewriteSections] rewriteSubSegments total took " - + totalRewriteSubSegmentsMs - + " ms"); return result; } /** - * Rewrite sub-segments within a section that exceeds the rewrite threshold. Only sub-segments - * containing defaultCompaction files are rewritten; other files are kept as-is. + * Rewrite sub-segments within a section that exceeds the rewrite threshold. */ private static void rewriteSubSegments( List section, @@ -631,8 +588,6 @@ private static void rewriteSubSegments( throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; - long totalSmallCount = 0; - int rewriteCount = 0; for (ManifestFileMeta m : section) { subSegmentSize += m.fileSize(); subSegment.add(m); @@ -650,23 +605,6 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); - long smallCount = 0; - for (ManifestFileMeta f : merged) { - if (f.fileSize() < manifestTargetSize) { - smallCount++; - } - } - rewriteCount++; - totalSmallCount += smallCount; - System.out.println( - "[rewriteSubSegments] merged " - + subSegment.size() - + " -> " - + merged.size() - + " files, small files(<" - + manifestTargetSize - + "): " - + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); } @@ -684,33 +622,9 @@ private static void rewriteSubSegments( sortFieldType, deleteEntries, manifestReadParallelism); - long smallCount = 0; - for (ManifestFileMeta f : merged) { - if (f.fileSize() < manifestTargetSize) { - smallCount++; - } - } - rewriteCount++; - totalSmallCount += smallCount; - System.out.println( - "[rewriteSubSegments-flush] merged " - + subSegment.size() - + " -> " - + merged.size() - + " files, small files(<" - + manifestTargetSize - + "): " - + smallCount); sortNewFiles.addAll(merged); result.addAll(merged); } - System.out.println( - "[rewriteSubSegments] sortAndRewriteSection called " - + rewriteCount - + " times, total small files: " - + totalSmallCount - + ", result size: " - + result.size()); } /** @@ -731,16 +645,7 @@ private static List sortAndRewriteSection( Set deletedIdentifiers, @Nullable Integer manifestReadParallelism) throws Exception { - if (section.size() == 1 && deletedIdentifiers.isEmpty()) { - return section; - } - long totalStart = System.currentTimeMillis(); - long readTime = 0; - long sortTime = 0; - long writeTime = 0; - // Parallel read: each meta is read independently - long readStart = System.currentTimeMillis(); Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); @@ -749,15 +654,11 @@ private static List sortAndRewriteSection( sequentialBatchedExecute(reader, section, manifestReadParallelism)) { entriesToRewrite.addAll(readResult.entries); } - readTime = System.currentTimeMillis() - readStart; List result = new ArrayList<>(); if (!entriesToRewrite.isEmpty()) { - long sortStart = System.currentTimeMillis(); entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); - sortTime = System.currentTimeMillis() - sortStart; - long writeStart = System.currentTimeMillis(); RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; @@ -775,25 +676,7 @@ private static List sortAndRewriteSection( writer.close(); } result.addAll(writer.result()); - writeTime = System.currentTimeMillis() - writeStart; } - - long totalTime = System.currentTimeMillis() - totalStart; - if (totalTime > 0) { - System.out.println( - String.format( - "[sortAndRewriteSection] Total: %d ms, Read: %d ms (%.1f%%), Sort: %d ms (%.1f%%), Write: %d ms (%.1f%%), Entries: %d, Files: %d", - totalTime, - readTime, - 100.0 * readTime / totalTime, - sortTime, - 100.0 * sortTime / totalTime, - writeTime, - 100.0 * writeTime / totalTime, - entriesToRewrite.size(), - result.size())); - } - return result; } From dd8dcbf39f401e97fef707730bf26e9b03a59c80 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 16:58:26 +0800 Subject: [PATCH 21/51] simplied --- .../paimon/operation/ManifestFileSorter.java | 144 +++++++++--------- 1 file changed, 68 insertions(+), 76 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index b462796477de..b18f7298e0f1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -93,7 +93,6 @@ static Optional> trySortRewrite( List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; - // Step 3: Build LSM Tree and assign levels (only for lsmFiles). List levelRuns = lsmFiles.isEmpty() @@ -315,7 +314,6 @@ static List buildLevelSortedRuns( result.get(i).setLevel(0); } } - System.out.println("run num: " + result.size()); return result; } @@ -421,7 +419,8 @@ private static List
    mergeSmallAdjacentSections( if (pending == null) { pending = section; } else { - if (pending.totalSize < suggestedMetaSize || section.totalSize < suggestedMetaSize) { + if (pending.totalSize < suggestedMetaSize + || section.totalSize < suggestedMetaSize) { pending = Section.merge(pending, section); } else { merged.add(pending); @@ -465,20 +464,16 @@ private static List rewriteSections( Section section = sections.get(i); // Single-file section without defaultCompaction: already sorted, skip rewrite. if (section.files.size() == 1) { - if (!section.hasDefaultCompactMeta || deleteEntries.isEmpty()) { - result.addAll(section.files); - } else { - List merged = - sortAndRewriteSection( - section.files, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); continue; } long sectionSize = section.totalSizeWithCost; @@ -486,16 +481,16 @@ private static List rewriteSections( if (!exceedsThreshold) { processedSize += sectionSize; - List merged = - sortAndRewriteSection( - section.files, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); + sortAndRewriteSection( + section.files, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); } else if (!reachedLimit) { // First time exceeding threshold without defaultCompaction: // partial rewrite within remaining budget. @@ -524,21 +519,16 @@ private static List rewriteSections( } } - if (toRewrite.size() > 1) { - List merged = - sortAndRewriteSection( - toRewrite, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } else if (toRewrite.size() == 1) { - sortNewFiles.addAll(toRewrite); - result.addAll(toRewrite); - } + sortAndRewriteSection( + toRewrite, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); // Create new section for remaining files and append to sections list if (!remainingFiles.isEmpty()) { @@ -561,8 +551,8 @@ private static List rewriteSections( sortFieldType, deleteEntries, suggestedMetaSize, - sortNewFiles, result, + sortNewFiles, manifestReadParallelism); } else { result.addAll(section.files); @@ -571,9 +561,7 @@ private static List rewriteSections( return result; } - /** - * Rewrite sub-segments within a section that exceeds the rewrite threshold. - */ + /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ private static void rewriteSubSegments( List section, Map defaultCompactionMap, @@ -582,8 +570,8 @@ private static void rewriteSubSegments( DataType sortFieldType, @Nullable Set deleteEntries, long manifestTargetSize, - List sortNewFiles, List result, + List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { List subSegment = new ArrayList<>(); @@ -593,37 +581,32 @@ private static void rewriteSubSegments( subSegment.add(m); if (subSegmentSize >= manifestTargetSize) { - if (subSegment.size() == 1 - && (!defaultCompactionMap.containsKey(m) - || !defaultCompactionMap.get(m)[1])) result.add(m); - else { - List merged = - sortAndRewriteSection( - subSegment, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); - } + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); subSegment.clear(); subSegmentSize = 0; } } // Flush remaining sub-segment if (!subSegment.isEmpty()) { - List merged = - sortAndRewriteSection( - subSegment, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - manifestReadParallelism); - sortNewFiles.addAll(merged); - result.addAll(merged); + sortAndRewriteSection( + subSegment, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); } } @@ -637,14 +620,23 @@ private static void rewriteSubSegments( *

    Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as * {@link ManifestFileMerger#tryFullCompaction}. */ - private static List sortAndRewriteSection( + private static void sortAndRewriteSection( List section, ManifestFile manifestFile, int sortFieldIndex, DataType sortFieldType, Set deletedIdentifiers, + Map defaultCompactionMap, + List result, + List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { + if (section.size() == 1 + && (!defaultCompactionMap.containsKey(section.get(0)) + || !defaultCompactionMap.get(section.get(0))[1])) { + result.add(section.get(0)); + return; + } // Parallel read: each meta is read independently Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); @@ -655,7 +647,6 @@ private static List sortAndRewriteSection( entriesToRewrite.addAll(readResult.entries); } - List result = new ArrayList<>(); if (!entriesToRewrite.isEmpty()) { entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); @@ -675,9 +666,10 @@ private static List sortAndRewriteSection( } writer.close(); } - result.addAll(writer.result()); + List sorted = writer.result(); + result.addAll(sorted); + sortNewFiles.addAll(sorted); } - return result; } /** From d60a8e2b2a44012c2e2cf70ceade6b1fb47ac6c3 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 17:36:35 +0800 Subject: [PATCH 22/51] fix --- .../org/apache/paimon/operation/ManifestFileMerger.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 46371c698773..e197868849c1 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -207,13 +207,6 @@ public static Optional> tryFullCompaction( totalManifestSize, deltaDeleteFileNum, totalDeltaFileSize); - System.out.println( - "Start Manifest File Full Compaction: totalManifestSize: " - + totalManifestSize - + ", deltaDeleteFileNum " - + deltaDeleteFileNum - + ", totalDeltaFileSize " - + totalDeltaFileSize); // 2.1. read all delete entries From a81670b75f60222ef5672e74736f24891e96f347 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 19:05:19 +0800 Subject: [PATCH 23/51] rmOpenFileCost --- docs/generated/core_configuration.html | 7 +-- .../java/org/apache/paimon/CoreOptions.java | 14 ----- .../paimon/operation/ManifestFileSorter.java | 58 ++++--------------- 3 files changed, 12 insertions(+), 67 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 17d2efeed612..f0a78f590e8d 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -933,12 +933,7 @@ MemorySize Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. - -

    manifest-sort.open-file-cost
    - 4 mb - MemorySize - Open file cost of a manifest file during sort rewrite. It is added to each manifest file's size when computing section size, to avoid rewriting too many small manifest files in a single section. - +
    manifest-sort.partition-field
    (none) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index b668e8d7ef1d..afa8e7080412 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -497,16 +497,6 @@ public InlineElement getDescription() { + " skipped. Set to a larger value to allow more aggressive" + " sort rewriting."); - public static final ConfigOption MANIFEST_SORT_OPEN_FILE_COST = - key("manifest-sort.open-file-cost") - .memoryType() - .defaultValue(MemorySize.ofMebiBytes(4)) - .withDescription( - "Open file cost of a manifest file during sort rewrite. " - + "It is added to each manifest file's size when computing " - + "section size, to avoid rewriting too many small manifest " - + "files in a single section."); - public static final ConfigOption UPSERT_KEY = key("upsert-key") .stringType() @@ -2652,10 +2642,6 @@ public long manifestSortMaxRewriteSize() { return options.get(MANIFEST_SORT_MAX_REWRITE_SIZE).getBytes(); } - public long manifestSortOpenFileCost() { - return options.get(MANIFEST_SORT_OPEN_FILE_COST).getBytes(); - } - public String partitionDefaultName() { return options.get(PARTITION_DEFAULT_NAME); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index b18f7298e0f1..445921dea943 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -30,10 +30,12 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.annotation.Nullable; + import java.util.ArrayList; import java.util.Comparator; import java.util.HashSet; @@ -137,15 +139,9 @@ static Optional> trySortRewrite( pickedFiles.addAll(defaultCompactionMap.keySet()); long maxRewriteSize = options.manifestSortMaxRewriteSize(); - long openFileCost = options.manifestSortOpenFileCost(); List
    sections = - splitIntoSections( - pickedFiles, - sortFieldIndex, - sortFieldType, - defaultCompactionMap, - openFileCost); + splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); @@ -161,7 +157,6 @@ static Optional> trySortRewrite( deleteEntries, suggestedMetaSize, maxRewriteSize, - openFileCost, sortNewFiles, manifestReadParallelism); result.addAll(rewritten); @@ -325,8 +320,7 @@ static List
    splitIntoSections( List pickedFiles, int sortFieldIndex, DataType sortFieldType, - Map defaultCompactionMap, - long openFileCost) { + Map defaultCompactionMap) { pickedFiles.sort( (a, b) -> { int cmp = @@ -348,12 +342,10 @@ static List
    splitIntoSections( List
    sections = new ArrayList<>(); List currentFiles = new ArrayList<>(); long currentTotalSize = 0; - long currentTotalSizeWithCost = 0; boolean currentHasDefault = false; ManifestFileMeta first = pickedFiles.get(0); currentFiles.add(first); currentTotalSize += first.fileSize(); - currentTotalSizeWithCost += Math.max(first.fileSize(), openFileCost); currentHasDefault = defaultCompactionMap.containsKey(first); BinaryRow sectionMaxBound = first.partitionStats().maxValues(); @@ -365,24 +357,16 @@ static List
    splitIntoSections( sortFieldIndex, sortFieldType) >= 0) { - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); currentTotalSize = 0; - currentTotalSizeWithCost = 0; currentFiles.add(file); currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); currentHasDefault = defaultCompactionMap.containsKey(file); sectionMaxBound = file.partitionStats().maxValues(); } else { currentFiles.add(file); currentTotalSize += file.fileSize(); - currentTotalSizeWithCost += Math.max(file.fileSize(), openFileCost); if (!currentHasDefault && defaultCompactionMap.containsKey(file)) { currentHasDefault = true; } @@ -396,12 +380,7 @@ static List
    splitIntoSections( } } } - sections.add( - new Section( - currentFiles, - currentTotalSize, - currentTotalSizeWithCost, - currentHasDefault)); + sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); return sections; } @@ -452,7 +431,6 @@ private static List rewriteSections( Set deleteEntries, long suggestedMetaSize, long maxRewriteSize, - long openFileCost, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { @@ -476,7 +454,7 @@ private static List rewriteSections( manifestReadParallelism); continue; } - long sectionSize = section.totalSizeWithCost; + long sectionSize = section.totalSize; boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; if (!exceedsThreshold) { @@ -501,18 +479,15 @@ private static List rewriteSections( List remainingFiles = new ArrayList<>(); long rewriteSize = 0; long remainingSize = 0; - long remainingSizeWithCost = 0; boolean remainingHasDefault = false; for (ManifestFileMeta file : section.files) { - long fileCost = Math.max(file.fileSize(), openFileCost); - if (rewriteSize + fileCost <= remaining) { + if (rewriteSize + file.fileSize() <= remaining) { toRewrite.add(file); - rewriteSize += fileCost; + rewriteSize += file.fileSize(); } else { remainingFiles.add(file); remainingSize += file.fileSize(); - remainingSizeWithCost += fileCost; if (defaultCompactionMap.containsKey(file)) { remainingHasDefault = true; } @@ -533,11 +508,7 @@ private static List rewriteSections( // Create new section for remaining files and append to sections list if (!remainingFiles.isEmpty()) { Section remainingSection = - new Section( - remainingFiles, - remainingSize, - remainingSizeWithCost, - remainingHasDefault); + new Section(remainingFiles, remainingSize, remainingHasDefault); // Append remaining section to the end of sections list sections.add(remainingSection); } @@ -783,17 +754,11 @@ private static FullCompactionReadResult readForSortRewrite( static class Section { final List files; final long totalSize; - final long totalSizeWithCost; final boolean hasDefaultCompactMeta; - Section( - List files, - long totalSize, - long totalSizeWithCost, - boolean hasDefaultCompactMeta) { + Section(List files, long totalSize, boolean hasDefaultCompactMeta) { this.files = files; this.totalSize = totalSize; - this.totalSizeWithCost = totalSizeWithCost; this.hasDefaultCompactMeta = hasDefaultCompactMeta; } @@ -804,7 +769,6 @@ static Section merge(Section a, Section b) { return new Section( merged, a.totalSize + b.totalSize, - a.totalSizeWithCost + b.totalSizeWithCost, a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); } } From d21e9b3168b0b771902047a53f4aea3130643297 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 20:52:46 +0800 Subject: [PATCH 24/51] spotless --- docs/generated/core_configuration.html | 2 +- .../java/org/apache/paimon/CoreOptions.java | 4 +- paimon-core/pom.xml | 15 ---- .../paimon/operation/FileStoreCommitImpl.java | 2 +- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 80 ++++++++----------- .../operation/ManifestPickStrategy.java | 5 +- 7 files changed, 43 insertions(+), 70 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index f0a78f590e8d..433a8d16d646 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -925,7 +925,7 @@
    manifest-sort.enabled
    false Boolean - Whether to invoke manifest sort rewrite right after manifest merge during commit. + Whether to invoke manifest sort rewrite during commit.
    manifest-sort.max-rewrite-size
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index afa8e7080412..e90d6fec4f18 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -475,9 +475,7 @@ public InlineElement getDescription() { key("manifest-sort.enabled") .booleanType() .defaultValue(false) - .withDescription( - "Whether to invoke manifest sort rewrite right after manifest merge" - + " during commit."); + .withDescription("Whether to invoke manifest sort rewrite during commit."); public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = key("manifest-sort.partition-field") diff --git a/paimon-core/pom.xml b/paimon-core/pom.xml index e570324ee9e3..9506bdf03959 100644 --- a/paimon-core/pom.xml +++ b/paimon-core/pom.xml @@ -36,21 +36,6 @@ under the License. - - com.aliyun.jindodata - jindo-core-macos-11_0-aarch64 - 6.9.1 - - - - - - - - org.apache.paimon - paimon-ali-jindo - 1.4-ali-SNAPSHOT - org.apache.paimon paimon-common diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java index ee7fa5aed4da..10c9b20a0467 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/FileStoreCommitImpl.java @@ -1195,7 +1195,7 @@ private boolean compactManifestOnce() { mergeBeforeManifests, manifestFile, partitionType, - CoreOptions.fromMap(compactOptions.toMap())); + new CoreOptions(compactOptions)); if (new HashSet<>(mergeBeforeManifests).equals(new HashSet<>(mergeAfterManifests))) { // no need to commit this snapshot, because no compact were happened diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index e197868849c1..36de3d2ecdef 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -85,7 +85,7 @@ public static List merge( } // Otherwise try full compaction first, then minor compaction if needed - Optional> merged = + Optional> fullCompacted = tryFullCompaction( input, newFilesForAbort, @@ -94,8 +94,7 @@ public static List merge( manifestFullCompactionSize, partitionType, manifestReadParallelism); - - return merged.orElseGet( + return fullCompacted.orElseGet( () -> tryMinorCompaction( input, diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 445921dea943..fcc06a0591f8 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -30,6 +30,7 @@ import org.apache.paimon.types.DataType; import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; +import org.apache.paimon.utils.Filter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -102,16 +103,15 @@ static Optional> trySortRewrite( : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); // Step 4: Pick runs to compact. - int sizeAmpThreshold = options.maxSizeAmplificationPercent(); - int sizeRatioThreshold = options.sortedRunSizeRatio(); ManifestPickStrategy pickStrategy = - new ManifestPickStrategy(sizeAmpThreshold, sizeRatioThreshold); + new ManifestPickStrategy( + options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); List pickedRuns = pickStrategy.pick(levelRuns); if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); - return Optional.of(input); + return Optional.empty(); } LOG.info( @@ -138,34 +138,27 @@ static Optional> trySortRewrite( } pickedFiles.addAll(defaultCompactionMap.keySet()); - long maxRewriteSize = options.manifestSortMaxRewriteSize(); - List
    sections = splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); - LOG.info("After mergeSmallAdjacentSections: sections={}.", sections.size()); - - List sortNewFiles = new ArrayList<>(); - List rewritten = - rewriteSections( - sections, - defaultCompactionMap, - manifestFile, - sortFieldIndex, - sortFieldType, - deleteEntries, - suggestedMetaSize, - maxRewriteSize, - sortNewFiles, - manifestReadParallelism); - result.addAll(rewritten); + rewriteSections( + sections, + defaultCompactionMap, + manifestFile, + sortFieldIndex, + sortFieldType, + deleteEntries, + suggestedMetaSize, + options.manifestSortMaxRewriteSize(), + result, + newFilesForAbort, + manifestReadParallelism); - newFilesForAbort.addAll(sortNewFiles); LOG.info( "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", sections.size(), - sortNewFiles.size(), + newFilesForAbort.size(), result.size()); return Optional.of(result); } @@ -419,10 +412,8 @@ private static List
    mergeSmallAdjacentSections( *

    Within threshold: read all metas, sort and rewrite the entire section. Exceeds threshold * but contains defaultCompaction files: only rewrite sub-segments around those files. Exceeds * threshold with no defaultCompaction files: skip (keep as-is). - * - * @return the list of result manifest files (both rewritten and kept-as-is) */ - private static List rewriteSections( + private static void rewriteSections( List

    sections, Map defaultCompactionMap, ManifestFile manifestFile, @@ -431,16 +422,15 @@ private static List rewriteSections( Set deleteEntries, long suggestedMetaSize, long maxRewriteSize, + List result, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { - List result = new ArrayList<>(); long processedSize = 0; boolean reachedLimit = false; for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); - // Single-file section without defaultCompaction: already sorted, skip rewrite. if (section.files.size() == 1) { sortAndRewriteSection( section.files, @@ -454,11 +444,9 @@ private static List rewriteSections( manifestReadParallelism); continue; } - long sectionSize = section.totalSize; - boolean exceedsThreshold = processedSize + sectionSize > maxRewriteSize; - if (!exceedsThreshold) { - processedSize += sectionSize; + if (processedSize + section.totalSize <= maxRewriteSize) { + processedSize += section.totalSize; sortAndRewriteSection( section.files, manifestFile, @@ -472,18 +460,18 @@ private static List rewriteSections( } else if (!reachedLimit) { // First time exceeding threshold without defaultCompaction: // partial rewrite within remaining budget. - long remaining = maxRewriteSize - processedSize; - processedSize += sectionSize; + long rewriteTotalSize = maxRewriteSize - processedSize; + processedSize += section.totalSize; // Split section into two parts: files within budget and remaining files - List toRewrite = new ArrayList<>(); + List rewriteFiles = new ArrayList<>(); List remainingFiles = new ArrayList<>(); long rewriteSize = 0; long remainingSize = 0; boolean remainingHasDefault = false; for (ManifestFileMeta file : section.files) { - if (rewriteSize + file.fileSize() <= remaining) { - toRewrite.add(file); + if (rewriteSize + file.fileSize() <= rewriteTotalSize) { + rewriteFiles.add(file); rewriteSize += file.fileSize(); } else { remainingFiles.add(file); @@ -495,7 +483,7 @@ private static List rewriteSections( } sortAndRewriteSection( - toRewrite, + rewriteFiles, manifestFile, sortFieldIndex, sortFieldType, @@ -529,7 +517,6 @@ private static List rewriteSections( result.addAll(section.files); } } - return result; } /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ @@ -625,9 +612,7 @@ private static void sortAndRewriteSection( manifestFile.createRollingWriter(); Exception exception = null; try { - for (ManifestEntry entry : entriesToRewrite) { - writer.write(entry); - } + writer.write(entriesToRewrite); } catch (Exception e) { exception = e; } finally { @@ -654,7 +639,7 @@ static int compareSortKey( if (c != 0) { return c; } - // ADD before DELETE, so that mergeEntries can correctly cancel pairs + // ADD before DELETE int kindCmp = a.kind().compareTo(b.kind()); if (kindCmp != 0) { return kindCmp; @@ -741,7 +726,12 @@ private static FullCompactionReadResult readForSortRewrite( if (deletedIdentifiers.isEmpty()) { entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); } else { - for (ManifestEntry entry : manifestFile.read(meta.fileName(), meta.fileSize())) { + for (ManifestEntry entry : + manifestFile.read( + meta.fileName(), + meta.fileSize(), + FileEntry.addFilter(), + Filter.alwaysTrue())) { if (!deletedIdentifiers.contains(entry.identifier())) { entries.add(entry); } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 9744df5d7f1c..2cc9faf2ec26 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -40,6 +40,7 @@ public class ManifestPickStrategy { private final int sizeAmpThreshold; private final int sizeRatioThreshold; + private static final int MAX_LEVEL = 4; public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { Preconditions.checkArgument(sizeAmpThreshold > 0, "sizeAmpThreshold must be positive"); @@ -55,7 +56,7 @@ public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { * @return list of picked runs to compact */ public List pick(List levelRuns) { - if (levelRuns.isEmpty() || levelRuns.size() < 5) { + if (levelRuns.isEmpty() || levelRuns.size() <= MAX_LEVEL) { return new ArrayList<>(); } @@ -131,7 +132,7 @@ private List pickForSizeRatioAndForce(List pickedSize += run.totalSize(); } else { long nextRunSize = run.totalSize(); - if (pickedSize * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize) { + if (pickedSize * (100 + sizeRatioThreshold) >= nextRunSize * 100L) { picked.add(run); pickedSize += nextRunSize; } From 17d4ead56c9e354a38e966600aacd19e347bbf97 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 22:19:52 +0800 Subject: [PATCH 25/51] fix --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index fcc06a0591f8..4c3ef1af7a81 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -83,7 +83,7 @@ static Optional> trySortRewrite( int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); - // Step 2: Classify manifests into defaultCompaction and LSM groups. + // Step 2: Classify manifests into defaultCompaction and LSM. ClassifyResult classified = classifyManifests( input, From 8c5799cd06121e422799614fe1cec13c15a17f81 Mon Sep 17 00:00:00 2001 From: umi Date: Mon, 18 May 2026 23:14:58 +0800 Subject: [PATCH 26/51] fmt --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 4c3ef1af7a81..64f93218342c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -215,6 +215,7 @@ private static ClassifyResult classifyManifests( } return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } + /** * Build level-sorted runs from a list of manifest files. Sorts files by min partition value, * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 @@ -405,6 +406,7 @@ private static List
    mergeSmallAdjacentSections( } return merged; } + /** * Iterate over sections, decide whether to rewrite each section fully or partially based on the * maxRewriteSize threshold and whether the section contains defaultCompaction files. From 112581377d4de4b96b64a275287f7038b8ed9c84 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 10:05:32 +0800 Subject: [PATCH 27/51] doc --- docs/generated/core_configuration.html | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 433a8d16d646..4e41fe4be2cd 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -927,19 +927,18 @@ Boolean Whether to invoke manifest sort rewrite during commit. - -
    manifest-sort.max-rewrite-size
    - 256 mb - MemorySize - Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. - -
    manifest-sort.partition-field
    (none) String Partition field name to sort manifest entries by. Validated by schema validation; If not configured, defaults to the first partition field. + +
    manifest-sort.max-rewrite-size
    + 256 mb + MemorySize + Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. +
    manifest.target-file-size
    8 mb From 3c67843e31b61fd994c35a262a2265e486833006 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 12:57:19 +0800 Subject: [PATCH 28/51] comparator --- .../paimon/operation/ManifestFileSorter.java | 186 +++++------------- .../paimon/manifest/ManifestFileMetaTest.java | 128 ++---------- 2 files changed, 64 insertions(+), 250 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 64f93218342c..7220aad93acd 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -19,6 +19,8 @@ package org.apache.paimon.operation; import org.apache.paimon.CoreOptions; +import org.apache.paimon.codegen.CodeGenUtils; +import org.apache.paimon.codegen.RecordComparator; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; @@ -27,8 +29,6 @@ import org.apache.paimon.manifest.ManifestFileMeta; import org.apache.paimon.operation.ManifestFileMerger.FullCompactionReadResult; import org.apache.paimon.partition.PartitionPredicate; -import org.apache.paimon.types.DataType; -import org.apache.paimon.types.DecimalType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -81,7 +81,9 @@ static Optional> trySortRewrite( "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - DataType sortFieldType = partitionType.getTypeAt(sortFieldIndex); + RecordComparator fieldComparator = + CodeGenUtils.newRecordComparator( + partitionType.getFieldTypes(), new int[] {sortFieldIndex}); // Step 2: Classify manifests into defaultCompaction and LSM. ClassifyResult classified = @@ -100,7 +102,7 @@ static Optional> trySortRewrite( List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() - : buildLevelSortedRuns(lsmFiles, sortFieldIndex, sortFieldType); + : buildLevelSortedRuns(lsmFiles, fieldComparator); // Step 4: Pick runs to compact. ManifestPickStrategy pickStrategy = @@ -139,15 +141,14 @@ static Optional> trySortRewrite( pickedFiles.addAll(defaultCompactionMap.keySet()); List
    sections = - splitIntoSections(pickedFiles, sortFieldIndex, sortFieldType, defaultCompactionMap); + splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); rewriteSections( sections, defaultCompactionMap, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, suggestedMetaSize, options.manifestSortMaxRewriteSize(), @@ -222,24 +223,18 @@ private static ClassifyResult classifyManifests( * largest to level 1~4, rest to level 0). */ static List buildLevelSortedRuns( - List input, int sortFieldIndex, DataType sortFieldType) { + List input, RecordComparator fieldComparator) { // Step 1: Sort by min value (if equal, then by max value) input.sort( (a, b) -> { int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); + fieldComparator.compare( + a.partitionStats().minValues(), b.partitionStats().minValues()); if (cmp != 0) { return cmp; } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); + return fieldComparator.compare( + a.partitionStats().maxValues(), b.partitionStats().maxValues()); }); // Step 2: Interval graph coloring algorithm - assign files to runs @@ -249,37 +244,28 @@ static List buildLevelSortedRuns( (r1, r2) -> { ManifestFileMeta last1 = r1.get(r1.size() - 1); ManifestFileMeta last2 = r2.get(r2.size() - 1); - return compareField( + return fieldComparator.compare( last1.partitionStats().maxValues(), - last2.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); + last2.partitionStats().maxValues()); }); for (ManifestFileMeta file : input) { - boolean addedToExisting = false; - - // Try to find a run where current file's min >= run's max - if (!runs.isEmpty()) { - List earliestRun = runs.peek(); - ManifestFileMeta last = earliestRun.get(earliestRun.size() - 1); - - if (compareField( - file.partitionStats().minValues(), - last.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType) - >= 0) { - // Current file can be added to this run - runs.poll(); - earliestRun.add(file); - runs.offer(earliestRun); - addedToExisting = true; - } - } - - if (!addedToExisting) { - // Create a new run + List earliestRun = runs.poll(); + if (earliestRun == null) { + // No existing runs, create a new one + List newRun = new ArrayList<>(); + newRun.add(file); + runs.offer(newRun); + } else if (fieldComparator.compare( + file.partitionStats().minValues(), + earliestRun.get(earliestRun.size() - 1).partitionStats().maxValues()) + >= 0) { + // Current file's min >= run's max, append to this run + earliestRun.add(file); + runs.offer(earliestRun); + } else { + // Overlap detected, put the run back and create a new one + runs.offer(earliestRun); List newRun = new ArrayList<>(); newRun.add(file); runs.offer(newRun); @@ -312,25 +298,18 @@ static List buildLevelSortedRuns( */ static List
    splitIntoSections( List pickedFiles, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, Map defaultCompactionMap) { pickedFiles.sort( (a, b) -> { int cmp = - compareField( - a.partitionStats().minValues(), - b.partitionStats().minValues(), - sortFieldIndex, - sortFieldType); + fieldComparator.compare( + a.partitionStats().minValues(), b.partitionStats().minValues()); if (cmp != 0) { return cmp; } - return compareField( - a.partitionStats().maxValues(), - b.partitionStats().maxValues(), - sortFieldIndex, - sortFieldType); + return fieldComparator.compare( + a.partitionStats().maxValues(), b.partitionStats().maxValues()); }); List
    sections = new ArrayList<>(); @@ -345,12 +324,7 @@ static List
    splitIntoSections( for (int i = 1; i < pickedFiles.size(); i++) { ManifestFileMeta file = pickedFiles.get(i); - if (compareField( - file.partitionStats().minValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) - >= 0) { + if (fieldComparator.compare(file.partitionStats().minValues(), sectionMaxBound) >= 0) { sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); currentTotalSize = 0; @@ -364,11 +338,7 @@ static List
    splitIntoSections( if (!currentHasDefault && defaultCompactionMap.containsKey(file)) { currentHasDefault = true; } - if (compareField( - file.partitionStats().maxValues(), - sectionMaxBound, - sortFieldIndex, - sortFieldType) + if (fieldComparator.compare(file.partitionStats().maxValues(), sectionMaxBound) > 0) { sectionMaxBound = file.partitionStats().maxValues(); } @@ -419,8 +389,7 @@ private static void rewriteSections( List
    sections, Map defaultCompactionMap, ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, Set deleteEntries, long suggestedMetaSize, long maxRewriteSize, @@ -437,8 +406,7 @@ private static void rewriteSections( sortAndRewriteSection( section.files, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -452,8 +420,7 @@ private static void rewriteSections( sortAndRewriteSection( section.files, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -487,8 +454,7 @@ private static void rewriteSections( sortAndRewriteSection( rewriteFiles, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -508,8 +474,7 @@ private static void rewriteSections( section.files, defaultCompactionMap, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, suggestedMetaSize, result, @@ -526,8 +491,7 @@ private static void rewriteSubSegments( List section, Map defaultCompactionMap, ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, @Nullable Set deleteEntries, long manifestTargetSize, List result, @@ -544,8 +508,7 @@ private static void rewriteSubSegments( sortAndRewriteSection( subSegment, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -560,8 +523,7 @@ private static void rewriteSubSegments( sortAndRewriteSection( subSegment, manifestFile, - sortFieldIndex, - sortFieldType, + fieldComparator, deleteEntries, defaultCompactionMap, result, @@ -583,8 +545,7 @@ private static void rewriteSubSegments( private static void sortAndRewriteSection( List section, ManifestFile manifestFile, - int sortFieldIndex, - DataType sortFieldType, + RecordComparator fieldComparator, Set deletedIdentifiers, Map defaultCompactionMap, List result, @@ -608,7 +569,7 @@ private static void sortAndRewriteSection( } if (!entriesToRewrite.isEmpty()) { - entriesToRewrite.sort((a, b) -> compareSortKey(a, b, sortFieldIndex, sortFieldType)); + entriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = manifestFile.createRollingWriter(); @@ -631,13 +592,12 @@ private static void sortAndRewriteSection( } /** - * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, fileName)}. + * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, kind, fileName)}. * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field * value AND the same data file are emitted contiguously. */ - static int compareSortKey( - ManifestEntry a, ManifestEntry b, int sortFieldIndex, DataType sortFieldType) { - int c = compareField(a.partition(), b.partition(), sortFieldIndex, sortFieldType); + static int compareSortKey(ManifestEntry a, ManifestEntry b, RecordComparator fieldComparator) { + int c = fieldComparator.compare(a.partition(), b.partition()); if (c != 0) { return c; } @@ -649,50 +609,6 @@ static int compareSortKey( return a.file().fileName().compareTo(b.file().fileName()); } - /** - * Compares the value at field {@code k} of two {@link BinaryRow}s according to {@code type}. - */ - static int compareField(BinaryRow a, BinaryRow b, int k, DataType type) { - switch (type.getTypeRoot()) { - case INTEGER: - case DATE: - return Integer.compare(a.getInt(k), b.getInt(k)); - case BIGINT: - return Long.compare(a.getLong(k), b.getLong(k)); - case SMALLINT: - return Short.compare(a.getShort(k), b.getShort(k)); - case TINYINT: - return Byte.compare(a.getByte(k), b.getByte(k)); - case FLOAT: - return Float.compare(a.getFloat(k), b.getFloat(k)); - case DOUBLE: - return Double.compare(a.getDouble(k), b.getDouble(k)); - case BOOLEAN: - return Boolean.compare(a.getBoolean(k), b.getBoolean(k)); - case VARCHAR: - case CHAR: - return a.getString(k).compareTo(b.getString(k)); - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return a.getTimestamp(k, type.defaultSize()) - .compareTo(b.getTimestamp(k, type.defaultSize())); - case DECIMAL: - DecimalType dt = (DecimalType) type; - return a.getDecimal(k, dt.getPrecision(), dt.getScale()) - .compareTo(b.getDecimal(k, dt.getPrecision(), dt.getScale())); - default: - String errorMsg = - String.format( - "Unsupported partition field type '%s' for manifest sort rewrite. " - + "Supported types: TINYINT, SMALLINT, INTEGER, BIGINT, " - + "FLOAT, DOUBLE, BOOLEAN, CHAR, VARCHAR, DATE, TIMESTAMP, " - + "DECIMAL.", - type.getTypeRoot()); - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - } - /** * Resolve the partition field to sort manifests by. * diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index d1c15d412fad..e95f4cf21685 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -27,12 +27,10 @@ import org.apache.paimon.operation.ManifestFileMerger; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.FailingFileIO; - -import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; - import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.RepeatedTest; @@ -882,12 +880,12 @@ private void beforeFirstRead() throws IOException { *

    Input manifests (deliberately unordered and overlapping): * *

    -     *   manifest-A: partitions [5, 9]  (entries in partition 5,6,7,8,9)
    -     *   manifest-B: partitions [0, 4]  (entries in partition 0,1,2,3,4)
    +     *   manifest-A: partitions [5, 13]  (entries in partition 5,6,7,8,9)
    +     *   manifest-B: partitions [0, 9]  (entries in partition 0,1,2,3,4)
          *   manifest-C: partitions [3, 7]  (entries in partition 3,4,5,6,7) -- overlaps A and B
          *   manifest-D: partitions [8, 12] (entries in partition 8,9,10,11,12) -- overlaps A
    -     *   manifest-E: partitions [1, 3]  (entries in partition 1,2,3) -- overlaps B and C
    -     *   manifest-F: partitions [10, 14](entries in partition 10,11,12,13,14) -- overlaps D
    +     *   manifest-E: partitions [1, 6]  (entries in partition 1,2,3) -- overlaps B and C
    +     *   manifest-F: partitions [4, 14](entries in partition 10,11,12,13,14) -- overlaps D
          * 
    * *

    After sort rewrite, all surviving ADD entries should be sorted by partition field. @@ -896,16 +894,16 @@ private void beforeFirstRead() throws IOException { public void testManifestSortWithOverlappingPartitions() { List input = new ArrayList<>(); - // manifest-A: partitions [5, 9] + // manifest-A: partitions [5, 13] List entriesA = new ArrayList<>(); - for (int p = 5; p <= 9; p++) { + for (int p = 5; p <= 13; p++) { entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); } input.add(makeManifest(entriesA.toArray(new ManifestEntry[0]))); - // manifest-B: partitions [0, 4] + // manifest-B: partitions [0, 9] List entriesB = new ArrayList<>(); - for (int p = 0; p <= 4; p++) { + for (int p = 0; p <= 9; p++) { entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); } input.add(makeManifest(entriesB.toArray(new ManifestEntry[0]))); @@ -924,23 +922,22 @@ public void testManifestSortWithOverlappingPartitions() { } input.add(makeManifest(entriesD.toArray(new ManifestEntry[0]))); - // manifest-E: partitions [1, 3] -- overlaps with B and C + // manifest-E: partitions [1, 6] -- overlaps with B and C List entriesE = new ArrayList<>(); - for (int p = 1; p <= 3; p++) { + for (int p = 1; p <= 6; p++) { entriesE.add(makeEntry(true, String.format("E-p%d", p), p)); } input.add(makeManifest(entriesE.toArray(new ManifestEntry[0]))); - // manifest-F: partitions [10, 14] -- overlaps with D + // manifest-F: partitions [4, 14] -- overlaps with D List entriesF = new ArrayList<>(); - for (int p = 10; p <= 14; p++) { + for (int p = 4; p <= 14; p++) { entriesF.add(makeEntry(true, String.format("F-p%d", p), p)); } input.add(makeManifest(entriesF.toArray(new ManifestEntry[0]))); Options testOptions = new Options(); testOptions.set("manifest-sort.enabled", "true"); - List merged = ManifestFileMerger.merge( input, @@ -963,100 +960,6 @@ public void testManifestSortWithOverlappingPartitions() { } } - // Verify manifest files themselves are ordered by minValues - for (int i = 1; i < merged.size(); i++) { - int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); - int currMin = merged.get(i).partitionStats().minValues().getInt(0); - assertThat(currMin).isGreaterThanOrEqualTo(prevMin); - } - } - - /** - * Test manifest sort with heavily overlapping manifests that form multiple sorted runs. This - * exercises buildLevelSortedRuns and the LSM level assignment logic. - * - *

    Creates manifests whose partition ranges overlap in various ways: - * - *

    -     *   run1 (non-overlapping): [0,2], [3,5], [6,8]
    -     *   run2 (overlapping with run1): [1,4], [5,7]
    -     *   run3 (overlapping with both): [0,9]
    -     * 
    - */ - @Test - public void testManifestSortWithMultipleOverlappingRuns() { - List input = new ArrayList<>(); - - // Run1: non-overlapping within itself [0,2], [3,5], [6,8] - input.add( - makeManifest( - makeEntry(true, "r1a-p0", 0), - makeEntry(true, "r1a-p1", 1), - makeEntry(true, "r1a-p2", 2))); - input.add( - makeManifest( - makeEntry(true, "r1b-p3", 3), - makeEntry(true, "r1b-p4", 4), - makeEntry(true, "r1b-p5", 5))); - input.add( - makeManifest( - makeEntry(true, "r1c-p6", 6), - makeEntry(true, "r1c-p7", 7), - makeEntry(true, "r1c-p8", 8))); - - // Run2: overlaps with run1 [1,4], [5,7] - input.add( - makeManifest( - makeEntry(true, "r2a-p1", 1), - makeEntry(true, "r2a-p2", 2), - makeEntry(true, "r2a-p3", 3), - makeEntry(true, "r2a-p4", 4))); - input.add( - makeManifest( - makeEntry(true, "r2b-p5", 5), - makeEntry(true, "r2b-p6", 6), - makeEntry(true, "r2b-p7", 7))); - - // Run3: a large manifest overlapping everything [0,9] - List run3Entries = new ArrayList<>(); - for (int p = 0; p <= 9; p++) { - run3Entries.add(makeEntry(true, String.format("r3-p%d", p), p)); - } - input.add(makeManifest(run3Entries.toArray(new ManifestEntry[0]))); - - Options testOptions = new Options(); - testOptions.set("manifest-sort.enabled", "true"); - - List merged = - ManifestFileMerger.merge( - input, - manifestFile, - getPartitionType(), - CoreOptions.fromMap(testOptions.toMap())); - - // Verify no data loss - assertEquivalentEntries(input, merged); - - // Verify entries within each output manifest are sorted by partition - for (ManifestFileMeta meta : merged) { - List entries = manifestFile.read(meta.fileName(), meta.fileSize()); - for (int i = 1; i < entries.size(); i++) { - int prevPartition = entries.get(i - 1).partition().getInt(0); - int currPartition = entries.get(i).partition().getInt(0); - assertThat(currPartition) - .as( - "Entries within manifest should be sorted, but found %d after %d", - currPartition, prevPartition) - .isGreaterThanOrEqualTo(prevPartition); - } - } - - // Verify output manifests are ordered by minValues - for (int i = 1; i < merged.size(); i++) { - int prevMin = merged.get(i - 1).partitionStats().minValues().getInt(0); - int currMin = merged.get(i).partitionStats().minValues().getInt(0); - assertThat(currMin).isGreaterThanOrEqualTo(prevMin); - } } /** @@ -1118,11 +1021,6 @@ public void testManifestSortEliminatesDeleteEntries() { input.add(makeManifest(makeEntry(false, "C-p7", 7), makeEntry(true, "new-p7", 7))); Options testOptions = new Options(); - // Set target file size very large so all input manifests are considered "small" - // (fileSize < suggestedMetaSize), which makes them all satisfy mustChange condition - testOptions.set("manifest.target-file-size", "16MB"); - // Set full-compaction threshold very small to ensure it triggers - testOptions.set("manifest.full-compaction-threshold-size", "1B"); testOptions.set("manifest-sort.enabled", "true"); List merged = From e5670cfd66e210e5bf2bf058597d686610917b38 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 16:23:48 +0800 Subject: [PATCH 29/51] deleteTrigger --- .../paimon/operation/ManifestFileSorter.java | 81 +++++++++++++------ .../paimon/manifest/ManifestFileMetaTest.java | 5 +- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 7220aad93acd..56d6075c77e6 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -38,6 +38,7 @@ import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; @@ -74,6 +75,8 @@ static Optional> trySortRewrite( long suggestedMetaSize = options.manifestTargetSize().getBytes(); Integer manifestReadParallelism = options.scanManifestParallelism(); String sortPartitionField = options.manifestSortPartitionField(); + long manifestFullCompactionThresholdSize = + options.manifestFullCompactionThresholdSize().getBytes(); // Step 1: Resolve sort field. String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -86,15 +89,17 @@ static Optional> trySortRewrite( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); // Step 2: Classify manifests into defaultCompaction and LSM. + List result = new ArrayList<>(); ClassifyResult classified = classifyManifests( input, + result, suggestedMetaSize, manifestFile, partitionType, + manifestFullCompactionThresholdSize, manifestReadParallelism); - Map defaultCompactionMap = - classified.defaultCompactionManifests; + Map defaultCompactionMap = classified.defaultCompactionManifests; List lsmFiles = classified.lsmFiles; Set deleteEntries = classified.deleteEntries; @@ -131,7 +136,7 @@ static Optional> trySortRewrite( reusedFiles.addAll(run.files()); } } - List result = new ArrayList<>(reusedFiles); + result.addAll(reusedFiles); // Step 5: Split picked files into sections, sort and rewrite each. List pickedFiles = new ArrayList<>(); @@ -151,6 +156,7 @@ static Optional> trySortRewrite( fieldComparator, deleteEntries, suggestedMetaSize, + options.manifestMergeMinCount(), options.manifestSortMaxRewriteSize(), result, newFilesForAbort, @@ -176,11 +182,23 @@ static Optional> trySortRewrite( */ private static ClassifyResult classifyManifests( List input, + List result, long suggestedMetaSize, ManifestFile manifestFile, RowType partitionType, + long sizeTrigger, @Nullable Integer manifestReadParallelism) { - Map defaultCompactionManifests = new LinkedHashMap<>(); + Filter mustChange = + file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (mustChange.test(file)) { + totalDeltaFileSize += file.fileSize(); + } + } + boolean removeAllDelete = totalDeltaFileSize >= sizeTrigger; + + Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); @@ -209,11 +227,19 @@ private static ClassifyResult classifyManifests( file.partitionStats().minValues(), file.partitionStats().maxValues(), file.partitionStats().nullCounts()); - if (small || inDeleteRange) { - iterator.remove(); - defaultCompactionManifests.put(file, new boolean[] {small, inDeleteRange}); + if (removeAllDelete) { + if (small || inDeleteRange) { + iterator.remove(); + defaultCompactionManifests.put(file, inDeleteRange); + } + } else { + if (inDeleteRange) { + iterator.remove(); + result.add(file); + } } } + deleteEntries = removeAllDelete ? deleteEntries : Collections.emptySet(); return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } @@ -299,7 +325,7 @@ static List buildLevelSortedRuns( static List
    splitIntoSections( List pickedFiles, RecordComparator fieldComparator, - Map defaultCompactionMap) { + Map defaultCompactionMap) { pickedFiles.sort( (a, b) -> { int cmp = @@ -387,11 +413,12 @@ private static List
    mergeSmallAdjacentSections( */ private static void rewriteSections( List
    sections, - Map defaultCompactionMap, + Map defaultCompactionMap, ManifestFile manifestFile, RecordComparator fieldComparator, Set deleteEntries, long suggestedMetaSize, + int suggestedMinMetaCount, long maxRewriteSize, List result, List sortNewFiles, @@ -477,6 +504,7 @@ private static void rewriteSections( fieldComparator, deleteEntries, suggestedMetaSize, + suggestedMinMetaCount, result, sortNewFiles, manifestReadParallelism); @@ -489,11 +517,12 @@ private static void rewriteSections( /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ private static void rewriteSubSegments( List section, - Map defaultCompactionMap, + Map defaultCompactionMap, ManifestFile manifestFile, RecordComparator fieldComparator, @Nullable Set deleteEntries, long manifestTargetSize, + int suggestedMinMetaCount, List result, List sortNewFiles, @Nullable Integer manifestReadParallelism) @@ -518,17 +547,21 @@ private static void rewriteSubSegments( subSegmentSize = 0; } } - // Flush remaining sub-segment + // Flush remaining sub-segment only if there are enough files to justify rewrite if (!subSegment.isEmpty()) { - sortAndRewriteSection( - subSegment, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - result, - sortNewFiles, - manifestReadParallelism); + if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { + sortAndRewriteSection( + subSegment, + manifestFile, + fieldComparator, + deleteEntries, + defaultCompactionMap, + result, + sortNewFiles, + manifestReadParallelism); + } else { + result.addAll(subSegment); + } } } @@ -547,14 +580,14 @@ private static void sortAndRewriteSection( ManifestFile manifestFile, RecordComparator fieldComparator, Set deletedIdentifiers, - Map defaultCompactionMap, + Map defaultCompactionMap, List result, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { if (section.size() == 1 && (!defaultCompactionMap.containsKey(section.get(0)) - || !defaultCompactionMap.get(section.get(0))[1])) { + || !defaultCompactionMap.get(section.get(0)))) { result.add(section.get(0)); return; } @@ -684,13 +717,13 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { /** key: ManifestFileMeta, value: boolean[]{isSmall, isInDeleteRange}. */ - final Map defaultCompactionManifests; + final Map defaultCompactionManifests; final List lsmFiles; @Nullable final Set deleteEntries; ClassifyResult( - Map defaultCompactionManifests, + Map defaultCompactionManifests, List lsmFiles, @Nullable Set deleteEntries) { this.defaultCompactionManifests = defaultCompactionManifests; diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index e95f4cf21685..e3e4a29b1df0 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -27,10 +27,12 @@ import org.apache.paimon.operation.ManifestFileMerger; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; -import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.FailingFileIO; + +import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.RepeatedTest; @@ -959,7 +961,6 @@ public void testManifestSortWithOverlappingPartitions() { .isGreaterThanOrEqualTo(prevPartition); } } - } /** From a7584f247f645193a3f65e56b2f117663be718ba Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 16:46:15 +0800 Subject: [PATCH 30/51] addSmall --- .../org/apache/paimon/operation/ManifestFileSorter.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 56d6075c77e6..ee13e80b3609 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -236,6 +236,9 @@ private static ClassifyResult classifyManifests( if (inDeleteRange) { iterator.remove(); result.add(file); + } else if (small) { + iterator.remove(); + defaultCompactionManifests.put(file, false); } } } @@ -520,7 +523,7 @@ private static void rewriteSubSegments( Map defaultCompactionMap, ManifestFile manifestFile, RecordComparator fieldComparator, - @Nullable Set deleteEntries, + Set deleteEntries, long manifestTargetSize, int suggestedMinMetaCount, List result, @@ -579,7 +582,7 @@ private static void sortAndRewriteSection( List section, ManifestFile manifestFile, RecordComparator fieldComparator, - Set deletedIdentifiers, + Set deleteEntries, Map defaultCompactionMap, List result, List sortNewFiles, @@ -593,7 +596,7 @@ private static void sortAndRewriteSection( } // Parallel read: each meta is read independently Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, deletedIdentifiers)); + meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); List entriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : From 52582b57e5121f0dca7bfdba93deb394257e78dc Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 16:58:50 +0800 Subject: [PATCH 31/51] test --- .../java/org/apache/paimon/manifest/ManifestFileMetaTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index e3e4a29b1df0..a13132223e4f 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -1023,6 +1023,7 @@ public void testManifestSortEliminatesDeleteEntries() { Options testOptions = new Options(); testOptions.set("manifest-sort.enabled", "true"); + testOptions.set("manifest.full-compaction-threshold-size", "10B"); List merged = ManifestFileMerger.merge( From be16163a8845ad55606fc7e3925131de80b2a24d Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 17:09:53 +0800 Subject: [PATCH 32/51] doc --- docs/generated/core_configuration.html | 2 +- paimon-api/src/main/java/org/apache/paimon/CoreOptions.java | 2 +- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 5 ++--- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 4e41fe4be2cd..012b2929aa85 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -931,7 +931,7 @@
    manifest-sort.partition-field
    (none) String - Partition field name to sort manifest entries by. Validated by schema validation; If not configured, defaults to the first partition field. + Partition field name to sort manifest entries by. Validated by schema validation, If not configured, defaults to the first partition field.
    manifest-sort.max-rewrite-size
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index e90d6fec4f18..5dabc4117ebc 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -483,7 +483,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation; If not configured, defaults to the first partition field."); + + " schema validation, If not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index ee13e80b3609..44440bb8b0c2 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,8 +61,7 @@ public class ManifestFileSorter { /** * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort - * field cannot be resolved or the delta file size is below the full compaction threshold, the - * input is returned as-is. + * field cannot be resolved, the input is returned as-is. */ static Optional> trySortRewrite( List input, @@ -379,7 +378,7 @@ static List
    splitIntoSections( /** * Merge small adjacent sections to avoid producing too many small rewrite batches. If either - * the pending section or the current section total size is smaller than half of {@code + * the pending section or the current section total size is smaller than {@code * suggestedMetaSize}, they are combined into a single section. */ private static List
    mergeSmallAdjacentSections( From 309521c4aba81a49412213e7d6377a5bd0ee9cea Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 18:07:27 +0800 Subject: [PATCH 33/51] comment --- .../paimon/operation/ManifestFileSorter.java | 63 ++++++++++--------- .../operation/ManifestPickStrategy.java | 3 +- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 44440bb8b0c2..f527c7b330bc 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -172,12 +172,11 @@ static Optional> trySortRewrite( /** * Classify manifest files into default-compaction group and LSM group. * - *

    When full compaction is triggered (totalDeltaFileSize >= threshold), files that must - * change or overlap with delete partitions go into defaultCompactionManifests; the rest stay as - * lsmFiles. + *

    Full compaction: small files and files overlapping delete partitions go into + * defaultCompactionManifests; the rest stay as lsmFiles. * - *

    When full compaction is NOT triggered, adjacent small manifests whose cumulative size - * reaches suggestedMetaSize are grouped into defaultCompactionManifests (minor-style pick). + *

    Non-full compaction: delete-overlapping files go to result, small files go to + * defaultCompactionManifests for minor-style merge. */ private static ClassifyResult classifyManifests( List input, @@ -187,6 +186,7 @@ private static ClassifyResult classifyManifests( RowType partitionType, long sizeTrigger, @Nullable Integer manifestReadParallelism) { + // Calculate total size of files that need compaction to determine full-compaction trigger Filter mustChange = file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; long totalDeltaFileSize = 0; @@ -197,11 +197,13 @@ private static ClassifyResult classifyManifests( } boolean removeAllDelete = totalDeltaFileSize >= sizeTrigger; + // Initialize classification containers and read delete entries Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + // Build partition predicate from delete entries for overlap detection PartitionPredicate predicate; if (deleteEntries.isEmpty()) { predicate = PartitionPredicate.ALWAYS_FALSE; @@ -215,6 +217,7 @@ private static ClassifyResult classifyManifests( } } + // Classify each file based on size and delete-partition overlap Iterator iterator = lsmFiles.iterator(); while (iterator.hasNext()) { ManifestFileMeta file = iterator.next(); @@ -227,11 +230,13 @@ private static ClassifyResult classifyManifests( file.partitionStats().maxValues(), file.partitionStats().nullCounts()); if (removeAllDelete) { + // Full compaction: collect small or delete-overlapping files if (small || inDeleteRange) { iterator.remove(); defaultCompactionManifests.put(file, inDeleteRange); } } else { + // Non-full: separate delete-overlapping into result, small into compaction group if (inDeleteRange) { iterator.remove(); result.add(file); @@ -309,7 +314,7 @@ static List buildLevelSortedRuns( // Step 4: Sort by totalSize and assign levels result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); int n = result.size(); - int maxLevel = 4; + int maxLevel = ManifestPickStrategy.MAX_LEVEL; for (int i = 0; i < n; i++) { if (i >= n - maxLevel) { result.get(i).setLevel(i - (n - maxLevel) + 1); @@ -406,12 +411,16 @@ private static List

    mergeSmallAdjacentSections( } /** - * Iterate over sections, decide whether to rewrite each section fully or partially based on the - * maxRewriteSize threshold and whether the section contains defaultCompaction files. + * Rewrite sections with a budget-controlled strategy. * - *

    Within threshold: read all metas, sort and rewrite the entire section. Exceeds threshold - * but contains defaultCompaction files: only rewrite sub-segments around those files. Exceeds - * threshold with no defaultCompaction files: skip (keep as-is). + *

      + *
    • 1. Single-file section: pass through (rewrite only if it has delete entries). + *
    • 2. Within budget: sort and rewrite the entire section. + *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining files + * form a new section appended for later processing. + *
    • 4. After budget exhausted with defaultCompaction files: rewrite sub-segments only. + *
    • 5. After budget exhausted without defaultCompaction files: keep as-is. + *
    */ private static void rewriteSections( List
    sections, @@ -456,11 +465,9 @@ private static void rewriteSections( sortNewFiles, manifestReadParallelism); } else if (!reachedLimit) { - // First time exceeding threshold without defaultCompaction: - // partial rewrite within remaining budget. + // Partial rewrite: split section at the budget boundary. long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; - // Split section into two parts: files within budget and remaining files List rewriteFiles = new ArrayList<>(); List remainingFiles = new ArrayList<>(); long rewriteSize = 0; @@ -490,11 +497,10 @@ private static void rewriteSections( sortNewFiles, manifestReadParallelism); - // Create new section for remaining files and append to sections list + // Append remaining files as a new section for later processing. if (!remainingFiles.isEmpty()) { Section remainingSection = new Section(remainingFiles, remainingSize, remainingHasDefault); - // Append remaining section to the end of sections list sections.add(remainingSection); } reachedLimit = true; @@ -516,7 +522,11 @@ private static void rewriteSections( } } - /** Rewrite sub-segments within a section that exceeds the rewrite threshold. */ + /** + * Batch-rewrite files in a section by splitting them into sub-segments of {@code + * manifestTargetSize}. Tail sub-segment is only rewritten if it has delete entries or meets + * {@code suggestedMinMetaCount}. + */ private static void rewriteSubSegments( List section, Map defaultCompactionMap, @@ -549,7 +559,7 @@ private static void rewriteSubSegments( subSegmentSize = 0; } } - // Flush remaining sub-segment only if there are enough files to justify rewrite + // Flush tail only if delete entries exist or file count >= minCount. if (!subSegment.isEmpty()) { if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { sortAndRewriteSection( @@ -568,14 +578,8 @@ private static void rewriteSubSegments( } /** - * Read all entries from a section's manifest files, sort them in memory by the specified - * partition field, filter out DELETE entries and cancelled ADD entries, then write surviving - * entries to new manifest files via the rolling writer. - * - *

    All files participate in sorting, enabling full sort across the entire section. - * - *

    Reading is parallelized via {@code sequentialBatchedExecute} following the same pattern as - * {@link ManifestFileMerger#tryFullCompaction}. + * Read entries from a section's manifest files, sort by partition field, and write to new + * manifests. Single non-delete-range files are passed through without rewrite. */ private static void sortAndRewriteSection( List section, @@ -587,13 +591,13 @@ private static void sortAndRewriteSection( List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { + // Skip rewrite for single file not in delete-range. if (section.size() == 1 - && (!defaultCompactionMap.containsKey(section.get(0)) - || !defaultCompactionMap.get(section.get(0)))) { + && !defaultCompactionMap.getOrDefault(section.get(0), false)) { result.add(section.get(0)); return; } - // Parallel read: each meta is read independently + // Read all entries in parallel. Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); @@ -604,6 +608,7 @@ private static void sortAndRewriteSection( } if (!entriesToRewrite.isEmpty()) { + // Sort and write to new manifest files. entriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 2cc9faf2ec26..3a8693d4dcf0 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -38,9 +38,10 @@ */ public class ManifestPickStrategy { + public static final int MAX_LEVEL = 4; + private final int sizeAmpThreshold; private final int sizeRatioThreshold; - private static final int MAX_LEVEL = 4; public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { Preconditions.checkArgument(sizeAmpThreshold > 0, "sizeAmpThreshold must be positive"); From 54656a992589395eba5ed0ef50591fc58afc6a6d Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 19:45:26 +0800 Subject: [PATCH 34/51] modifyTests --- .../paimon/operation/ManifestFileSorter.java | 7 +- .../paimon/manifest/ManifestFileMetaTest.java | 251 +++++++++++++++++- .../paimon/schema/SchemaValidationTest.java | 50 ++-- 3 files changed, 258 insertions(+), 50 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index f527c7b330bc..7f1a7a455fa7 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -416,8 +416,8 @@ private static List

    mergeSmallAdjacentSections( *
      *
    • 1. Single-file section: pass through (rewrite only if it has delete entries). *
    • 2. Within budget: sort and rewrite the entire section. - *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining files - * form a new section appended for later processing. + *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining + * files form a new section appended for later processing. *
    • 4. After budget exhausted with defaultCompaction files: rewrite sub-segments only. *
    • 5. After budget exhausted without defaultCompaction files: keep as-is. *
    @@ -592,8 +592,7 @@ private static void sortAndRewriteSection( @Nullable Integer manifestReadParallelism) throws Exception { // Skip rewrite for single file not in delete-range. - if (section.size() == 1 - && !defaultCompactionMap.getOrDefault(section.get(0), false)) { + if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { result.add(section.get(0)); return; } diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index a13132223e4f..d41cc1413b54 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -20,16 +20,24 @@ import org.apache.paimon.CoreOptions; import org.apache.paimon.data.BinaryRow; +import org.apache.paimon.data.BinaryRowWriter; +import org.apache.paimon.data.Timestamp; +import org.apache.paimon.fs.FileIO; +import org.apache.paimon.fs.FileIOFinder; import org.apache.paimon.fs.Path; import org.apache.paimon.fs.SeekableInputStream; import org.apache.paimon.fs.SeekableInputStreamWrapper; import org.apache.paimon.fs.local.LocalFileIO; +import org.apache.paimon.io.DataFileMeta; import org.apache.paimon.operation.ManifestFileMerger; import org.apache.paimon.options.Options; import org.apache.paimon.partition.PartitionPredicate; +import org.apache.paimon.schema.SchemaManager; +import org.apache.paimon.stats.StatsTestUtils; import org.apache.paimon.types.IntType; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.FailingFileIO; +import org.apache.paimon.utils.FileStorePathFactory; import org.apache.paimon.shade.guava30.com.google.common.collect.Lists; @@ -44,6 +52,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -878,19 +887,7 @@ private void beforeFirstRead() throws IOException { /** * Test manifest sort with overlapping partition ranges. Each manifest contains entries spanning * multiple partitions, creating overlapping intervals that require sort rewrite to resolve. - * - *

    Input manifests (deliberately unordered and overlapping): - * - *

    -     *   manifest-A: partitions [5, 13]  (entries in partition 5,6,7,8,9)
    -     *   manifest-B: partitions [0, 9]  (entries in partition 0,1,2,3,4)
    -     *   manifest-C: partitions [3, 7]  (entries in partition 3,4,5,6,7) -- overlaps A and B
    -     *   manifest-D: partitions [8, 12] (entries in partition 8,9,10,11,12) -- overlaps A
    -     *   manifest-E: partitions [1, 6]  (entries in partition 1,2,3) -- overlaps B and C
    -     *   manifest-F: partitions [4, 14](entries in partition 10,11,12,13,14) -- overlaps D
    -     * 
    - * - *

    After sort rewrite, all surviving ADD entries should be sorted by partition field. + * After sort rewrite, all surviving ADD entries should be sorted by partition field. */ @Test public void testManifestSortWithOverlappingPartitions() { @@ -1066,4 +1063,232 @@ public void testManifestSortEliminatesDeleteEntries() { } } } + /** + * Test manifest sort with a multi-field partition type. + * + *

    Setup: partition=(region INT, dt INT, hour INT), sort by dt (field index=1). 9 manifest + * files form 6 overlapping sorted runs by dt range: + * + *

    +     *   Run1: 3 files, dt=[0,15],[3,5],[6,8]
    +     *   Run2: 2 files, dt=[1,8],[5,7]
    +     *   Run3: 1 file,  dt=[0,9]
    +     *   Run4: 1 file,  dt=[5,14]
    +     *   Run5: 1 file,  dt=[8,15]
    +     *   Run6: 1 file,  dt=[4,12]
    +     * 
    + * + *

    Verifies: 1) no data loss after sort-rewrite, 2) entries within each output manifest are + * sorted by dt. + */ + @Test + public void testManifestSortWithMultiplePartitions() { + // Use a 3-field partition type: (region INT, dt INT, hour INT) + RowType multiPartitionType = RowType.of(new IntType(), new IntType(), new IntType()); + + // Create a dedicated ManifestFile for the 3-field partition type + Path path = new Path(tempDir.toString()); + FileIO fileIO = FileIOFinder.find(path); + ManifestFile multiPartManifestFile = + new ManifestFile.Factory( + fileIO, + new SchemaManager(fileIO, path), + multiPartitionType, + avro, + "zstd", + new FileStorePathFactory( + path, + multiPartitionType, + "default", + CoreOptions.FILE_FORMAT.defaultValue(), + CoreOptions.DATA_FILE_PREFIX.defaultValue(), + CoreOptions.CHANGELOG_FILE_PREFIX.defaultValue(), + CoreOptions.PARTITION_GENERATE_LEGACY_NAME.defaultValue(), + CoreOptions.FILE_SUFFIX_INCLUDE_COMPRESSION.defaultValue(), + CoreOptions.FILE_COMPRESSION.defaultValue(), + null, + null, + CoreOptions.ExternalPathStrategy.NONE, + null, + false, + null), + Long.MAX_VALUE, + null) + .create(); + + List input = new ArrayList<>(); + + // Run1 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r1a-p0", 10, 0, 1), + makeMultiPartEntry(true, "r1a-p1", 20, 1, 2), + makeMultiPartEntry(true, "r1a-p2", 30, 15, 3))) + .get(0)); + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r1b-p3", 10, 3, 4), + makeMultiPartEntry(true, "r1b-p4", 20, 4, 5), + makeMultiPartEntry(true, "r1b-p5", 30, 5, 6))) + .get(0)); + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r1c-p6", 10, 6, 7), + makeMultiPartEntry(true, "r1c-p7", 20, 7, 8), + makeMultiPartEntry(true, "r1c-p8", 30, 8, 9))) + .get(0)); + + // Run2 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r2a-p1", 5, 1, 10), + makeMultiPartEntry(true, "r2a-p2", 15, 2, 11), + makeMultiPartEntry(true, "r2a-p3", 25, 3, 12), + makeMultiPartEntry(true, "r2a-p4", 35, 8, 13))) + .get(0)); + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r2b-p5", 5, 5, 14), + makeMultiPartEntry(true, "r2b-p6", 15, 6, 15), + makeMultiPartEntry(true, "r2b-p7", 25, 7, 16))) + .get(0)); + + // Run3 + List run3Entries = new ArrayList<>(); + for (int p = 0; p <= 9; p++) { + run3Entries.add(makeMultiPartEntry(true, String.format("r3-p%d", p), 99, p, p + 20)); + } + input.add(multiPartManifestFile.write(run3Entries).get(0)); + + // Run4 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r4a-p10", 10, 5, 30), + makeMultiPartEntry(true, "r4a-p11", 20, 11, 31), + makeMultiPartEntry(true, "r4a-p12", 30, 12, 32), + makeMultiPartEntry(true, "r4a-p13", 40, 13, 33), + makeMultiPartEntry(true, "r4a-p14", 50, 14, 34))) + .get(0)); + + // Run5 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r5a-p11", 11, 8, 40), + makeMultiPartEntry(true, "r5a-p12", 21, 12, 41), + makeMultiPartEntry(true, "r5a-p13", 31, 13, 42), + makeMultiPartEntry(true, "r5a-p14", 41, 14, 43), + makeMultiPartEntry(true, "r5a-p15", 51, 15, 44))) + .get(0)); + + // Run6 + input.add( + multiPartManifestFile + .write( + Arrays.asList( + makeMultiPartEntry(true, "r6a-p7", 7, 4, 50), + makeMultiPartEntry(true, "r6a-p8", 17, 8, 51), + makeMultiPartEntry(true, "r6a-p9", 27, 9, 52), + makeMultiPartEntry(true, "r6a-p10", 37, 10, 53), + makeMultiPartEntry(true, "r6a-p11", 47, 11, 54), + makeMultiPartEntry(true, "r6a-p12", 57, 12, 55))) + .get(0)); + + Options testOptions = new Options(); + testOptions.set("manifest-sort.enabled", "true"); + // Sort by the second partition field "f1" (dt) + testOptions.set("manifest-sort.partition-field", "f1"); + List merged = + ManifestFileMerger.merge( + input, + multiPartManifestFile, + multiPartitionType, + CoreOptions.fromMap(testOptions.toMap())); + + // Verify no data loss + List inputEntries = + input.stream() + .flatMap( + f -> + multiPartManifestFile.read(f.fileName(), f.fileSize()) + .stream()) + .collect(Collectors.toList()); + List entryBeforeMerge = + FileEntry.mergeEntries(inputEntries).stream() + .filter(entry -> entry.kind() == FileKind.ADD) + .map(entry -> entry.kind() + "-" + entry.file().fileName()) + .collect(Collectors.toList()); + List entryAfterMerge = new ArrayList<>(); + for (ManifestFileMeta meta : merged) { + for (ManifestEntry entry : + multiPartManifestFile.read(meta.fileName(), meta.fileSize())) { + entryAfterMerge.add(entry.kind() + "-" + entry.file().fileName()); + } + } + assertThat(entryBeforeMerge).hasSameElementsAs(entryAfterMerge); + + // Verify entries within each output manifest are sorted by the second field (dt) + for (ManifestFileMeta meta : merged) { + List entries = + multiPartManifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevDt = entries.get(i - 1).partition().getInt(1); + int currDt = entries.get(i).partition().getInt(1); + assertThat(currDt) + .as("Entries within manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevDt); + } + } + } + + /** Create a ManifestEntry with a 3-field partition row (region, dt, hour). */ + private ManifestEntry makeMultiPartEntry( + boolean isAdd, String fileName, int region, int dt, int hour) { + BinaryRow binaryRow = new BinaryRow(3); + BinaryRowWriter writer = new BinaryRowWriter(binaryRow); + writer.writeInt(0, region); + writer.writeInt(1, dt); + writer.writeInt(2, hour); + writer.complete(); + + return ManifestEntry.create( + isAdd ? FileKind.ADD : FileKind.DELETE, + binaryRow, + 0, + 0, + DataFileMeta.create( + fileName, + 0, + 0, + binaryRow, + binaryRow, + StatsTestUtils.newEmptySimpleStats(), + StatsTestUtils.newEmptySimpleStats(), + 0, + 0, + 0, + 0, + Collections.emptyList(), + Timestamp.fromEpochMillis(200000), + 0L, + null, + FileSource.APPEND, + null, + null, + null, + null)); + } } diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java index 9a6a768c7b65..28527f907d57 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java @@ -512,16 +512,16 @@ public void testFileFormatPerLevelAcceptsCompatibleSchema() { } @Test - void testManifestSortEnableOnNonPartitionTable() { - Map options = new HashMap<>(); - options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); - options.put(BUCKET.key(), String.valueOf(-1)); - + void testManifestSortValidation() { List fields = Arrays.asList( new DataField(0, "f0", DataTypes.INT()), new DataField(1, "f1", DataTypes.INT())); + // Test 1: manifest-sort.enabled on non-partition table should fail + Map options1 = new HashMap<>(); + options1.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options1.put(BUCKET.key(), String.valueOf(-1)); assertThatThrownBy( () -> validateTableSchema( @@ -531,23 +531,15 @@ void testManifestSortEnableOnNonPartitionTable() { 10, emptyList(), emptyList(), - options, + options1, ""))) .hasMessageContaining( "Cannot enable 'manifest-sort.enabled' for non-partition table."); - } - - @Test - void testManifestSortPartitionFieldNotInPartitionKeys() { - Map options = new HashMap<>(); - options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); - options.put(BUCKET.key(), String.valueOf(-1)); - - List fields = - Arrays.asList( - new DataField(0, "f0", DataTypes.INT()), - new DataField(1, "f1", DataTypes.INT())); + // Test 2: manifest-sort-partition-field not in partition keys should fail + Map options2 = new HashMap<>(); + options2.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); + options2.put(BUCKET.key(), String.valueOf(-1)); assertThatThrownBy( () -> validateTableSchema( @@ -557,23 +549,15 @@ void testManifestSortPartitionFieldNotInPartitionKeys() { 10, singletonList("f0"), emptyList(), - options, + options2, ""))) .hasMessageContaining("is not a partition field"); - } - - @Test - void testManifestSortValidConfig() { - Map options = new HashMap<>(); - options.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); - options.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f0"); - options.put(BUCKET.key(), String.valueOf(-1)); - - List fields = - Arrays.asList( - new DataField(0, "f0", DataTypes.INT()), - new DataField(1, "f1", DataTypes.INT())); + // Test 3: valid manifest-sort config should pass + Map options3 = new HashMap<>(); + options3.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); + options3.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f0"); + options3.put(BUCKET.key(), String.valueOf(-1)); assertThatNoException() .isThrownBy( () -> @@ -584,7 +568,7 @@ void testManifestSortValidConfig() { 10, singletonList("f0"), emptyList(), - options, + options3, ""))); } From 3e3c851379a53aada55a68d77fad1a686c7dee18 Mon Sep 17 00:00:00 2001 From: umi Date: Tue, 19 May 2026 20:59:26 +0800 Subject: [PATCH 35/51] fmt --- .../java/org/apache/paimon/manifest/ManifestFileMetaTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index d41cc1413b54..462ab337ee73 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -1063,6 +1063,7 @@ public void testManifestSortEliminatesDeleteEntries() { } } } + /** * Test manifest sort with a multi-field partition type. * From 7a01b25a90e5e43c7f5b89ded879f3fab20c61c2 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 15:34:29 +0800 Subject: [PATCH 36/51] index --- .../paimon/operation/ManifestFileSorter.java | 169 ++++++++++++------ 1 file changed, 116 insertions(+), 53 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 7f1a7a455fa7..563e10110c9d 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -24,6 +24,7 @@ import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.RollingFileWriter; import org.apache.paimon.manifest.FileEntry; +import org.apache.paimon.manifest.FileKind; import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; @@ -31,6 +32,7 @@ import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; +import org.apache.paimon.utils.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +42,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; @@ -87,12 +90,22 @@ static Optional> trySortRewrite( CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + // Build fileName -> index mapping from input + Map fileNameToIndex = new HashMap<>(); + for (int i = 0; i < input.size(); i++) { + fileNameToIndex.put(input.get(i).fileName(), i); + } + + // Build result as 2D list with same size as input + List> result = new ArrayList<>(input.size()); + for (int i = 0; i < input.size(); i++) { + result.add(new ArrayList<>()); + } + // Step 2: Classify manifests into defaultCompaction and LSM. - List result = new ArrayList<>(); ClassifyResult classified = classifyManifests( input, - result, suggestedMetaSize, manifestFile, partitionType, @@ -135,7 +148,14 @@ static Optional> trySortRewrite( reusedFiles.addAll(run.files()); } } - result.addAll(reusedFiles); + + // Place reusedFiles at their original index positions + for (ManifestFileMeta file : reusedFiles) { + Integer idx = fileNameToIndex.get(file.fileName()); + if (idx != null) { + result.get(idx).add(file); + } + } // Step 5: Split picked files into sections, sort and rewrite each. List pickedFiles = new ArrayList<>(); @@ -144,10 +164,21 @@ static Optional> trySortRewrite( } pickedFiles.addAll(defaultCompactionMap.keySet()); + // Compute minIdx and maxIdx from pickedFiles + int minIdx = Integer.MAX_VALUE; + int maxIdx = Integer.MIN_VALUE; + for (ManifestFileMeta meta : pickedFiles) { + Integer idx = fileNameToIndex.get(meta.fileName()); + if (idx != null) { + minIdx = Math.min(minIdx, idx); + maxIdx = Math.max(maxIdx, idx); + } + } + Pair indexRange = Pair.of(minIdx, maxIdx); + List

    sections = splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); - rewriteSections( sections, defaultCompactionMap, @@ -158,15 +189,22 @@ static Optional> trySortRewrite( options.manifestMergeMinCount(), options.manifestSortMaxRewriteSize(), result, + indexRange, newFilesForAbort, manifestReadParallelism); + // Flatten 2D result into a single list + List flatResult = new ArrayList<>(); + for (List subList : result) { + flatResult.addAll(subList); + } + LOG.info( "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", sections.size(), newFilesForAbort.size(), - result.size()); - return Optional.of(result); + flatResult.size()); + return Optional.of(flatResult); } /** @@ -180,7 +218,6 @@ static Optional> trySortRewrite( */ private static ClassifyResult classifyManifests( List input, - List result, long suggestedMetaSize, ManifestFile manifestFile, RowType partitionType, @@ -195,25 +232,26 @@ private static ClassifyResult classifyManifests( totalDeltaFileSize += file.fileSize(); } } - boolean removeAllDelete = totalDeltaFileSize >= sizeTrigger; - // Initialize classification containers and read delete entries Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = - FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); - - // Build partition predicate from delete entries for overlap detection - PartitionPredicate predicate; - if (deleteEntries.isEmpty()) { - predicate = PartitionPredicate.ALWAYS_FALSE; - } else { - if (partitionType.getFieldCount() > 0) { - Set deletePartitions = - ManifestFileMerger.computeDeletePartitions(deleteEntries); - predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + Set deleteEntries = Collections.emptySet(); + PartitionPredicate predicate = null; + if (totalDeltaFileSize >= sizeTrigger) { + deleteEntries = + FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); + + // Build partition predicate from delete entries for overlap detection + if (deleteEntries.isEmpty()) { + predicate = PartitionPredicate.ALWAYS_FALSE; } else { - predicate = PartitionPredicate.ALWAYS_TRUE; + if (partitionType.getFieldCount() > 0) { + Set deletePartitions = + ManifestFileMerger.computeDeletePartitions(deleteEntries); + predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); + } else { + predicate = PartitionPredicate.ALWAYS_TRUE; + } } } @@ -229,24 +267,11 @@ private static ClassifyResult classifyManifests( file.partitionStats().minValues(), file.partitionStats().maxValues(), file.partitionStats().nullCounts()); - if (removeAllDelete) { - // Full compaction: collect small or delete-overlapping files - if (small || inDeleteRange) { - iterator.remove(); - defaultCompactionManifests.put(file, inDeleteRange); - } - } else { - // Non-full: separate delete-overlapping into result, small into compaction group - if (inDeleteRange) { - iterator.remove(); - result.add(file); - } else if (small) { - iterator.remove(); - defaultCompactionManifests.put(file, false); - } + if (small || inDeleteRange) { + iterator.remove(); + defaultCompactionManifests.put(file, inDeleteRange); } } - deleteEntries = removeAllDelete ? deleteEntries : Collections.emptySet(); return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); } @@ -431,7 +456,8 @@ private static void rewriteSections( long suggestedMetaSize, int suggestedMinMetaCount, long maxRewriteSize, - List result, + List> result, + Pair indexRange, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { @@ -448,6 +474,7 @@ private static void rewriteSections( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); continue; @@ -462,6 +489,7 @@ private static void rewriteSections( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); } else if (!reachedLimit) { @@ -494,6 +522,7 @@ private static void rewriteSections( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); @@ -514,10 +543,11 @@ private static void rewriteSections( suggestedMetaSize, suggestedMinMetaCount, result, + indexRange, sortNewFiles, manifestReadParallelism); } else { - result.addAll(section.files); + result.get(indexRange.getLeft()).addAll(section.files); } } } @@ -535,7 +565,8 @@ private static void rewriteSubSegments( Set deleteEntries, long manifestTargetSize, int suggestedMinMetaCount, - List result, + List> result, + Pair indexRange, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { @@ -553,6 +584,7 @@ private static void rewriteSubSegments( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); subSegment.clear(); @@ -569,17 +601,19 @@ private static void rewriteSubSegments( deleteEntries, defaultCompactionMap, result, + indexRange, sortNewFiles, manifestReadParallelism); } else { - result.addAll(subSegment); + result.get(indexRange.getLeft()).addAll(subSegment); } } } /** - * Read entries from a section's manifest files, sort by partition field, and write to new - * manifests. Single non-delete-range files are passed through without rewrite. + * Read entries from a section's manifest files, split into ADD and DELETE entries, sort each + * group separately, write to new manifests, and place ADD meta at result[minIdx] and DELETE + * meta at result[maxIdx]. */ private static void sortAndRewriteSection( List section, @@ -587,34 +621,63 @@ private static void sortAndRewriteSection( RecordComparator fieldComparator, Set deleteEntries, Map defaultCompactionMap, - List result, + List> result, + Pair indexRange, List sortNewFiles, @Nullable Integer manifestReadParallelism) throws Exception { // Skip rewrite for single file not in delete-range. if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { - result.add(section.get(0)); + result.get(indexRange.getLeft()).add(section.get(0)); return; } // Read all entries in parallel. Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); - List entriesToRewrite = new ArrayList<>(); + List addEntriesToRewrite = new ArrayList<>(); + List deleteEntriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - entriesToRewrite.addAll(readResult.entries); + for (ManifestEntry entry : readResult.entries) { + if (entry.kind() == FileKind.ADD) { + addEntriesToRewrite.add(entry); + } else { + deleteEntriesToRewrite.add(entry); + } + } } - if (!entriesToRewrite.isEmpty()) { - // Sort and write to new manifest files. - entriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); + // Sort and write ADD entries + if (!addEntriesToRewrite.isEmpty()) { + addEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); + RollingFileWriter writer = + manifestFile.createRollingWriter(); + Exception exception = null; + try { + writer.write(addEntriesToRewrite); + } catch (Exception e) { + exception = e; + } finally { + if (exception != null) { + writer.abort(); + throw exception; + } + writer.close(); + } + List sorted = writer.result(); + result.get(indexRange.getLeft()).addAll(sorted); + sortNewFiles.addAll(sorted); + } + // Sort and write DELETE entries + if (!deleteEntriesToRewrite.isEmpty()) { + deleteEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = manifestFile.createRollingWriter(); Exception exception = null; try { - writer.write(entriesToRewrite); + writer.write(deleteEntriesToRewrite); } catch (Exception e) { exception = e; } finally { @@ -625,7 +688,7 @@ private static void sortAndRewriteSection( writer.close(); } List sorted = writer.result(); - result.addAll(sorted); + result.get(indexRange.getRight()).addAll(sorted); sortNewFiles.addAll(sorted); } } From 90eedb9cec2b573d37659499b7509b19dbecd15e Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 17:54:18 +0800 Subject: [PATCH 37/51] split --- .../paimon/operation/ManifestFileMerger.java | 46 +- .../paimon/operation/ManifestFileSorter.java | 547 ++++++++++++------ 2 files changed, 398 insertions(+), 195 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index 36de3d2ecdef..fad84521f5fb 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,31 +78,29 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - Optional> sorted = - ManifestFileSorter.trySortRewrite( - input, newFilesForAbort, manifestFile, partitionType, options); - return sorted.orElse(input); + return ManifestFileSorter.trySortRewrite( + input, newFilesForAbort, manifestFile, partitionType, options); + } else { + // Otherwise try full compaction first, then minor compaction if needed + Optional> fullCompacted = + tryFullCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + manifestFullCompactionSize, + partitionType, + manifestReadParallelism); + return fullCompacted.orElseGet( + () -> + tryMinorCompaction( + input, + newFilesForAbort, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + manifestReadParallelism)); } - - // Otherwise try full compaction first, then minor compaction if needed - Optional> fullCompacted = - tryFullCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - manifestFullCompactionSize, - partitionType, - manifestReadParallelism); - return fullCompacted.orElseGet( - () -> - tryMinorCompaction( - input, - newFilesForAbort, - manifestFile, - suggestedMetaSize, - suggestedMinMetaCount, - manifestReadParallelism)); } catch (Throwable e) { // exception occurs, clean up and rethrow for (ManifestFileMeta manifest : newFilesForAbort) { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 563e10110c9d..b351fc960316 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -65,106 +65,186 @@ public class ManifestFileSorter { /** * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort * field cannot be resolved, the input is returned as-is. + * + *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link + * #tryMinorCompact} otherwise. */ - static Optional> trySortRewrite( + static List trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + // Try full compaction first, then minor compaction if full compact is skipped + Optional> fullCompacted = + tryFullCompact(input, newFilesForAbort, manifestFile, partitionType, options); + if (fullCompacted.isPresent()) { + return fullCompacted.get(); + } + return tryMinorCompact(input, newFilesForAbort, manifestFile, partitionType, options); + } + + /** + * Full compaction path: totalDeltaFileSize >= sizeTrigger. + * + *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) + * together without separating them. + */ + private static Optional> tryFullCompact( List input, List newFilesForAbort, ManifestFile manifestFile, RowType partitionType, CoreOptions options) throws Exception { - // Extract configuration from options long suggestedMetaSize = options.manifestTargetSize().getBytes(); - Integer manifestReadParallelism = options.scanManifestParallelism(); - String sortPartitionField = options.manifestSortPartitionField(); - long manifestFullCompactionThresholdSize = - options.manifestFullCompactionThresholdSize().getBytes(); - // Step 1: Resolve sort field. - String sortField = resolveSortField(sortPartitionField, partitionType); - if (sortField == null) { - throw new IllegalArgumentException( - "Cannot resolve sort field for manifest sort rewrite."); + + // Step 1: Check if full compaction threshold is met + long totalDeltaFileSize = 0; + for (ManifestFileMeta file : input) { + if (file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize) { + totalDeltaFileSize += file.fileSize(); + } } - int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - RecordComparator fieldComparator = - CodeGenUtils.newRecordComparator( - partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + if (totalDeltaFileSize < options.manifestFullCompactionThresholdSize().getBytes()) { + return Optional.empty(); + } + + // Step 2: Prepare compaction context + CompactionContext ctx = + prepareCompaction(input, manifestFile, partitionType, options, true); + Map defaultCompactionMap = ctx.defaultCompactionManifests; + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + RecordComparator fieldComparator = ctx.fieldComparator; + Set deleteEntries = ctx.deleteEntries; + + if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { + LOG.debug( + "Manifest sort full compact skipped: no runs picked and no defaultCompaction files."); + return Optional.empty(); + } + + LOG.info( + "Manifest sort full compact: input={} files, lsm={} runs, picked={} runs, " + + "defaultCompaction={} files.", + input.size(), + levelRuns.size(), + pickedRuns.size(), + defaultCompactionMap.size()); + + // Step 3: Collect reused files (not picked) and picked files + Set pickedSet = new HashSet<>(pickedRuns); + List result = new ArrayList<>(); + for (ManifestSortedRun run : levelRuns) { + if (!pickedSet.contains(run)) { + result.addAll(run.files()); + } + } + List pickedFiles = new ArrayList<>(); + for (ManifestSortedRun run : pickedRuns) { + pickedFiles.addAll(run.files()); + } + pickedFiles.addAll(defaultCompactionMap.keySet()); + + // Step 4: Split into sections and merge small adjacent sections + List

    sections = + splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); + sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + + // Step 5: Rewrite sections + FullCompactOutput output = new FullCompactOutput(result); + rewriteSections( + sections, + defaultCompactionMap, + manifestFile, + fieldComparator, + deleteEntries, + suggestedMetaSize, + options.manifestMergeMinCount(), + options.manifestSortMaxRewriteSize(), + output, + newFilesForAbort, + options.scanManifestParallelism(), + true); + + LOG.info( + "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", + sections.size(), + newFilesForAbort.size(), + result.size()); + return Optional.of(result); + } + + /** + * Minor compaction path: totalDeltaFileSize < sizeTrigger. + * + *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD + * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. + */ + private static List tryMinorCompact( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) + throws Exception { + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + + // Step 1: Prepare compaction context + CompactionContext ctx = + prepareCompaction(input, manifestFile, partitionType, options, false); + Map defaultCompactionMap = ctx.defaultCompactionManifests; + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + RecordComparator fieldComparator = ctx.fieldComparator; + Set deleteEntries = ctx.deleteEntries; - // Build fileName -> index mapping from input + // Step 2: Build fileName -> index mapping and initialize 2D result Map fileNameToIndex = new HashMap<>(); for (int i = 0; i < input.size(); i++) { fileNameToIndex.put(input.get(i).fileName(), i); } - - // Build result as 2D list with same size as input List> result = new ArrayList<>(input.size()); for (int i = 0; i < input.size(); i++) { result.add(new ArrayList<>()); } - // Step 2: Classify manifests into defaultCompaction and LSM. - ClassifyResult classified = - classifyManifests( - input, - suggestedMetaSize, - manifestFile, - partitionType, - manifestFullCompactionThresholdSize, - manifestReadParallelism); - Map defaultCompactionMap = classified.defaultCompactionManifests; - List lsmFiles = classified.lsmFiles; - Set deleteEntries = classified.deleteEntries; - - // Step 3: Build LSM Tree and assign levels (only for lsmFiles). - List levelRuns = - lsmFiles.isEmpty() - ? new ArrayList<>() - : buildLevelSortedRuns(lsmFiles, fieldComparator); - - // Step 4: Pick runs to compact. - ManifestPickStrategy pickStrategy = - new ManifestPickStrategy( - options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); - List pickedRuns = pickStrategy.pick(levelRuns); - if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( - "Manifest sort rewrite skipped: no runs picked and no defaultCompaction files."); - return Optional.empty(); + "Manifest sort minor compact skipped: no runs picked and no defaultCompaction files."); + return input; } LOG.info( - "Manifest sort rewrite: input={} files, lsm={} runs, picked={} runs, " + "Manifest sort minor compact: input={} files, lsm={} runs, picked={} runs, " + "defaultCompaction={} files.", input.size(), levelRuns.size(), pickedRuns.size(), defaultCompactionMap.size()); + // Step 3: Collect reused files at their original index positions Set pickedSet = new HashSet<>(pickedRuns); - List reusedFiles = new ArrayList<>(); for (ManifestSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { - reusedFiles.addAll(run.files()); - } - } - - // Place reusedFiles at their original index positions - for (ManifestFileMeta file : reusedFiles) { - Integer idx = fileNameToIndex.get(file.fileName()); - if (idx != null) { - result.get(idx).add(file); + for (ManifestFileMeta file : run.files()) { + Integer idx = fileNameToIndex.get(file.fileName()); + if (idx != null) { + result.get(idx).add(file); + } + } } } - // Step 5: Split picked files into sections, sort and rewrite each. + // Step 4: Collect picked files and compute index range List pickedFiles = new ArrayList<>(); for (ManifestSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); - // Compute minIdx and maxIdx from pickedFiles int minIdx = Integer.MAX_VALUE; int maxIdx = Integer.MIN_VALUE; for (ManifestFileMeta meta : pickedFiles) { @@ -176,9 +256,13 @@ static Optional> trySortRewrite( } Pair indexRange = Pair.of(minIdx, maxIdx); + // Step 5: Split into sections and merge small adjacent sections List

    sections = splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + + // Step 6: Rewrite sections + MinorCompactOutput output = new MinorCompactOutput(result, indexRange); rewriteSections( sections, defaultCompactionMap, @@ -188,23 +272,76 @@ static Optional> trySortRewrite( suggestedMetaSize, options.manifestMergeMinCount(), options.manifestSortMaxRewriteSize(), - result, - indexRange, + output, newFilesForAbort, - manifestReadParallelism); + options.scanManifestParallelism(), + false); - // Flatten 2D result into a single list + // Step 7: Flatten 2D result into a single list List flatResult = new ArrayList<>(); for (List subList : result) { flatResult.addAll(subList); } LOG.info( - "Manifest sort rewrite completed: sections={}, newFiles={}, resultFiles={}.", + "Manifest sort minor compact completed: sections={}, newFiles={}, resultFiles={}.", sections.size(), newFilesForAbort.size(), flatResult.size()); - return Optional.of(flatResult); + return flatResult; + } + + /** + * Prepare compaction context by extracting common logic from tryFullCompact and + * tryMinorCompact. + */ + private static CompactionContext prepareCompaction( + List input, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options, + boolean fullCompaction) { + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + String sortPartitionField = options.manifestSortPartitionField(); + + String sortField = resolveSortField(sortPartitionField, partitionType); + if (sortField == null) { + throw new IllegalArgumentException( + "Cannot resolve sort field for manifest sort rewrite."); + } + int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); + RecordComparator fieldComparator = + CodeGenUtils.newRecordComparator( + partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + + ClassifyResult classified = + classifyManifests( + input, + suggestedMetaSize, + manifestFile, + partitionType, + fullCompaction, + manifestReadParallelism); + + List lsmFiles = classified.lsmFiles; + List levelRuns = + lsmFiles.isEmpty() + ? new ArrayList<>() + : buildLevelSortedRuns(lsmFiles, fieldComparator); + + ManifestPickStrategy pickStrategy = + new ManifestPickStrategy( + options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); + List pickedRuns = pickStrategy.pick(levelRuns); + + return new CompactionContext( + fieldComparator, + classified.defaultCompactionManifests, + classified.lsmFiles, + classified.deleteEntries, + levelRuns, + pickedRuns); } /** @@ -221,23 +358,14 @@ private static ClassifyResult classifyManifests( long suggestedMetaSize, ManifestFile manifestFile, RowType partitionType, - long sizeTrigger, + boolean fullCompaction, @Nullable Integer manifestReadParallelism) { - // Calculate total size of files that need compaction to determine full-compaction trigger - Filter mustChange = - file -> file.numDeletedFiles() > 0 || file.fileSize() < suggestedMetaSize; - long totalDeltaFileSize = 0; - for (ManifestFileMeta file : input) { - if (mustChange.test(file)) { - totalDeltaFileSize += file.fileSize(); - } - } // Initialize classification containers and read delete entries Map defaultCompactionManifests = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); Set deleteEntries = Collections.emptySet(); PartitionPredicate predicate = null; - if (totalDeltaFileSize >= sizeTrigger) { + if (fullCompaction) { deleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); @@ -435,18 +563,7 @@ private static List
    mergeSmallAdjacentSections( return merged; } - /** - * Rewrite sections with a budget-controlled strategy. - * - *
      - *
    • 1. Single-file section: pass through (rewrite only if it has delete entries). - *
    • 2. Within budget: sort and rewrite the entire section. - *
    • 3. First time exceeding budget: partial rewrite within remaining budget, remaining - * files form a new section appended for later processing. - *
    • 4. After budget exhausted with defaultCompaction files: rewrite sub-segments only. - *
    • 5. After budget exhausted without defaultCompaction files: keep as-is. - *
    - */ + /** Unified method to rewrite sections with budget control. */ private static void rewriteSections( List
    sections, Map defaultCompactionMap, @@ -456,10 +573,10 @@ private static void rewriteSections( long suggestedMetaSize, int suggestedMinMetaCount, long maxRewriteSize, - List> result, - Pair indexRange, + RewriteOutput output, List sortNewFiles, - @Nullable Integer manifestReadParallelism) + @Nullable Integer manifestReadParallelism, + boolean fullCompaction) throws Exception { long processedSize = 0; boolean reachedLimit = false; @@ -473,10 +590,10 @@ private static void rewriteSections( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); continue; } @@ -488,12 +605,11 @@ private static void rewriteSections( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); } else if (!reachedLimit) { - // Partial rewrite: split section at the budget boundary. long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; List rewriteFiles = new ArrayList<>(); @@ -521,12 +637,11 @@ private static void rewriteSections( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); - // Append remaining files as a new section for later processing. if (!remainingFiles.isEmpty()) { Section remainingSection = new Section(remainingFiles, remainingSize, remainingHasDefault); @@ -542,21 +657,17 @@ private static void rewriteSections( deleteEntries, suggestedMetaSize, suggestedMinMetaCount, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); } else { - result.get(indexRange.getLeft()).addAll(section.files); + output.addAllUnchanged(section.files); } } } - /** - * Batch-rewrite files in a section by splitting them into sub-segments of {@code - * manifestTargetSize}. Tail sub-segment is only rewritten if it has delete entries or meets - * {@code suggestedMinMetaCount}. - */ + /** Unified method to rewrite sub-segments with budget control. */ private static void rewriteSubSegments( List section, Map defaultCompactionMap, @@ -565,10 +676,10 @@ private static void rewriteSubSegments( Set deleteEntries, long manifestTargetSize, int suggestedMinMetaCount, - List> result, - Pair indexRange, + RewriteOutput output, List sortNewFiles, - @Nullable Integer manifestReadParallelism) + @Nullable Integer manifestReadParallelism, + boolean fullCompaction) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; @@ -583,10 +694,10 @@ private static void rewriteSubSegments( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); subSegment.clear(); subSegmentSize = 0; } @@ -600,20 +711,20 @@ private static void rewriteSubSegments( fieldComparator, deleteEntries, defaultCompactionMap, - result, - indexRange, + output, sortNewFiles, - manifestReadParallelism); + manifestReadParallelism, + fullCompaction); } else { - result.get(indexRange.getLeft()).addAll(subSegment); + output.addAllUnchanged(subSegment); } } } /** - * Read entries from a section's manifest files, split into ADD and DELETE entries, sort each - * group separately, write to new manifests, and place ADD meta at result[minIdx] and DELETE - * meta at result[maxIdx]. + * Unified method to sort and rewrite a section. + * + * @param fullCompaction if true, merge ADD+DELETE entries together; if false, separate them */ private static void sortAndRewriteSection( List section, @@ -621,78 +732,79 @@ private static void sortAndRewriteSection( RecordComparator fieldComparator, Set deleteEntries, Map defaultCompactionMap, - List> result, - Pair indexRange, + RewriteOutput output, List sortNewFiles, - @Nullable Integer manifestReadParallelism) + @Nullable Integer manifestReadParallelism, + boolean fullCompaction) throws Exception { // Skip rewrite for single file not in delete-range. if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { - result.get(indexRange.getLeft()).add(section.get(0)); + output.addUnchanged(section.get(0)); return; } - // Read all entries in parallel. + + // Read all entries in parallel (common for both paths). Function> reader = meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); - List addEntriesToRewrite = new ArrayList<>(); + List addEntries = new ArrayList<>(); List deleteEntriesToRewrite = new ArrayList<>(); for (FullCompactionReadResult readResult : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - for (ManifestEntry entry : readResult.entries) { - if (entry.kind() == FileKind.ADD) { - addEntriesToRewrite.add(entry); - } else { - deleteEntriesToRewrite.add(entry); + if (fullCompaction) { + addEntries.addAll(readResult.entries); + } else { + for (ManifestEntry entry : readResult.entries) { + if (entry.kind() == FileKind.ADD) { + addEntries.add(entry); + } else { + deleteEntriesToRewrite.add(entry); + } } } } - // Sort and write ADD entries - if (!addEntriesToRewrite.isEmpty()) { - addEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); - RollingFileWriter writer = - manifestFile.createRollingWriter(); - Exception exception = null; - try { - writer.write(addEntriesToRewrite); - } catch (Exception e) { - exception = e; - } finally { - if (exception != null) { - writer.abort(); - throw exception; - } - writer.close(); - } - List sorted = writer.result(); - result.get(indexRange.getLeft()).addAll(sorted); + // Write ADD (or all) entries + if (!addEntries.isEmpty()) { + List sorted = + sortAndWriteEntries(addEntries, manifestFile, fieldComparator); + output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } - // Sort and write DELETE entries + // Write DELETE entries (minor compact only) if (!deleteEntriesToRewrite.isEmpty()) { - deleteEntriesToRewrite.sort((a, b) -> compareSortKey(a, b, fieldComparator)); - RollingFileWriter writer = - manifestFile.createRollingWriter(); - Exception exception = null; - try { - writer.write(deleteEntriesToRewrite); - } catch (Exception e) { - exception = e; - } finally { - if (exception != null) { - writer.abort(); - throw exception; - } - writer.close(); - } - List sorted = writer.result(); - result.get(indexRange.getRight()).addAll(sorted); + List sorted = + sortAndWriteEntries(deleteEntriesToRewrite, manifestFile, fieldComparator); + output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } } + /** Sort entries and write them to a new manifest file with proper error handling. */ + private static List sortAndWriteEntries( + List entries, + ManifestFile manifestFile, + RecordComparator fieldComparator) + throws Exception { + entries.sort((a, b) -> compareSortKey(a, b, fieldComparator)); + RollingFileWriter writer = + manifestFile.createRollingWriter(); + Exception exception = null; + try { + writer.write(entries); + } catch (Exception e) { + exception = e; + } finally { + if (exception != null) { + writer.abort(); + throw exception; + } + writer.close(); + } + return writer.result(); + } + /** * Compare two {@link ManifestEntry}s by the composite key {@code (sort-field, kind, fileName)}. * {@code fileName} is used as the tie-breaker so that all entries sharing the same sort-field @@ -760,6 +872,99 @@ private static FullCompactionReadResult readForSortRewrite( return new FullCompactionReadResult(meta, true, entries); } + /** Strategy interface for writing compaction results. */ + interface RewriteOutput { + void addUnchanged(ManifestFileMeta file); + + void addAllUnchanged(List files); + + void addSortedFiles(List files); + + void addDeleteFiles(List files); + } + + private static class FullCompactOutput implements RewriteOutput { + private final List result; + + FullCompactOutput(List result) { + this.result = result; + } + + @Override + public void addUnchanged(ManifestFileMeta file) { + result.add(file); + } + + @Override + public void addAllUnchanged(List files) { + result.addAll(files); + } + + @Override + public void addSortedFiles(List files) { + result.addAll(files); + } + + @Override + public void addDeleteFiles(List files) { + result.addAll(files); + } + } + + private static class MinorCompactOutput implements RewriteOutput { + private final List> result; + private final Pair indexRange; + + MinorCompactOutput(List> result, Pair indexRange) { + this.result = result; + this.indexRange = indexRange; + } + + @Override + public void addUnchanged(ManifestFileMeta file) { + result.get(indexRange.getLeft()).add(file); + } + + @Override + public void addAllUnchanged(List files) { + result.get(indexRange.getLeft()).addAll(files); + } + + @Override + public void addSortedFiles(List files) { + result.get(indexRange.getLeft()).addAll(files); + } + + @Override + public void addDeleteFiles(List files) { + result.get(indexRange.getRight()).addAll(files); + } + } + + private static class CompactionContext { + final RecordComparator fieldComparator; + final Map defaultCompactionManifests; + final List lsmFiles; + @Nullable final Set deleteEntries; + final List levelRuns; + final List pickedRuns; + + CompactionContext( + RecordComparator fieldComparator, + Map defaultCompactionManifests, + List lsmFiles, + @Nullable Set deleteEntries, + List levelRuns, + List pickedRuns) { + this.fieldComparator = fieldComparator; + this.defaultCompactionManifests = defaultCompactionManifests; + this.lsmFiles = lsmFiles; + this.deleteEntries = deleteEntries; + this.levelRuns = levelRuns; + this.pickedRuns = pickedRuns; + } + } + /** A section of manifest files with pre-computed metadata. */ static class Section { final List files; From 99fc3260bd8ba107c460e54c75988ec2e4a49be5 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 18:35:18 +0800 Subject: [PATCH 38/51] fix --- .../java/org/apache/paimon/CoreOptions.java | 2 +- .../paimon/operation/ManifestFileSorter.java | 21 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 5dabc4117ebc..f5819c82182f 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -483,7 +483,7 @@ public InlineElement getDescription() { .noDefaultValue() .withDescription( "Partition field name to sort manifest entries by. Validated by" - + " schema validation, If not configured, defaults to the first partition field."); + + " schema validation, if not configured, defaults to the first partition field."); public static final ConfigOption MANIFEST_SORT_MAX_REWRITE_SIZE = key("manifest-sort.max-rewrite-size") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index b351fc960316..7038dee65c5c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -192,16 +192,7 @@ private static List tryMinorCompact( throws Exception { long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Prepare compaction context - CompactionContext ctx = - prepareCompaction(input, manifestFile, partitionType, options, false); - Map defaultCompactionMap = ctx.defaultCompactionManifests; - List levelRuns = ctx.levelRuns; - List pickedRuns = ctx.pickedRuns; - RecordComparator fieldComparator = ctx.fieldComparator; - Set deleteEntries = ctx.deleteEntries; - - // Step 2: Build fileName -> index mapping and initialize 2D result + // Step 1: Build fileName -> index mapping and initialize 2D result Map fileNameToIndex = new HashMap<>(); for (int i = 0; i < input.size(); i++) { fileNameToIndex.put(input.get(i).fileName(), i); @@ -211,6 +202,15 @@ private static List tryMinorCompact( result.add(new ArrayList<>()); } + // Step 2: Prepare compaction context + CompactionContext ctx = + prepareCompaction(input, manifestFile, partitionType, options, false); + Map defaultCompactionMap = ctx.defaultCompactionManifests; + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + RecordComparator fieldComparator = ctx.fieldComparator; + Set deleteEntries = ctx.deleteEntries; + if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort minor compact skipped: no runs picked and no defaultCompaction files."); @@ -990,7 +990,6 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { - /** key: ManifestFileMeta, value: boolean[]{isSmall, isInDeleteRange}. */ final Map defaultCompactionManifests; final List lsmFiles; From 398fd017ddf52aead874aaf4ccbbe14a80628307 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 20:40:33 +0800 Subject: [PATCH 39/51] comment --- .../paimon/operation/ManifestFileSorter.java | 11 +++++++++-- .../paimon/schema/SchemaValidation.java | 19 +++++++++---------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 7038dee65c5c..1e724f925a9b 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -225,7 +225,7 @@ private static List tryMinorCompact( pickedRuns.size(), defaultCompactionMap.size()); - // Step 3: Collect reused files at their original index positions + // Step 3: Collect reused files at their original index positions and collect picked files Set pickedSet = new HashSet<>(pickedRuns); for (ManifestSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { @@ -238,13 +238,13 @@ private static List tryMinorCompact( } } - // Step 4: Collect picked files and compute index range List pickedFiles = new ArrayList<>(); for (ManifestSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); + // Step 4: Compute index range int minIdx = Integer.MAX_VALUE; int maxIdx = Integer.MIN_VALUE; for (ManifestFileMeta meta : pickedFiles) { @@ -447,6 +447,9 @@ static List buildLevelSortedRuns( earliestRun.get(earliestRun.size() - 1).partitionStats().maxValues()) >= 0) { // Current file's min >= run's max, append to this run + // Note: When min == max (boundary equality), files are considered non-overlapping + // and can be placed in the same SortedRun. This allows building fewer SortedRuns, + // improving compaction efficiency while maintaining correct sort order. earliestRun.add(file); runs.offer(earliestRun); } else { @@ -510,6 +513,10 @@ static List
    splitIntoSections( for (int i = 1; i < pickedFiles.size(); i++) { ManifestFileMeta file = pickedFiles.get(i); + // Note: Boundary equality (file.min == sectionMaxBound) results in separate sections. + // This avoids merge-sort overhead while maintaining partition filtering capability. + // Files with non-overlapping boundaries (including equal boundaries) can be processed + // independently without significantly impacting partition pruning efficiency. if (fieldComparator.compare(file.partitionStats().minValues(), sectionMaxBound) >= 0) { sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java index 128d33aa9649..50228385a90a 100644 --- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java +++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaValidation.java @@ -1041,16 +1041,15 @@ private static void validateManifestSort(TableSchema schema, CoreOptions options !schema.partitionKeys().isEmpty(), "Cannot enable '%s' for non-partition table.", CoreOptions.MANIFEST_SORT_ENABLED.key()); - } - - String sortPartitionField = options.manifestSortPartitionField(); - if (sortPartitionField != null && !sortPartitionField.isEmpty()) { - checkArgument( - schema.partitionKeys().contains(sortPartitionField), - "'%s' = '%s' is not a partition field. Available partition fields: %s.", - CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), - sortPartitionField, - schema.partitionKeys()); + String sortPartitionField = options.manifestSortPartitionField(); + if (sortPartitionField != null && !sortPartitionField.isEmpty()) { + checkArgument( + schema.partitionKeys().contains(sortPartitionField), + "'%s' = '%s' is not a partition field. Available partition fields: %s.", + CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), + sortPartitionField, + schema.partitionKeys()); + } } } } From 9a91b7f61a0f9e2a5596c239451323b0526bbf4a Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:15:34 +0800 Subject: [PATCH 40/51] splitSortAndRewriteSection --- .../paimon/operation/ManifestFileSorter.java | 163 ++++++++++++------ 1 file changed, 106 insertions(+), 57 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 1e724f925a9b..834d5d5ff11e 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -28,7 +28,6 @@ import org.apache.paimon.manifest.ManifestEntry; import org.apache.paimon.manifest.ManifestFile; import org.apache.paimon.manifest.ManifestFileMeta; -import org.apache.paimon.operation.ManifestFileMerger.FullCompactionReadResult; import org.apache.paimon.partition.PartitionPredicate; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.Filter; @@ -729,15 +728,16 @@ private static void rewriteSubSegments( } /** - * Unified method to sort and rewrite a section. + * Sort and rewrite a section. Dispatches to full or minor compact path. * - * @param fullCompaction if true, merge ADD+DELETE entries together; if false, separate them + *

    sortNewFiles is the same reference as newFilesForAbort, ensuring newly written files are + * cleaned up on exception by the caller's catch block. */ private static void sortAndRewriteSection( List section, ManifestFile manifestFile, RecordComparator fieldComparator, - Set deleteEntries, + Set deletedIdentifiers, Map defaultCompactionMap, RewriteOutput output, List sortNewFiles, @@ -750,28 +750,111 @@ private static void sortAndRewriteSection( return; } - // Read all entries in parallel (common for both paths). - Function> reader = - meta -> singletonList(readForSortRewrite(meta, manifestFile, deleteEntries)); + if (fullCompaction) { + sortAndRewriteFull( + section, + manifestFile, + fieldComparator, + deletedIdentifiers, + output, + sortNewFiles, + manifestReadParallelism); + } else { + sortAndRewriteMinor( + section, + manifestFile, + fieldComparator, + output, + sortNewFiles, + manifestReadParallelism); + } + } - List addEntries = new ArrayList<>(); - List deleteEntriesToRewrite = new ArrayList<>(); - for (FullCompactionReadResult readResult : + /** + * Full compaction path: read all surviving entries (ADD merged with DELETE), sort them + * together, and write to output as a single sorted stream. + */ + private static void sortAndRewriteFull( + List section, + ManifestFile manifestFile, + RecordComparator fieldComparator, + Set deletedIdentifiers, + RewriteOutput output, + List sortNewFiles, + @Nullable Integer manifestReadParallelism) + throws Exception { + // Read surviving ADD entries: filter out entries cancelled by deletedIdentifiers. + Function> reader = + meta -> { + List batch = new ArrayList<>(); + for (ManifestEntry entry : + manifestFile.read( + meta.fileName(), + meta.fileSize(), + FileEntry.addFilter(), + Filter.alwaysTrue())) { + if (!deletedIdentifiers.contains(entry.identifier())) { + batch.add(entry); + } + } + return batch; + }; + + List entries = new ArrayList<>(); + for (ManifestEntry entry : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - if (fullCompaction) { - addEntries.addAll(readResult.entries); - } else { - for (ManifestEntry entry : readResult.entries) { - if (entry.kind() == FileKind.ADD) { - addEntries.add(entry); - } else { - deleteEntriesToRewrite.add(entry); + entries.add(entry); + } + + if (!entries.isEmpty()) { + List sorted = + sortAndWriteEntries(entries, manifestFile, fieldComparator); + output.addSortedFiles(sorted); + sortNewFiles.addAll(sorted); + } + } + + /** + * Minor compaction path: read entries with ADD/DELETE classified in a single pass per file, + * then sort each group independently and write them to output. + * + *

    Each file is read in parallel (via sequentialBatchedExecute). The reader classifies + * entries into ADD and DELETE within each file, returning a Pair. Results are merged in the + * main thread. + */ + private static void sortAndRewriteMinor( + List section, + ManifestFile manifestFile, + RecordComparator fieldComparator, + RewriteOutput output, + List sortNewFiles, + @Nullable Integer manifestReadParallelism) + throws Exception { + // Read and classify ADD/DELETE in one pass per file. + // Returns Pair packed as a singleton list of a wrapper. + Function, List>>> reader = + meta -> { + List addBatch = new ArrayList<>(); + List deleteBatch = new ArrayList<>(); + for (ManifestEntry entry : + manifestFile.read(meta.fileName(), meta.fileSize())) { + if (entry.kind() == FileKind.ADD) { + addBatch.add(entry); + } else { + deleteBatch.add(entry); + } } - } - } + return singletonList(Pair.of(addBatch, deleteBatch)); + }; + + List addEntries = new ArrayList<>(); + List deleteEntries = new ArrayList<>(); + for (Pair, List> pair : + sequentialBatchedExecute(reader, section, manifestReadParallelism)) { + addEntries.addAll(pair.getLeft()); + deleteEntries.addAll(pair.getRight()); } - // Write ADD (or all) entries if (!addEntries.isEmpty()) { List sorted = sortAndWriteEntries(addEntries, manifestFile, fieldComparator); @@ -779,10 +862,9 @@ private static void sortAndRewriteSection( sortNewFiles.addAll(sorted); } - // Write DELETE entries (minor compact only) - if (!deleteEntriesToRewrite.isEmpty()) { + if (!deleteEntries.isEmpty()) { List sorted = - sortAndWriteEntries(deleteEntriesToRewrite, manifestFile, fieldComparator); + sortAndWriteEntries(deleteEntries, manifestFile, fieldComparator); output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } @@ -847,38 +929,6 @@ static String resolveSortField(String sortPartitionField, RowType partitionType) return partitionType.getFieldNames().get(0); } - /** - * Read a single manifest file for sort rewrite. - * - *

    When {@code deletedIdentifiers} is non-empty (full compaction path), only surviving ADD - * entries (not cancelled by deletedIdentifiers) are kept, and DELETE entries are dropped - * because the full compaction has already resolved them. - * - *

    When {@code deletedIdentifiers} is empty (non-full-compaction path), all entries (both ADD - * and DELETE) are preserved to avoid losing unresolved DELETE entries. - */ - private static FullCompactionReadResult readForSortRewrite( - ManifestFileMeta meta, - ManifestFile manifestFile, - Set deletedIdentifiers) { - List entries = new ArrayList<>(); - if (deletedIdentifiers.isEmpty()) { - entries.addAll(manifestFile.read(meta.fileName(), meta.fileSize())); - } else { - for (ManifestEntry entry : - manifestFile.read( - meta.fileName(), - meta.fileSize(), - FileEntry.addFilter(), - Filter.alwaysTrue())) { - if (!deletedIdentifiers.contains(entry.identifier())) { - entries.add(entry); - } - } - } - return new FullCompactionReadResult(meta, true, entries); - } - /** Strategy interface for writing compaction results. */ interface RewriteOutput { void addUnchanged(ManifestFileMeta file); @@ -998,7 +1048,6 @@ static Section merge(Section a, Section b) { /** Result of classifying manifest files into default-compaction and LSM groups. */ private static class ClassifyResult { final Map defaultCompactionManifests; - final List lsmFiles; @Nullable final Set deleteEntries; From 5a6c04392a96757319f7e8c20d44a2469f6057d2 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:18:04 +0800 Subject: [PATCH 41/51] fix --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 834d5d5ff11e..c0d9c4302dbf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -1049,12 +1049,12 @@ static Section merge(Section a, Section b) { private static class ClassifyResult { final Map defaultCompactionManifests; final List lsmFiles; - @Nullable final Set deleteEntries; + final Set deleteEntries; ClassifyResult( Map defaultCompactionManifests, List lsmFiles, - @Nullable Set deleteEntries) { + Set deleteEntries) { this.defaultCompactionManifests = defaultCompactionManifests; this.lsmFiles = lsmFiles; this.deleteEntries = deleteEntries; From e74c4746b2a5b16beed6bc545539a3c3fcba5ba6 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:35:04 +0800 Subject: [PATCH 42/51] fix --- .../paimon/operation/ManifestFileSorter.java | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index c0d9c4302dbf..bea1da2b28a8 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -191,17 +191,7 @@ private static List tryMinorCompact( throws Exception { long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Build fileName -> index mapping and initialize 2D result - Map fileNameToIndex = new HashMap<>(); - for (int i = 0; i < input.size(); i++) { - fileNameToIndex.put(input.get(i).fileName(), i); - } - List> result = new ArrayList<>(input.size()); - for (int i = 0; i < input.size(); i++) { - result.add(new ArrayList<>()); - } - - // Step 2: Prepare compaction context + // Step 1: Prepare compaction context (early-return if nothing to compact) CompactionContext ctx = prepareCompaction(input, manifestFile, partitionType, options, false); Map defaultCompactionMap = ctx.defaultCompactionManifests; @@ -224,7 +214,15 @@ private static List tryMinorCompact( pickedRuns.size(), defaultCompactionMap.size()); - // Step 3: Collect reused files at their original index positions and collect picked files + // Step 2: Build fileName -> index mapping and initialize 2D result + Map fileNameToIndex = new HashMap<>(); + List> result = new ArrayList<>(input.size()); + for (int i = 0; i < input.size(); i++) { + fileNameToIndex.put(input.get(i).fileName(), i); + result.add(new ArrayList<>()); + } + + // Step 3: Collect reused files and picked files Set pickedSet = new HashSet<>(pickedRuns); for (ManifestSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { From 6cffd0ff5f4ead2c63e5d43bd121b489de5af103 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 20 May 2026 21:38:15 +0800 Subject: [PATCH 43/51] fix --- .../test/java/org/apache/paimon/schema/SchemaValidationTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java index 28527f907d57..beb4bfd37680 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaValidationTest.java @@ -538,6 +538,7 @@ void testManifestSortValidation() { // Test 2: manifest-sort-partition-field not in partition keys should fail Map options2 = new HashMap<>(); + options2.put(CoreOptions.MANIFEST_SORT_ENABLED.key(), "true"); options2.put(CoreOptions.MANIFEST_SORT_PARTITION_FIELD.key(), "f1"); options2.put(BUCKET.key(), String.valueOf(-1)); assertThatThrownBy( From 6cc462ff2da5d6bbd9e71baba8e7b25c1923d945 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 00:38:33 +0800 Subject: [PATCH 44/51] refactor --- ...un.java => ManifestAdjacentSortedRun.java} | 28 +- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 468 ++++++------------ .../operation/ManifestPickStrategy.java | 18 +- 4 files changed, 175 insertions(+), 344 deletions(-) rename paimon-core/src/main/java/org/apache/paimon/operation/{ManifestSortedRun.java => ManifestAdjacentSortedRun.java} (67%) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java similarity index 67% rename from paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java rename to paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java index c270677e1f8d..4e1db69fb6dd 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestSortedRun.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java @@ -26,18 +26,18 @@ import java.util.stream.Collectors; /** - * A {@code ManifestSortedRun} is a list of {@link ManifestFileMeta}s sorted by a single partition - * field (the configured manifest sort field). The intervals {@code [partitionStats.minValues[k], - * partitionStats.maxValues[k]]} of these manifests do not overlap on field {@code k}, where {@code - * k} is the configured sort field index. + * A {@code ManifestAdjacentSortedRun} is a list of {@link ManifestFileMeta}s sorted by a single + * partition field (the configured manifest sort field). The intervals {@code + * [partitionStats.minValues[k], partitionStats.maxValues[k]]} of these manifests do not overlap on + * field {@code k}, where {@code k} is the configured sort field index. */ -public class ManifestSortedRun { +public class ManifestAdjacentSortedRun { private int level; private final List files; private final long totalSize; - private ManifestSortedRun(List files) { + private ManifestAdjacentSortedRun(List files) { this.level = -1; this.files = Collections.unmodifiableList(files); long size = 0L; @@ -48,12 +48,12 @@ private ManifestSortedRun(List files) { } /** - * Build a {@code ManifestSortedRun} from an already-sorted list. The caller MUST guarantee that - * {@code sortedFiles} is sorted ascending on the configured sort field's min value, and that - * intervals do not overlap on that field. + * Build a {@code ManifestAdjacentSortedRun} from an already-sorted list. The caller MUST + * guarantee that {@code sortedFiles} is sorted ascending on the configured sort field's min + * value, and that intervals do not overlap on that field. */ - public static ManifestSortedRun fromSorted(List sortedFiles) { - return new ManifestSortedRun(sortedFiles); + public static ManifestAdjacentSortedRun fromSorted(List sortedFiles) { + return new ManifestAdjacentSortedRun(sortedFiles); } public List files() { @@ -74,10 +74,10 @@ public void setLevel(int level) { @Override public boolean equals(Object o) { - if (!(o instanceof ManifestSortedRun)) { + if (!(o instanceof ManifestAdjacentSortedRun)) { return false; } - ManifestSortedRun that = (ManifestSortedRun) o; + ManifestAdjacentSortedRun that = (ManifestAdjacentSortedRun) o; return level == that.level && files.equals(that.files); } @@ -88,7 +88,7 @@ public int hashCode() { @Override public String toString() { - return "ManifestSortedRun{level=" + return "ManifestAdjacentSortedRun{level=" + level + ", files=[" + files.stream().map(ManifestFileMeta::fileName).collect(Collectors.joining(", ")) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index fad84521f5fb..b10505570baf 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,8 +78,9 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - return ManifestFileSorter.trySortRewrite( - input, newFilesForAbort, manifestFile, partitionType, options); + ManifestFileSorter sorter = + new ManifestFileSorter(manifestFile, partitionType, options); + return sorter.trySortRewrite(input, newFilesForAbort); } else { // Otherwise try full compaction first, then minor compaction if needed Optional> fullCompacted = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index bea1da2b28a8..d71d9f901502 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,6 +61,38 @@ public class ManifestFileSorter { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); + // Immutable fields set at construction time + private final ManifestFile manifestFile; + private final RowType partitionType; + private final String sortPartitionField; + + private final long suggestedMetaSize; + private final int suggestedMinMetaCount; + private final long fullCompactionThreshold; + private final long maxRewriteSize; + private final int maxSizeAmplificationPercent; + private final int sortedRunSizeRatio; + @Nullable private final Integer manifestReadParallelism; + + // Mutable fields set during prepareCompaction + private boolean fullCompaction; + private RecordComparator fieldComparator; + private Set deleteEntries; + private Map defaultCompactionMap; + + ManifestFileSorter(ManifestFile manifestFile, RowType partitionType, CoreOptions options) { + this.manifestFile = manifestFile; + this.partitionType = partitionType; + this.sortPartitionField = options.manifestSortPartitionField(); + this.suggestedMetaSize = options.manifestTargetSize().getBytes(); + this.suggestedMinMetaCount = options.manifestMergeMinCount(); + this.fullCompactionThreshold = options.manifestFullCompactionThresholdSize().getBytes(); + this.maxRewriteSize = options.manifestSortMaxRewriteSize(); + this.maxSizeAmplificationPercent = options.maxSizeAmplificationPercent(); + this.sortedRunSizeRatio = options.sortedRunSizeRatio(); + this.manifestReadParallelism = options.scanManifestParallelism(); + } + /** * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort * field cannot be resolved, the input is returned as-is. @@ -68,20 +100,14 @@ public class ManifestFileSorter { *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link * #tryMinorCompact} otherwise. */ - static List trySortRewrite( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) + List trySortRewrite( + List input, List newFilesForAbort) throws Exception { - // Try full compaction first, then minor compaction if full compact is skipped - Optional> fullCompacted = - tryFullCompact(input, newFilesForAbort, manifestFile, partitionType, options); + Optional> fullCompacted = tryFullCompact(input, newFilesForAbort); if (fullCompacted.isPresent()) { return fullCompacted.get(); } - return tryMinorCompact(input, newFilesForAbort, manifestFile, partitionType, options); + return tryMinorCompact(input, newFilesForAbort); } /** @@ -90,15 +116,9 @@ static List trySortRewrite( *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) * together without separating them. */ - private static Optional> tryFullCompact( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) + private Optional> tryFullCompact( + List input, List newFilesForAbort) throws Exception { - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Check if full compaction threshold is met long totalDeltaFileSize = 0; for (ManifestFileMeta file : input) { @@ -106,18 +126,16 @@ private static Optional> tryFullCompact( totalDeltaFileSize += file.fileSize(); } } - if (totalDeltaFileSize < options.manifestFullCompactionThresholdSize().getBytes()) { + if (totalDeltaFileSize < fullCompactionThreshold) { + this.fullCompaction = false; return Optional.empty(); } - + this.fullCompaction = true; // Step 2: Prepare compaction context - CompactionContext ctx = - prepareCompaction(input, manifestFile, partitionType, options, true); - Map defaultCompactionMap = ctx.defaultCompactionManifests; - List levelRuns = ctx.levelRuns; - List pickedRuns = ctx.pickedRuns; - RecordComparator fieldComparator = ctx.fieldComparator; - Set deleteEntries = ctx.deleteEntries; + Pair, List> runsPair = + prepareCompaction(input); + List levelRuns = runsPair.getLeft(); + List pickedRuns = runsPair.getRight(); if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( @@ -134,15 +152,15 @@ private static Optional> tryFullCompact( defaultCompactionMap.size()); // Step 3: Collect reused files (not picked) and picked files - Set pickedSet = new HashSet<>(pickedRuns); + Set pickedSet = new HashSet<>(pickedRuns); List result = new ArrayList<>(); - for (ManifestSortedRun run : levelRuns) { + for (ManifestAdjacentSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { result.addAll(run.files()); } } List pickedFiles = new ArrayList<>(); - for (ManifestSortedRun run : pickedRuns) { + for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); @@ -154,19 +172,7 @@ private static Optional> tryFullCompact( // Step 5: Rewrite sections FullCompactOutput output = new FullCompactOutput(result); - rewriteSections( - sections, - defaultCompactionMap, - manifestFile, - fieldComparator, - deleteEntries, - suggestedMetaSize, - options.manifestMergeMinCount(), - options.manifestSortMaxRewriteSize(), - output, - newFilesForAbort, - options.scanManifestParallelism(), - true); + rewriteSections(sections, output, newFilesForAbort); LOG.info( "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", @@ -182,23 +188,14 @@ private static Optional> tryFullCompact( *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. */ - private static List tryMinorCompact( - List input, - List newFilesForAbort, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options) + private List tryMinorCompact( + List input, List newFilesForAbort) throws Exception { - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - // Step 1: Prepare compaction context (early-return if nothing to compact) - CompactionContext ctx = - prepareCompaction(input, manifestFile, partitionType, options, false); - Map defaultCompactionMap = ctx.defaultCompactionManifests; - List levelRuns = ctx.levelRuns; - List pickedRuns = ctx.pickedRuns; - RecordComparator fieldComparator = ctx.fieldComparator; - Set deleteEntries = ctx.deleteEntries; + Pair, List> runsPair = + prepareCompaction(input); + List levelRuns = runsPair.getLeft(); + List pickedRuns = runsPair.getRight(); if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { LOG.debug( @@ -223,8 +220,8 @@ private static List tryMinorCompact( } // Step 3: Collect reused files and picked files - Set pickedSet = new HashSet<>(pickedRuns); - for (ManifestSortedRun run : levelRuns) { + Set pickedSet = new HashSet<>(pickedRuns); + for (ManifestAdjacentSortedRun run : levelRuns) { if (!pickedSet.contains(run)) { for (ManifestFileMeta file : run.files()) { Integer idx = fileNameToIndex.get(file.fileName()); @@ -236,7 +233,7 @@ private static List tryMinorCompact( } List pickedFiles = new ArrayList<>(); - for (ManifestSortedRun run : pickedRuns) { + for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } pickedFiles.addAll(defaultCompactionMap.keySet()); @@ -259,20 +256,8 @@ private static List tryMinorCompact( sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); // Step 6: Rewrite sections - MinorCompactOutput output = new MinorCompactOutput(result, indexRange); - rewriteSections( - sections, - defaultCompactionMap, - manifestFile, - fieldComparator, - deleteEntries, - suggestedMetaSize, - options.manifestMergeMinCount(), - options.manifestSortMaxRewriteSize(), - output, - newFilesForAbort, - options.scanManifestParallelism(), - false); + MinorCompactOutput output = new MinorCompactOutput(result, indexRange, fileNameToIndex); + rewriteSections(sections, output, newFilesForAbort); // Step 7: Flatten 2D result into a single list List flatResult = new ArrayList<>(); @@ -289,18 +274,14 @@ private static List tryMinorCompact( } /** - * Prepare compaction context by extracting common logic from tryFullCompact and - * tryMinorCompact. + * Prepare compaction context: resolve sort field, classify manifests, build level runs, and + * pick runs for compaction. Sets instance fields: fullCompaction, fieldComparator, + * deleteEntries, defaultCompactionMap. + * + * @return Pair of (levelRuns, pickedRuns) */ - private static CompactionContext prepareCompaction( - List input, - ManifestFile manifestFile, - RowType partitionType, - CoreOptions options, - boolean fullCompaction) { - long suggestedMetaSize = options.manifestTargetSize().getBytes(); - Integer manifestReadParallelism = options.scanManifestParallelism(); - String sortPartitionField = options.manifestSortPartitionField(); + private Pair, List> + prepareCompaction(List input) { String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -308,71 +289,52 @@ private static CompactionContext prepareCompaction( "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - RecordComparator fieldComparator = + this.fieldComparator = CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); - ClassifyResult classified = - classifyManifests( - input, - suggestedMetaSize, - manifestFile, - partitionType, - fullCompaction, - manifestReadParallelism); - - List lsmFiles = classified.lsmFiles; - List levelRuns = + List lsmFiles = classifyManifests(input); + List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() : buildLevelSortedRuns(lsmFiles, fieldComparator); ManifestPickStrategy pickStrategy = - new ManifestPickStrategy( - options.maxSizeAmplificationPercent(), options.sortedRunSizeRatio()); - List pickedRuns = pickStrategy.pick(levelRuns); - - return new CompactionContext( - fieldComparator, - classified.defaultCompactionManifests, - classified.lsmFiles, - classified.deleteEntries, - levelRuns, - pickedRuns); + new ManifestPickStrategy(maxSizeAmplificationPercent, sortedRunSizeRatio); + List pickedRuns = pickStrategy.pick(levelRuns); + + return Pair.of(levelRuns, pickedRuns); } /** - * Classify manifest files into default-compaction group and LSM group. + * Classify manifest files into default-compaction group and LSM group. Sets instance fields + * {@link #deleteEntries} and {@link #defaultCompactionMap}. * *

    Full compaction: small files and files overlapping delete partitions go into - * defaultCompactionManifests; the rest stay as lsmFiles. + * defaultCompactionMap; the rest are returned as lsmFiles. + * + *

    Non-full compaction: small files go to defaultCompactionMap for minor-style merge; the + * rest are returned as lsmFiles. * - *

    Non-full compaction: delete-overlapping files go to result, small files go to - * defaultCompactionManifests for minor-style merge. + * @return lsmFiles that should participate in LSM-tree compaction */ - private static ClassifyResult classifyManifests( - List input, - long suggestedMetaSize, - ManifestFile manifestFile, - RowType partitionType, - boolean fullCompaction, - @Nullable Integer manifestReadParallelism) { + private List classifyManifests(List input) { // Initialize classification containers and read delete entries - Map defaultCompactionManifests = new LinkedHashMap<>(); + Map classifiedDefaultMap = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); - Set deleteEntries = Collections.emptySet(); + Set classifiedDeleteEntries = Collections.emptySet(); PartitionPredicate predicate = null; if (fullCompaction) { - deleteEntries = + classifiedDeleteEntries = FileEntry.readDeletedEntries(manifestFile, input, manifestReadParallelism); // Build partition predicate from delete entries for overlap detection - if (deleteEntries.isEmpty()) { + if (classifiedDeleteEntries.isEmpty()) { predicate = PartitionPredicate.ALWAYS_FALSE; } else { if (partitionType.getFieldCount() > 0) { Set deletePartitions = - ManifestFileMerger.computeDeletePartitions(deleteEntries); + ManifestFileMerger.computeDeletePartitions(classifiedDeleteEntries); predicate = PartitionPredicate.fromMultiple(partitionType, deletePartitions); } else { predicate = PartitionPredicate.ALWAYS_TRUE; @@ -394,10 +356,14 @@ private static ClassifyResult classifyManifests( file.partitionStats().nullCounts()); if (small || inDeleteRange) { iterator.remove(); - defaultCompactionManifests.put(file, inDeleteRange); + classifiedDefaultMap.put(file, inDeleteRange); } } - return new ClassifyResult(defaultCompactionManifests, lsmFiles, deleteEntries); + + // Set instance fields + this.deleteEntries = classifiedDeleteEntries; + this.defaultCompactionMap = classifiedDefaultMap; + return lsmFiles; } /** @@ -405,7 +371,7 @@ private static ClassifyResult classifyManifests( * greedy-scans to build non-overlapping SortedRuns, then assigns levels by totalSize (Top-4 * largest to level 1~4, rest to level 0). */ - static List buildLevelSortedRuns( + static List buildLevelSortedRuns( List input, RecordComparator fieldComparator) { // Step 1: Sort by min value (if equal, then by max value) input.sort( @@ -444,9 +410,10 @@ static List buildLevelSortedRuns( earliestRun.get(earliestRun.size() - 1).partitionStats().maxValues()) >= 0) { // Current file's min >= run's max, append to this run - // Note: When min == max (boundary equality), files are considered non-overlapping - // and can be placed in the same SortedRun. This allows building fewer SortedRuns, - // improving compaction efficiency while maintaining correct sort order. + // Note: When min == max (boundary equality), files are considered + // non-overlapping and can be placed in the same SortedRun. This allows + // building fewer SortedRuns, improving compaction efficiency while + // maintaining correct sort order. earliestRun.add(file); runs.offer(earliestRun); } else { @@ -458,14 +425,14 @@ static List buildLevelSortedRuns( } } - // Step 3: Convert to ManifestSortedRun list - List result = new ArrayList<>(); + // Step 3: Convert to ManifestAdjacentSortedRun list + List result = new ArrayList<>(); while (!runs.isEmpty()) { - result.add(ManifestSortedRun.fromSorted(runs.poll())); + result.add(ManifestAdjacentSortedRun.fromSorted(runs.poll())); } // Step 4: Sort by totalSize and assign levels - result.sort(Comparator.comparingLong(ManifestSortedRun::totalSize)); + result.sort(Comparator.comparingLong(ManifestAdjacentSortedRun::totalSize)); int n = result.size(); int maxLevel = ManifestPickStrategy.MAX_LEVEL; for (int i = 0; i < n; i++) { @@ -510,10 +477,11 @@ static List

    splitIntoSections( for (int i = 1; i < pickedFiles.size(); i++) { ManifestFileMeta file = pickedFiles.get(i); - // Note: Boundary equality (file.min == sectionMaxBound) results in separate sections. - // This avoids merge-sort overhead while maintaining partition filtering capability. - // Files with non-overlapping boundaries (including equal boundaries) can be processed - // independently without significantly impacting partition pruning efficiency. + // Note: Boundary equality (file.min == sectionMaxBound) results in separate + // sections. This avoids merge-sort overhead while maintaining partition filtering + // capability. Files with non-overlapping boundaries (including equal boundaries) + // can be processed independently without significantly impacting partition pruning + // efficiency. if (fieldComparator.compare(file.partitionStats().minValues(), sectionMaxBound) >= 0) { sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); @@ -567,20 +535,9 @@ private static List
    mergeSmallAdjacentSections( return merged; } - /** Unified method to rewrite sections with budget control. */ - private static void rewriteSections( - List
    sections, - Map defaultCompactionMap, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deleteEntries, - long suggestedMetaSize, - int suggestedMinMetaCount, - long maxRewriteSize, - RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism, - boolean fullCompaction) + /** Rewrite sections with budget control. */ + private void rewriteSections( + List
    sections, RewriteOutput output, List sortNewFiles) throws Exception { long processedSize = 0; boolean reachedLimit = false; @@ -588,31 +545,13 @@ private static void rewriteSections( for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); if (section.files.size() == 1) { - sortAndRewriteSection( - section.files, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(section.files, output, sortNewFiles); continue; } if (processedSize + section.totalSize <= maxRewriteSize) { processedSize += section.totalSize; - sortAndRewriteSection( - section.files, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(section.files, output, sortNewFiles); } else if (!reachedLimit) { long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; @@ -635,16 +574,7 @@ private static void rewriteSections( } } - sortAndRewriteSection( - rewriteFiles, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(rewriteFiles, output, sortNewFiles); if (!remainingFiles.isEmpty()) { Section remainingSection = @@ -653,37 +583,18 @@ private static void rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { - rewriteSubSegments( - section.files, - defaultCompactionMap, - manifestFile, - fieldComparator, - deleteEntries, - suggestedMetaSize, - suggestedMinMetaCount, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + rewriteSubSegments(section.files, output, sortNewFiles); } else { output.addAllUnchanged(section.files); } } } - /** Unified method to rewrite sub-segments with budget control. */ - private static void rewriteSubSegments( + /** Rewrite sub-segments within a section that exceeded the budget. */ + private void rewriteSubSegments( List section, - Map defaultCompactionMap, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deleteEntries, - long manifestTargetSize, - int suggestedMinMetaCount, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism, - boolean fullCompaction) + List sortNewFiles) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; @@ -691,17 +602,8 @@ private static void rewriteSubSegments( subSegmentSize += m.fileSize(); subSegment.add(m); - if (subSegmentSize >= manifestTargetSize) { - sortAndRewriteSection( - subSegment, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + if (subSegmentSize >= suggestedMetaSize) { + sortAndRewriteSection(subSegment, output, sortNewFiles); subSegment.clear(); subSegmentSize = 0; } @@ -709,16 +611,7 @@ private static void rewriteSubSegments( // Flush tail only if delete entries exist or file count >= minCount. if (!subSegment.isEmpty()) { if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { - sortAndRewriteSection( - subSegment, - manifestFile, - fieldComparator, - deleteEntries, - defaultCompactionMap, - output, - sortNewFiles, - manifestReadParallelism, - fullCompaction); + sortAndRewriteSection(subSegment, output, sortNewFiles); } else { output.addAllUnchanged(subSegment); } @@ -731,16 +624,10 @@ private static void rewriteSubSegments( *

    sortNewFiles is the same reference as newFilesForAbort, ensuring newly written files are * cleaned up on exception by the caller's catch block. */ - private static void sortAndRewriteSection( + private void sortAndRewriteSection( List section, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deletedIdentifiers, - Map defaultCompactionMap, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism, - boolean fullCompaction) + List sortNewFiles) throws Exception { // Skip rewrite for single file not in delete-range. if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { @@ -749,22 +636,9 @@ private static void sortAndRewriteSection( } if (fullCompaction) { - sortAndRewriteFull( - section, - manifestFile, - fieldComparator, - deletedIdentifiers, - output, - sortNewFiles, - manifestReadParallelism); + sortAndRewriteFull(section, output, sortNewFiles); } else { - sortAndRewriteMinor( - section, - manifestFile, - fieldComparator, - output, - sortNewFiles, - manifestReadParallelism); + sortAndRewriteMinor(section, output, sortNewFiles); } } @@ -772,16 +646,12 @@ private static void sortAndRewriteSection( * Full compaction path: read all surviving entries (ADD merged with DELETE), sort them * together, and write to output as a single sorted stream. */ - private static void sortAndRewriteFull( + private void sortAndRewriteFull( List section, - ManifestFile manifestFile, - RecordComparator fieldComparator, - Set deletedIdentifiers, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism) + List sortNewFiles) throws Exception { - // Read surviving ADD entries: filter out entries cancelled by deletedIdentifiers. + // Read surviving ADD entries: filter out entries cancelled by deleteEntries. Function> reader = meta -> { List batch = new ArrayList<>(); @@ -791,7 +661,7 @@ private static void sortAndRewriteFull( meta.fileSize(), FileEntry.addFilter(), Filter.alwaysTrue())) { - if (!deletedIdentifiers.contains(entry.identifier())) { + if (!deleteEntries.contains(entry.identifier())) { batch.add(entry); } } @@ -805,8 +675,7 @@ private static void sortAndRewriteFull( } if (!entries.isEmpty()) { - List sorted = - sortAndWriteEntries(entries, manifestFile, fieldComparator); + List sorted = sortAndWriteEntries(entries); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } @@ -820,16 +689,12 @@ private static void sortAndRewriteFull( * entries into ADD and DELETE within each file, returning a Pair. Results are merged in the * main thread. */ - private static void sortAndRewriteMinor( + private void sortAndRewriteMinor( List section, - ManifestFile manifestFile, - RecordComparator fieldComparator, RewriteOutput output, - List sortNewFiles, - @Nullable Integer manifestReadParallelism) + List sortNewFiles) throws Exception { // Read and classify ADD/DELETE in one pass per file. - // Returns Pair packed as a singleton list of a wrapper. Function, List>>> reader = meta -> { List addBatch = new ArrayList<>(); @@ -846,33 +711,28 @@ private static void sortAndRewriteMinor( }; List addEntries = new ArrayList<>(); - List deleteEntries = new ArrayList<>(); + List minorDeleteEntries = new ArrayList<>(); for (Pair, List> pair : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { addEntries.addAll(pair.getLeft()); - deleteEntries.addAll(pair.getRight()); + minorDeleteEntries.addAll(pair.getRight()); } if (!addEntries.isEmpty()) { - List sorted = - sortAndWriteEntries(addEntries, manifestFile, fieldComparator); + List sorted = sortAndWriteEntries(addEntries); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } - if (!deleteEntries.isEmpty()) { - List sorted = - sortAndWriteEntries(deleteEntries, manifestFile, fieldComparator); + if (!minorDeleteEntries.isEmpty()) { + List sorted = sortAndWriteEntries(minorDeleteEntries); output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } } /** Sort entries and write them to a new manifest file with proper error handling. */ - private static List sortAndWriteEntries( - List entries, - ManifestFile manifestFile, - RecordComparator fieldComparator) + private List sortAndWriteEntries(List entries) throws Exception { entries.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = @@ -969,20 +829,28 @@ public void addDeleteFiles(List files) { private static class MinorCompactOutput implements RewriteOutput { private final List> result; private final Pair indexRange; + private final Map fileNameToIndex; - MinorCompactOutput(List> result, Pair indexRange) { + MinorCompactOutput( + List> result, + Pair indexRange, + Map fileNameToIndex) { this.result = result; this.indexRange = indexRange; + this.fileNameToIndex = fileNameToIndex; } @Override public void addUnchanged(ManifestFileMeta file) { - result.get(indexRange.getLeft()).add(file); + Integer idx = fileNameToIndex.get(file.fileName()); + result.get(idx).add(file); } @Override public void addAllUnchanged(List files) { - result.get(indexRange.getLeft()).addAll(files); + for (ManifestFileMeta file : files) { + addUnchanged(file); + } } @Override @@ -996,30 +864,6 @@ public void addDeleteFiles(List files) { } } - private static class CompactionContext { - final RecordComparator fieldComparator; - final Map defaultCompactionManifests; - final List lsmFiles; - @Nullable final Set deleteEntries; - final List levelRuns; - final List pickedRuns; - - CompactionContext( - RecordComparator fieldComparator, - Map defaultCompactionManifests, - List lsmFiles, - @Nullable Set deleteEntries, - List levelRuns, - List pickedRuns) { - this.fieldComparator = fieldComparator; - this.defaultCompactionManifests = defaultCompactionManifests; - this.lsmFiles = lsmFiles; - this.deleteEntries = deleteEntries; - this.levelRuns = levelRuns; - this.pickedRuns = pickedRuns; - } - } - /** A section of manifest files with pre-computed metadata. */ static class Section { final List files; @@ -1042,20 +886,4 @@ static Section merge(Section a, Section b) { a.hasDefaultCompactMeta || b.hasDefaultCompactMeta); } } - - /** Result of classifying manifest files into default-compaction and LSM groups. */ - private static class ClassifyResult { - final Map defaultCompactionManifests; - final List lsmFiles; - final Set deleteEntries; - - ClassifyResult( - Map defaultCompactionManifests, - List lsmFiles, - Set deleteEntries) { - this.defaultCompactionManifests = defaultCompactionManifests; - this.lsmFiles = lsmFiles; - this.deleteEntries = deleteEntries; - } - } } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java index 3a8693d4dcf0..519c49676ce3 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestPickStrategy.java @@ -56,13 +56,13 @@ public ManifestPickStrategy(int sizeAmpThreshold, int sizeRatioThreshold) { * @param levelRuns runs with assigned levels (level 0~4) * @return list of picked runs to compact */ - public List pick(List levelRuns) { + public List pick(List levelRuns) { if (levelRuns.isEmpty() || levelRuns.size() <= MAX_LEVEL) { return new ArrayList<>(); } // Try SizeAmp first - List sizeAmpResult = pickForSizeAmp(levelRuns); + List sizeAmpResult = pickForSizeAmp(levelRuns); if (sizeAmpResult != null) { return sizeAmpResult; } @@ -78,13 +78,14 @@ public List pick(List levelRuns) { *

    Formula (consistent with {@code UniversalCompaction#pickForSizeAmp}): {@code * lowerLevelTotalSize * 100 > sizeAmpThreshold * highestRunSize} */ - private List pickForSizeAmp(List levelRuns) { + private List pickForSizeAmp( + List levelRuns) { if (levelRuns.isEmpty()) { return null; } // The last run has the highest level (set by buildLevelSortedRuns) - ManifestSortedRun highestRun = levelRuns.get(levelRuns.size() - 1); + ManifestAdjacentSortedRun highestRun = levelRuns.get(levelRuns.size() - 1); int maxLevel = highestRun.level(); if (maxLevel <= 0) { @@ -92,7 +93,7 @@ private List pickForSizeAmp(List levelRuns } long lowerLevelTotalSize = 0; - for (ManifestSortedRun run : levelRuns) { + for (ManifestAdjacentSortedRun run : levelRuns) { if (run.level() < maxLevel) { lowerLevelTotalSize += run.totalSize(); } @@ -117,9 +118,10 @@ private List pickForSizeAmp(List levelRuns *

    Formula (consistent with {@code UniversalCompaction#pickForSizeRatio}): {@code pickedSize * * (100.0 + sizeRatioThreshold) / 100.0 >= nextRunSize} */ - private List pickForSizeRatioAndForce(List levelRuns) { + private List pickForSizeRatioAndForce( + List levelRuns) { // levelRuns is already sorted by level ascending (set by buildLevelSortedRuns) - List picked = new ArrayList<>(); + List picked = new ArrayList<>(); // Always pick the first run to guarantee a non-empty result. picked.add(levelRuns.get(0)); @@ -127,7 +129,7 @@ private List pickForSizeRatioAndForce(List // From the second run onward: forced pick level0/level1, then SizeRatio for the rest. for (int i = 1; i < levelRuns.size(); i++) { - ManifestSortedRun run = levelRuns.get(i); + ManifestAdjacentSortedRun run = levelRuns.get(i); if (run.level() <= 1) { picked.add(run); pickedSize += run.totalSize(); From 869bf333b9c1bca7e6e5798fad65c7b3a09d5492 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 00:43:09 +0800 Subject: [PATCH 45/51] fix --- docs/generated/core_configuration.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 012b2929aa85..2a98d2a96b0e 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -931,7 +931,7 @@

    manifest-sort.partition-field
    (none) String - Partition field name to sort manifest entries by. Validated by schema validation, If not configured, defaults to the first partition field. + Partition field name to sort manifest entries by. Validated by schema validation, if not configured, defaults to the first partition field.
    manifest-sort.max-rewrite-size
    From 0b77757d345c973199d4750a876587da23e22969 Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 16:10:07 +0800 Subject: [PATCH 46/51] static --- .../paimon/operation/ManifestFileMerger.java | 5 +- .../paimon/operation/ManifestFileSorter.java | 401 +++++++++++++----- 2 files changed, 300 insertions(+), 106 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index b10505570baf..fad84521f5fb 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,9 +78,8 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - ManifestFileSorter sorter = - new ManifestFileSorter(manifestFile, partitionType, options); - return sorter.trySortRewrite(input, newFilesForAbort); + return ManifestFileSorter.trySortRewrite( + input, newFilesForAbort, manifestFile, partitionType, options); } else { // Otherwise try full compaction first, then minor compaction if needed Optional> fullCompacted = diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index d71d9f901502..1ed980856c07 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,36 +61,47 @@ public class ManifestFileSorter { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); - // Immutable fields set at construction time - private final ManifestFile manifestFile; - private final RowType partitionType; - private final String sortPartitionField; - - private final long suggestedMetaSize; - private final int suggestedMinMetaCount; - private final long fullCompactionThreshold; - private final long maxRewriteSize; - private final int maxSizeAmplificationPercent; - private final int sortedRunSizeRatio; - @Nullable private final Integer manifestReadParallelism; - - // Mutable fields set during prepareCompaction - private boolean fullCompaction; - private RecordComparator fieldComparator; - private Set deleteEntries; - private Map defaultCompactionMap; - - ManifestFileSorter(ManifestFile manifestFile, RowType partitionType, CoreOptions options) { - this.manifestFile = manifestFile; - this.partitionType = partitionType; - this.sortPartitionField = options.manifestSortPartitionField(); - this.suggestedMetaSize = options.manifestTargetSize().getBytes(); - this.suggestedMinMetaCount = options.manifestMergeMinCount(); - this.fullCompactionThreshold = options.manifestFullCompactionThresholdSize().getBytes(); - this.maxRewriteSize = options.manifestSortMaxRewriteSize(); - this.maxSizeAmplificationPercent = options.maxSizeAmplificationPercent(); - this.sortedRunSizeRatio = options.sortedRunSizeRatio(); - this.manifestReadParallelism = options.scanManifestParallelism(); + private ManifestFileSorter() {} + + /** Context object that carries shared state across compaction methods. */ + static class CompactionContext { + final boolean fullCompaction; + final RecordComparator fieldComparator; + final Set deleteEntries; + final Map defaultCompactionMap; + final List levelRuns; + final List pickedRuns; + + CompactionContext( + boolean fullCompaction, + RecordComparator fieldComparator, + Set deleteEntries, + Map defaultCompactionMap, + List levelRuns, + List pickedRuns) { + this.fullCompaction = fullCompaction; + this.fieldComparator = fieldComparator; + this.deleteEntries = deleteEntries; + this.defaultCompactionMap = defaultCompactionMap; + this.levelRuns = levelRuns; + this.pickedRuns = pickedRuns; + } + } + + /** Result of classifying manifest files. */ + private static class ClassifyResult { + final List lsmFiles; + final Set deleteEntries; + final Map defaultCompactionMap; + + ClassifyResult( + List lsmFiles, + Set deleteEntries, + Map defaultCompactionMap) { + this.lsmFiles = lsmFiles; + this.deleteEntries = deleteEntries; + this.defaultCompactionMap = defaultCompactionMap; + } } /** @@ -100,14 +111,51 @@ public class ManifestFileSorter { *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link * #tryMinorCompact} otherwise. */ - List trySortRewrite( - List input, List newFilesForAbort) + static List trySortRewrite( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + CoreOptions options) throws Exception { - Optional> fullCompacted = tryFullCompact(input, newFilesForAbort); + String sortPartitionField = options.manifestSortPartitionField(); + long suggestedMetaSize = options.manifestTargetSize().getBytes(); + int suggestedMinMetaCount = options.manifestMergeMinCount(); + long fullCompactionThreshold = options.manifestFullCompactionThresholdSize().getBytes(); + long maxRewriteSize = options.manifestSortMaxRewriteSize(); + int maxSizeAmplificationPercent = options.maxSizeAmplificationPercent(); + int sortedRunSizeRatio = options.sortedRunSizeRatio(); + Integer manifestReadParallelism = options.scanManifestParallelism(); + + Optional> fullCompacted = + tryFullCompact( + input, + newFilesForAbort, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + suggestedMinMetaCount, + fullCompactionThreshold, + maxRewriteSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); if (fullCompacted.isPresent()) { return fullCompacted.get(); } - return tryMinorCompact(input, newFilesForAbort); + return tryMinorCompact( + input, + newFilesForAbort, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + suggestedMinMetaCount, + maxRewriteSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); } /** @@ -116,8 +164,19 @@ List trySortRewrite( *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) * together without separating them. */ - private Optional> tryFullCompact( - List input, List newFilesForAbort) + private static Optional> tryFullCompact( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + String sortPartitionField, + long suggestedMetaSize, + int suggestedMinMetaCount, + long fullCompactionThreshold, + long maxRewriteSize, + int maxSizeAmplificationPercent, + int sortedRunSizeRatio, + @Nullable Integer manifestReadParallelism) throws Exception { // Step 1: Check if full compaction threshold is met long totalDeltaFileSize = 0; @@ -127,17 +186,24 @@ private Optional> tryFullCompact( } } if (totalDeltaFileSize < fullCompactionThreshold) { - this.fullCompaction = false; return Optional.empty(); } - this.fullCompaction = true; // Step 2: Prepare compaction context - Pair, List> runsPair = - prepareCompaction(input); - List levelRuns = runsPair.getLeft(); - List pickedRuns = runsPair.getRight(); - - if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { + CompactionContext ctx = + prepareCompaction( + input, + true, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + + if (pickedRuns.isEmpty() && ctx.defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort full compact skipped: no runs picked and no defaultCompaction files."); return Optional.empty(); @@ -149,7 +215,7 @@ private Optional> tryFullCompact( input.size(), levelRuns.size(), pickedRuns.size(), - defaultCompactionMap.size()); + ctx.defaultCompactionMap.size()); // Step 3: Collect reused files (not picked) and picked files Set pickedSet = new HashSet<>(pickedRuns); @@ -163,16 +229,25 @@ private Optional> tryFullCompact( for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } - pickedFiles.addAll(defaultCompactionMap.keySet()); + pickedFiles.addAll(ctx.defaultCompactionMap.keySet()); // Step 4: Split into sections and merge small adjacent sections List

    sections = - splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); + splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); // Step 5: Rewrite sections FullCompactOutput output = new FullCompactOutput(result); - rewriteSections(sections, output, newFilesForAbort); + rewriteSections( + sections, + output, + newFilesForAbort, + ctx, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + maxRewriteSize, + manifestReadParallelism); LOG.info( "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", @@ -188,16 +263,35 @@ private Optional> tryFullCompact( *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. */ - private List tryMinorCompact( - List input, List newFilesForAbort) + private static List tryMinorCompact( + List input, + List newFilesForAbort, + ManifestFile manifestFile, + RowType partitionType, + String sortPartitionField, + long suggestedMetaSize, + int suggestedMinMetaCount, + long maxRewriteSize, + int maxSizeAmplificationPercent, + int sortedRunSizeRatio, + @Nullable Integer manifestReadParallelism) throws Exception { // Step 1: Prepare compaction context (early-return if nothing to compact) - Pair, List> runsPair = - prepareCompaction(input); - List levelRuns = runsPair.getLeft(); - List pickedRuns = runsPair.getRight(); - - if (pickedRuns.isEmpty() && defaultCompactionMap.isEmpty()) { + CompactionContext ctx = + prepareCompaction( + input, + false, + manifestFile, + partitionType, + sortPartitionField, + suggestedMetaSize, + maxSizeAmplificationPercent, + sortedRunSizeRatio, + manifestReadParallelism); + List levelRuns = ctx.levelRuns; + List pickedRuns = ctx.pickedRuns; + + if (pickedRuns.isEmpty() && ctx.defaultCompactionMap.isEmpty()) { LOG.debug( "Manifest sort minor compact skipped: no runs picked and no defaultCompaction files."); return input; @@ -209,7 +303,7 @@ private List tryMinorCompact( input.size(), levelRuns.size(), pickedRuns.size(), - defaultCompactionMap.size()); + ctx.defaultCompactionMap.size()); // Step 2: Build fileName -> index mapping and initialize 2D result Map fileNameToIndex = new HashMap<>(); @@ -236,7 +330,7 @@ private List tryMinorCompact( for (ManifestAdjacentSortedRun run : pickedRuns) { pickedFiles.addAll(run.files()); } - pickedFiles.addAll(defaultCompactionMap.keySet()); + pickedFiles.addAll(ctx.defaultCompactionMap.keySet()); // Step 4: Compute index range int minIdx = Integer.MAX_VALUE; @@ -252,12 +346,21 @@ private List tryMinorCompact( // Step 5: Split into sections and merge small adjacent sections List

    sections = - splitIntoSections(pickedFiles, fieldComparator, defaultCompactionMap); + splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); // Step 6: Rewrite sections MinorCompactOutput output = new MinorCompactOutput(result, indexRange, fileNameToIndex); - rewriteSections(sections, output, newFilesForAbort); + rewriteSections( + sections, + output, + newFilesForAbort, + ctx, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + maxRewriteSize, + manifestReadParallelism); // Step 7: Flatten 2D result into a single list List flatResult = new ArrayList<>(); @@ -275,13 +378,20 @@ private List tryMinorCompact( /** * Prepare compaction context: resolve sort field, classify manifests, build level runs, and - * pick runs for compaction. Sets instance fields: fullCompaction, fieldComparator, - * deleteEntries, defaultCompactionMap. + * pick runs for compaction. * - * @return Pair of (levelRuns, pickedRuns) + * @return CompactionContext containing all shared state */ - private Pair, List> - prepareCompaction(List input) { + private static CompactionContext prepareCompaction( + List input, + boolean fullCompaction, + ManifestFile manifestFile, + RowType partitionType, + String sortPartitionField, + long suggestedMetaSize, + int maxSizeAmplificationPercent, + int sortedRunSizeRatio, + @Nullable Integer manifestReadParallelism) { String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { @@ -289,11 +399,20 @@ private List tryMinorCompact( "Cannot resolve sort field for manifest sort rewrite."); } int sortFieldIndex = partitionType.getFieldNames().indexOf(sortField); - this.fieldComparator = + RecordComparator fieldComparator = CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); - List lsmFiles = classifyManifests(input); + ClassifyResult classifyResult = + classifyManifests( + input, + fullCompaction, + manifestFile, + partitionType, + suggestedMetaSize, + manifestReadParallelism); + List lsmFiles = classifyResult.lsmFiles; + List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() @@ -303,12 +422,17 @@ private List tryMinorCompact( new ManifestPickStrategy(maxSizeAmplificationPercent, sortedRunSizeRatio); List pickedRuns = pickStrategy.pick(levelRuns); - return Pair.of(levelRuns, pickedRuns); + return new CompactionContext( + fullCompaction, + fieldComparator, + classifyResult.deleteEntries, + classifyResult.defaultCompactionMap, + levelRuns, + pickedRuns); } /** - * Classify manifest files into default-compaction group and LSM group. Sets instance fields - * {@link #deleteEntries} and {@link #defaultCompactionMap}. + * Classify manifest files into default-compaction group and LSM group. * *

    Full compaction: small files and files overlapping delete partitions go into * defaultCompactionMap; the rest are returned as lsmFiles. @@ -316,9 +440,15 @@ private List tryMinorCompact( *

    Non-full compaction: small files go to defaultCompactionMap for minor-style merge; the * rest are returned as lsmFiles. * - * @return lsmFiles that should participate in LSM-tree compaction + * @return ClassifyResult containing lsmFiles, deleteEntries, and defaultCompactionMap */ - private List classifyManifests(List input) { + private static ClassifyResult classifyManifests( + List input, + boolean fullCompaction, + ManifestFile manifestFile, + RowType partitionType, + long suggestedMetaSize, + @Nullable Integer manifestReadParallelism) { // Initialize classification containers and read delete entries Map classifiedDefaultMap = new LinkedHashMap<>(); List lsmFiles = new LinkedList<>(input); @@ -360,10 +490,7 @@ private List classifyManifests(List input) { } } - // Set instance fields - this.deleteEntries = classifiedDeleteEntries; - this.defaultCompactionMap = classifiedDefaultMap; - return lsmFiles; + return new ClassifyResult(lsmFiles, classifiedDeleteEntries, classifiedDefaultMap); } /** @@ -536,8 +663,16 @@ private static List

    mergeSmallAdjacentSections( } /** Rewrite sections with budget control. */ - private void rewriteSections( - List
    sections, RewriteOutput output, List sortNewFiles) + private static void rewriteSections( + List
    sections, + RewriteOutput output, + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + long suggestedMetaSize, + int suggestedMinMetaCount, + long maxRewriteSize, + @Nullable Integer manifestReadParallelism) throws Exception { long processedSize = 0; boolean reachedLimit = false; @@ -545,13 +680,25 @@ private void rewriteSections( for (int i = 0; i < sections.size(); i++) { Section section = sections.get(i); if (section.files.size() == 1) { - sortAndRewriteSection(section.files, output, sortNewFiles); + sortAndRewriteSection( + section.files, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); continue; } if (processedSize + section.totalSize <= maxRewriteSize) { processedSize += section.totalSize; - sortAndRewriteSection(section.files, output, sortNewFiles); + sortAndRewriteSection( + section.files, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); } else if (!reachedLimit) { long rewriteTotalSize = maxRewriteSize - processedSize; processedSize += section.totalSize; @@ -568,13 +715,19 @@ private void rewriteSections( } else { remainingFiles.add(file); remainingSize += file.fileSize(); - if (defaultCompactionMap.containsKey(file)) { + if (ctx.defaultCompactionMap.containsKey(file)) { remainingHasDefault = true; } } } - sortAndRewriteSection(rewriteFiles, output, sortNewFiles); + sortAndRewriteSection( + rewriteFiles, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); if (!remainingFiles.isEmpty()) { Section remainingSection = @@ -583,7 +736,15 @@ private void rewriteSections( } reachedLimit = true; } else if (section.hasDefaultCompactMeta) { - rewriteSubSegments(section.files, output, sortNewFiles); + rewriteSubSegments( + section.files, + output, + sortNewFiles, + ctx, + manifestFile, + suggestedMetaSize, + suggestedMinMetaCount, + manifestReadParallelism); } else { output.addAllUnchanged(section.files); } @@ -591,10 +752,15 @@ private void rewriteSections( } /** Rewrite sub-segments within a section that exceeded the budget. */ - private void rewriteSubSegments( + private static void rewriteSubSegments( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + long suggestedMetaSize, + int suggestedMinMetaCount, + @Nullable Integer manifestReadParallelism) throws Exception { List subSegment = new ArrayList<>(); long subSegmentSize = 0; @@ -603,15 +769,27 @@ private void rewriteSubSegments( subSegment.add(m); if (subSegmentSize >= suggestedMetaSize) { - sortAndRewriteSection(subSegment, output, sortNewFiles); + sortAndRewriteSection( + subSegment, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); subSegment.clear(); subSegmentSize = 0; } } // Flush tail only if delete entries exist or file count >= minCount. if (!subSegment.isEmpty()) { - if (!deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { - sortAndRewriteSection(subSegment, output, sortNewFiles); + if (!ctx.deleteEntries.isEmpty() || subSegment.size() >= suggestedMinMetaCount) { + sortAndRewriteSection( + subSegment, + output, + sortNewFiles, + ctx, + manifestFile, + manifestReadParallelism); } else { output.addAllUnchanged(subSegment); } @@ -624,21 +802,26 @@ private void rewriteSubSegments( *

    sortNewFiles is the same reference as newFilesForAbort, ensuring newly written files are * cleaned up on exception by the caller's catch block. */ - private void sortAndRewriteSection( + private static void sortAndRewriteSection( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + @Nullable Integer manifestReadParallelism) throws Exception { // Skip rewrite for single file not in delete-range. - if (section.size() == 1 && !defaultCompactionMap.getOrDefault(section.get(0), false)) { + if (section.size() == 1 && !ctx.defaultCompactionMap.getOrDefault(section.get(0), false)) { output.addUnchanged(section.get(0)); return; } - if (fullCompaction) { - sortAndRewriteFull(section, output, sortNewFiles); + if (ctx.fullCompaction) { + sortAndRewriteFull( + section, output, sortNewFiles, ctx, manifestFile, manifestReadParallelism); } else { - sortAndRewriteMinor(section, output, sortNewFiles); + sortAndRewriteMinor( + section, output, sortNewFiles, ctx, manifestFile, manifestReadParallelism); } } @@ -646,10 +829,13 @@ private void sortAndRewriteSection( * Full compaction path: read all surviving entries (ADD merged with DELETE), sort them * together, and write to output as a single sorted stream. */ - private void sortAndRewriteFull( + private static void sortAndRewriteFull( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + @Nullable Integer manifestReadParallelism) throws Exception { // Read surviving ADD entries: filter out entries cancelled by deleteEntries. Function> reader = @@ -661,7 +847,7 @@ private void sortAndRewriteFull( meta.fileSize(), FileEntry.addFilter(), Filter.alwaysTrue())) { - if (!deleteEntries.contains(entry.identifier())) { + if (!ctx.deleteEntries.contains(entry.identifier())) { batch.add(entry); } } @@ -675,7 +861,8 @@ private void sortAndRewriteFull( } if (!entries.isEmpty()) { - List sorted = sortAndWriteEntries(entries); + List sorted = + sortAndWriteEntries(entries, ctx.fieldComparator, manifestFile); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } @@ -689,10 +876,13 @@ private void sortAndRewriteFull( * entries into ADD and DELETE within each file, returning a Pair. Results are merged in the * main thread. */ - private void sortAndRewriteMinor( + private static void sortAndRewriteMinor( List section, RewriteOutput output, - List sortNewFiles) + List sortNewFiles, + CompactionContext ctx, + ManifestFile manifestFile, + @Nullable Integer manifestReadParallelism) throws Exception { // Read and classify ADD/DELETE in one pass per file. Function, List>>> reader = @@ -719,20 +909,25 @@ private void sortAndRewriteMinor( } if (!addEntries.isEmpty()) { - List sorted = sortAndWriteEntries(addEntries); + List sorted = + sortAndWriteEntries(addEntries, ctx.fieldComparator, manifestFile); output.addSortedFiles(sorted); sortNewFiles.addAll(sorted); } if (!minorDeleteEntries.isEmpty()) { - List sorted = sortAndWriteEntries(minorDeleteEntries); + List sorted = + sortAndWriteEntries(minorDeleteEntries, ctx.fieldComparator, manifestFile); output.addDeleteFiles(sorted); sortNewFiles.addAll(sorted); } } /** Sort entries and write them to a new manifest file with proper error handling. */ - private List sortAndWriteEntries(List entries) + private static List sortAndWriteEntries( + List entries, + RecordComparator fieldComparator, + ManifestFile manifestFile) throws Exception { entries.sort((a, b) -> compareSortKey(a, b, fieldComparator)); RollingFileWriter writer = From 07c4f606c3123e66ee849fd2846199217787747d Mon Sep 17 00:00:00 2001 From: umi Date: Thu, 21 May 2026 16:13:41 +0800 Subject: [PATCH 47/51] fix --- .../java/org/apache/paimon/operation/ManifestFileSorter.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 1ed980856c07..f0dfc1600126 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -61,8 +61,6 @@ public class ManifestFileSorter { private static final Logger LOG = LoggerFactory.getLogger(ManifestFileSorter.class); - private ManifestFileSorter() {} - /** Context object that carries shared state across compaction methods. */ static class CompactionContext { final boolean fullCompaction; From 9da444d0daee1b3c1aa7799abdec7164d0f31291 Mon Sep 17 00:00:00 2001 From: umi Date: Fri, 22 May 2026 11:03:27 +0800 Subject: [PATCH 48/51] minorDelete --- .../paimon/operation/ManifestFileMerger.java | 2 +- .../paimon/operation/ManifestFileSorter.java | 49 +++++++++++++------ 2 files changed, 35 insertions(+), 16 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java index fad84521f5fb..f899aa71786f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileMerger.java @@ -78,7 +78,7 @@ public static List merge( // If manifest-sort.enabled is enabled and there are partition fields, use // trySortRewrite if (options.manifestSortEnabled() && partitionType.getFieldCount() > 0) { - return ManifestFileSorter.trySortRewrite( + return ManifestFileSorter.trySortCompaction( input, newFilesForAbort, manifestFile, partitionType, options); } else { // Otherwise try full compaction first, then minor compaction if needed diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index f0dfc1600126..87a2474b0431 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -106,10 +106,10 @@ private static class ClassifyResult { * Try to sort-rewrite the merged manifest list by a configured partition field. If the sort * field cannot be resolved, the input is returned as-is. * - *

    Dispatches to {@link #tryFullCompact} when totalDeltaFileSize >= sizeTrigger, or {@link - * #tryMinorCompact} otherwise. + *

    Dispatches to {@link #tryFullCompaction} when totalDeltaFileSize >= sizeTrigger, or {@link + * #tryMinorCompaction} otherwise. */ - static List trySortRewrite( + static List trySortCompaction( List input, List newFilesForAbort, ManifestFile manifestFile, @@ -126,7 +126,7 @@ static List trySortRewrite( Integer manifestReadParallelism = options.scanManifestParallelism(); Optional> fullCompacted = - tryFullCompact( + tryFullCompaction( input, newFilesForAbort, manifestFile, @@ -142,7 +142,7 @@ static List trySortRewrite( if (fullCompacted.isPresent()) { return fullCompacted.get(); } - return tryMinorCompact( + return tryMinorCompaction( input, newFilesForAbort, manifestFile, @@ -162,7 +162,7 @@ static List trySortRewrite( *

    Does not build index mapping. sortAndRewriteSection writes all entries (ADD+DELETE merged) * together without separating them. */ - private static Optional> tryFullCompact( + private static Optional> tryFullCompaction( List input, List newFilesForAbort, ManifestFile manifestFile, @@ -234,6 +234,11 @@ private static Optional> tryFullCompact( splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + LOG.info( + "Manifest sort full compact: pickedFiles={}, sections={}.", + pickedFiles.size(), + sections.size()); + // Step 5: Rewrite sections FullCompactOutput output = new FullCompactOutput(result); rewriteSections( @@ -248,9 +253,8 @@ private static Optional> tryFullCompact( manifestReadParallelism); LOG.info( - "Manifest sort full compact completed: sections={}, newFiles={}, resultFiles={}.", - sections.size(), - newFilesForAbort.size(), + "Manifest sort full compact completed: input={}, resultFiles={}.", + input.size(), result.size()); return Optional.of(result); } @@ -261,7 +265,7 @@ private static Optional> tryFullCompact( *

    Builds index mapping to preserve original positions. sortAndRewriteSection separates ADD * and DELETE entries, placing ADD at result[minIdx] and DELETE at result[maxIdx]. */ - private static List tryMinorCompact( + private static List tryMinorCompaction( List input, List newFilesForAbort, ManifestFile manifestFile, @@ -347,6 +351,11 @@ private static List tryMinorCompact( splitIntoSections(pickedFiles, ctx.fieldComparator, ctx.defaultCompactionMap); sections = mergeSmallAdjacentSections(sections, suggestedMetaSize); + LOG.info( + "Manifest sort minor compact: pickedFiles={}, sections={}.", + pickedFiles.size(), + sections.size()); + // Step 6: Rewrite sections MinorCompactOutput output = new MinorCompactOutput(result, indexRange, fileNameToIndex); rewriteSections( @@ -367,9 +376,8 @@ private static List tryMinorCompact( } LOG.info( - "Manifest sort minor compact completed: sections={}, newFiles={}, resultFiles={}.", - sections.size(), - newFilesForAbort.size(), + "Manifest sort minor compact completed: input={}, resultFiles={}.", + input.size(), flatResult.size()); return flatResult; } @@ -391,6 +399,7 @@ private static CompactionContext prepareCompaction( int sortedRunSizeRatio, @Nullable Integer manifestReadParallelism) { + // Step 1: Resolve sort field and build comparator for partition ordering. String sortField = resolveSortField(sortPartitionField, partitionType); if (sortField == null) { throw new IllegalArgumentException( @@ -401,6 +410,7 @@ private static CompactionContext prepareCompaction( CodeGenUtils.newRecordComparator( partitionType.getFieldTypes(), new int[] {sortFieldIndex}); + // Step 2: Classify manifests into LSM files and collect delete entries. ClassifyResult classifyResult = classifyManifests( input, @@ -411,11 +421,13 @@ private static CompactionContext prepareCompaction( manifestReadParallelism); List lsmFiles = classifyResult.lsmFiles; + // Step 3: Build level-sorted runs from LSM files based on partition order. List levelRuns = lsmFiles.isEmpty() ? new ArrayList<>() : buildLevelSortedRuns(lsmFiles, fieldComparator); + // Step 4: Pick runs for compaction using size amplification and ratio strategy. ManifestPickStrategy pickStrategy = new ManifestPickStrategy(maxSizeAmplificationPercent, sortedRunSizeRatio); List pickedRuns = pickStrategy.pick(levelRuns); @@ -898,14 +910,21 @@ private static void sortAndRewriteMinor( return singletonList(Pair.of(addBatch, deleteBatch)); }; - List addEntries = new ArrayList<>(); + Map addMap = new HashMap<>(); List minorDeleteEntries = new ArrayList<>(); for (Pair, List> pair : sequentialBatchedExecute(reader, section, manifestReadParallelism)) { - addEntries.addAll(pair.getLeft()); + for (ManifestEntry entry : pair.getLeft()) { + addMap.put(entry.identifier(), entry); + } minorDeleteEntries.addAll(pair.getRight()); } + // Cancel out ADD+DELETE pairs with the same identifier within the section. + minorDeleteEntries.removeIf( + manifestEntry -> addMap.remove(manifestEntry.identifier()) != null); + List addEntries = new ArrayList<>(addMap.values()); + if (!addEntries.isEmpty()) { List sorted = sortAndWriteEntries(addEntries, ctx.fieldComparator, manifestFile); From 26d0e1629aacc23e76b5f30d9654480dd159ecbd Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 3 Jun 2026 20:12:36 +0800 Subject: [PATCH 49/51] addComment --- .../operation/ManifestAdjacentSortedRun.java | 12 + .../paimon/operation/ManifestFileSorter.java | 61 +++++- .../paimon/manifest/ManifestFileMetaTest.java | 206 ++++++++++++++++++ 3 files changed, 272 insertions(+), 7 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java index 4e1db69fb6dd..ad584e9356a7 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java @@ -30,6 +30,18 @@ * partition field (the configured manifest sort field). The intervals {@code * [partitionStats.minValues[k], partitionStats.maxValues[k]]} of these manifests do not overlap on * field {@code k}, where {@code k} is the configured sort field index. + * + *

    Ordering Invariant: The global order of manifests across all SortedRuns must be + * maintained by the sort key (partition field). When a section is split due to budget limits + * (manifest-sort.max-rewrite-size), the remaining files are appended to the processing queue and + * will be handled in subsequent iterations. This tail-appending mechanism preserves the global sort + * order because the remaining files naturally have larger key values than the currently processed + * ones. + * + *

    Boundary Equality: Files with boundary-touching intervals (min == previous.max) are + * considered non-overlapping and can be placed in the same SortedRun. This reduces the number of + * runs and improves compaction efficiency. However, such files may be separated into different + * Sections during splitIntoSections to avoid merge-sort overhead. */ public class ManifestAdjacentSortedRun { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 87a2474b0431..6bd2fb4a4cfb 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -550,7 +550,11 @@ static List buildLevelSortedRuns( // Note: When min == max (boundary equality), files are considered // non-overlapping and can be placed in the same SortedRun. This allows // building fewer SortedRuns, improving compaction efficiency while - // maintaining correct sort order. + // maintaining correct sort order. However, these files may later be separated + // into different Sections during splitIntoSections to avoid merge-sort overhead. + // + // See ManifestAdjacentSortedRun class comment for the full boundary equality + // semantics. earliestRun.add(file); runs.offer(earliestRun); } else { @@ -615,10 +619,19 @@ static List

    splitIntoSections( for (int i = 1; i < pickedFiles.size(); i++) { ManifestFileMeta file = pickedFiles.get(i); // Note: Boundary equality (file.min == sectionMaxBound) results in separate - // sections. This avoids merge-sort overhead while maintaining partition filtering - // capability. Files with non-overlapping boundaries (including equal boundaries) - // can be processed independently without significantly impacting partition pruning - // efficiency. + // sections. This design choice balances three factors: + // 1. Avoid merge-sort overhead: Files with non-overlapping boundaries can be processed + // independently without merge-sort, improving performance. + // 2. Maintain partition filtering capability: Each section has a distinct key range, + // enabling efficient partition pruning during queries. + // 3. Preserve ordering invariant: Separating boundary-touching files into different + // sections + // does not break the global sort order, as they are still processed in ascending + // order. + // + // IMPORTANT: While boundary-touching files are separated into different Sections here, + // they may be placed in the same SortedRun during buildLevelSortedRuns (which uses >= 0 + // comparison). This dual behavior is intentional and documented in class comments. if (fieldComparator.compare(file.partitionStats().minValues(), sectionMaxBound) >= 0) { sections.add(new Section(currentFiles, currentTotalSize, currentHasDefault)); currentFiles = new ArrayList<>(); @@ -672,7 +685,25 @@ private static List
    mergeSmallAdjacentSections( return merged; } - /** Rewrite sections with budget control. */ + /** + * Rewrite sections with budget control. + * + *

    Semantics of manifest-sort.max-rewrite-size: This budget applies only to the sorted + * rewrite portion. When the cumulative size reaches the limit: + * + *

      + *
    • First overflow: The current section is split. The rewritable part is sorted and + * rewritten. The remaining part is appended back to the sections queue for later + * processing. + *
    • Subsequent overflows: If the section has files in defaultCompactionMap (needs default + * compaction), rewriteSubSegments is called to process it in smaller chunks. Otherwise, + * the section is skipped. + *
    + * + *

    This design ensures that the budget only limits the aggressive sort rewrite, while still + * allowing necessary cleanup operations (delete entry elimination, small file merge) through + * the rewriteSubSegments fallback path. + */ private static void rewriteSections( List

    sections, RewriteOutput output, @@ -761,7 +792,23 @@ private static void rewriteSections( } } - /** Rewrite sub-segments within a section that exceeded the budget. */ + /** + * Rewrite a section in smaller sub-segments when it exceeds the sort rewrite budget. + * + *

    Semantics difference from old minor merge: In the old ManifestFileMerger path, the + * trailing candidates are kept unchanged when their count is below manifest.merge-min-count. In + * this sort path, rewriteSubSegments is triggered when defaultCompactionMap is non-empty, + * regardless of the manifest count. This is because files in defaultCompactionMap either: + * + *

      + *
    • Are small files needing consolidation + *
    • Contain delete entries that must be eliminated + *
    + * + *

    The manifest.merge-min-count threshold is still applied to the final sub-segment's tail, + * acting as a conservative gate to avoid unnecessary rewrite when there are no delete entries + * and the tail is too small. + */ private static void rewriteSubSegments( List section, RewriteOutput output, diff --git a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java index 462ab337ee73..75a1ab0a84df 100644 --- a/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/manifest/ManifestFileMetaTest.java @@ -1255,6 +1255,212 @@ public void testManifestSortWithMultiplePartitions() { } } + /** + * Test that when manifest-sort.max-rewrite-size budget is exceeded in the middle of a section, + * the remaining files are appended to the tail and the final manifest order is preserved. + * + *

    Design: + * + *

    +     *   - Create a large section with overlapping partition ranges that exceeds the budget
    +     *   - Set a small manifest-sort.max-rewrite-size to force budget split
    +     *   - Verify that after merge, all manifests are globally sorted by partition field
    +     *   - Verify that entries are equivalent (no data loss)
    +     * 
    + */ + @Test + public void testManifestSortBudgetSplitPreservesOrder() { + // Create manifests with overlapping ranges, large enough to exceed budget + List input = new ArrayList<>(); + + // Manifest A: partitions [0, 10] - large size + List entriesA = new ArrayList<>(); + for (int p = 0; p <= 10; p++) { + entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); + } + ManifestFileMeta manifestA = makeManifest(entriesA.toArray(new ManifestEntry[0])); + // Manually increase file size to simulate large manifest + input.add( + new ManifestFileMeta( + manifestA.fileName(), + 100, + manifestA.numAddedFiles(), + manifestA.numDeletedFiles(), + manifestA.partitionStats(), + manifestA.schemaId(), + manifestA.minBucket(), + manifestA.maxBucket(), + manifestA.minLevel(), + manifestA.maxLevel(), + manifestA.minRowId(), + manifestA.maxRowId())); + + // Manifest B: partitions [5, 15] - overlaps with A + List entriesB = new ArrayList<>(); + for (int p = 5; p <= 15; p++) { + entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); + } + ManifestFileMeta manifestB = makeManifest(entriesB.toArray(new ManifestEntry[0])); + input.add( + new ManifestFileMeta( + manifestB.fileName(), + 100, + manifestB.numAddedFiles(), + manifestB.numDeletedFiles(), + manifestB.partitionStats(), + manifestB.schemaId(), + manifestB.minBucket(), + manifestB.maxBucket(), + manifestB.minLevel(), + manifestB.maxLevel(), + manifestB.minRowId(), + manifestB.maxRowId())); + + // Manifest C: partitions [10, 20] - overlaps with B + List entriesC = new ArrayList<>(); + for (int p = 10; p <= 20; p++) { + entriesC.add(makeEntry(true, String.format("C-p%d", p), p)); + } + ManifestFileMeta manifestC = makeManifest(entriesC.toArray(new ManifestEntry[0])); + input.add( + new ManifestFileMeta( + manifestC.fileName(), + 100, + manifestC.numAddedFiles(), + manifestC.numDeletedFiles(), + manifestC.partitionStats(), + manifestC.schemaId(), + manifestC.minBucket(), + manifestC.maxBucket(), + manifestC.minLevel(), + manifestC.maxLevel(), + manifestC.minRowId(), + manifestC.maxRowId())); + + // Set small budget to force split + Options testOptions = new Options(); + testOptions.set("manifest-sort.enabled", "true"); + testOptions.set("manifest-sort.max-rewrite-size", "150B"); // Total input size is 300B + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify entries are equivalent + assertEquivalentEntries(input, merged); + + // Verify global ordering: all manifests sorted by partition min value + for (int i = 1; i < merged.size(); i++) { + BinaryRow prevMin = merged.get(i - 1).partitionStats().minValues(); + BinaryRow currMin = merged.get(i).partitionStats().minValues(); + assertThat(currMin.getInt(0)) + .as("Manifests should be globally sorted by partition field") + .isGreaterThanOrEqualTo(prevMin.getInt(0)); + } + + // Verify entries within each manifest are sorted + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + } + + /** + * Test boundary equality (min == previous.max) handling in both SortedRun construction and + * Section splitting. Boundary-touching files should be allowed in the same SortedRun but may be + * separated into different Sections. + * + *

    Design: + * + *

    +     *   - Create manifests with boundary-touching partition ranges
    +     *   - Manifest A: [0, 5]
    +     *   - Manifest B: [5, 10] (min == A.max, boundary touching)
    +     *   - Manifest C: [10, 15] (min == B.max, boundary touching)
    +     *   - Verify they can be in the same SortedRun (>= comparison)
    +     *   - Verify they may be split into different Sections (>= comparison with comment)
    +     * 
    + */ + @Test + public void testBoundaryEqualityHandling() { + List input = new ArrayList<>(); + + // Manifest A: partitions [0, 5] + List entriesA = new ArrayList<>(); + for (int p = 0; p <= 5; p++) { + entriesA.add(makeEntry(true, String.format("A-p%d", p), p)); + } + input.add(makeManifest(entriesA.toArray(new ManifestEntry[0]))); + + // Manifest B: partitions [5, 10] - boundary touches A (min == A.max) + List entriesB = new ArrayList<>(); + for (int p = 5; p <= 10; p++) { + entriesB.add(makeEntry(true, String.format("B-p%d", p), p)); + } + input.add(makeManifest(entriesB.toArray(new ManifestEntry[0]))); + + // Manifest C: partitions [10, 15] - boundary touches B (min == B.max) + List entriesC = new ArrayList<>(); + for (int p = 10; p <= 15; p++) { + entriesC.add(makeEntry(true, String.format("C-p%d", p), p)); + } + input.add(makeManifest(entriesC.toArray(new ManifestEntry[0]))); + + Options testOptions = new Options(); + testOptions.set("manifest-sort.enabled", "true"); + + List merged = + ManifestFileMerger.merge( + input, + manifestFile, + getPartitionType(), + CoreOptions.fromMap(testOptions.toMap())); + + // Verify entries are equivalent + assertEquivalentEntries(input, merged); + + // Verify all manifests maintain global sort order + for (int i = 1; i < merged.size(); i++) { + BinaryRow prevMin = merged.get(i - 1).partitionStats().minValues(); + BinaryRow prevMax = merged.get(i - 1).partitionStats().maxValues(); + BinaryRow currMin = merged.get(i).partitionStats().minValues(); + + // Boundary-touching is allowed: currMin >= prevMin + assertThat(currMin.getInt(0)) + .as("Global order should be maintained with boundary-touching allowed") + .isGreaterThanOrEqualTo(prevMin.getInt(0)); + + // Log boundary equality cases for documentation + if (currMin.getInt(0) == prevMax.getInt(0)) { + System.out.println( + String.format( + "Boundary equality detected: manifest[%d].min=%d == manifest[%d].max=%d", + i, currMin.getInt(0), i - 1, prevMax.getInt(0))); + } + } + + // Verify entries within each manifest are sorted + for (ManifestFileMeta meta : merged) { + List entries = manifestFile.read(meta.fileName(), meta.fileSize()); + for (int i = 1; i < entries.size(); i++) { + int prevPartition = entries.get(i - 1).partition().getInt(0); + int currPartition = entries.get(i).partition().getInt(0); + assertThat(currPartition) + .as("Entries within manifest should be sorted by partition") + .isGreaterThanOrEqualTo(prevPartition); + } + } + } + /** Create a ManifestEntry with a 3-field partition row (region, dt, hour). */ private ManifestEntry makeMultiPartEntry( boolean isAdd, String fileName, int region, int dt, int hour) { From d39293b4f2c5bc62aba211781991eaaae613b99f Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 3 Jun 2026 21:13:53 +0800 Subject: [PATCH 50/51] fix --- docs/generated/core_configuration.html | 2 +- .../src/main/java/org/apache/paimon/CoreOptions.java | 2 +- .../apache/paimon/operation/ManifestAdjacentSortedRun.java | 7 ------- .../org/apache/paimon/operation/ManifestFileSorter.java | 1 + 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 2a98d2a96b0e..14585d66c72c 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -937,7 +937,7 @@
    manifest-sort.max-rewrite-size
    256 mb MemorySize - Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. + Maximum total size of manifest files to rewrite in a single sort rewrite pass. Sections exceeding this limit are skipped. Set to a larger value to allow more aggressive sort rewriting. The cap only limits the sorted rewrite portion and full/minor cleanup may still happen beyond it.
    manifest.target-file-size
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index f5819c82182f..0d9abc82fbdf 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -493,7 +493,7 @@ public InlineElement getDescription() { "Maximum total size of manifest files to rewrite in a single" + " sort rewrite pass. Sections exceeding this limit are" + " skipped. Set to a larger value to allow more aggressive" - + " sort rewriting."); + + " sort rewriting. The cap only limits the sorted rewrite portion and full/minor cleanup may still happen beyond it."); public static final ConfigOption UPSERT_KEY = key("upsert-key") diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java index ad584e9356a7..ca0797c2139c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestAdjacentSortedRun.java @@ -31,13 +31,6 @@ * [partitionStats.minValues[k], partitionStats.maxValues[k]]} of these manifests do not overlap on * field {@code k}, where {@code k} is the configured sort field index. * - *

    Ordering Invariant: The global order of manifests across all SortedRuns must be - * maintained by the sort key (partition field). When a section is split due to budget limits - * (manifest-sort.max-rewrite-size), the remaining files are appended to the processing queue and - * will be handled in subsequent iterations. This tail-appending mechanism preserves the global sort - * order because the remaining files naturally have larger key values than the currently processed - * ones. - * *

    Boundary Equality: Files with boundary-touching intervals (min == previous.max) are * considered non-overlapping and can be placed in the same SortedRun. This reduces the number of * runs and improves compaction efficiency. However, such files may be separated into different diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java index 6bd2fb4a4cfb..39ef0bab5299 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/ManifestFileSorter.java @@ -773,6 +773,7 @@ private static void rewriteSections( if (!remainingFiles.isEmpty()) { Section remainingSection = new Section(remainingFiles, remainingSize, remainingHasDefault); + // global manifest file metas order by sort key is not a required invariant sections.add(remainingSection); } reachedLimit = true; From 0a96fae1741f808f0e9a55e16b371f4502295b65 Mon Sep 17 00:00:00 2001 From: umi Date: Wed, 3 Jun 2026 23:00:28 +0800 Subject: [PATCH 51/51] addComments --- docs/generated/core_configuration.html | 4 +-- .../java/org/apache/paimon/CoreOptions.java | 35 +++++++++++++++++-- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/docs/generated/core_configuration.html b/docs/generated/core_configuration.html index 14585d66c72c..ac105664faea 100644 --- a/docs/generated/core_configuration.html +++ b/docs/generated/core_configuration.html @@ -919,13 +919,13 @@

    manifest.merge-min-count
    30 Integer - To avoid frequent manifest merges, this parameter specifies the minimum number of ManifestFileMeta to merge. + To avoid frequent manifest merges, this parameter specifies the minimum number of ManifestFileMeta to merge.
    Note: when 'manifest-sort.enabled' is true, this minimum-count gate is only applied to the trailing sub-segment of a section that exceeds 'manifest-sort.max-rewrite-size'. Small under-budget sections are sorted and rewritten directly, so two small manifest files may be merged into one even when their count is below this threshold and full compaction is not triggered.
    manifest-sort.enabled
    false Boolean - Whether to invoke manifest sort rewrite during commit. + Whether to invoke manifest sort rewrite during commit.
    Note: enabling this changes the semantics of 'manifest.merge-min-count'. In the sort rewrite path, small manifest files within the rewrite budget are sorted and merged directly, so the minimum-count gate no longer prevents merging a small number of under-budget manifest files when full compaction is not triggered.
    manifest-sort.partition-field
    diff --git a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java index 0d9abc82fbdf..2518fc75643f 100644 --- a/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java +++ b/paimon-api/src/main/java/org/apache/paimon/CoreOptions.java @@ -468,14 +468,43 @@ public InlineElement getDescription() { .intType() .defaultValue(30) .withDescription( - "To avoid frequent manifest merges, this parameter specifies the minimum number " - + "of ManifestFileMeta to merge."); + Description.builder() + .text( + "To avoid frequent manifest merges, this parameter specifies the minimum number " + + "of ManifestFileMeta to merge.") + .linebreak() + .text( + "Note: when '" + + "manifest-sort.enabled" + + "' is true, this minimum-count gate is only " + + "applied to the trailing sub-segment of a " + + "section that exceeds '" + + "manifest-sort.max-rewrite-size" + + "'. Small under-budget sections are sorted " + + "and rewritten directly, so two small manifest " + + "files may be merged into one even when their " + + "count is below this threshold and full " + + "compaction is not triggered.") + .build()); public static final ConfigOption MANIFEST_SORT_ENABLED = key("manifest-sort.enabled") .booleanType() .defaultValue(false) - .withDescription("Whether to invoke manifest sort rewrite during commit."); + .withDescription( + Description.builder() + .text("Whether to invoke manifest sort rewrite during commit.") + .linebreak() + .text( + "Note: enabling this changes the semantics of '" + + "manifest.merge-min-count" + + "'. In the sort rewrite path, small manifest " + + "files within the rewrite budget are sorted " + + "and merged directly, so the minimum-count " + + "gate no longer prevents merging a small " + + "number of under-budget manifest files when " + + "full compaction is not triggered.") + .build()); public static final ConfigOption MANIFEST_SORT_PARTITION_FIELD = key("manifest-sort.partition-field")