Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ paimon-python/dist/
paimon-python/*.egg-info/
paimon-python/dev/log
paimon-spark/paimon-spark-ut/PaimonLambdaFunctionfunction_test.java
paimon-python/build/

### Misc ###
*.swp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@ public boolean intersects(long start, long end) {
return candidate < starts.length && starts[candidate] <= end;
}

public boolean contains(Range range) {
int candidate = lowerBound(ends, range.from);
return candidate < starts.length
&& starts[candidate] <= range.from
&& ends[candidate] >= range.to;
}

public List<Range> intersectedRanges(long start, long end) {
int left = lowerBound(ends, start);
if (left >= ranges.size()) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.utils;

import org.junit.jupiter.api.Test;

import java.util.Arrays;

import static org.assertj.core.api.Assertions.assertThat;

/** Tests for {@link RowRangeIndex}. */
class RowRangeIndexTest {

@Test
void testContains() {
RowRangeIndex index =
RowRangeIndex.create(
Arrays.asList(new Range(0, 99), new Range(100, 149), new Range(200, 299)));

assertThat(index.contains(new Range(0, 149))).isTrue();
assertThat(index.contains(new Range(50, 120))).isTrue();
assertThat(index.contains(new Range(150, 199))).isFalse();
assertThat(index.contains(new Range(100, 200))).isFalse();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ public class BTreeGlobalIndexBuilder implements Serializable {
// readRowType is composed by partition fields, indexed field and _ROW_ID field
private RowType readRowType;
@Nullable private Snapshot snapshot;
@Nullable private Long scanSnapshotId;

@Nullable private PartitionPredicate partitionPredicate;

Expand Down Expand Up @@ -133,6 +134,10 @@ public BTreeGlobalIndexBuilder withSnapshot(Snapshot snapshot) {
return this;
}

public Optional<Long> scanSnapshotId() {
return Optional.ofNullable(scanSnapshotId);
}

public Optional<Pair<RowRangeIndex, List<DataSplit>>> scan() {
SnapshotReader snapshotReader = table.newSnapshotReader();
if (partitionPredicate != null) {
Expand All @@ -143,8 +148,10 @@ public Optional<Pair<RowRangeIndex, List<DataSplit>>> scan() {
? this.snapshot
: snapshotReader.snapshotManager().latestSnapshot();
if (snapshot == null) {
scanSnapshotId = null;
return Optional.empty();
}
scanSnapshotId = snapshot.id();
snapshotReader = withManifestEntryFilter(snapshotReader.withSnapshot(snapshot));
Range dataRange = new Range(0, snapshot.nextRowId() - 1);

Expand All @@ -164,8 +171,10 @@ public Optional<Pair<RowRangeIndex, List<DataSplit>>> incrementalScan() {
? this.snapshot
: snapshotReader.snapshotManager().latestSnapshot();
if (snapshot == null) {
scanSnapshotId = null;
return Optional.empty();
}
scanSnapshotId = snapshot.id();
snapshotReader = withManifestEntryFilter(snapshotReader.withSnapshot(snapshot));

Preconditions.checkArgument(indexField != null, "indexField must be set before scan.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,13 @@ public int commit(ManifestCommittable committable, boolean checkAppendFiles) {
checkAppendFiles = true;
allowRollback = true;
}
if (changes.appendIndexFiles.stream()
.anyMatch(
entry ->
entry.kind() == FileKind.ADD
&& entry.indexFile().globalIndexMeta() != null)) {
checkAppendFiles = true;
}

attempts +=
tryCommit(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.paimon.data.BinaryRow;
import org.apache.paimon.data.InternalRow;
import org.apache.paimon.index.DeletionVectorMeta;
import org.apache.paimon.index.GlobalIndexMeta;
import org.apache.paimon.index.IndexFileHandler;
import org.apache.paimon.index.IndexFileMeta;
import org.apache.paimon.io.DataFileMeta;
Expand All @@ -37,7 +38,9 @@
import org.apache.paimon.types.RowType;
import org.apache.paimon.utils.FileStorePathFactory;
import org.apache.paimon.utils.Pair;
import org.apache.paimon.utils.Range;
import org.apache.paimon.utils.RangeHelper;
import org.apache.paimon.utils.RowRangeIndex;
import org.apache.paimon.utils.SnapshotManager;

import org.slf4j.Logger;
Expand Down Expand Up @@ -237,6 +240,11 @@ public Optional<RuntimeException> checkConflicts(
return exception;
}

exception = checkGlobalIndexRowIdExistence(baseEntries, deltaIndexEntries);
if (exception.isPresent()) {
return exception;
}

return checkForRowIdFromSnapshot(
latestSnapshot, deltaEntries, deltaIndexEntries, rowIdColumnConflictChecker);
}
Expand Down Expand Up @@ -544,6 +552,55 @@ private Optional<RuntimeException> checkForRowIdFromSnapshot(
return Optional.empty();
}

private Optional<RuntimeException> checkGlobalIndexRowIdExistence(
List<SimpleFileEntry> baseEntries, List<IndexManifestEntry> deltaIndexEntries) {
if (!dataEvolutionEnabled) {
return Optional.empty();
}

List<IndexManifestEntry> indexesToCheck = globalIndexFileAdditions(deltaIndexEntries);
if (indexesToCheck.isEmpty()) {
return Optional.empty();
}

List<Range> dataRanges = new ArrayList<>();
for (SimpleFileEntry entry : baseEntries) {
if (entry.kind() == FileKind.ADD && entry.firstRowId() != null) {
dataRanges.add(entry.nonNullRowIdRange());
}
}
RowRangeIndex rowRangeIndex = RowRangeIndex.create(dataRanges);

for (IndexManifestEntry indexEntry : indexesToCheck) {
GlobalIndexMeta globalIndex = indexEntry.indexFile().globalIndexMeta();
checkState(globalIndex != null, "Global index meta must not be null.");
Range indexRange = globalIndex.rowRange();
if (!rowRangeIndex.contains(indexRange)) {
return Optional.of(
new RuntimeException(
String.format(
"Global index row ID existence conflict: index file '%s' "
+ "references row range %s, but this range "
+ "is not fully covered by current data "
+ "files. The referenced row IDs may have been "
+ "reassigned or removed by a concurrent commit.",
indexEntry.indexFile().fileName(), indexRange)));
}
}
return Optional.empty();
}

private List<IndexManifestEntry> globalIndexFileAdditions(
List<IndexManifestEntry> indexFileChanges) {
List<IndexManifestEntry> result = new ArrayList<>();
for (IndexManifestEntry entry : indexFileChanges) {
if (entry.kind() == FileKind.ADD && entry.indexFile().globalIndexMeta() != null) {
result.add(entry);
}
}
return result;
}

Optional<RuntimeException> checkRowIdExistence(
List<SimpleFileEntry> baseEntries,
List<SimpleFileEntry> deltaEntries,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ public static List<BinaryRow> changedPartitions(
changedPartitions.add(file.partition());
}
for (IndexManifestEntry file : indexFileChanges) {
if (file.indexFile().indexType().equals(DELETION_VECTORS_INDEX)) {
if (file.indexFile().indexType().equals(DELETION_VECTORS_INDEX)
|| file.indexFile().globalIndexMeta() != null) {
changedPartitions.add(file.partition());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,11 @@
import org.apache.paimon.deletionvectors.DeletionVector;
import org.apache.paimon.fs.Path;
import org.apache.paimon.fs.local.LocalFileIO;
import org.apache.paimon.index.GlobalIndexMeta;
import org.apache.paimon.index.IndexFileHandler;
import org.apache.paimon.index.IndexFileMeta;
import org.apache.paimon.io.CompactIncrement;
import org.apache.paimon.io.DataIncrement;
import org.apache.paimon.manifest.FileKind;
import org.apache.paimon.manifest.IndexManifestEntry;
import org.apache.paimon.manifest.ManifestCommittable;
Expand Down Expand Up @@ -1008,6 +1011,48 @@ public void testCommitManifestWithProperties() throws Exception {
}
}

@Test
public void testGlobalIndexCommitChecksExistingRowIds() throws Exception {
TestFileStore store = createRowTrackingDataEvolutionStore();

List<KeyValue> keyValues = generateDataList(1);
BinaryRow partition = gen.getPartition(keyValues.get(0));
Snapshot dataSnapshot = store.commitData(keyValues, s -> partition, kv -> 0).get(0);
assertThat(dataSnapshot.nextRowId()).isEqualTo(1L);

try (FileStoreCommitImpl commit = store.newCommit()) {
commit.commit(indexCommittable(partition, "existing-index", 0, 0), false);
}

Snapshot latest = checkNotNull(store.snapshotManager().latestSnapshot());
assertThat(latest.indexManifest()).isNotNull();
}

@Test
public void testGlobalIndexCommitFailsForMissingRowIds() throws Exception {
TestFileStore store = createRowTrackingDataEvolutionStore();

List<KeyValue> keyValues = generateDataList(1);
BinaryRow partition = gen.getPartition(keyValues.get(0));
Snapshot dataSnapshot = store.commitData(keyValues, s -> partition, kv -> 0).get(0);
long missingRowId = checkNotNull(dataSnapshot.nextRowId());

try (FileStoreCommitImpl commit = store.newCommit()) {
assertThatThrownBy(
() ->
commit.commit(
indexCommittable(
partition,
"missing-index",
missingRowId,
missingRowId),
false))
.hasMessageContaining("Global index row ID existence conflict")
.hasMessageContaining("missing-index")
.hasMessageContaining("[" + missingRowId + ", " + missingRowId + "]");
}
}

@Test
public void testCommitTwiceWithDifferentKind() throws Exception {
TestFileStore store = createStore(false);
Expand Down Expand Up @@ -1082,6 +1127,20 @@ public void testCommitRetryAfterFalseSuccessDoesNotCleanManifest() throws Except

private FileStoreCommitImpl newCommitWithSnapshotCommit(
TestFileStore store, String commitUser, SnapshotCommit snapshotCommit) {
return newCommitWithSnapshotCommit(
store,
commitUser,
snapshotCommit,
store.options(),
store.options().dataEvolutionEnabled());
}

private FileStoreCommitImpl newCommitWithSnapshotCommit(
TestFileStore store,
String commitUser,
SnapshotCommit snapshotCommit,
CoreOptions options,
boolean dataEvolutionEnabled) {
String tableName = store.options().path().getName();
return new FileStoreCommitImpl(
snapshotCommit,
Expand All @@ -1090,7 +1149,7 @@ private FileStoreCommitImpl newCommitWithSnapshotCommit(
tableName,
commitUser,
store.partitionType(),
store.options(),
options,
store.pathFactory(),
store.snapshotManager(),
store.manifestFileFactory(),
Expand All @@ -1109,15 +1168,37 @@ private FileStoreCommitImpl newCommitWithSnapshotCommit(
store.pathFactory(),
store.newKeyComparator(),
store.bucketMode(),
store.options().deletionVectorsEnabled(),
store.options().dataEvolutionEnabled(),
store.options().pkClusteringOverride(),
options.deletionVectorsEnabled(),
dataEvolutionEnabled,
options.pkClusteringOverride(),
store.newIndexFileHandler(),
store.snapshotManager(),
scanner),
null);
}

private ManifestCommittable indexCommittable(
BinaryRow partition, String fileName, long rowRangeStart, long rowRangeEnd) {
ManifestCommittable committable = new ManifestCommittable(0);
committable.addFileCommittable(
new CommitMessageImpl(
partition,
0,
null,
DataIncrement.indexIncrement(
Collections.singletonList(
new IndexFileMeta(
"btree",
fileName,
1,
1,
new GlobalIndexMeta(
rowRangeStart, rowRangeEnd, 0, null, null),
null))),
CompactIncrement.emptyIncrement()));
return committable;
}

private static class FalseSuccessSnapshotCommit implements SnapshotCommit {

private final SnapshotCommit delegate;
Expand Down Expand Up @@ -1153,6 +1234,13 @@ private TestFileStore createStore(boolean failing, Map<String, String> options)
return createStore(failing, 1, CoreOptions.ChangelogProducer.NONE, options);
}

private TestFileStore createRowTrackingDataEvolutionStore() throws Exception {
Map<String, String> options = new HashMap<>();
options.put(CoreOptions.ROW_TRACKING_ENABLED.key(), "true");
options.put(CoreOptions.DATA_EVOLUTION_ENABLED.key(), "true");
return createStore(false, -1, CoreOptions.ChangelogProducer.NONE, options);
}

private TestFileStore createStore(boolean failing) throws Exception {
return createStore(failing, 1);
}
Expand All @@ -1179,14 +1267,18 @@ private TestFileStore createStore(
? FailingFileIO.getFailingPath(failingName, tempDir.toString())
: TraceableFileIO.SCHEME + "://" + tempDir.toString();
Path path = new Path(tempDir.toUri());
List<String> primaryKeys =
Boolean.parseBoolean(options.get(CoreOptions.ROW_TRACKING_ENABLED.key()))
? Collections.emptyList()
: TestKeyValueGenerator.getPrimaryKeys(
TestKeyValueGenerator.GeneratorMode.MULTI_PARTITIONED);
TableSchema tableSchema =
SchemaUtils.forceCommit(
new SchemaManager(new LocalFileIO(), path),
new Schema(
TestKeyValueGenerator.DEFAULT_ROW_TYPE.getFields(),
TestKeyValueGenerator.DEFAULT_PART_TYPE.getFieldNames(),
TestKeyValueGenerator.getPrimaryKeys(
TestKeyValueGenerator.GeneratorMode.MULTI_PARTITIONED),
primaryKeys,
options,
null));
return new TestFileStore.Builder(
Expand Down
Loading
Loading