diff --git a/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java b/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java index ad5708e3e9..74b51571ee 100644 --- a/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java +++ b/fluss-common/src/main/java/org/apache/fluss/fs/FileStatus.java @@ -46,4 +46,18 @@ public interface FileStatus { * @return the corresponding Path to the FileStatus */ FsPath getPath(); + + /** + * Returns the modification time of the file in milliseconds since the epoch. + * + *

The default implementation returns {@link Long#MAX_VALUE}, which is interpreted by + * time-based filters (e.g. orphan-files cleanup) as "always fresh" - effectively a fail-closed + * default that prevents deletion when modification time is unavailable. File system + * implementations that can expose modification time SHOULD override this. + * + * @return the modification time in epoch millis, or {@link Long#MAX_VALUE} when unavailable + */ + default long getModificationTime() { + return Long.MAX_VALUE; + } } diff --git a/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java b/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java index 09184a9756..b8b04aa63b 100644 --- a/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java +++ b/fluss-common/src/main/java/org/apache/fluss/fs/local/LocalFileStatus.java @@ -67,6 +67,11 @@ public FsPath getPath() { return this.path; } + @Override + public long getModificationTime() { + return this.file.lastModified(); + } + public File getFile() { return this.file; } diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManifest.java b/fluss-common/src/main/java/org/apache/fluss/remote/RemoteLogManifest.java similarity index 96% rename from fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManifest.java rename to fluss-common/src/main/java/org/apache/fluss/remote/RemoteLogManifest.java index b255b8718d..bc856e361d 100644 --- a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManifest.java +++ b/fluss-common/src/main/java/org/apache/fluss/remote/RemoteLogManifest.java @@ -15,12 +15,10 @@ * limitations under the License. */ -package org.apache.fluss.server.log.remote; +package org.apache.fluss.remote; -import org.apache.fluss.annotation.VisibleForTesting; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; -import org.apache.fluss.remote.RemoteLogSegment; import java.util.ArrayList; import java.util.Collections; @@ -33,7 +31,7 @@ /** * A remote log manifest is an immutable list of current {@link RemoteLogSegment} which can - * represent a snapshot of {@link RemoteLogTablet}. + * represent a snapshot of a remote log tablet. */ public class RemoteLogManifest { private final PhysicalTablePath physicalTablePath; @@ -122,7 +120,6 @@ public TableBucket getTableBucket() { return tableBucket; } - @VisibleForTesting public List getRemoteLogSegmentList() { return remoteLogSegmentList; } diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManifestJsonSerde.java b/fluss-common/src/main/java/org/apache/fluss/remote/RemoteLogManifestJsonSerde.java similarity index 98% rename from fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManifestJsonSerde.java rename to fluss-common/src/main/java/org/apache/fluss/remote/RemoteLogManifestJsonSerde.java index 27c5488490..c90a85ea02 100644 --- a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManifestJsonSerde.java +++ b/fluss-common/src/main/java/org/apache/fluss/remote/RemoteLogManifestJsonSerde.java @@ -15,11 +15,10 @@ * limitations under the License. */ -package org.apache.fluss.server.log.remote; +package org.apache.fluss.remote; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; -import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator; import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; import org.apache.fluss.utils.json.JsonDeserializer; diff --git a/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java b/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java index 9a0659f180..1c75663ba3 100644 --- a/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java +++ b/fluss-common/src/main/java/org/apache/fluss/utils/FlussPaths.java @@ -74,7 +74,7 @@ public class FlussPaths { public static final String REMOTE_LOG_DIR_NAME = "log"; /** The directory name for storing metadata files (e.g., manifest) for a log tablet. */ - private static final String REMOTE_LOG_METADATA_DIR_NAME = "metadata"; + public static final String REMOTE_LOG_METADATA_DIR_NAME = "metadata"; /** Suffix of a manifest file. */ private static final String REMOTE_LOG_MANIFEST_FILE_SUFFIX = ".manifest"; diff --git a/fluss-common/src/test/java/org/apache/fluss/fs/FileStatusTest.java b/fluss-common/src/test/java/org/apache/fluss/fs/FileStatusTest.java new file mode 100644 index 0000000000..f491a56b60 --- /dev/null +++ b/fluss-common/src/test/java/org/apache/fluss/fs/FileStatusTest.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.fs; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for default methods of {@link FileStatus}. */ +class FileStatusTest { + + /** + * An implementation that does not override {@link FileStatus#getModificationTime()} must + * inherit the fail-safe default of {@link Long#MAX_VALUE}, so time-based filters treat the file + * as "always fresh" and never delete it when modification time is unavailable. + */ + @Test + void defaultModificationTimeIsMaxValueFailSafe() { + FileStatus status = + new FileStatus() { + @Override + public long getLen() { + return 0L; + } + + @Override + public boolean isDir() { + return false; + } + + @Override + public FsPath getPath() { + return new FsPath("/tmp/x"); + } + }; + + assertThat(status.getModificationTime()).isEqualTo(Long.MAX_VALUE); + } +} diff --git a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/RemoteLogManifestJsonSerdeTest.java b/fluss-common/src/test/java/org/apache/fluss/remote/RemoteLogManifestJsonSerdeTest.java similarity index 97% rename from fluss-server/src/test/java/org/apache/fluss/server/log/remote/RemoteLogManifestJsonSerdeTest.java rename to fluss-common/src/test/java/org/apache/fluss/remote/RemoteLogManifestJsonSerdeTest.java index da4024ffc4..e095132158 100644 --- a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/RemoteLogManifestJsonSerdeTest.java +++ b/fluss-common/src/test/java/org/apache/fluss/remote/RemoteLogManifestJsonSerdeTest.java @@ -15,18 +15,17 @@ * limitations under the License. */ -package org.apache.fluss.server.log.remote; +package org.apache.fluss.remote; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; import org.apache.fluss.metadata.TablePath; -import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.utils.json.JsonSerdeTestBase; import java.util.Arrays; import java.util.UUID; -/** Tests of {@link org.apache.fluss.server.log.remote.RemoteLogManifestJsonSerde}. */ +/** Tests of {@link RemoteLogManifestJsonSerde}. */ class RemoteLogManifestJsonSerdeTest extends JsonSerdeTestBase { private static final PhysicalTablePath TABLE_PATH1 = PhysicalTablePath.of(TablePath.of("db", "mytable")); diff --git a/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java b/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java index f54033a693..47c9febcfe 100644 --- a/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java +++ b/fluss-filesystems/fluss-fs-hadoop/src/main/java/org/apache/fluss/fs/hdfs/HadoopFileStatus.java @@ -52,6 +52,11 @@ public boolean isDir() { return fileStatus.isDirectory(); } + @Override + public long getModificationTime() { + return fileStatus.getModificationTime(); + } + // ------------------------------------------------------------------------ /** diff --git a/fluss-flink/fluss-flink-1.18/pom.xml b/fluss-flink/fluss-flink-1.18/pom.xml index 1636f25569..9f67b6ce9b 100644 --- a/fluss-flink/fluss-flink-1.18/pom.xml +++ b/fluss-flink/fluss-flink-1.18/pom.xml @@ -219,6 +219,14 @@ org.apache.fluss:fluss-client + + + org.apache.fluss:fluss-flink-common + + org/apache/fluss/flink/action/** + + + @@ -226,4 +234,4 @@ - \ No newline at end of file + diff --git a/fluss-flink/fluss-flink-1.19/pom.xml b/fluss-flink/fluss-flink-1.19/pom.xml index a9df2c830a..d16e6e46a8 100644 --- a/fluss-flink/fluss-flink-1.19/pom.xml +++ b/fluss-flink/fluss-flink-1.19/pom.xml @@ -213,6 +213,11 @@ org.apache.fluss:fluss-client + + + org.apache.fluss.flink.action.FlussActionEntrypoint + + @@ -220,4 +225,4 @@ - \ No newline at end of file + diff --git a/fluss-flink/fluss-flink-1.19/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory b/fluss-flink/fluss-flink-1.19/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory new file mode 100644 index 0000000000..c30c9dd5ab --- /dev/null +++ b/fluss-flink/fluss-flink-1.19/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +org.apache.fluss.flink.action.orphan.OrphanFilesCleanActionFactory diff --git a/fluss-flink/fluss-flink-1.19/src/test/java/org/apache/fluss/flink/action/orphan/Flink119OrphanFilesCleanITCase.java b/fluss-flink/fluss-flink-1.19/src/test/java/org/apache/fluss/flink/action/orphan/Flink119OrphanFilesCleanITCase.java new file mode 100644 index 0000000000..d775605170 --- /dev/null +++ b/fluss-flink/fluss-flink-1.19/src/test/java/org/apache/fluss/flink/action/orphan/Flink119OrphanFilesCleanITCase.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +/** The IT case for orphan files cleanup in Flink 1.19. */ +class Flink119OrphanFilesCleanITCase extends OrphanFilesCleanITCase {} diff --git a/fluss-flink/fluss-flink-1.20/pom.xml b/fluss-flink/fluss-flink-1.20/pom.xml index 25d867b398..ab0915f6e8 100644 --- a/fluss-flink/fluss-flink-1.20/pom.xml +++ b/fluss-flink/fluss-flink-1.20/pom.xml @@ -234,6 +234,11 @@ org.apache.fluss:fluss-client + + + org.apache.fluss.flink.action.FlussActionEntrypoint + + @@ -241,4 +246,4 @@ - \ No newline at end of file + diff --git a/fluss-flink/fluss-flink-1.20/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory b/fluss-flink/fluss-flink-1.20/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory new file mode 100644 index 0000000000..c30c9dd5ab --- /dev/null +++ b/fluss-flink/fluss-flink-1.20/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +org.apache.fluss.flink.action.orphan.OrphanFilesCleanActionFactory diff --git a/fluss-flink/fluss-flink-1.20/src/test/java/org/apache/fluss/flink/action/orphan/Flink120OrphanFilesCleanITCase.java b/fluss-flink/fluss-flink-1.20/src/test/java/org/apache/fluss/flink/action/orphan/Flink120OrphanFilesCleanITCase.java new file mode 100644 index 0000000000..0dc35613f9 --- /dev/null +++ b/fluss-flink/fluss-flink-1.20/src/test/java/org/apache/fluss/flink/action/orphan/Flink120OrphanFilesCleanITCase.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +/** The IT case for orphan files cleanup in Flink 1.20. */ +class Flink120OrphanFilesCleanITCase extends OrphanFilesCleanITCase {} diff --git a/fluss-flink/fluss-flink-2.2/pom.xml b/fluss-flink/fluss-flink-2.2/pom.xml index f2ea4cb597..3337797d4c 100644 --- a/fluss-flink/fluss-flink-2.2/pom.xml +++ b/fluss-flink/fluss-flink-2.2/pom.xml @@ -258,6 +258,11 @@ org.apache.fluss:fluss-client + + + org.apache.fluss.flink.action.FlussActionEntrypoint + + diff --git a/fluss-flink/fluss-flink-2.2/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java b/fluss-flink/fluss-flink-2.2/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java index 076dcb86c8..999cd40a8e 100644 --- a/fluss-flink/fluss-flink-2.2/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java +++ b/fluss-flink/fluss-flink-2.2/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java @@ -19,6 +19,9 @@ import org.apache.flink.util.MultipleParameterTool; +import javax.annotation.Nullable; + +import java.util.Collection; import java.util.Map; /** @@ -43,4 +46,23 @@ public static MultipleParameterToolAdapter fromArgs(String[] args) { public Map toMap() { return this.multipleParameterTool.toMap(); } + + /** Returns whether the given key is present in the parsed arguments. */ + public boolean has(String key) { + return this.multipleParameterTool.has(key); + } + + /** Returns the value for the given key, or {@code null} if the key is not found. */ + @Nullable + public String get(String key) { + return this.multipleParameterTool.get(key); + } + + /** + * Returns all values associated with the given key, or {@code null} if the key is not found. + */ + @Nullable + public Collection getMultiParameter(String key) { + return this.multipleParameterTool.getMultiParameter(key); + } } diff --git a/fluss-flink/fluss-flink-2.2/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory b/fluss-flink/fluss-flink-2.2/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory new file mode 100644 index 0000000000..c30c9dd5ab --- /dev/null +++ b/fluss-flink/fluss-flink-2.2/src/main/resources/META-INF/services/org.apache.fluss.flink.action.ActionFactory @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +org.apache.fluss.flink.action.orphan.OrphanFilesCleanActionFactory diff --git a/fluss-flink/fluss-flink-2.2/src/test/java/org/apache/fluss/flink/action/orphan/Flink22OrphanFilesCleanITCase.java b/fluss-flink/fluss-flink-2.2/src/test/java/org/apache/fluss/flink/action/orphan/Flink22OrphanFilesCleanITCase.java new file mode 100644 index 0000000000..79f15997cc --- /dev/null +++ b/fluss-flink/fluss-flink-2.2/src/test/java/org/apache/fluss/flink/action/orphan/Flink22OrphanFilesCleanITCase.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +/** The IT case for orphan files cleanup in Flink 2.2. */ +class Flink22OrphanFilesCleanITCase extends OrphanFilesCleanITCase {} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/Action.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/Action.java new file mode 100644 index 0000000000..98af1da48a --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/Action.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import org.apache.fluss.annotation.Internal; + +/** Pluggable Flink action invoked from CLI via {@link FlussFlinkActionEntrypoint}. */ +@Internal +public interface Action { + + /** Optional setup hook called once before {@link #run()}. */ + default void build() throws Exception {} + + /** Execute the action. */ + void run() throws Exception; +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionFactory.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionFactory.java new file mode 100644 index 0000000000..d68c07ca8f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionFactory.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.adapter.MultipleParameterToolAdapter; + +import java.util.Optional; + +/** SPI for {@link Action} factories, registered via JDK {@link java.util.ServiceLoader}. */ +@Internal +public interface ActionFactory { + + /** + * Identifier matched against the first CLI argument after lowercasing and replacing {@code -} + * with {@code _}. + */ + String identifier(); + + /** Construct the action from parsed CLI parameters. Empty when {@code --help} is requested. */ + Optional create(MultipleParameterToolAdapter params); + + /** Help text printed when {@code --help} is passed. */ + default String help() { + return ""; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionLoader.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionLoader.java new file mode 100644 index 0000000000..91599e7510 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/ActionLoader.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.adapter.MultipleParameterToolAdapter; + +import java.util.Arrays; +import java.util.Optional; +import java.util.ServiceLoader; + +/** + * Discovers {@link ActionFactory} implementations via {@link ServiceLoader} and dispatches CLI + * arguments to the appropriate {@link Action}. + */ +@Internal +public final class ActionLoader { + + private ActionLoader() {} + + /** + * Resolve and create an action from CLI arguments. + * + *

Returns {@link Optional#empty()} when no arguments are provided or when {@code --help} is + * requested. Throws {@link IllegalArgumentException} when the requested identifier does not + * resolve to a known factory. + */ + public static Optional createAction(String[] args) { + if (args.length < 1) { + printDefaultHelp(); + return Optional.empty(); + } + if (isHelp(args[0])) { + printDefaultHelp(); + return Optional.empty(); + } + String name = args[0].toLowerCase().replace('-', '_'); + ActionFactory factory = + findFactory(name) + .orElseThrow( + () -> + new IllegalArgumentException( + "Unknown action: " + + args[0] + + ". Run with --help for available actions.")); + String[] remaining = Arrays.copyOfRange(args, 1, args.length); + if (hasHelp(remaining)) { + System.out.println(factory.help()); + return Optional.empty(); + } + MultipleParameterToolAdapter params = MultipleParameterToolAdapter.fromArgs(remaining); + return factory.create(params); + } + + private static boolean isHelp(String arg) { + return "--help".equals(arg) || "-h".equals(arg); + } + + private static boolean hasHelp(String[] args) { + for (String arg : args) { + if (isHelp(arg)) { + return true; + } + } + return false; + } + + private static Optional findFactory(String identifier) { + for (ActionFactory f : ServiceLoader.load(ActionFactory.class)) { + if (f.identifier().equals(identifier)) { + return Optional.of(f); + } + } + return Optional.empty(); + } + + private static void printDefaultHelp() { + System.out.println("Usage: [options]"); + System.out.println("Available actions:"); + for (ActionFactory f : ServiceLoader.load(ActionFactory.class)) { + System.out.println(" " + f.identifier()); + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/FlussActionEntrypoint.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/FlussActionEntrypoint.java new file mode 100644 index 0000000000..dda7d4cf93 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/FlussActionEntrypoint.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action; + +import java.util.Optional; + +/** Main entrypoint for Fluss Flink action jars. Delegates to {@link ActionLoader}. */ +public class FlussActionEntrypoint { + + public static void main(String[] args) throws Exception { + Optional action; + try { + action = ActionLoader.createAction(args); + } catch (IllegalArgumentException e) { + System.err.println(e.getMessage()); + System.exit(1); + return; + } + if (!action.isPresent()) { + return; + } + action.get().build(); + action.get().run(); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanCleanUtils.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanCleanUtils.java new file mode 100644 index 0000000000..24381ab752 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanCleanUtils.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.config.cluster.ConfigEntry; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.metadata.PartitionInfo; +import org.apache.fluss.metadata.PhysicalTablePath; +import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; + +import javax.annotation.Nullable; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; + +import static org.apache.fluss.utils.Preconditions.checkNotNull; + +/** Shared utility methods for the orphan files cleanup action. */ +@Internal +public final class OrphanCleanUtils { + + private OrphanCleanUtils() {} + + /** + * Constructs a {@link PhysicalTablePath} from a table path and an optional partition. Returns + * the non-partitioned form when {@code partitionInfo} is null. + */ + public static PhysicalTablePath physicalPath( + TablePath tablePath, @Nullable PartitionInfo partitionInfo) { + if (partitionInfo == null) { + return PhysicalTablePath.of(tablePath); + } + return PhysicalTablePath.of(tablePath, partitionInfo.getPartitionName()); + } + + /** + * Enumerates all {@link TableBucket} instances for a table (or a single partition of that + * table). + */ + public static List enumerateBuckets( + TableInfo tableInfo, @Nullable PartitionInfo partitionInfo) { + int n = tableInfo.getNumBuckets(); + List buckets = new ArrayList(n); + long tableId = tableInfo.getTableId(); + for (int b = 0; b < n; b++) { + if (partitionInfo == null) { + buckets.add(new TableBucket(tableId, b)); + } else { + buckets.add(new TableBucket(tableId, partitionInfo.getPartitionId(), b)); + } + } + return buckets; + } + + /** + * Resolves the effective remote data directory for a table/partition target using the + * three-level fallback: partition-level → table-level → cluster-level. At least one level is + * always set because the coordinator assigns a {@code remoteDataDir} to every table at creation + * time via {@code RemoteDirSelector.nextDataDir()}. + */ + public static String resolveRemoteDataDir( + TableInfo tableInfo, + @Nullable PartitionInfo partitionInfo, + @Nullable String clusterRemoteDataDir) { + if (partitionInfo != null && partitionInfo.getRemoteDataDir() != null) { + return partitionInfo.getRemoteDataDir(); + } + if (tableInfo.getRemoteDataDir() != null) { + return tableInfo.getRemoteDataDir(); + } + return checkNotNull( + clusterRemoteDataDir, + "No remote data directory resolvable: partition, table, " + + "and cluster levels are all null. This should not happen because the " + + "coordinator requires remote.data.dir or remote.data.dirs at startup."); + } + + /** + * Resolves the cluster-level {@code remote.data.dir} by querying the coordinator's runtime + * configuration. Returns {@code null} when the cluster uses {@code remote.data.dirs} + * (multi-directory mode) without the legacy single {@code remote.data.dir}. + */ + @Nullable + public static String resolveClusterRemoteDataDir(Admin admin) throws Exception { + return resolveClusterRemoteDataDir(fetchClusterConfigMap(admin)); + } + + /** Extracts the single-root {@code remote.data.dir} from a pre-fetched config map. */ + @Nullable + public static String resolveClusterRemoteDataDir(Map configMap) { + return configMap.get(ConfigOptions.REMOTE_DATA_DIR.key()); + } + + /** + * Resolves all cluster-level remote data directories by querying the coordinator's runtime + * configuration. Reads both the single-root {@code remote.data.dir} and the multi-root {@code + * remote.data.dirs}, deduplicates by normalized form, and returns the union as the canonical + * root list. + * + *

This is the authoritative source for determining what storage roots the cleanup action is + * allowed to touch. + * + * @return list of normalized roots (no trailing slash); never {@code null}, may be empty if the + * cluster has neither config set (which should not happen because the coordinator requires + * at least one remote data dir at startup). + */ + public static List resolveClusterRemoteDataDirs(Admin admin) throws Exception { + return resolveClusterRemoteDataDirs(fetchClusterConfigMap(admin)); + } + + /** Extracts all remote data roots from a pre-fetched config map. */ + public static List resolveClusterRemoteDataDirs(Map configMap) { + Configuration conf = Configuration.fromMap(configMap); + LinkedHashSet roots = new LinkedHashSet(); + String singleDir = conf.get(ConfigOptions.REMOTE_DATA_DIR); + if (singleDir != null && !singleDir.isEmpty()) { + roots.add(normalizeRoot(singleDir)); + } + List multiDirs = conf.get(ConfigOptions.REMOTE_DATA_DIRS); + if (multiDirs != null) { + for (String dir : multiDirs) { + if (dir != null && !dir.isEmpty()) { + roots.add(normalizeRoot(dir)); + } + } + } + return new ArrayList(roots); + } + + /** + * Fetches the coordinator's runtime configuration as a key-value map. Use this once and pass + * the result to the map-based overloads of {@link #resolveClusterRemoteDataDir(Map)} and {@link + * #resolveClusterRemoteDataDirs(Map)} to avoid duplicate RPCs. + */ + public static Map fetchClusterConfigMap(Admin admin) throws Exception { + Collection entries = admin.describeClusterConfigs().get(); + Map map = new HashMap(); + for (ConfigEntry entry : entries) { + if (entry.value() != null) { + map.put(entry.key(), entry.value()); + } + } + return map; + } + + /** Constructs a remote sub-directory path, normalizing trailing slashes on the root. */ + public static FsPath remoteSubDir(String remoteDataDir, String subDir) { + return new FsPath(normalizeRoot(remoteDataDir) + "/" + subDir); + } + + /** Strips a trailing slash from a remote data directory string. */ + public static String normalizeRoot(String remoteDataDir) { + return remoteDataDir.endsWith("/") + ? remoteDataDir.substring(0, remoteDataDir.length() - 1) + : remoteDataDir; + } + + /** Formats a bucket-scope key for audit/logging purposes. */ + public static String bucketScopeKey(long tableId, Long partitionId, int bucketId) { + return tableId + ":" + partitionId + ":" + bucketId; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanAction.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanAction.java new file mode 100644 index 0000000000..1f12090783 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanAction.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.Action; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; +import org.apache.fluss.flink.action.orphan.job.CleanStats; +import org.apache.fluss.flink.action.orphan.job.OrphanFilesCleanJob; + +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Orphan files cleanup action. Delegates to a distributed Flink Batch job ({@link + * OrphanFilesCleanJob}) that executes a 3-stage DAG: + * + *

    + *
  1. ScopeEnumerator (p=1): coordinator RPCs to enumerate scope and emit per-bucket work items. + *
  2. ScanAndClean (p=N): parallel FS scan + rate-limited delete. + *
  3. StatsAggregate (p=1): merge per-task stats into final summary. + *
+ */ +@Internal +public class OrphanFilesCleanAction implements Action { + + private static final Logger LOG = LoggerFactory.getLogger(OrphanFilesCleanAction.class); + + private final OrphanCleanConfig config; + + public OrphanFilesCleanAction(OrphanCleanConfig config) { + this.config = config; + } + + @Override + public void run() throws Exception { + StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); + CleanStats stats = + OrphanFilesCleanJob.execute(env, config, config.parallelism().orElse(null)); + LOG.info( + "remove_orphan_files done: scope={} scanned={} deletedTotal={}" + + " emptyDirsRemoved={} failures={} bytesReclaimed={} dryRun={}", + scopeDescription(), + stats.scanned(), + stats.deleted(), + stats.emptyDirsRemoved(), + stats.deleteFailures(), + stats.bytesReclaimed(), + config.dryRun()); + } + + private String scopeDescription() { + String scope = + config.allDatabases() ? "all-databases" : config.database().orElse("unknown"); + if (config.table().isPresent()) { + return scope + "." + config.table().get(); + } + return scope; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanActionFactory.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanActionFactory.java new file mode 100644 index 0000000000..ef6dc7bdc6 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanActionFactory.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.Action; +import org.apache.fluss.flink.action.ActionFactory; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; +import org.apache.fluss.flink.adapter.MultipleParameterToolAdapter; + +import java.util.Optional; + +/** Factory for the shell-mode orphan files cleanup action. */ +@Internal +public class OrphanFilesCleanActionFactory implements ActionFactory { + + @Override + public String identifier() { + return "remove_orphan_files"; + } + + @Override + public Optional create(MultipleParameterToolAdapter params) { + return Optional.of( + new OrphanFilesCleanAction(OrphanCleanConfig.fromParams(params))); + } + + @Override + public String help() { + return "Usage: remove_orphan_files --bootstrap-server \n" + + " (--database [--table ] | --all-databases)\n" + + " [--older-than '']\n" + + " [--remote-fs-op-rate-limit-per-second 100]\n" + + " [--dry-run]\n" + + " [--allow-delete-manifest]\n" + + " [--allow-clean-orphan-tables]\n" + + " [--allow-clean-orphan-partitions]\n" + + " [--conf =]...\n" + + "\n" + + "Notes:\n" + + " --older-than is an absolute wall-clock cutoff in ISO-8601 with explicit\n" + + " offset (e.g. '2024-01-01T00:00:00+08:00' or '2024-01-01T00:00:00Z').\n" + + " Files with mtime strictly less than the cutoff are deletion-eligible.\n" + + " Default: now - 3d, computed once at startup. The cutoff is frozen for the\n" + + " run, so a long scan cannot accidentally pull in files written after the\n" + + " action started. The cutoff must be at least 1d before now (closer cutoffs\n" + + " would race with mid-write files).\n" + + " Orphan directory detection (table/partition) relies solely on ID guards\n" + + " (maxKnownTableId / maxKnownPartitionId), not mtime.\n" + + " --table also disables the orphan-table scan (no sibling orphan-table scan in\n" + + " the db).\n" + + " --conf passes filesystem configuration for remote storage authentication.\n" + + " Keys use the same format as server.yaml (e.g. fs.oss.accessKeyId,\n" + + " fs.oss.accessKeySecret, fs.oss.endpoint, fs.oss.region). Repeatable.\n" + + "\n" + + "Examples:\n" + + " remove_orphan_files --bootstrap-server host:9123 --all-databases\n" + + " --conf fs.oss.accessKeyId=XXXX --conf fs.oss.accessKeySecret=YYYY\n" + + " --conf fs.oss.endpoint=oss-cn-hangzhou-internal.aliyuncs.com\n" + + " --conf fs.oss.region=cn-hangzhou"; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifier.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifier.java new file mode 100644 index 0000000000..8f0994213f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifier.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.exception.FlussRuntimeException; +import org.apache.fluss.exception.PartitionNotExistException; +import org.apache.fluss.exception.TableNotExistException; + +import java.io.IOException; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeoutException; + +/** + * Classifies RPC exceptions raised during scope enumeration and per-target active-set fetch into a + * small, audit-stable vocabulary. The category name is what surfaces as the {@code reason=} field + * of {@code skip_log_target} / {@code skip_kv_target} audit events, so operators triage by exact + * string and the enum must not be widened lightly. + * + *
    + *
  • {@link Category#NOT_FOUND} — legitimate "object does not exist"; the enumerator treats it + * as the target having disappeared concurrently and silently skips it without alarm. + *
  • {@link Category#TRANSIENT} — IO / timeout / ZK connection loss; the target is skipped this + * round and naturally retried in the next cleanup round. + *
  • {@link Category#SERVER_ERROR} — server-side failure; same skip, but audited at higher + * severity so an operator can investigate. + *
  • {@link Category#UNKNOWN} — anything not matched above; conservatively skipped + audited. + *
+ */ +@Internal +public final class RpcErrorClassifier { + + private RpcErrorClassifier() {} + + /** Categories of RPC errors. */ + public enum Category { + NOT_FOUND, + TRANSIENT, + SERVER_ERROR, + UNKNOWN + } + + /** + * Classifies a thrown exception. Unwraps {@link CompletionException}/{@link + * ExecutionException}. + */ + public static Category classify(Throwable t) { + Throwable cause = unwrap(t); + if (cause instanceof TableNotExistException + || cause instanceof PartitionNotExistException) { + return Category.NOT_FOUND; + } + if (cause instanceof IOException || cause instanceof TimeoutException) { + return Category.TRANSIENT; + } + if (cause instanceof FlussRuntimeException) { + return Category.SERVER_ERROR; + } + return Category.UNKNOWN; + } + + private static Throwable unwrap(Throwable t) { + while (t instanceof CompletionException || t instanceof ExecutionException) { + if (t.getCause() == null) { + return t; + } + t = t.getCause(); + } + return t; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/audit/AuditLogger.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/audit/AuditLogger.java new file mode 100644 index 0000000000..26adf5f00e --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/audit/AuditLogger.java @@ -0,0 +1,236 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.audit; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.rule.RuleId; +import org.apache.fluss.fs.FsPath; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; + +/** + * Structured audit log writer for the orphan files cleanup action. + * + *

The dedicated logger name {@code fluss.orphan.audit} can be routed to a separate sink (e.g. + * SLS) by deployment-specific log4j configuration. + */ +@Internal +public final class AuditLogger { + + private static final Logger AUDIT = LoggerFactory.getLogger("fluss.orphan.audit"); + + /** + * Formats cutoff epoch-ms back to the {@code yyyy-MM-dd HH:mm:ss} CLI grammar in the server's + * local zone, so the audit line and the original {@code --older-than} value can be compared + * verbatim. + */ + private static final DateTimeFormatter CUTOFF_FORMATTER = + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss").withZone(ZoneId.systemDefault()); + + /** + * One-shot startup event recording the frozen file cutoff that drives this run's deletion + * decisions. Emitted before any other audit line so log readers can recover the exact threshold + * without having to re-parse the original CLI arguments. + */ + public void logCutoff(long olderThanMillis) { + AUDIT.info( + "action=cutoff older_than_iso={} older_than_ms={} ts={}", + CUTOFF_FORMATTER.format(Instant.ofEpochMilli(olderThanMillis)), + olderThanMillis, + Instant.now()); + } + + public void logDeleted(FsPath path, RuleId ruleId, boolean ok) { + AUDIT.info("action=deleted rule={} path={} ok={} ts={}", ruleId, path, ok, Instant.now()); + } + + public void logWouldDelete(FsPath path, RuleId ruleId) { + AUDIT.info("action=would_delete rule={} path={} ts={}", ruleId, path, Instant.now()); + } + + public void logDirDeleted(FsPath dir) { + AUDIT.info("action=dir_deleted path={} ts={}", dir, Instant.now()); + } + + public void logWouldDeleteDir(FsPath dir) { + AUDIT.info("action=would_delete_dir path={} ts={}", dir, Instant.now()); + } + + public void logSkipUnknown(FsPath path, RuleId ruleId) { + AUDIT.warn("action=skip_unknown rule={} path={} ts={}", ruleId, path, Instant.now()); + } + + public void logBucketAborted(String bucketStr, String reason) { + AUDIT.error( + "action=bucket_aborted bucket={} reason={} ts={}", + bucketStr, + reason, + Instant.now()); + } + + /** Skip an entire database during scope enumeration due to listTables failure. */ + public void logSkipDb(String dbName, String reason) { + AUDIT.warn("action=skip_db reason={} db={} ts={}", reason, dbName, Instant.now()); + } + + /** Skip a single table during scope enumeration due to getTableInfo or RPC failure. */ + public void logSkipTable(String dbName, String tableName, String reason) { + AUDIT.warn( + "action=skip_table reason={} db={} table={} ts={}", + reason, + dbName, + tableName, + Instant.now()); + } + + /** + * Skip listPartitionInfos for a table due to RPC failure (both active-partition cleanup and + * orphan-partition scan are suppressed for this table). + */ + public void logSkipPartitionList(String dbName, String tableName, String reason) { + AUDIT.warn( + "action=skip_partition_list reason={} db={} table={} ts={}", + reason, + dbName, + tableName, + Instant.now()); + } + + /** + * Skip KV cleanup for one (tableId, partitionId) target — emitted when {@code ListKvSnapshots} + * fails after retries. {@code partitionId} is null for non-partitioned tables. + */ + public void logSkipKvTarget(long tableId, Long partitionId, String reason) { + AUDIT.warn( + "action=skip_kv_target reason={} table_id={} partition_id={} ts={}", + reason, + tableId, + partitionId, + Instant.now()); + } + + /** + * Skip KV cleanup for a single bucket whose {@code ListKvSnapshots} response carried no + * active-snapshot entries. Empty per-bucket active set is treated as "cannot prove what is + * active" and the bucket is skipped to avoid mis-deletion. + */ + public void logSkipKvBucket(long tableId, Long partitionId, int bucketId, String reason) { + AUDIT.warn( + "action=skip_kv_bucket reason={} table_id={} partition_id={} bucket_id={} ts={}", + reason, + tableId, + partitionId, + bucketId, + Instant.now()); + } + + /** + * Skip log cleanup for one (tableId, partitionId) target — emitted when {@code + * ListRemoteLogManifests} fails after retries. {@code partitionId} is null for non-partitioned + * tables. + */ + public void logSkipLogTarget(long tableId, Long partitionId, String reason) { + AUDIT.warn( + "action=skip_log_target reason={} table_id={} partition_id={} ts={}", + reason, + tableId, + partitionId, + Instant.now()); + } + + /** + * Skip log cleanup for a single bucket whose remote manifest was not returned by the {@code + * ListRemoteLogManifests} RPC (the bucket has not yet committed any remote manifest). + */ + public void logSkipLogBucket(long tableId, Long partitionId, int bucketId, String reason) { + AUDIT.warn( + "action=skip_log_bucket reason={} table_id={} partition_id={} bucket_id={} ts={}", + reason, + tableId, + partitionId, + bucketId, + Instant.now()); + } + + /** Default-conservative skip of an orphan-table dir (opt-in flag not set). */ + public void logSkipOrphanTable(FsPath dir, String reason) { + AUDIT.info("action=skip_orphan_table reason={} path={} ts={}", reason, dir, Instant.now()); + } + + /** + * Skip the orphan-table scan for a database whose table-info set is incomplete (e.g. {@code + * --table} single-table mode, or {@code listTables}/{@code getTableInfo} failures left holes in + * the active table id set). Distinct from {@link #logSkipDb}, which means the whole database + * scope is dropped. + */ + public void logSkipOrphanTableScan(String dbName, String reason) { + AUDIT.warn( + "action=skip_orphan_table_scan reason={} db={} ts={}", + reason, + dbName, + Instant.now()); + } + + /** Default-conservative skip of an orphan-partition dir (opt-in flag not set). */ + public void logSkipOrphanPartition(FsPath dir, String reason) { + AUDIT.info( + "action=skip_orphan_partition reason={} path={} ts={}", reason, dir, Instant.now()); + } + + /** Skip a bucket target because its metadata-resolved root is outside cluster config. */ + public void logSkipBucketOutOfScope(long tableId, Long partitionId, String resolvedRoot) { + AUDIT.info( + "action=skip_bucket_target reason=out-of-scope-root table_id={} partition_id={}" + + " resolved_root={} ts={}", + tableId, + partitionId, + resolvedRoot, + Instant.now()); + } + + /** + * Final summary event emitted once at the end of a run, carrying the headline counters that + * operators query most often ("how many files were removed and how much space was reclaimed"). + * Routed through the dedicated audit logger so the result is queryable from the same sink as + * the per-file {@code action=deleted} / {@code action=skip_*} lines. + */ + public void logSummary( + long scanned, + long deletedFiles, + long emptyDirsRemoved, + long deleteFailures, + long bytesReclaimed, + boolean dryRun) { + AUDIT.info( + "action=summary scanned={} deleted_total={} deleted_files={} empty_dirs_removed={}" + + " delete_failures={} bytes_reclaimed={} dry_run={} ts={}", + scanned, + deletedFiles + emptyDirsRemoved, + deletedFiles, + emptyDirsRemoved, + deleteFailures, + bytesReclaimed, + dryRun, + Instant.now()); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcher.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcher.java new file mode 100644 index 0000000000..223c6b97c4 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcher.java @@ -0,0 +1,359 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.annotation.VisibleForTesting; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.client.metadata.ActiveKvSnapshots; +import org.apache.fluss.client.metadata.RemoteLogManifestInfo; +import org.apache.fluss.flink.action.orphan.RpcErrorClassifier; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; +import org.apache.fluss.fs.FSDataInputStream; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.remote.RemoteLogManifest; +import org.apache.fluss.remote.RemoteLogSegment; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; +import org.apache.fluss.utils.FlussPaths; +import org.apache.fluss.utils.IOUtils; +import org.apache.fluss.utils.RetryUtils; + +import javax.annotation.Nullable; + +import java.io.ByteArrayOutputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; + +import static org.apache.fluss.utils.Preconditions.checkArgument; + +/** + * Builds the active reference set for a single {@code (tableId, partitionId|null)} target, sourced + * from coordinator metadata via RPC (not from filesystem listing). + * + *

Log path: discovers each bucket's current remote log manifest path via {@code + * LIST_REMOTE_LOG_MANIFESTS}, then second-reads the manifest file from object storage. The + * per-target RPC is retried with exponential backoff via {@link RetryUtils}; per-bucket + * second-reads make a single attempt — a {@link FileNotFoundException} (manifest upserted between + * RPC and read) or any other IO failure immediately marks the bucket as {@link + * LogActiveRefsFetchResult.ManifestReadStatus#READ_FAILED} and recovery is left to the next cleanup + * round, avoiding {@code N × retries × IO} blow-up on cluster-wide turbulence. + * + *

KV path: {@code LIST_KV_SNAPSHOTS} returns snapshot ids directly (no second-read), so the + * per-target RPC retry alone is sufficient symmetry with the log path. + */ +@Internal +public final class ActiveRefsFetcher { + + /** + * Retry backoff base used by {@link RetryUtils} for per-target RPCs. With the default 3 retries + * and exponential backoff (200 → 400 → cap) this caps total retry delay at ~600ms — negligible + * vs the smoothing it gives over server jitter. + */ + private static final long DEFAULT_BACKOFF_MILLIS = 200L; + + private static final long MAX_BACKOFF_MILLIS = 2000L; + + private static final MetadataReader DEFAULT_METADATA_READER = + new MetadataReader() { + @Override + public byte[] read(FsPath path) throws IOException { + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + try (FSDataInputStream inputStream = path.getFileSystem().open(path)) { + IOUtils.copyBytes(inputStream, outputStream); + } + return outputStream.toByteArray(); + } + }; + + private final AdminFacade admin; + private final MetadataReader metadataReader; + private final int maxRetries; + private final long backoffMillis; + private final RateLimiter remoteFsOpRateLimiter; + + public ActiveRefsFetcher(Admin admin, int maxRetries, RateLimiter remoteFsOpRateLimiter) { + this( + wrap(admin), + DEFAULT_METADATA_READER, + maxRetries, + DEFAULT_BACKOFF_MILLIS, + remoteFsOpRateLimiter); + } + + /** Test constructor: defaults backoff to 0 so unit tests don't pay retry sleep. */ + @VisibleForTesting + ActiveRefsFetcher(AdminFacade admin, MetadataReader metadataReader, int maxRetries) { + this(admin, metadataReader, maxRetries, 0L); + } + + @VisibleForTesting + ActiveRefsFetcher( + AdminFacade admin, MetadataReader metadataReader, int maxRetries, long backoffMillis) { + this(admin, metadataReader, maxRetries, backoffMillis, RateLimiter.create(1000.0)); + } + + @VisibleForTesting + ActiveRefsFetcher( + AdminFacade admin, + MetadataReader metadataReader, + int maxRetries, + long backoffMillis, + RateLimiter remoteFsOpRateLimiter) { + checkArgument(maxRetries >= 1, "maxRetries must be >= 1, got %s", maxRetries); + checkArgument(backoffMillis >= 0L, "backoffMillis must be >= 0, got %s", backoffMillis); + this.admin = admin; + this.metadataReader = metadataReader; + this.maxRetries = maxRetries; + this.backoffMillis = backoffMillis; + this.remoteFsOpRateLimiter = remoteFsOpRateLimiter; + } + + private static AdminFacade wrap(Admin admin) { + return new AdminFacade() { + @Override + public CompletableFuture> listRemoteLogManifests( + long tableId, @Nullable Long partitionId) { + return admin.listRemoteLogManifests(tableId, partitionId); + } + + @Override + public CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId) { + return admin.listKvSnapshots(tableId, partitionId); + } + }; + } + + /** + * Fetches per-bucket log active refs for a single {@code (tableId, partitionId|null)} target. + * Each bucket whose remote manifest is returned by the RPC is second-read in a single attempt; + * a {@link FileNotFoundException} or any other IO failure marks the bucket as {@link + * LogActiveRefsFetchResult.ManifestReadStatus#READ_FAILED} without affecting siblings. + * Per-target RPC failure (after retries) is reported via {@link + * LogActiveRefsFetchResult#listOk()}. + */ + public LogActiveRefsFetchResult fetchLogActiveRefsByBucket( + long tableId, @Nullable Long partitionId) { + List manifests; + try { + manifests = + RetryUtils.executeWithRetry( + () -> admin.listRemoteLogManifests(tableId, partitionId).get(), + "listRemoteLogManifests", + maxRetries, + backoffMillis, + MAX_BACKOFF_MILLIS, + e -> + RpcErrorClassifier.classify(e) + != RpcErrorClassifier.Category.NOT_FOUND); + } catch (IOException e) { + return LogActiveRefsFetchResult.listFailed( + formatRpcFailureReason(tableId, partitionId, e.getCause())); + } + + Map> entriesByBucket = new HashMap<>(); + for (RemoteLogManifestInfo entry : manifests) { + int bucketId = entry.getTableBucket().getBucket(); + entriesByBucket.computeIfAbsent(bucketId, id -> new ArrayList<>()).add(entry); + } + + Map resolved = new HashMap<>(); + Map readFailures = new HashMap<>(); + for (Map.Entry> bucketEntries : + entriesByBucket.entrySet()) { + int bucketId = bucketEntries.getKey(); + try { + resolved.put(bucketId, buildBucketActiveRefs(bucketEntries.getValue())); + } catch (FileNotFoundException e) { + readFailures.put( + bucketId, + formatBucketReadFailureReason( + "Manifest not found (likely upserted concurrently)", + tableId, + partitionId, + bucketId, + e)); + } catch (ManifestParseException e) { + // Manifest payload is unreadable or violates the shared manifest serde schema. + // Distinct reason so operators triage separately from transient FS hiccups. + readFailures.put( + bucketId, + formatBucketReadFailureReason( + "Manifest parse failure (corrupt or unexpected schema)", + tableId, + partitionId, + bucketId, + e)); + } catch (IOException e) { + readFailures.put( + bucketId, + formatBucketReadFailureReason( + "IO error reading manifest", tableId, partitionId, bucketId, e)); + } + } + return LogActiveRefsFetchResult.ofPerBucket(resolved, readFailures); + } + + /** + * Fetches the per-bucket active snapshot directories ({@code snap-{id}} names) for one {@code + * (tableId, partitionId|null)} target. The set per bucket is the union of RETAINED and + * STILL_IN_USE entries returned by {@link Admin#listKvSnapshots(long, Long)}. Per-target RPC + * failure (after retries) is reported via {@link KvActiveRefsFetchResult#listOk()}, symmetric + * with the log path. + */ + public KvActiveRefsFetchResult fetchKvActiveSnapDirs(long tableId, @Nullable Long partitionId) { + ActiveKvSnapshots activeKvSnapshots; + try { + activeKvSnapshots = + RetryUtils.executeWithRetry( + () -> admin.listKvSnapshots(tableId, partitionId).get(), + "listKvSnapshots", + maxRetries, + backoffMillis, + MAX_BACKOFF_MILLIS, + e -> + RpcErrorClassifier.classify(e) + != RpcErrorClassifier.Category.NOT_FOUND); + } catch (IOException e) { + return KvActiveRefsFetchResult.listFailed( + formatRpcFailureReason(tableId, partitionId, e.getCause())); + } + Map> dirsByBucket = new HashMap<>(); + for (Map.Entry> entry : + activeKvSnapshots.getSnapshotIdsByBucket().entrySet()) { + int bucketId = entry.getKey(); + Set dirNames = new HashSet<>(); + for (Long snapshotId : entry.getValue()) { + dirNames.add(FlussPaths.REMOTE_KV_SNAPSHOT_DIR_PREFIX + snapshotId); + } + dirsByBucket.put(bucketId, dirNames); + } + return KvActiveRefsFetchResult.ok(dirsByBucket); + } + + private static String formatRpcFailureReason( + long tableId, @Nullable Long partitionId, @Nullable Throwable cause) { + String reason = + String.format("RPC failure for tableId=%s partitionId=%s", tableId, partitionId); + if (cause != null && cause.getMessage() != null) { + reason = reason + ": " + cause.getMessage(); + } + return reason; + } + + private static String formatBucketReadFailureReason( + String prefix, + long tableId, + @Nullable Long partitionId, + int bucketId, + Throwable cause) { + String reason = + String.format( + "%s for tableId=%s partitionId=%s bucketId=%s", + prefix, tableId, partitionId, bucketId); + if (cause != null && cause.getMessage() != null) { + reason = reason + ": " + cause.getMessage(); + } + return reason; + } + + private BucketActiveRefs buildBucketActiveRefs(List entries) + throws IOException { + Set manifestPaths = new HashSet<>(); + Set segmentRelpaths = new HashSet<>(); + for (RemoteLogManifestInfo entry : entries) { + String path = entry.getRemoteLogManifestPath(); + manifestPaths.add(path); + remoteFsOpRateLimiter.acquire(); + byte[] manifestBytes = metadataReader.read(new FsPath(path)); + segmentRelpaths.addAll(parseLogSegmentRelativePaths(manifestBytes)); + } + return new BucketActiveRefs(segmentRelpaths, Collections.emptySet(), manifestPaths); + } + + private Set parseLogSegmentRelativePaths(byte[] manifestBytes) + throws ManifestParseException { + RemoteLogManifest manifest; + try { + manifest = RemoteLogManifest.fromJsonBytes(manifestBytes); + } catch (RuntimeException e) { + throw new ManifestParseException("Failed to parse remote log manifest", e); + } + + Set relativePaths = new HashSet<>(); + for (RemoteLogSegment segment : manifest.getRemoteLogSegmentList()) { + String segmentId = segment.remoteLogSegmentId().toString(); + long startOffset = segment.remoteLogStartOffset(); + long endOffset = segment.remoteLogEndOffset(); + String baseOffset = FlussPaths.filenamePrefixFromOffset(startOffset); + String writerOffset = FlussPaths.filenamePrefixFromOffset(endOffset); + + relativePaths.add(segmentId + "/" + baseOffset + FlussPaths.LOG_FILE_SUFFIX); + relativePaths.add(segmentId + "/" + baseOffset + FlussPaths.INDEX_FILE_SUFFIX); + relativePaths.add(segmentId + "/" + baseOffset + FlussPaths.TIME_INDEX_FILE_SUFFIX); + relativePaths.add( + segmentId + "/" + writerOffset + FlussPaths.WRITER_SNAPSHOT_FILE_SUFFIX); + } + return relativePaths; + } + + /** + * Thrown when a remote-log manifest payload is structurally invalid (missing required field, + * wrong shape). Distinct from {@link IOException} so the bucket-read failure handler can route + * it to the {@code "Manifest parse failure"} reason instead of the generic {@code "IO error"} + * bucket — same skip-this-round outcome, different operator triage. + */ + static final class ManifestParseException extends IOException { + ManifestParseException(String message, Throwable cause) { + super(message, cause); + } + } + + /** + * Thin abstraction over the {@link FlussAdmin} read-only RPCs the builder depends on ({@code + * listRemoteLogManifests} for the log active manifest, {@code listKvSnapshots} for the KV + * active snapshot dirs). Exposed for test injection. + */ + @VisibleForTesting + interface AdminFacade { + CompletableFuture> listRemoteLogManifests( + long tableId, @Nullable Long partitionId); + + CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId); + } + + /** + * Abstraction for reading manifest files from object storage. Must throw {@link + * FileNotFoundException} (and not a wrapped variant) when the path is absent, so the caller can + * distinguish "manifest pointer upserted concurrently" from genuine IO failures and surface + * each with a distinct failure reason. + */ + @VisibleForTesting + interface MetadataReader { + byte[] read(FsPath path) throws IOException; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/KvActiveRefsFetchResult.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/KvActiveRefsFetchResult.java new file mode 100644 index 0000000000..7b1c6c7873 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/KvActiveRefsFetchResult.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Result of KV active-snapshot-dir fetch for one {@code (tableId, partitionId|null)} target. + * + *

Mirrors the per-target {@code listOk + listFailureReason} axis of {@link + * LogActiveRefsFetchResult}. KV has no per-bucket failure dimension because the {@code + * LIST_KV_SNAPSHOTS} RPC returns snapshot ids directly (no second-read of an external file), so the + * per-bucket payload is just {@code Map>} of {@code snap-{id}} directory + * names. Buckets absent from the map are treated by the consumer as "empty active set → skip". + */ +@Internal +public final class KvActiveRefsFetchResult { + + private final RpcListStatus list; + private final Map> activeSnapDirsByBucket; + + private KvActiveRefsFetchResult( + RpcListStatus list, Map> activeSnapDirsByBucket) { + this.list = list; + Map> copy = new HashMap<>(); + for (Map.Entry> e : activeSnapDirsByBucket.entrySet()) { + copy.put(e.getKey(), Collections.unmodifiableSet(new HashSet<>(e.getValue()))); + } + this.activeSnapDirsByBucket = Collections.unmodifiableMap(copy); + } + + /** Result for a target whose {@code LIST_KV_SNAPSHOTS} RPC failed and exhausted retries. */ + public static KvActiveRefsFetchResult listFailed(String reason) { + return new KvActiveRefsFetchResult( + RpcListStatus.listFailed(reason), Collections.emptyMap()); + } + + /** Result for a target whose {@code LIST_KV_SNAPSHOTS} RPC succeeded. */ + static KvActiveRefsFetchResult ok(Map> activeSnapDirsByBucket) { + return new KvActiveRefsFetchResult(RpcListStatus.ok(), activeSnapDirsByBucket); + } + + /** Whether the per-target {@code LIST_KV_SNAPSHOTS} RPC succeeded. */ + public boolean listOk() { + return list.isOk(); + } + + /** Reason the per-target RPC failed; {@code null} when {@link #listOk()} is true. */ + @Nullable + public String listFailureReason() { + return list.reason(); + } + + /** + * Per-bucket active snapshot directory names ({@code snap-{id}}). Empty map when {@link + * #listOk()} is false. + * + *

Bucket absent from the map means "the RPC returned no active-snapshot entries for this + * bucket", which the consumer must treat as "cannot prove what is active here → skip KV + * cleanup for this bucket and emit {@code skip_kv_bucket reason=empty_active_set}". Empty does + * not mean "no active snapshots exist": the server enumerates buckets from ZK and that path can + * transiently underreport (partial reads, znode creation lag, stale historical bucket counts), + * so treating empty as no-op-skip is the only response compatible with the action's "may leak, + * must not mis-delete" hard constraint. + */ + public Map> activeSnapDirsByBucket() { + return activeSnapDirsByBucket; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/LogActiveRefsFetchResult.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/LogActiveRefsFetchResult.java new file mode 100644 index 0000000000..44c1227694 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/LogActiveRefsFetchResult.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; + +import javax.annotation.Nullable; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * Result of log active-refs fetch for one {@code (tableId, partitionId|null)} target. + * + *

The result is split along two orthogonal axes so each axis can be queried independently: + * + *

    + *
  • Per-target: {@link #listOk()} reports whether the {@code LIST_REMOTE_LOG_MANIFESTS} + * RPC succeeded. When it fails the per-bucket axis is meaningless and the caller should emit + * a single per-target skip and bypass the per-bucket loop entirely. + *
  • Per-bucket: {@link #statusFor(int)} reports one of {@link + * ManifestReadStatus#RESOLVED}, {@link ManifestReadStatus#READ_FAILED}, or {@link + * ManifestReadStatus#NOT_LISTED} for every bucket enumerated from table metadata. Only + * meaningful when {@link #listOk()} is true. + *
+ */ +@Internal +public final class LogActiveRefsFetchResult { + + /** Per-bucket outcome (only meaningful when {@link #listOk()} is true). */ + public enum ManifestReadStatus { + /** The RPC returned an entry for this bucket and its manifest was read successfully. */ + RESOLVED, + /** + * Per-bucket manifest second-read failed (FileNotFound from manifest upsert race, or other + * IO failure). The failing bucket is skipped for this round; recovery is by the next + * cleanup round. + */ + READ_FAILED, + /** + * Table metadata enumerates the bucket, but the {@code LIST_REMOTE_LOG_MANIFESTS} response + * did not include an entry for it — typically because the bucket has not yet committed any + * remote manifest (e.g. log tiering has not produced one), or an occasional server-side + * underreport (e.g. partial ZK read). Cleanup has nothing to clean for this bucket. + */ + NOT_LISTED + } + + private final RpcListStatus list; + private final Map resolved; + private final Map readFailures; + + private LogActiveRefsFetchResult( + RpcListStatus list, + Map resolved, + Map readFailures) { + this.list = list; + this.resolved = Collections.unmodifiableMap(new HashMap<>(resolved)); + this.readFailures = Collections.unmodifiableMap(new HashMap<>(readFailures)); + } + + /** + * Result for a target whose {@code LIST_REMOTE_LOG_MANIFESTS} RPC failed and exhausted retries. + */ + public static LogActiveRefsFetchResult listFailed(String reason) { + return new LogActiveRefsFetchResult( + RpcListStatus.listFailed(reason), Collections.emptyMap(), Collections.emptyMap()); + } + + /** + * Result for a target whose {@code LIST_REMOTE_LOG_MANIFESTS} RPC succeeded. {@code resolved} + * carries the per-bucket active refs for RESOLVED buckets; {@code readFailures} carries the + * per-bucket failure reasons for READ_FAILED buckets. Any bucket not present in either map is + * reported as {@link ManifestReadStatus#NOT_LISTED}. + */ + static LogActiveRefsFetchResult ofPerBucket( + Map resolved, Map readFailures) { + return new LogActiveRefsFetchResult(RpcListStatus.ok(), resolved, readFailures); + } + + /** Whether the per-target {@code LIST_REMOTE_LOG_MANIFESTS} RPC succeeded. */ + public boolean listOk() { + return list.isOk(); + } + + /** Reason the per-target RPC failed; {@code null} when {@link #listOk()} is true. */ + @Nullable + public String listFailureReason() { + return list.reason(); + } + + /** + * Per-bucket manifest read status for a bucket enumerated from table metadata. Callers must + * first check {@link #listOk()} and skip the per-bucket loop entirely when it is false. + */ + public ManifestReadStatus statusFor(int bucketId) { + if (!list.isOk()) { + throw new IllegalStateException("Per-bucket status is not available when listOk=false"); + } + if (resolved.containsKey(bucketId)) { + return ManifestReadStatus.RESOLVED; + } + if (readFailures.containsKey(bucketId)) { + return ManifestReadStatus.READ_FAILED; + } + return ManifestReadStatus.NOT_LISTED; + } + + /** Active refs for a RESOLVED bucket. */ + public BucketActiveRefs activeRefsOf(int bucketId) { + BucketActiveRefs activeRefs = resolved.get(bucketId); + if (activeRefs == null) { + throw new IllegalStateException("Bucket " + bucketId + " is not RESOLVED"); + } + return activeRefs; + } + + /** Failure reason for a READ_FAILED bucket. */ + public String readFailureReason(int bucketId) { + String reason = readFailures.get(bucketId); + if (reason == null) { + throw new IllegalStateException("Bucket " + bucketId + " is not READ_FAILED"); + } + return reason; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTracker.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTracker.java new file mode 100644 index 0000000000..c77d03323b --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/MaxKnownIdsTracker.java @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.annotation.Internal; + +/** + * Accumulates {@code maxKnownTableId} and {@code maxKnownPartitionId} during a single cleanup run. + * + *

Values are updated from the successful scope-enumeration metadata lookups that already + * materialize concrete ids for cleanup orchestration: {@code getTableInfo()} for tables and {@code + * listPartitionInfos()} for partitions. The tracker is therefore pure RPC-derived and never sourced + * from FS dir-name parsing. + * + *

The tracked maximums serve as ID guards for orphan directory detection: only + * directories whose parsed ID is {@code <=} the observed maximum can be classified as orphan + * candidates. Directories with higher IDs are conservatively skipped as potentially freshly + * allocated. Because RPC failures cause the tracker to observe fewer IDs, the maximums are always a + * lower bound of the true cluster-wide maximum — making the guard strictly more conservative (safe + * direction) under partial failures. + */ +@Internal +public final class MaxKnownIdsTracker { + + private long maxKnownTableId = -1L; + private long maxKnownPartitionId = -1L; + + public void observeTableId(long tableId) { + if (tableId > maxKnownTableId) { + maxKnownTableId = tableId; + } + } + + public void observePartitionId(long partitionId) { + if (partitionId > maxKnownPartitionId) { + maxKnownPartitionId = partitionId; + } + } + + public long maxKnownTableId() { + return maxKnownTableId; + } + + public long maxKnownPartitionId() { + return maxKnownPartitionId; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/RpcListStatus.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/RpcListStatus.java new file mode 100644 index 0000000000..4113dd500c --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/build/RpcListStatus.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import javax.annotation.Nullable; + +/** + * Per-target status of a list RPC (target = one {@code (tableId, partitionId|null)} pair), shared + * by {@link LogActiveRefsFetchResult} and {@link KvActiveRefsFetchResult}. + * + *

Captures the {@code listOk + listFailureReason} pair so both result types can delegate the + * per-target axis to a single value and surface identical {@code listOk()} / {@code + * listFailureReason()} APIs to consumers. + */ +final class RpcListStatus { + + private static final RpcListStatus OK = new RpcListStatus(true, null); + + private final boolean ok; + @Nullable private final String reason; + + private RpcListStatus(boolean ok, @Nullable String reason) { + this.ok = ok; + this.reason = reason; + } + + static RpcListStatus ok() { + return OK; + } + + static RpcListStatus listFailed(String reason) { + return new RpcListStatus(false, reason); + } + + boolean isOk() { + return ok; + } + + @Nullable + String reason() { + return reason; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfig.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfig.java new file mode 100644 index 0000000000..839ca7ccc1 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfig.java @@ -0,0 +1,310 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.config; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.adapter.MultipleParameterToolAdapter; +import org.apache.fluss.utils.StringUtils; + +import javax.annotation.Nullable; + +import java.io.Serializable; +import java.time.Duration; +import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.format.DateTimeParseException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +/** Parsed command-line options for the orphan files cleanup action. */ +@Internal +public final class OrphanCleanConfig implements Serializable { + + private static final long serialVersionUID = 1L; + + /** + * Minimum gap between any user-supplied cutoff and {@code now}. A cutoff closer to {@code now} + * would risk classifying files that are mid-write (committed file written, snapshot/manifest + * not yet visible to {@code ListRemoteLogManifests} / {@code ListKvSnapshots}) as orphan and + * deleting them. + */ + private static final Duration HARD_LOWER_BOUND = Duration.ofDays(1); + + /** Default file-level cutoff: files written before {@code now - 3d} are deletion-eligible. */ + private static final Duration DEFAULT_OLDER_THAN = Duration.ofDays(3); + + private static final long DEFAULT_REMOTE_FS_OP_RATE_LIMIT_PER_SECOND = 100L; + + private final String bootstrapServer; + private final boolean allDatabases; + private final @Nullable String database; + private final @Nullable String table; + private final long olderThanMillis; + private final boolean dryRun; + private final long remoteFsOpRateLimitPerSecond; + private final @Nullable Integer parallelism; + private final boolean allowDeleteManifest; + private final boolean allowCleanOrphanTables; + private final boolean allowCleanOrphanPartitions; + private final Map extraConfigs; + + private OrphanCleanConfig( + String bootstrapServer, + boolean allDatabases, + @Nullable String database, + @Nullable String table, + long olderThanMillis, + boolean dryRun, + long remoteFsOpRateLimitPerSecond, + @Nullable Integer parallelism, + boolean allowDeleteManifest, + boolean allowCleanOrphanTables, + boolean allowCleanOrphanPartitions, + Map extraConfigs) { + this.bootstrapServer = bootstrapServer; + this.allDatabases = allDatabases; + this.database = database; + this.table = table; + this.olderThanMillis = olderThanMillis; + this.dryRun = dryRun; + this.remoteFsOpRateLimitPerSecond = remoteFsOpRateLimitPerSecond; + this.parallelism = parallelism; + this.allowDeleteManifest = allowDeleteManifest; + this.allowCleanOrphanTables = allowCleanOrphanTables; + this.allowCleanOrphanPartitions = allowCleanOrphanPartitions; + this.extraConfigs = Collections.unmodifiableMap(new HashMap<>(extraConfigs)); + } + + /** Parses a cleanup config from CLI parameters. */ + public static OrphanCleanConfig fromParams(MultipleParameterToolAdapter params) { + String bootstrapServer = params.get("bootstrap-server"); + if (StringUtils.isNullOrWhitespaceOnly(bootstrapServer)) { + throw new IllegalArgumentException("--bootstrap-server is required"); + } + + boolean allDatabases = params.has("all-databases"); + String database = params.get("database"); + if (allDatabases && !StringUtils.isNullOrWhitespaceOnly(database)) { + throw new IllegalArgumentException( + "--database and --all-databases are mutually exclusive"); + } + if (!allDatabases && StringUtils.isNullOrWhitespaceOnly(database)) { + throw new IllegalArgumentException( + "Either --database or --all-databases must be provided"); + } + if (allDatabases && !StringUtils.isNullOrWhitespaceOnly(params.get("table"))) { + throw new IllegalArgumentException( + "--table requires --database and cannot be used with --all-databases"); + } + + long now = System.currentTimeMillis(); + long olderThanMillis = + parseCutoff("--older-than", params.get("older-than"), now, DEFAULT_OLDER_THAN); + long remoteFsOpRateLimitPerSecond = + parsePositiveRateLimit( + "--remote-fs-op-rate-limit-per-second", + params.get("remote-fs-op-rate-limit-per-second"), + DEFAULT_REMOTE_FS_OP_RATE_LIMIT_PER_SECOND); + Integer parallelism = parseParallelism(params.get("parallelism")); + boolean allowDeleteManifest = params.has("allow-delete-manifest"); + boolean allowCleanOrphanTables = params.has("allow-clean-orphan-tables"); + boolean allowCleanOrphanPartitions = params.has("allow-clean-orphan-partitions"); + + return new OrphanCleanConfig( + bootstrapServer, + allDatabases, + database, + params.get("table"), + olderThanMillis, + params.has("dry-run"), + remoteFsOpRateLimitPerSecond, + parallelism, + allowDeleteManifest, + allowCleanOrphanTables, + allowCleanOrphanPartitions, + parseExtraConfigs(params.getMultiParameter("conf"))); + } + + /** + * Parses a CLI cutoff value into an absolute epoch-ms timestamp. Empty input falls back to + * {@code now - defaultGap}. Explicit input must be ISO-8601 with an explicit offset (e.g. + * {@code 2024-01-01T00:00:00+08:00} or {@code 2024-01-01T00:00:00Z}) and must be at least + * {@link #HARD_LOWER_BOUND} earlier than {@code now} — closer-to-now cutoffs would race with + * active writes (see {@code HARD_LOWER_BOUND} javadoc). + */ + private static long parseCutoff( + String flag, @Nullable String value, long now, Duration defaultGap) { + if (StringUtils.isNullOrWhitespaceOnly(value)) { + return now - defaultGap.toMillis(); + } + OffsetDateTime parsed; + try { + parsed = OffsetDateTime.parse(value); + } catch (DateTimeParseException e) { + throw new IllegalArgumentException( + flag + + " must be an ISO-8601 timestamp with an explicit offset (e.g." + + " '2024-01-01T00:00:00+08:00' or '2024-01-01T00:00:00Z'); got: " + + value, + e); + } + long parsedMillis = parsed.toInstant().toEpochMilli(); + long maxAllowed = now - HARD_LOWER_BOUND.toMillis(); + if (parsedMillis > maxAllowed) { + throw new IllegalArgumentException( + flag + + " must be at least 1d before now (got " + + Instant.ofEpochMilli(parsedMillis) + + ", now is " + + Instant.ofEpochMilli(now) + + "); a closer cutoff would race with mid-write files"); + } + return parsedMillis; + } + + private static long parsePositiveRateLimit( + String flag, @Nullable String value, long defaultValue) { + if (StringUtils.isNullOrWhitespaceOnly(value)) { + return defaultValue; + } + long rate = Long.parseLong(value); + if (rate <= 0) { + throw new IllegalArgumentException(flag + " must be positive"); + } + return rate; + } + + @Nullable + private static Integer parseParallelism(@Nullable String value) { + if (StringUtils.isNullOrWhitespaceOnly(value)) { + return null; + } + int p = Integer.parseInt(value); + if (p <= 0) { + throw new IllegalArgumentException("--parallelism must be positive"); + } + return p; + } + + private static Map parseExtraConfigs(@Nullable Collection values) { + if (values == null || values.isEmpty()) { + return Collections.emptyMap(); + } + Map configs = new HashMap(); + for (String kv : values) { + int eqIdx = kv.indexOf('='); + if (eqIdx <= 0) { + throw new IllegalArgumentException( + "--conf must be in key=value format, got: " + kv); + } + configs.put(kv.substring(0, eqIdx), kv.substring(eqIdx + 1)); + } + return configs; + } + + /** Returns the bootstrap server list used to connect to Fluss. */ + public String bootstrapServer() { + return bootstrapServer; + } + + /** Returns whether the cleanup targets all databases. */ + public boolean allDatabases() { + return allDatabases; + } + + /** Returns the single targeted database when the action is not scoped to all databases. */ + public Optional database() { + return Optional.ofNullable(database); + } + + /** Returns the optional targeted table name. */ + public Optional table() { + return Optional.ofNullable(table); + } + + /** + * Returns the file-level cutoff as an absolute epoch-millis timestamp, frozen at action + * startup. A candidate file is deletion-eligible iff its mtime is strictly less than this + * value. The cutoff does not slide during the run — long scans cannot accidentally pull in + * files written after startup. + */ + public long olderThanMillis() { + return olderThanMillis; + } + + /** Returns whether the action runs in dry-run mode. */ + public boolean dryRun() { + return dryRun; + } + + /** + * Returns the best-effort job-level target rate for remote filesystem operations per second. + * + *

The budget is shared by remote filesystem metadata reads, manifest reads, and deletes. + * Scan subtasks split this value by operator parallelism because Flink does not provide a + * cross-JVM limiter for this action. + */ + public long remoteFsOpRateLimitPerSecond() { + return remoteFsOpRateLimitPerSecond; + } + + /** Returns the optional parallelism for the ScanAndClean stage. */ + public Optional parallelism() { + return Optional.ofNullable(parallelism); + } + + /** + * Opt-in to delete {@code .manifest} files. Default {@code false}: mis-deleting an active + * manifest leaves the coordinator's manifest pointer dangling and breaks the bucket's metadata + * chain — the failure mode is catastrophic and asymmetric vs the trivial space cost of keeping + * orphan manifests (KB-sized files), so deletion is gated behind an explicit operator flag. + */ + public boolean allowDeleteManifest() { + return allowDeleteManifest; + } + + /** + * Opt-in to recursively clean files inside an orphan-table directory. Default {@code false}: + * the action only audits the detected orphan dir and leaves its contents untouched, because an + * id-based misclassification of a freshly-created table as orphan would otherwise be + * unrecoverable. Operators flip this on once they have reviewed the audit log. + */ + public boolean allowCleanOrphanTables() { + return allowCleanOrphanTables; + } + + /** + * Opt-in to recursively clean files inside an orphan-partition directory. Same default-audit + * rationale as {@link #allowCleanOrphanTables()}. + */ + public boolean allowCleanOrphanPartitions() { + return allowCleanOrphanPartitions; + } + + /** + * Returns extra configuration entries passed via {@code --conf key=value}. These are propagated + * to {@link org.apache.fluss.fs.FileSystem#initialize} for remote filesystem authentication + * (e.g. {@code fs.oss.accessKeyId}, {@code fs.oss.accessKeySecret}). + */ + public Map extraConfigs() { + return extraConfigs; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleter.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleter.java new file mode 100644 index 0000000000..9b52fa43d8 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleter.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.fs; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.RuleId; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +import static org.apache.fluss.utils.Preconditions.checkArgument; + +/** + * Sole entry point for filesystem deletion within the orphan cleanup package. + * + *

Only two operations are exposed: + * + *

    + *
  • {@link #deleteFile} - delete a single file (never recursive). + *
  • {@link #deleteEmptyDir} - delete a directory only if it is currently empty. + *
+ * + *

By design there is no recursive-delete API; any caller that needs deletion under {@code + * fluss-flink-common/.../action/orphan/} should go through this class. The single-entry-point + * invariant is currently enforced only by convention — there is no Checkstyle rule guarding it. + */ +@Internal +public final class SafeDeleter { + + private static final Logger LOG = LoggerFactory.getLogger(SafeDeleter.class); + + private final FileSystem fs; + private final boolean dryRun; + private final AuditLogger audit; + private final RateLimiter remoteFsOpRateLimiter; + + public SafeDeleter( + FileSystem fs, boolean dryRun, AuditLogger audit, RateLimiter remoteFsOpRateLimiter) { + this.fs = fs; + this.dryRun = dryRun; + this.audit = audit; + this.remoteFsOpRateLimiter = remoteFsOpRateLimiter; + } + + /** + * Delete a single file. + * + * @return {@code true} if the file was actually deleted (or recorded as would-be-deleted under + * {@code dryRun}); {@code false} if {@link FileSystem#delete} returned {@code false} + * (deletion silently failed — e.g. permissions, transient remote-store error). Callers + * should track {@code false} returns as delete failures in their run summary. + */ + public boolean deleteFile(FsPath file, Decision decision, RuleId ruleId) { + checkArgument( + decision == Decision.DELETE, + "deleteFile must only be called for Decision.DELETE, got %s", + decision); + if (dryRun) { + audit.logWouldDelete(file, ruleId); + return true; + } + remoteFsOpRateLimiter.acquire(); + try { + boolean ok = fs.delete(file, false); + audit.logDeleted(file, ruleId, ok); + return ok; + } catch (IOException e) { + LOG.warn("Failed to delete file: {}", file, e); + audit.logDeleted(file, ruleId, false); + return false; + } + } + + /** + * Delete a directory only if it is currently empty. + * + * @return {@code true} if the directory was actually deleted (or recorded as would-be-deleted + * under {@code dryRun}); {@code false} if the directory was non-empty / unreadable, or if + * {@link FileSystem#delete} returned {@code false}. Callers should not increment a "deleted + * directory" counter when this returns {@code false}. + */ + public boolean deleteEmptyDir(FsPath dir) { + FileStatus[] children = listChildrenSilently(dir); + if (children == null || children.length > 0) { + return false; + } + if (dryRun) { + audit.logWouldDeleteDir(dir); + return true; + } + remoteFsOpRateLimiter.acquire(); + try { + boolean ok = fs.delete(dir, false); + if (ok) { + audit.logDirDeleted(dir); + } + return ok; + } catch (IOException e) { + LOG.warn("Failed to delete empty directory: {}", dir, e); + return false; + } + } + + private FileStatus[] listChildrenSilently(FsPath dir) { + try { + remoteFsOpRateLimiter.acquire(); + return fs.listStatus(dir); + } catch (IOException ignored) { + return null; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleanTask.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleanTask.java new file mode 100644 index 0000000000..70499fd285 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleanTask.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +import javax.annotation.Nullable; + +import java.util.HashSet; +import java.util.Set; + +/** + * Work item for a single bucket's file-level cleanup. Carries everything needed to execute cleanup + * without coordinator interaction: FS paths, manifest locations for second-read, and the + * already-resolved KV active snapshot directory names. + */ +@Internal +public final class BucketCleanTask implements CleanTask { + + private static final long serialVersionUID = 1L; + + @Nullable private final String logTabletDir; + @Nullable private final String kvTabletDir; + private final Set logSegmentRelativePaths; + private final Set logActiveManifestPaths; + private final Set kvActiveSnapDirs; + private final long cutoffMillis; + private final boolean dryRun; + private final boolean allowDeleteManifest; + + public BucketCleanTask( + @Nullable String logTabletDir, + @Nullable String kvTabletDir, + Set logSegmentRelativePaths, + Set logActiveManifestPaths, + Set kvActiveSnapDirs, + long cutoffMillis, + boolean dryRun, + boolean allowDeleteManifest) { + this.logTabletDir = logTabletDir; + this.kvTabletDir = kvTabletDir; + this.logSegmentRelativePaths = new HashSet<>(logSegmentRelativePaths); + this.logActiveManifestPaths = new HashSet<>(logActiveManifestPaths); + this.kvActiveSnapDirs = new HashSet<>(kvActiveSnapDirs); + this.cutoffMillis = cutoffMillis; + this.dryRun = dryRun; + this.allowDeleteManifest = allowDeleteManifest; + } + + @Nullable + public String logTabletDir() { + return logTabletDir; + } + + @Nullable + public String kvTabletDir() { + return kvTabletDir; + } + + /** Active log segment relative paths (already resolved from manifests in Stage 1). */ + public Set logSegmentRelativePaths() { + return logSegmentRelativePaths; + } + + /** Active manifest paths (already resolved from RPC in Stage 1). */ + public Set logActiveManifestPaths() { + return logActiveManifestPaths; + } + + /** + * KV active snapshot directory names (already resolved from RPC, no further FS read needed). + */ + public Set kvActiveSnapDirs() { + return kvActiveSnapDirs; + } + + public long cutoffMillis() { + return cutoffMillis; + } + + public boolean dryRun() { + return dryRun; + } + + public boolean allowDeleteManifest() { + return allowDeleteManifest; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleaner.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleaner.java new file mode 100644 index 0000000000..a1e13cf424 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/BucketCleaner.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.fs.SafeDeleter; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.FileMeta; +import org.apache.fluss.flink.action.orphan.rule.FileRule; +import org.apache.fluss.flink.action.orphan.rule.RuleDispatcher; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; +import org.apache.fluss.utils.FlussPaths; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; + +/** + * Per-bucket orphan cleanup for live buckets: walks the provided bucket directories and dispatches + * each file to the appropriate {@link FileRule} using the caller-supplied active reference set. + * + *

All deletions go through {@link SafeDeleter} (no recursive deletes). Unknown file types are + * skipped with an audit warning per the design's "unknown-types-not-deleted" principle. + */ +@Internal +public final class BucketCleaner { + + private static final Logger LOG = LoggerFactory.getLogger(BucketCleaner.class); + + private final RuleDispatcher dispatcher; + private final SafeDeleter safeDeleter; + private final AuditLogger audit; + private final long cutoffMillis; + private final RateLimiter remoteFsOpRateLimiter; + + public BucketCleaner( + RuleDispatcher dispatcher, + SafeDeleter safeDeleter, + AuditLogger audit, + long cutoffMillis, + RateLimiter remoteFsOpRateLimiter) { + this.dispatcher = dispatcher; + this.safeDeleter = safeDeleter; + this.audit = audit; + this.cutoffMillis = cutoffMillis; + this.remoteFsOpRateLimiter = remoteFsOpRateLimiter; + } + + /** Cleans one bucket's log/kv subtrees using the caller-supplied active reference set. */ + public BucketCleanStats clean(BucketActiveRefs activeRefs, FsPath... bucketDirs) + throws IOException { + BucketCleanStats stats = BucketCleanStats.empty(); + for (FsPath bucketDir : bucketDirs) { + if (bucketDir != null) { + walkAndCleanDir(bucketDir, activeRefs, stats); + } + } + return stats; + } + + private void walkAndCleanDir(FsPath root, BucketActiveRefs activeRefs, BucketCleanStats stats) + throws IOException { + FileSystem fs = root.getFileSystem(); + remoteFsOpRateLimiter.acquire(); + if (!fs.exists(root)) { + return; + } + Deque stack = new ArrayDeque(); + stack.push(new DirVisit(root, false, false)); + while (!stack.isEmpty()) { + DirVisit visit = stack.pop(); + if (visit.postOrder) { + if (visit.oldEnough && safeDeleter.deleteEmptyDir(visit.dir)) { + stats.deleted++; + stats.emptyDirsRemoved++; + } + continue; + } + FileStatus[] children; + try { + remoteFsOpRateLimiter.acquire(); + children = fs.listStatus(visit.dir); + } catch (IOException e) { + LOG.warn("Failed to list directory: {}", visit.dir, e); + continue; + } + if (children == null) { + continue; + } + if (!visit.dir.toString().equals(root.toString())) { + stack.push(new DirVisit(visit.dir, true, visit.oldEnough)); + } + for (FileStatus child : children) { + FsPath childPath = child.getPath(); + if (child.isDir()) { + if (FlussPaths.REMOTE_KV_SNAPSHOT_SHARED_DIR.equals(childPath.getName())) { + continue; + } + stack.push( + new DirVisit( + childPath, false, child.getModificationTime() < cutoffMillis)); + continue; + } + FileMeta meta = + new FileMeta(childPath, child.getLen(), child.getModificationTime()); + FileRule rule = dispatcher.dispatch(meta); + Decision decision = rule.evaluate(meta, activeRefs, cutoffMillis); + stats.scanned++; + switch (decision) { + case DELETE: + if (safeDeleter.deleteFile(meta.path(), decision, rule.id())) { + stats.deleted++; + stats.bytesReclaimed += meta.size(); + } else { + stats.deleteFailures++; + } + break; + case SKIP_UNKNOWN: + audit.logSkipUnknown(meta.path(), rule.id()); + break; + case KEEP_ACTIVE: + case DEFER: + // no-op + break; + default: + // unknown decision — skip defensively + break; + } + } + } + } + + /** Per-bucket cleanup statistics. */ + public static final class BucketCleanStats { + public long scanned; + public long deleted; + public long emptyDirsRemoved; + public long deleteFailures; + public long bytesReclaimed; + + public static BucketCleanStats empty() { + return new BucketCleanStats(); + } + } + + private static final class DirVisit { + private final FsPath dir; + private final boolean postOrder; + private final boolean oldEnough; + + private DirVisit(FsPath dir, boolean postOrder, boolean oldEnough) { + this.dir = dir; + this.postOrder = postOrder; + this.oldEnough = oldEnough; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanStats.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanStats.java new file mode 100644 index 0000000000..cfecb0096e --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanStats.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +import java.io.Serializable; + +/** + * Per-task cleanup statistics emitted by each {@link ScanAndCleanFunction} subtask. The scalar + * counters are accumulated by {@link StatsAggregateOperator} via simple addition. + */ +@Internal +public final class CleanStats implements Serializable { + + private static final long serialVersionUID = 1L; + + private final long scanned; + private final long deleted; + private final long emptyDirsRemoved; + private final long deleteFailures; + private final long bytesReclaimed; + + public CleanStats(long scanned, long deleted, long deleteFailures, long bytesReclaimed) { + this(scanned, deleted, 0L, deleteFailures, bytesReclaimed); + } + + public CleanStats( + long scanned, + long deleted, + long emptyDirsRemoved, + long deleteFailures, + long bytesReclaimed) { + this.scanned = scanned; + this.deleted = deleted; + this.emptyDirsRemoved = emptyDirsRemoved; + this.deleteFailures = deleteFailures; + this.bytesReclaimed = bytesReclaimed; + } + + public static CleanStats empty() { + return new CleanStats(0L, 0L, 0L, 0L); + } + + public long scanned() { + return scanned; + } + + public long deleted() { + return deleted; + } + + public long emptyDirsRemoved() { + return emptyDirsRemoved; + } + + public long deleteFailures() { + return deleteFailures; + } + + public long bytesReclaimed() { + return bytesReclaimed; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanTask.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanTask.java new file mode 100644 index 0000000000..69f691ce99 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/CleanTask.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +import java.io.Serializable; + +/** + * Marker interface for work items emitted by {@link ScopeEnumeratorFunction} and consumed by {@link + * ScanAndCleanFunction}. Implementations carry enough context for a single subtask to execute + * cleanup independently (no further coordinator interaction needed). + */ +@Internal +public interface CleanTask extends Serializable {} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanDirCleanTask.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanDirCleanTask.java new file mode 100644 index 0000000000..cd564e5b78 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanDirCleanTask.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; + +/** + * Work item for cleaning an orphan table or partition directory. The directory has already been + * identified as an orphan candidate by {@link ScopeEnumeratorFunction} (ID guard satisfied). + */ +@Internal +public final class OrphanDirCleanTask implements CleanTask { + + private static final long serialVersionUID = 1L; + + private final String dirPath; + private final long cutoffMillis; + private final boolean dryRun; + private final boolean allowDeleteManifest; + + public OrphanDirCleanTask( + String dirPath, long cutoffMillis, boolean dryRun, boolean allowDeleteManifest) { + this.dirPath = dirPath; + this.cutoffMillis = cutoffMillis; + this.dryRun = dryRun; + this.allowDeleteManifest = allowDeleteManifest; + } + + public String dirPath() { + return dirPath; + } + + public long cutoffMillis() { + return cutoffMillis; + } + + public boolean dryRun() { + return dryRun; + } + + public boolean allowDeleteManifest() { + return allowDeleteManifest; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanFilesCleanJob.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanFilesCleanJob.java new file mode 100644 index 0000000000..3008715c14 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/OrphanFilesCleanJob.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; + +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.typeinfo.TypeHint; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +/** + * Builds and executes the 3-stage Flink Batch DAG for orphan files cleanup. + * + *

+ * Stage 1: ScopeEnumerator (p=1)   — coordinator RPCs, emits CleanTask
+ * Stage 2: ScanAndClean (p=N)      — FS scan + rate-limited delete, emits CleanStats
+ * Stage 3: StatsAggregate (p=1)    — merge stats, emits final CleanStats
+ * 
+ */ +@Internal +public final class OrphanFilesCleanJob { + + private OrphanFilesCleanJob() {} + + /** + * Builds the DAG, executes it in batch mode, and returns the final aggregated cleanup + * statistics. + * + * @param env the Flink execution environment (caller configures classpath, etc.) + * @param config parsed orphan cleanup configuration + * @param parallelism the parallelism for Stage 2 (ScanAndClean); null uses env default + * @return the final cleanup statistics + */ + public static CleanStats execute( + StreamExecutionEnvironment env, OrphanCleanConfig config, Integer parallelism) + throws Exception { + env.setRuntimeMode(RuntimeExecutionMode.BATCH); + + // Stage 1: ScopeEnumerator (parallelism=1) + DataStream trigger = + env.fromCollection(Collections.singletonList(1), TypeInformation.of(Integer.class)); + + SingleOutputStreamOperator tasks = + trigger.process(new ScopeEnumeratorFunction(config)) + .returns(TypeInformation.of(new TypeHint() {})) + .setParallelism(1) + .setMaxParallelism(1) + .name("ScopeEnumerator"); + + // Stage 2: ScanAndClean (parallelism=N) + SingleOutputStreamOperator stats = + tasks.rebalance() + .process( + new ScanAndCleanFunction( + config.remoteFsOpRateLimitPerSecond(), + config.extraConfigs())) + .returns(TypeInformation.of(new TypeHint() {})) + .name("ScanAndClean"); + if (parallelism != null) { + stats = stats.setParallelism(parallelism); + } + + // Stage 3: StatsAggregate (parallelism=1) + SingleOutputStreamOperator result = + stats.transform( + "StatsAggregate", + TypeInformation.of(new TypeHint() {}), + new StatsAggregateOperator(config.dryRun())) + .setParallelism(1) + .setMaxParallelism(1); + + // Execute and collect the single result + List collected = collectResults(result); + if (collected.isEmpty()) { + return CleanStats.empty(); + } + return collected.get(0); + } + + @SuppressWarnings("deprecation") + private static List collectResults(DataStream result) throws Exception { + Iterator iterator = result.executeAndCollect("OrphanFilesClean"); + List results = new java.util.ArrayList(); + while (iterator.hasNext()) { + results.add(iterator.next()); + } + return results; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScanAndCleanFunction.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScanAndCleanFunction.java new file mode 100644 index 0000000000..85a64d349b --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScanAndCleanFunction.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.fs.SafeDeleter; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.FileMeta; +import org.apache.fluss.flink.action.orphan.rule.FileRule; +import org.apache.fluss.flink.action.orphan.rule.RuleDispatcher; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; + +import org.apache.flink.streaming.api.functions.ProcessFunction; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.Map; + +/** + * Stage 2 of the orphan files cleanup job. Runs at user-configured parallelism (N) and performs + * pure FS operations — no coordinator RPC interaction. + * + *

Each subtask processes assigned {@link CleanTask} items serially: + * + *

    + *
  • {@link BucketCleanTask}: second-reads manifests from object storage to build the active + * reference set, then walks log/kv directories and deletes orphan files and old empty child + * directories. + *
  • {@link OrphanDirCleanTask}: recursively walks the orphan directory and deletes all files + * older than the cutoff, then removes old empty directories bottom-up. + *
+ * + *

Each task emits a single {@link CleanStats} containing scalar counters. Remote filesystem + * operation rate is limited per-subtask: {@code configuredRate / runtimeParallelism}. The serial + * processing within each subtask guarantees no concurrent throttler access. + */ +@Internal +public final class ScanAndCleanFunction extends ProcessFunction { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(ScanAndCleanFunction.class); + + private final long remoteFsOpRateLimitPerSecond; + private final Map extraConfigs; + + private transient AuditLogger audit; + private transient RateLimiter remoteFsOpRateLimiter; + + public ScanAndCleanFunction( + long remoteFsOpRateLimitPerSecond, Map extraConfigs) { + this.remoteFsOpRateLimitPerSecond = remoteFsOpRateLimitPerSecond; + this.extraConfigs = extraConfigs; + } + + @Override + public void open(org.apache.flink.api.common.functions.OpenContext openContext) + throws Exception { + super.open(openContext); + if (!extraConfigs.isEmpty()) { + FileSystem.initialize(Configuration.fromMap(extraConfigs), null); + } + audit = new AuditLogger(); + int parallelism = getRuntimeContext().getTaskInfo().getNumberOfParallelSubtasks(); + int subtaskIndex = getRuntimeContext().getTaskInfo().getIndexOfThisSubtask(); + // Distribute the configured rate as base + 1 extra for the first `remainder` subtasks. + // Flink does not provide a cross-JVM limiter here, so this is a best-effort job-level + // target. Each subtask gets at least 1/s; if parallelism exceeds the configured rate, the + // effective aggregate can exceed the target by that floor. + remoteFsOpRateLimiter = + RateLimiter.create( + perSubtaskRate(remoteFsOpRateLimitPerSecond, parallelism, subtaskIndex)); + } + + @Override + public void processElement(CleanTask task, Context ctx, Collector out) + throws Exception { + if (task instanceof BucketCleanTask) { + out.collect(processBucketTask((BucketCleanTask) task)); + } else if (task instanceof OrphanDirCleanTask) { + out.collect(processOrphanDirTask((OrphanDirCleanTask) task)); + } + } + + // ------------------------------------------------------------------------- + // BucketCleanTask processing + // ------------------------------------------------------------------------- + + private CleanStats processBucketTask(BucketCleanTask task) throws IOException { + FsPath logDir = task.logTabletDir() != null ? new FsPath(task.logTabletDir()) : null; + FsPath kvDir = task.kvTabletDir() != null ? new FsPath(task.kvTabletDir()) : null; + + FsPath anyDir = logDir != null ? logDir : kvDir; + if (anyDir == null) { + return CleanStats.empty(); + } + + BucketActiveRefs activeRefs = + new BucketActiveRefs( + task.logSegmentRelativePaths(), + task.kvActiveSnapDirs(), + task.logActiveManifestPaths()); + RuleDispatcher dispatcher = new RuleDispatcher(task.allowDeleteManifest()); + SafeDeleter safeDeleter = createSafeDeleter(anyDir.getFileSystem(), task.dryRun()); + BucketCleaner cleaner = + new BucketCleaner( + dispatcher, safeDeleter, audit, task.cutoffMillis(), remoteFsOpRateLimiter); + + BucketCleaner.BucketCleanStats bucketStats = cleaner.clean(activeRefs, logDir, kvDir); + + return new CleanStats( + bucketStats.scanned, + bucketStats.deleted, + bucketStats.emptyDirsRemoved, + bucketStats.deleteFailures, + bucketStats.bytesReclaimed); + } + + // ------------------------------------------------------------------------- + // OrphanDirCleanTask processing + // ------------------------------------------------------------------------- + + private CleanStats processOrphanDirTask(OrphanDirCleanTask task) throws IOException { + FsPath dirPath = new FsPath(task.dirPath()); + FileSystem fs = dirPath.getFileSystem(); + remoteFsOpRateLimiter.acquire(); + if (!fs.exists(dirPath)) { + return CleanStats.empty(); + } + + SafeDeleter safeDeleter = createSafeDeleter(fs, task.dryRun()); + RuleDispatcher dispatcher = new RuleDispatcher(task.allowDeleteManifest(), true); + + long scanned = 0L; + long deleted = 0L; + long emptyDirsRemoved = 0L; + long deleteFailures = 0L; + long bytesReclaimed = 0L; + + remoteFsOpRateLimiter.acquire(); + FileStatus rootStatus = fs.getFileStatus(dirPath); + Deque stack = new ArrayDeque(); + stack.push( + new DirVisit( + dirPath, + false, + rootStatus.isDir() + && rootStatus.getModificationTime() < task.cutoffMillis())); + while (!stack.isEmpty()) { + DirVisit visit = stack.pop(); + if (visit.postOrder) { + if (visit.oldEnough && safeDeleter.deleteEmptyDir(visit.dir)) { + deleted++; + emptyDirsRemoved++; + } + continue; + } + FileStatus[] children; + try { + remoteFsOpRateLimiter.acquire(); + children = fs.listStatus(visit.dir); + } catch (IOException e) { + LOG.warn("Failed to list directory: {}", visit.dir, e); + continue; + } + if (children == null) { + continue; + } + stack.push(new DirVisit(visit.dir, true, visit.oldEnough)); + for (FileStatus child : children) { + FsPath childPath = child.getPath(); + if (child.isDir()) { + stack.push( + new DirVisit( + childPath, + false, + child.getModificationTime() < task.cutoffMillis())); + continue; + } + scanned++; + if (child.getModificationTime() >= task.cutoffMillis()) { + continue; + } + FileMeta meta = + new FileMeta(childPath, child.getLen(), child.getModificationTime()); + FileRule rule = dispatcher.dispatch(meta); + Decision decision = + rule.evaluate(meta, BucketActiveRefs.empty(), task.cutoffMillis()); + switch (decision) { + case DELETE: + if (safeDeleter.deleteFile(meta.path(), decision, rule.id())) { + deleted++; + bytesReclaimed += meta.size(); + } else { + deleteFailures++; + } + break; + case SKIP_UNKNOWN: + audit.logSkipUnknown(meta.path(), rule.id()); + break; + case KEEP_ACTIVE: + case DEFER: + default: + break; + } + } + } + + return new CleanStats(scanned, deleted, emptyDirsRemoved, deleteFailures, bytesReclaimed); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private SafeDeleter createSafeDeleter(FileSystem fs, boolean dryRun) { + return new SafeDeleter(fs, dryRun, audit, remoteFsOpRateLimiter); + } + + private static double perSubtaskRate(long totalRate, int parallelism, int subtaskIndex) { + long base = totalRate / parallelism; + long remainder = totalRate % parallelism; + long quota = base + (subtaskIndex < remainder ? 1L : 0L); + return Math.max(1.0, (double) quota); + } + + private static final class DirVisit { + private final FsPath dir; + private final boolean postOrder; + private final boolean oldEnough; + + private DirVisit(FsPath dir, boolean postOrder, boolean oldEnough) { + this.dir = dir; + this.postOrder = postOrder; + this.oldEnough = oldEnough; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScopeEnumeratorFunction.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScopeEnumeratorFunction.java new file mode 100644 index 0000000000..eede19cc68 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/ScopeEnumeratorFunction.java @@ -0,0 +1,662 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.exception.DisconnectException; +import org.apache.fluss.exception.NetworkException; +import org.apache.fluss.exception.UnsupportedVersionException; +import org.apache.fluss.flink.action.orphan.OrphanCleanUtils; +import org.apache.fluss.flink.action.orphan.RpcErrorClassifier; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.build.ActiveRefsFetcher; +import org.apache.fluss.flink.action.orphan.build.KvActiveRefsFetchResult; +import org.apache.fluss.flink.action.orphan.build.LogActiveRefsFetchResult; +import org.apache.fluss.flink.action.orphan.build.MaxKnownIdsTracker; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; +import org.apache.fluss.flink.action.orphan.rule.OrphanDirDetector; +import org.apache.fluss.fs.FileStatus; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.metadata.PartitionInfo; +import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; +import org.apache.fluss.utils.ExceptionUtils; +import org.apache.fluss.utils.FlussPaths; + +import org.apache.flink.streaming.api.functions.ProcessFunction; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Consumer; +import java.util.function.Predicate; + +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.enumerateBuckets; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.fetchClusterConfigMap; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.normalizeRoot; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.physicalPath; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.remoteSubDir; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.resolveClusterRemoteDataDir; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.resolveClusterRemoteDataDirs; +import static org.apache.fluss.flink.action.orphan.OrphanCleanUtils.resolveRemoteDataDir; + +/** + * Stage 1 of the orphan files cleanup job. Runs at parallelism=1 and concentrates all coordinator + * RPC interaction in a single subtask. + * + *

For each live bucket, emits a {@link BucketCleanTask} containing the FS paths and manifest + * locations needed for Stage 2 to execute cleanup without coordinator access. For each detected + * orphan directory, emits an {@link OrphanDirCleanTask}. + */ +@Internal +public final class ScopeEnumeratorFunction extends ProcessFunction { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(ScopeEnumeratorFunction.class); + private static final String[] TOP_LEVEL_DIRS = { + FlussPaths.REMOTE_LOG_DIR_NAME, FlussPaths.REMOTE_KV_DIR_NAME + }; + + private final OrphanCleanConfig config; + + public ScopeEnumeratorFunction(OrphanCleanConfig config) { + this.config = config; + } + + @Override + public void processElement(Integer trigger, Context ctx, Collector out) + throws Exception { + if (!config.extraConfigs().isEmpty()) { + FileSystem.initialize(Configuration.fromMap(config.extraConfigs()), null); + } + + Configuration flussConfig = new Configuration(); + flussConfig.setString(ConfigOptions.BOOTSTRAP_SERVERS.key(), config.bootstrapServer()); + // Pass through client-related extra configs (e.g. security/auth). + for (Map.Entry entry : config.extraConfigs().entrySet()) { + if (entry.getKey().startsWith("client.")) { + flussConfig.setString(entry.getKey(), entry.getValue()); + } + } + + try (Connection connection = ConnectionFactory.createConnection(flussConfig); + Admin admin = connection.getAdmin()) { + // Fail fast on incompatible servers: the action jar may be deployed against an + // older cluster that does not implement ListRemoteLogManifests / ListKvSnapshots. + // Without this guard, every per-target fetch would degrade to skip_log_target / + // skip_kv_target audit events and the job would exit "successfully" with + // deleted=0, masking the incompatibility. + verifyServerSupportsRequiredApis(admin); + + AuditLogger audit = new AuditLogger(); + audit.logCutoff(config.olderThanMillis()); + + RateLimiter remoteFsOpRateLimiter = + RateLimiter.create((double) config.remoteFsOpRateLimitPerSecond()); + ActiveRefsFetcher fetcher = new ActiveRefsFetcher(admin, 3, remoteFsOpRateLimiter); + MaxKnownIdsTracker tracker = new MaxKnownIdsTracker(); + Map clusterConfigMap = fetchClusterConfigMap(admin); + String clusterRemoteDataDir = resolveClusterRemoteDataDir(clusterConfigMap); + List clusterRoots = + normalizeRoots(resolveClusterRemoteDataDirs(clusterConfigMap)); + + Map dbStates = enumerateActiveScope(admin, audit, tracker); + + for (DbScanState dbState : dbStates.values()) { + for (LiveTableScope liveTable : dbState.liveTables) { + emitBucketTasks( + liveTable, fetcher, audit, clusterRemoteDataDir, clusterRoots, out); + emitOrphanPartitionDirTasks( + liveTable, tracker, clusterRoots, audit, remoteFsOpRateLimiter, out); + } + emitOrphanTableDirTasks( + dbState, tracker, clusterRoots, audit, remoteFsOpRateLimiter, out); + } + } + } + + /** Normalizes each root in the list and returns a deduplicated ordered list. */ + private static List normalizeRoots(List roots) { + LinkedHashSet normalized = new LinkedHashSet(); + for (String root : roots) { + normalized.add(normalizeRoot(root)); + } + return new ArrayList(normalized); + } + + /** + * Probes the two RPCs this action depends on and throws if the connected server does not + * implement them. A sentinel {@code tableId} of {@link Long#MAX_VALUE} is used so that on a + * compatible server the call simply fails with a benign error (typically table-not-found), + * whereas an incompatible server raises {@link UnsupportedVersionException} during ApiVersions + * negotiation. Any non-{@code UnsupportedVersionException} outcome is treated as proof that the + * RPC is recognized. + */ + private static void verifyServerSupportsRequiredApis(Admin admin) { + long sentinelTableId = Long.MAX_VALUE; + probeApi( + "ListRemoteLogManifests", + () -> admin.listRemoteLogManifests(sentinelTableId, null).get()); + probeApi("ListKvSnapshots", () -> admin.listKvSnapshots(sentinelTableId, null).get()); + } + + private static void probeApi(String apiName, ThrowingProbe probe) { + try { + probe.run(); + } catch (Throwable t) { + if (isUnsupportedVersion(t)) { + throw new UnsupportedOperationException( + "Orphan files cleanup requires the Fluss server to support the " + + apiName + + " RPC, which the connected cluster does not. Upgrade the" + + " cluster to a version that exposes this RPC, or run an" + + " older orphan-files-cleanup action that targets this server.", + t); + } + if (isConnectionFailure(t)) { + throw new IllegalStateException( + "Failed to connect to Fluss cluster while probing " + + apiName + + " RPC. The bootstrap server may be unreachable.", + t); + } + // Any other failure means the RPC is recognized; the call merely failed because of + // the sentinel target id. Compatibility is satisfied. + } + } + + private static boolean isConnectionFailure(Throwable t) { + Throwable cause = ExceptionUtils.stripExecutionException(t); + while (cause != null) { + if (cause instanceof NetworkException + || cause instanceof DisconnectException + || cause instanceof IOException) { + return true; + } + cause = cause.getCause(); + } + return false; + } + + private static boolean isUnsupportedVersion(Throwable t) { + Throwable cause = t; + while (cause != null) { + if (cause instanceof UnsupportedVersionException) { + return true; + } + cause = cause.getCause(); + } + return false; + } + + @FunctionalInterface + private interface ThrowingProbe { + void run() throws Exception; + } + + // ------------------------------------------------------------------------- + // Scope enumeration (coordinator RPCs only) + // ------------------------------------------------------------------------- + + private Map enumerateActiveScope( + Admin admin, AuditLogger audit, MaxKnownIdsTracker tracker) { + List dbs = resolveDatabasesToScan(admin, audit); + Map result = new LinkedHashMap(); + for (String dbName : dbs) { + DbScanState dbState = new DbScanState(dbName); + result.put(dbName, dbState); + if (config.table().isPresent()) { + dbState.tableInfosComplete = false; + resolveTable(admin, audit, tracker, dbState, config.table().get(), true); + continue; + } + List tableNames; + try { + tableNames = admin.listTables(dbName).get(); + } catch (Exception e) { + audit.logSkipDb(dbName, classifyName(e)); + dbState.tableInfosComplete = false; + continue; + } + for (String tableName : tableNames) { + resolveTable(admin, audit, tracker, dbState, tableName, false); + } + } + return result; + } + + private List resolveDatabasesToScan(Admin admin, AuditLogger audit) { + if (config.allDatabases()) { + try { + return admin.listDatabases().get(); + } catch (Exception e) { + audit.logSkipDb("*", classifyName(e)); + throw new IllegalStateException( + "Failed to list databases from Fluss cluster. " + + "The coordinator server may be unreachable.", + e); + } + } + String databaseName = config.database().get(); + try { + if (admin.databaseExists(databaseName).get()) { + return Collections.singletonList(databaseName); + } + } catch (Exception e) { + audit.logSkipDb(databaseName, classifyName(e)); + throw new IllegalStateException( + "Failed to check existence of database '" + + databaseName + + "'. " + + "The coordinator server may be unreachable.", + e); + } + audit.logSkipDb(databaseName, RpcErrorClassifier.Category.NOT_FOUND.name()); + return Collections.emptyList(); + } + + private void resolveTable( + Admin admin, + AuditLogger audit, + MaxKnownIdsTracker tracker, + DbScanState dbState, + String tableName, + boolean explicitTableTarget) { + TablePath tablePath = TablePath.of(dbState.dbName, tableName); + TableInfo tableInfo; + try { + tableInfo = admin.getTableInfo(tablePath).get(); + } catch (Exception e) { + RpcErrorClassifier.Category category = RpcErrorClassifier.classify(e); + if (category != RpcErrorClassifier.Category.NOT_FOUND || explicitTableTarget) { + audit.logSkipTable(dbState.dbName, tableName, category.name()); + dbState.tableInfosComplete = false; + } + return; + } + tracker.observeTableId(tableInfo.getTableId()); + dbState.activeTableIds.add(tableInfo.getTableId()); + + LiveTableScope liveTable = new LiveTableScope(dbState.dbName, tableName, tableInfo); + dbState.liveTables.add(liveTable); + if (!tableInfo.isPartitioned()) { + return; + } + try { + List partitions = admin.listPartitionInfos(tablePath).get(); + TableInfo confirm = admin.getTableInfo(tablePath).get(); + if (confirm.getTableId() != tableInfo.getTableId()) { + audit.logSkipTable(dbState.dbName, tableName, "table-recreated-during-enumeration"); + liveTable.partitionInfosComplete = false; + return; + } + for (PartitionInfo partition : partitions) { + liveTable.partitions.add(partition); + liveTable.activePartitionIds.add(partition.getPartitionId()); + tracker.observePartitionId(partition.getPartitionId()); + } + } catch (Exception e) { + audit.logSkipPartitionList(dbState.dbName, tableName, classifyName(e)); + liveTable.partitionInfosComplete = false; + } + } + + // ------------------------------------------------------------------------- + // Emit BucketCleanTasks (per-target RPC + per-bucket task emission) + // ------------------------------------------------------------------------- + + private void emitBucketTasks( + LiveTableScope liveTable, + ActiveRefsFetcher fetcher, + AuditLogger audit, + @Nullable String clusterRemoteDataDir, + List clusterRoots, + Collector out) { + if (liveTable.partitioned && !liveTable.partitionInfosComplete) { + return; + } + List partitionTargets = + liveTable.partitioned + ? liveTable.partitions + : Collections.singletonList(null); + for (PartitionInfo partitionInfo : partitionTargets) { + emitBucketTasksForTarget( + liveTable, + partitionInfo, + fetcher, + audit, + clusterRemoteDataDir, + clusterRoots, + out); + } + } + + private void emitBucketTasksForTarget( + LiveTableScope liveTable, + @Nullable PartitionInfo partitionInfo, + ActiveRefsFetcher fetcher, + AuditLogger audit, + @Nullable String clusterRemoteDataDir, + List clusterRoots, + Collector out) { + Long partitionId = partitionInfo == null ? null : partitionInfo.getPartitionId(); + + String remoteDataDir = + resolveRemoteDataDir(liveTable.tableInfo, partitionInfo, clusterRemoteDataDir); + + // Scope guard: skip this target if its metadata-resolved root is not part of the + // cluster's configured remote data directories. + if (!clusterRoots.contains(normalizeRoot(remoteDataDir))) { + audit.logSkipBucketOutOfScope(liveTable.tableId, partitionId, remoteDataDir); + return; + } + + LogActiveRefsFetchResult logResult = + fetcher.fetchLogActiveRefsByBucket(liveTable.tableId, partitionId); + if (!logResult.listOk()) { + audit.logSkipLogTarget(liveTable.tableId, partitionId, logResult.listFailureReason()); + } + + Map> kvActiveByBucket = Collections.emptyMap(); + boolean kvTargetOk = false; + if (liveTable.tableInfo.hasPrimaryKey()) { + KvActiveRefsFetchResult kvResult = + fetcher.fetchKvActiveSnapDirs(liveTable.tableId, partitionId); + if (kvResult.listOk()) { + kvActiveByBucket = kvResult.activeSnapDirsByBucket(); + kvTargetOk = true; + } else { + audit.logSkipKvTarget(liveTable.tableId, partitionId, kvResult.listFailureReason()); + } + } + + FsPath remoteLogDir = remoteSubDir(remoteDataDir, FlussPaths.REMOTE_LOG_DIR_NAME); + FsPath remoteKvDir = remoteSubDir(remoteDataDir, FlussPaths.REMOTE_KV_DIR_NAME); + + for (TableBucket tableBucket : enumerateBuckets(liveTable.tableInfo, partitionInfo)) { + int bucketId = tableBucket.getBucket(); + + String logTabletDir = null; + + Set logSegmentRelativePaths = Collections.emptySet(); + Set logActiveManifestPaths = Collections.emptySet(); + + if (logResult.listOk()) { + switch (logResult.statusFor(bucketId)) { + case RESOLVED: + logTabletDir = + FlussPaths.remoteLogTabletDir( + remoteLogDir, + physicalPath(liveTable.tablePath, partitionInfo), + tableBucket) + .toString(); + logSegmentRelativePaths = + logResult.activeRefsOf(bucketId).logSegmentRelativePaths(); + logActiveManifestPaths = + logResult.activeRefsOf(bucketId).logActiveManifestPaths(); + break; + case READ_FAILED: + audit.logBucketAborted( + OrphanCleanUtils.bucketScopeKey( + liveTable.tableId, partitionId, bucketId), + logResult.readFailureReason(bucketId)); + break; + case NOT_LISTED: + audit.logSkipLogBucket( + liveTable.tableId, partitionId, bucketId, "no_remote_manifest"); + break; + default: + break; + } + } + + String kvTabletDir = null; + Set kvActiveSnaps = Collections.emptySet(); + if (kvTargetOk && kvActiveByBucket.containsKey(bucketId)) { + kvTabletDir = + FlussPaths.remoteKvTabletDir( + remoteKvDir, + physicalPath(liveTable.tablePath, partitionInfo), + tableBucket) + .toString(); + kvActiveSnaps = kvActiveByBucket.get(bucketId); + } else if (kvTargetOk) { + audit.logSkipKvBucket(liveTable.tableId, partitionId, bucketId, "empty_active_set"); + } + + if (logTabletDir == null && kvTabletDir == null) { + continue; + } + + out.collect( + new BucketCleanTask( + logTabletDir, + kvTabletDir, + logSegmentRelativePaths, + logActiveManifestPaths, + kvActiveSnaps, + config.olderThanMillis(), + config.dryRun(), + config.allowDeleteManifest())); + } + } + + // ------------------------------------------------------------------------- + // Emit OrphanDirCleanTasks + // ------------------------------------------------------------------------- + + private void emitOrphanTableDirTasks( + DbScanState dbState, + MaxKnownIdsTracker tracker, + List clusterRoots, + AuditLogger audit, + RateLimiter remoteFsOpRateLimiter, + Collector out) + throws IOException { + if (!dbState.tableInfosComplete) { + audit.logSkipOrphanTableScan(dbState.dbName, "tableInfos-incomplete"); + return; + } + Set activeTableIds = dbState.activeTableIds; + long maxKnownTableId = tracker.maxKnownTableId(); + boolean emit = config.allowCleanOrphanTables(); + for (String root : clusterRoots) { + for (String topLevel : TOP_LEVEL_DIRS) { + FsPath dbDir = remoteSubDir(root, topLevel + "/" + dbState.dbName); + if (emit) { + forEachOrphanDirUnderParent( + dbDir, + dirName -> + OrphanDirDetector.isOrphanTable( + dirName, activeTableIds, maxKnownTableId), + remoteFsOpRateLimiter, + dir -> + out.collect( + new OrphanDirCleanTask( + dir.toString(), + config.olderThanMillis(), + config.dryRun(), + config.allowDeleteManifest()))); + } else { + forEachOrphanDirUnderParent( + dbDir, + dirName -> + OrphanDirDetector.isOrphanTable( + dirName, activeTableIds, maxKnownTableId), + remoteFsOpRateLimiter, + dir -> audit.logSkipOrphanTable(dir, "default-conservative")); + } + } + } + } + + private void emitOrphanPartitionDirTasks( + LiveTableScope liveTable, + MaxKnownIdsTracker tracker, + List clusterRoots, + AuditLogger audit, + RateLimiter remoteFsOpRateLimiter, + Collector out) + throws IOException { + if (!liveTable.partitioned || !liveTable.partitionInfosComplete) { + return; + } + Set activePartitionIds = liveTable.activePartitionIds; + long maxKnownPartitionId = tracker.maxKnownPartitionId(); + boolean emit = config.allowCleanOrphanPartitions(); + for (String root : clusterRoots) { + for (String topLevel : TOP_LEVEL_DIRS) { + FsPath tableDir = + FlussPaths.remoteTableDir( + remoteSubDir(root, topLevel), + liveTable.tablePath, + liveTable.tableId); + if (emit) { + forEachOrphanDirUnderParent( + tableDir, + dirName -> + OrphanDirDetector.isOrphanPartition( + dirName, activePartitionIds, maxKnownPartitionId), + remoteFsOpRateLimiter, + dir -> + out.collect( + new OrphanDirCleanTask( + dir.toString(), + config.olderThanMillis(), + config.dryRun(), + config.allowDeleteManifest()))); + } else { + forEachOrphanDirUnderParent( + tableDir, + dirName -> + OrphanDirDetector.isOrphanPartition( + dirName, activePartitionIds, maxKnownPartitionId), + remoteFsOpRateLimiter, + dir -> audit.logSkipOrphanPartition(dir, "default-conservative")); + } + } + } + } + + private void forEachOrphanDirUnderParent( + FsPath parentDir, + Predicate isOrphan, + RateLimiter remoteFsOpRateLimiter, + Consumer action) + throws IOException { + FileSystem fs = getFileSystemIfExists(parentDir, remoteFsOpRateLimiter); + if (fs == null) { + return; + } + FileStatus[] entries = listStatuses(fs, parentDir, remoteFsOpRateLimiter); + if (entries == null) { + return; + } + for (FileStatus entry : entries) { + if (!entry.isDir()) { + continue; + } + if (!isOrphan.test(entry.getPath().getName())) { + continue; + } + action.accept(entry.getPath()); + } + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private static String classifyName(Throwable e) { + return RpcErrorClassifier.classify(e).name(); + } + + @Nullable + private static FileSystem getFileSystemIfExists(FsPath dir, RateLimiter remoteFsOpRateLimiter) + throws IOException { + FileSystem fs = dir.getFileSystem(); + remoteFsOpRateLimiter.acquire(); + return fs.exists(dir) ? fs : null; + } + + @Nullable + private static FileStatus[] listStatuses( + FileSystem fs, FsPath dir, RateLimiter remoteFsOpRateLimiter) { + try { + remoteFsOpRateLimiter.acquire(); + return fs.listStatus(dir); + } catch (IOException e) { + LOG.warn("Failed to list directory: {}", dir, e); + return null; + } + } + + // ------------------------------------------------------------------------- + // Internal state classes + // ------------------------------------------------------------------------- + + private static final class DbScanState { + final String dbName; + boolean tableInfosComplete = true; + final Set activeTableIds = new LinkedHashSet(); + final List liveTables = new ArrayList(); + + DbScanState(String dbName) { + this.dbName = dbName; + } + } + + private static final class LiveTableScope { + final String dbName; + final String tableName; + final TablePath tablePath; + final long tableId; + final TableInfo tableInfo; + final boolean partitioned; + boolean partitionInfosComplete = true; + final List partitions = new ArrayList(); + final Set activePartitionIds = new LinkedHashSet(); + + LiveTableScope(String dbName, String tableName, TableInfo tableInfo) { + this.dbName = dbName; + this.tableName = tableName; + this.tablePath = tableInfo.getTablePath(); + this.tableId = tableInfo.getTableId(); + this.tableInfo = tableInfo; + this.partitioned = tableInfo.isPartitioned(); + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/StatsAggregateOperator.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/StatsAggregateOperator.java new file mode 100644 index 0000000000..2e1686ddbf --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/job/StatsAggregateOperator.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; + +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.BoundedOneInput; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; + +/** + * Stage 3 of the orphan files cleanup job. Runs at parallelism=1 to aggregate per-subtask {@link + * CleanStats} records. + * + *

Implemented as a custom operator (not ProcessFunction) because {@code ProcessOperator} does + * not implement {@link BoundedOneInput} — the {@code endInput()} callback would never fire. + * + *

Scalar counters are accumulated into longs and the final summary is emitted in {@link + * #endInput()}. + */ +@Internal +public final class StatsAggregateOperator extends AbstractStreamOperator + implements OneInputStreamOperator, BoundedOneInput { + + private static final long serialVersionUID = 2L; + + private final boolean dryRun; + + private transient long scanned; + private transient long deleted; + private transient long emptyDirsRemoved; + private transient long deleteFailures; + private transient long bytesReclaimed; + + public StatsAggregateOperator(boolean dryRun) { + this.dryRun = dryRun; + } + + @Override + public void open() throws Exception { + super.open(); + scanned = 0L; + deleted = 0L; + emptyDirsRemoved = 0L; + deleteFailures = 0L; + bytesReclaimed = 0L; + } + + @Override + public void processElement(StreamRecord element) { + CleanStats stats = element.getValue(); + scanned += stats.scanned(); + deleted += stats.deleted(); + emptyDirsRemoved += stats.emptyDirsRemoved(); + deleteFailures += stats.deleteFailures(); + bytesReclaimed += stats.bytesReclaimed(); + } + + @Override + public void endInput() { + AuditLogger audit = new AuditLogger(); + CleanStats finalStats = + new CleanStats(scanned, deleted, emptyDirsRemoved, deleteFailures, bytesReclaimed); + + audit.logSummary( + scanned, + deleted - emptyDirsRemoved, + emptyDirsRemoved, + deleteFailures, + bytesReclaimed, + dryRun); + + output.collect(new StreamRecord<>(finalStats)); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/BucketActiveRefs.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/BucketActiveRefs.java new file mode 100644 index 0000000000..73a847dd75 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/BucketActiveRefs.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** Immutable view of all active references for a single bucket / table partition. */ +@Internal +public final class BucketActiveRefs { + + private static final BucketActiveRefs EMPTY = + new BucketActiveRefs( + Collections.emptySet(), Collections.emptySet(), Collections.emptySet()); + + private final Set logSegmentRelativePaths; + private final Set kvActiveSnapDirs; + private final Set logActiveManifestPaths; + + public BucketActiveRefs( + Set logSegmentRelativePaths, + Set kvActiveSnapDirs, + Set logActiveManifestPaths) { + this.logSegmentRelativePaths = + Collections.unmodifiableSet(new HashSet<>(logSegmentRelativePaths)); + this.kvActiveSnapDirs = Collections.unmodifiableSet(new HashSet<>(kvActiveSnapDirs)); + this.logActiveManifestPaths = + Collections.unmodifiableSet(new HashSet<>(logActiveManifestPaths)); + } + + public static BucketActiveRefs empty() { + return EMPTY; + } + + public Set logSegmentRelativePaths() { + return logSegmentRelativePaths; + } + + /** + * Returns the set of active {@code snap-} directory names for the bucket. + * + *

The set is the union of two server-side categories the {@code ListKvSnapshots} RPC emits + * as one flat list (client does not distinguish): + * + *

    + *
  • RETAINED — the most recent N completed snapshots kept per the retention window. + *
  • STILL_IN_USE — snapshots pinned by an active lease; emitted unconditionally even when + * the corresponding ZK znode has been removed, on the principle "may over-count active, + * must never under-count." + *
+ * + *

A KV snap-private file is preserved iff its parent directory's name is in this set. + */ + public Set kvActiveSnapDirs() { + return kvActiveSnapDirs; + } + + /** + * Returns the set of active log manifest paths reported by {@code ListRemoteLogManifests}. The + * "current" manifest for a bucket is always also a member of this set, so {@link + * LogManifestRule} only needs to check this single collection. + */ + public Set logActiveManifestPaths() { + return logActiveManifestPaths; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/Decision.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/Decision.java new file mode 100644 index 0000000000..491281a22e --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/Decision.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +/** Decision returned by a {@link FileRule} for a given file. */ +@Internal +public enum Decision { + + /** File is orphan and should be deleted. */ + DELETE, + + /** File is referenced by an active object (manifest, snapshot, etc.). */ + KEEP_ACTIVE, + + /** + * File is not in the active set but its age is under the {@code --older-than} threshold; the + * deletion verdict is deferred to a future cleanup round, by which time the file will either + * have entered the active set (KEEP_ACTIVE) or aged past the threshold (DELETE). The grace + * window prevents racing in-flight writes whose manifest entry has not yet been committed. + */ + DEFER, + + /** File path or extension is not recognized; skip without deletion. */ + SKIP_UNKNOWN +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileMeta.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileMeta.java new file mode 100644 index 0000000000..74072de4fa --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileMeta.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; + +/** Immutable metadata describing a candidate file evaluated by {@link FileRule}. */ +@Internal +public final class FileMeta { + + private final FsPath path; + private final long size; + private final long modificationTime; + + public FileMeta(FsPath path, long size, long modificationTime) { + this.path = path; + this.size = size; + this.modificationTime = modificationTime; + } + + public FsPath path() { + return path; + } + + public long size() { + return size; + } + + public long modificationTime() { + return modificationTime; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileRule.java new file mode 100644 index 0000000000..af9a01468a --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/FileRule.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +/** Rule that decides whether a single file is orphan. */ +@Internal +public interface FileRule { + + /** Stable identifier used in audit logs. */ + RuleId id(); + + /** + * Decide what to do with the given file. + * + * @param cutoffMillis absolute epoch-ms cutoff: a file whose mtime is {@code < cutoffMillis} is + * age-eligible for deletion (a {@link Decision#DELETE}); a file whose mtime is {@code >= + * cutoffMillis} is {@link Decision#DEFER}red. Pre-frozen at action start; does not slide + * during a run. + */ + Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis); +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRule.java new file mode 100644 index 0000000000..8fc1e5b2c0 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRule.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +/** + * Rule for shared SST files under the {@code shared/} KV directory. + * + *

Always returns {@link Decision#KEEP_ACTIVE}. The true active set for shared SSTs lives inside + * the engine's {@code SharedKvFileRegistry}; orphan cleanup has no read path into that registry, so + * any deletion here would be a guess. Per the action's hard constraint "prefer leak over + * mis-delete," the rule never deletes, and as a consequence orphan PK-table / orphan-partition + * directories permanently retain their {@code shared/} subtree as accepted residue (recovering that + * residue would require a registry-backed GC channel that is out of scope for this action). + */ +@Internal +public final class KvSharedSstRule implements FileRule { + + @Override + public RuleId id() { + return RuleId.KV_SHARED_SST; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath parent = file.path().getParent(); + if (parent == null || !FlussPaths.REMOTE_KV_SNAPSHOT_SHARED_DIR.equals(parent.getName())) { + return Decision.SKIP_UNKNOWN; + } + if (!file.path().getName().endsWith(".sst")) { + return Decision.SKIP_UNKNOWN; + } + return Decision.KEEP_ACTIVE; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRule.java new file mode 100644 index 0000000000..0700b9563f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRule.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +/** + * Rule for files under a {@code snap-/} KV snapshot directory. + * + *

Match key is the file's parent {@code snap-} directory name: if that name is in {@link + * BucketActiveRefs#kvActiveSnapDirs()} (which carries the per-bucket union of RETAINED + + * STILL_IN_USE entries from {@code ListKvSnapshots}, see that getter's javadoc) the file is {@link + * Decision#KEEP_ACTIVE}. + * + *

The set-based check is what prevents retained non-latest snapshots from being misclassified as + * orphan — e.g. with {@code kv.snapshot.num-retained=2}, {@code snap-9} is still active while + * {@code snap-10} is the latest. + */ +@Internal +public final class KvSnapshotFileRule implements FileRule { + + private static final String SNAP_DIR_PREFIX = FlussPaths.REMOTE_KV_SNAPSHOT_DIR_PREFIX; + + private static final Set KNOWN_FIXED_NAMES = + new HashSet(Arrays.asList("_METADATA", "CURRENT", "LOG", "IDENTITY")); + + @Override + public RuleId id() { + return RuleId.KV_SNAPSHOT_FILE; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath parent = file.path().getParent(); + if (parent == null) { + return Decision.SKIP_UNKNOWN; + } + + String parentName = parent.getName(); + if (!parentName.startsWith(SNAP_DIR_PREFIX)) { + return Decision.SKIP_UNKNOWN; + } + + // Parent must be snap-; reject e.g. snap-, snap-abc. + String snapIdPart = parentName.substring(SNAP_DIR_PREFIX.length()); + if (snapIdPart.isEmpty()) { + return Decision.SKIP_UNKNOWN; + } + for (int i = 0; i < snapIdPart.length(); i++) { + if (!Character.isDigit(snapIdPart.charAt(i))) { + return Decision.SKIP_UNKNOWN; + } + } + + if (!isKnownSnapshotFile(file.path().getName())) { + return Decision.SKIP_UNKNOWN; + } + + if (activeRefs.kvActiveSnapDirs().contains(parentName)) { + return Decision.KEEP_ACTIVE; + } + + return file.modificationTime() < cutoffMillis ? Decision.DELETE : Decision.DEFER; + } + + private static boolean isKnownSnapshotFile(String fileName) { + if (KNOWN_FIXED_NAMES.contains(fileName)) { + return true; + } + if (fileName.startsWith("MANIFEST-") || fileName.startsWith("OPTIONS-")) { + return true; + } + return fileName.endsWith(".sst") || fileName.endsWith(".log"); + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRule.java new file mode 100644 index 0000000000..23fb5d5edd --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRule.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +/** + * Rule for manifest files under the {@code metadata/} directory of a log bucket. + * + *

Default behavior is to return {@link Decision#KEEP_ACTIVE} for every manifest. The asymmetry + * is the reason: mis-deleting an active manifest leaves the coordinator's manifest pointer dangling + * and breaks the bucket's metadata chain entirely, while keeping orphan manifests is structurally + * harmless (KB-sized files). Operators opt into the destructive path via {@code + * allowDeleteManifest=true} (driven by the {@code --allow-delete-manifest} CLI flag); only then + * does the rule consult the active-manifest set and apply the file-level age threshold. + */ +@Internal +public final class LogManifestRule implements FileRule { + + private final boolean allowDeleteManifest; + + /** Default-conservative constructor: {@code allowDeleteManifest=false}. */ + public LogManifestRule() { + this(false); + } + + public LogManifestRule(boolean allowDeleteManifest) { + this.allowDeleteManifest = allowDeleteManifest; + } + + @Override + public RuleId id() { + return RuleId.LOG_MANIFEST; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath path = file.path(); + FsPath parent = path.getParent(); + if (parent == null + || !FlussPaths.REMOTE_LOG_METADATA_DIR_NAME.equals(parent.getName()) + || !path.getName().endsWith(".manifest")) { + return Decision.SKIP_UNKNOWN; + } + + // Default-conservative: never delete a manifest. Keeping orphans is harmless; deleting an + // active manifest leaves the coordinator's manifest pointer dangling and breaks the + // bucket's metadata chain. + if (!allowDeleteManifest) { + return Decision.KEEP_ACTIVE; + } + + // Opt-in path: preserve the original active-set + cutoff semantics. The "current" bucket + // manifest is always present in logActiveManifestPaths (the server emits one path per + // bucket in ListRemoteLogManifests), so a single set lookup suffices. + String pathString = path.toString(); + if (activeRefs.logActiveManifestPaths().contains(pathString)) { + return Decision.KEEP_ACTIVE; + } + + return file.modificationTime() < cutoffMillis ? Decision.DELETE : Decision.DEFER; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRule.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRule.java new file mode 100644 index 0000000000..1ac4156e8f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRule.java @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; +import java.util.regex.Pattern; + +/** + * Rule for log-segment files under a remote log bucket. + * + *

{@code .writer_snapshot} files are only eligible for deletion in orphan-directory mode. In + * active-bucket mode the engine's own TTL cleanup handles them; the orphan tool conservatively + * keeps them to avoid any risk of racing a concurrent write. + */ +@Internal +public final class LogSegmentRule implements FileRule { + + private static final Pattern SEGMENT_DIR_PATTERN = + Pattern.compile( + "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}" + + "-[0-9a-fA-F]{12}"); + + private static final Set KNOWN_SUFFIXES = + new HashSet(Arrays.asList(".log", ".index", ".timeindex", ".writer_snapshot")); + + private final boolean orphanDirMode; + + public LogSegmentRule() { + this(false); + } + + public LogSegmentRule(boolean orphanDirMode) { + this.orphanDirMode = orphanDirMode; + } + + @Override + public RuleId id() { + return RuleId.LOG_SEGMENT; + } + + @Override + public Decision evaluate(FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + FsPath path = file.path(); + FsPath parent = path.getParent(); + if (parent == null || !isSegmentDir(parent.getName()) || !hasKnownSuffix(path.getName())) { + return Decision.SKIP_UNKNOWN; + } + + String relativePath = parent.getName() + "/" + path.getName(); + if (activeRefs.logSegmentRelativePaths().contains(relativePath)) { + return Decision.KEEP_ACTIVE; + } + + if (path.getName().endsWith(FlussPaths.WRITER_SNAPSHOT_FILE_SUFFIX) && !orphanDirMode) { + return Decision.KEEP_ACTIVE; + } + + return file.modificationTime() < cutoffMillis ? Decision.DELETE : Decision.DEFER; + } + + static boolean isSegmentDir(String dirName) { + return SEGMENT_DIR_PATTERN.matcher(dirName).matches(); + } + + private static boolean hasKnownSuffix(String fileName) { + String name = fileName; + if (name.endsWith(FlussPaths.DELETED_FILE_SUFFIX)) { + name = name.substring(0, name.length() - FlussPaths.DELETED_FILE_SUFFIX.length()); + } + for (String suffix : KNOWN_SUFFIXES) { + if (name.endsWith(suffix)) { + return true; + } + } + return false; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetector.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetector.java new file mode 100644 index 0000000000..5762ff51c2 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/OrphanDirDetector.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.annotation.VisibleForTesting; + +import javax.annotation.Nullable; + +import java.util.Set; + +/** + * Detects orphan table and partition directories by ID guard. + * + *

A directory is an orphan candidate iff its parsed ID is not in the active set and does not + * exceed the last-known maximum (conservatively treating IDs above the max as freshly allocated). + * Unrecognizable directory names are never flagged. + */ +@Internal +public final class OrphanDirDetector { + + private OrphanDirDetector() {} + + /** + * Returns {@code true} if the directory name matches {@code {name}-{tableId}} and the parsed ID + * is not in {@code activeTableIds} and is {@code <= maxKnownTableId}. + */ + public static boolean isOrphanTable( + String dirName, Set activeTableIds, long maxKnownTableId) { + Long parsed = parseTableId(dirName); + if (parsed == null) { + return false; + } + if (activeTableIds.contains(parsed)) { + return false; + } + return parsed <= maxKnownTableId; + } + + /** + * Returns {@code true} if the directory name matches {@code {name}-p{partitionId}} and the + * parsed ID is not in {@code activePartitionIds} and is {@code <= maxKnownPartitionId}. + */ + public static boolean isOrphanPartition( + String dirName, Set activePartitionIds, long maxKnownPartitionId) { + Long parsed = parsePartitionId(dirName); + if (parsed == null) { + return false; + } + if (activePartitionIds.contains(parsed)) { + return false; + } + return parsed <= maxKnownPartitionId; + } + + @VisibleForTesting + @Nullable + static Long parseTableId(String dirName) { + int dash = dirName.lastIndexOf('-'); + if (dash <= 0 || dash == dirName.length() - 1) { + return null; + } + String idPart = dirName.substring(dash + 1); + for (int i = 0; i < idPart.length(); i++) { + if (!Character.isDigit(idPart.charAt(i))) { + return null; + } + } + try { + return Long.parseLong(idPart); + } catch (NumberFormatException e) { + return null; + } + } + + @VisibleForTesting + @Nullable + static Long parsePartitionId(String dirName) { + int dashP = dirName.lastIndexOf("-p"); + if (dashP <= 0 || dashP == dirName.length() - 2) { + return null; + } + String idPart = dirName.substring(dashP + 2); + for (int i = 0; i < idPart.length(); i++) { + if (!Character.isDigit(idPart.charAt(i))) { + return null; + } + } + try { + return Long.parseLong(idPart); + } catch (NumberFormatException e) { + return null; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcher.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcher.java new file mode 100644 index 0000000000..9880c6e64d --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleDispatcher.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.utils.FlussPaths; + +/** Dispatches a candidate file to the matching orphan-cleanup rule. */ +@Internal +public final class RuleDispatcher { + + private static final FileRule UNKNOWN_RULE = + new FileRule() { + @Override + public RuleId id() { + return RuleId.UNKNOWN; + } + + @Override + public Decision evaluate( + FileMeta file, BucketActiveRefs activeRefs, long cutoffMillis) { + return Decision.SKIP_UNKNOWN; + } + }; + + private final FileRule logSegmentRule; + private final FileRule logManifestRule; + private final FileRule kvSnapshotFileRule = new KvSnapshotFileRule(); + private final FileRule kvSharedSstRule = new KvSharedSstRule(); + + public RuleDispatcher() { + this(false, false); + } + + public RuleDispatcher(boolean allowDeleteManifest) { + this(allowDeleteManifest, false); + } + + public RuleDispatcher(boolean allowDeleteManifest, boolean orphanDirMode) { + this.logSegmentRule = new LogSegmentRule(orphanDirMode); + this.logManifestRule = new LogManifestRule(allowDeleteManifest); + } + + public FileRule dispatch(FileMeta file) { + FsPath path = file.path(); + FsPath parent = path.getParent(); + if (parent == null) { + return UNKNOWN_RULE; + } + + String parentName = parent.getName(); + if (FlussPaths.REMOTE_LOG_METADATA_DIR_NAME.equals(parentName)) { + return logManifestRule; + } + if (FlussPaths.REMOTE_KV_SNAPSHOT_SHARED_DIR.equals(parentName)) { + return kvSharedSstRule; + } + if (parentName.startsWith(FlussPaths.REMOTE_KV_SNAPSHOT_DIR_PREFIX)) { + return kvSnapshotFileRule; + } + if (LogSegmentRule.isSegmentDir(parentName)) { + return logSegmentRule; + } + return UNKNOWN_RULE; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleId.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleId.java new file mode 100644 index 0000000000..a27ef07624 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/action/orphan/rule/RuleId.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.annotation.Internal; + +/** Enumeration of all file-level rule identifiers used in orphan cleanup audit logs. */ +@Internal +public enum RuleId { + LOG_SEGMENT("log-segment"), + LOG_MANIFEST("log-manifest"), + KV_SNAPSHOT_FILE("kv-snapshot-file"), + KV_SHARED_SST("kv-shared-sst"), + UNKNOWN("unknown"); + + private final String auditTag; + + RuleId(String auditTag) { + this.auditTag = auditTag; + } + + @Override + public String toString() { + return auditTag; + } +} diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java index 9c1972ce6c..21a8fb3afc 100644 --- a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java +++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/adapter/MultipleParameterToolAdapter.java @@ -19,6 +19,9 @@ import org.apache.flink.api.java.utils.MultipleParameterTool; +import javax.annotation.Nullable; + +import java.util.Collection; import java.util.Map; /** @@ -43,4 +46,23 @@ public static MultipleParameterToolAdapter fromArgs(String[] args) { public Map toMap() { return this.multipleParameterTool.toMap(); } + + /** Returns whether the given key is present in the parsed arguments. */ + public boolean has(String key) { + return this.multipleParameterTool.has(key); + } + + /** Returns the value for the given key, or {@code null} if the key is not found. */ + @Nullable + public String get(String key) { + return this.multipleParameterTool.get(key); + } + + /** + * Returns all values associated with the given key, or {@code null} if the key is not found. + */ + @Nullable + public Collection getMultiParameter(String key) { + return this.multipleParameterTool.getMultiParameter(key); + } } diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanITCase.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanITCase.java new file mode 100644 index 0000000000..6128495b2f --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/OrphanFilesCleanITCase.java @@ -0,0 +1,1209 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.client.Connection; +import org.apache.fluss.client.ConnectionFactory; +import org.apache.fluss.client.admin.Admin; +import org.apache.fluss.config.ConfigOptions; +import org.apache.fluss.config.Configuration; +import org.apache.fluss.flink.action.orphan.config.OrphanCleanConfig; +import org.apache.fluss.flink.adapter.MultipleParameterToolAdapter; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.metadata.DatabaseDescriptor; +import org.apache.fluss.metadata.PartitionInfo; +import org.apache.fluss.metadata.PartitionSpec; +import org.apache.fluss.metadata.PhysicalTablePath; +import org.apache.fluss.metadata.Schema; +import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.metadata.TableDescriptor; +import org.apache.fluss.metadata.TableInfo; +import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.server.testutils.FlussClusterExtension; +import org.apache.fluss.server.zk.ZooKeeperClient; +import org.apache.fluss.server.zk.data.BucketSnapshot; +import org.apache.fluss.server.zk.data.RemoteLogManifestHandle; +import org.apache.fluss.server.zk.data.ZkData.BucketSnapshotsZNode; +import org.apache.fluss.server.zk.data.ZkData.PartitionZNode; +import org.apache.fluss.types.DataTypes; +import org.apache.fluss.utils.FlussPaths; + +import org.apache.flink.test.util.AbstractTestBase; +import org.apache.logging.log4j.Level; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.core.LogEvent; +import org.apache.logging.log4j.core.LoggerContext; +import org.apache.logging.log4j.core.appender.AbstractAppender; +import org.apache.logging.log4j.core.config.LoggerConfig; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; + +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.FileTime; +import java.time.Duration; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.CopyOnWriteArrayList; + +import static org.assertj.core.api.Assertions.assertThat; + +/** End-to-end tests for orphan files cleanup safety scenarios. */ +abstract class OrphanFilesCleanITCase extends AbstractTestBase { + + @RegisterExtension + static final FlussClusterExtension FLUSS_CLUSTER_EXTENSION = + FlussClusterExtension.builder() + .setClusterConf(buildClusterConf()) + .setNumOfTabletServers(1) + .build(); + + private static Configuration buildClusterConf() { + Configuration clusterConf = new Configuration(); + clusterConf.set(ConfigOptions.KV_MAX_RETAINED_SNAPSHOTS, 2); + return clusterConf; + } + + private static Connection connection; + private static Admin admin; + private static String bootstrapServers; + + private CapturingAppender auditAppender; + private LoggerConfig auditLoggerConfig; + private Level previousAuditLevel; + + @BeforeAll + static void beforeAll() { + bootstrapServers = FLUSS_CLUSTER_EXTENSION.getBootstrapServers(); + Configuration clientConfig = new Configuration(); + clientConfig.setString(ConfigOptions.BOOTSTRAP_SERVERS.key(), bootstrapServers); + connection = ConnectionFactory.createConnection(clientConfig); + admin = connection.getAdmin(); + } + + @AfterAll + static void afterAll() throws Exception { + if (admin != null) { + admin.close(); + admin = null; + } + if (connection != null) { + connection.close(); + connection = null; + } + } + + @BeforeEach + void setUp() { + attachAuditAppender(); + } + + @AfterEach + void tearDown() { + detachAuditAppender(); + } + + private Path remoteDataRoot() { + return Paths.get(URI.create(FLUSS_CLUSTER_EXTENSION.getRemoteDataDir())); + } + + private List auditMessages() { + return auditAppender.messages(); + } + + private void attachAuditAppender() { + LoggerContext context = (LoggerContext) LogManager.getContext(false); + org.apache.logging.log4j.core.config.Configuration config = context.getConfiguration(); + auditAppender = new CapturingAppender("orphan-clean-it-audit"); + auditAppender.start(); + auditLoggerConfig = config.getLoggerConfig("fluss.orphan.audit"); + previousAuditLevel = auditLoggerConfig.getLevel(); + auditLoggerConfig.setLevel(Level.DEBUG); + auditLoggerConfig.addAppender(auditAppender, Level.DEBUG, null); + context.updateLoggers(); + } + + private void detachAuditAppender() { + if (auditLoggerConfig != null && auditAppender != null) { + auditLoggerConfig.removeAppender(auditAppender.getName()); + auditLoggerConfig.setLevel(previousAuditLevel); + ((LoggerContext) LogManager.getContext(false)).updateLoggers(); + auditAppender.stop(); + } + } + + private static final Duration OLD_ENOUGH = Duration.ofDays(2); + + @Test + void mixedOrphanAndActiveFilesInSameBucket() throws Exception { + String dbName = newDatabaseName("mixed"); + TablePath tablePath = createLogTable(dbName, "mixed_bucket"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + FsPath remoteLogTabletDir = + FlussPaths.remoteLogTabletDir( + new FsPath(remoteDataRoot().resolve("log").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + + // Two active segments registered in manifest + String activeId1 = UUID.randomUUID().toString(); + String activeId2 = UUID.randomUUID().toString(); + FsPath manifestPath = + new FsPath( + localPath(remoteLogTabletDir) + .resolve("metadata/p0.manifest") + .toUri() + .toString()); + Path manifest = localPath(manifestPath); + Files.createDirectories(manifest.getParent()); + String manifestContent = + "{\"version\":1," + + "\"database\":\"db\"," + + "\"table\":\"t\"," + + "\"table_id\":0," + + "\"bucket_id\":0," + + "\"remote_log_segments\":[" + + "{\"segment_id\":\"" + + activeId1 + + "\",\"start_offset\":0,\"end_offset\":99," + + "\"max_timestamp\":0,\"size_in_bytes\":1}," + + "{\"segment_id\":\"" + + activeId2 + + "\",\"start_offset\":100,\"end_offset\":199," + + "\"max_timestamp\":0,\"size_in_bytes\":1}" + + "]}"; + Files.write(manifest, manifestContent.getBytes(StandardCharsets.UTF_8)); + makeOld(manifest); + upsertManifest(tableBucket, manifestPath, 199L); + + Path activeFile1 = writeSegmentFile(remoteLogTabletDir, activeId1, 0L); + Path activeFile2 = writeSegmentFile(remoteLogTabletDir, activeId2, 100L); + + // Two orphan segments NOT in manifest + String orphanId1 = UUID.randomUUID().toString(); + String orphanId2 = UUID.randomUUID().toString(); + Path orphanFile1 = writeSegmentFile(remoteLogTabletDir, orphanId1, 500L); + Path orphanFile2 = writeSegmentFile(remoteLogTabletDir, orphanId2, 600L); + + runCleanerForDatabase(false, dbName); + + // Active files must survive + assertThat(Files.exists(activeFile1)).as("active segment 1 must survive cleanup").isTrue(); + assertThat(Files.exists(activeFile2)).as("active segment 2 must survive cleanup").isTrue(); + + // Orphan files must be deleted + assertThat(Files.exists(orphanFile1)).as("orphan segment 1 must be deleted").isFalse(); + assertThat(Files.exists(orphanFile2)).as("orphan segment 2 must be deleted").isFalse(); + + // Audit confirms deletions for both orphans + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(orphanFile1.toString())); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(orphanFile2.toString())); + + // No deletion audit for active files + assertThat(auditMessages()) + .noneMatch(m -> m.contains("action=deleted") && m.contains(activeFile1.toString())); + assertThat(auditMessages()) + .noneMatch(m -> m.contains("action=deleted") && m.contains(activeFile2.toString())); + } + + @Test + void dryRunDoesNotDeleteFiles() throws Exception { + String dbName = newDatabaseName("dryrun"); + TablePath tablePath = createLogTable(dbName, "dry_run"); + Path activeSegment = seedActiveBucketManifest(tablePath); + Path orphan = createOldSegmentFile(tablePath, "99999999999999999999.log"); + + runCleanerForDatabase(true, dbName); + + assertThat(Files.exists(orphan)).isTrue(); + assertThat(Files.exists(activeSegment)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=would_delete") + && m.contains("rule=log-segment") + && m.contains(orphan.toString())); + assertThat(auditMessages()).noneMatch(m -> m.contains("action=deleted")); + // Catch a regression that targets the active segment with a would_delete intent: the + // file-existence checks above would silently pass under dry-run even if the planner + // mis-marked the active segment, because dry-run never touches disk. + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("action=would_delete") + && m.contains(activeSegment.toString())); + } + + /** + * Seeds a remote log manifest + matching active segment under a freshly-allocated UUID so the + * active-file cleanup reaches {@code ManifestReadStatus.RESOLVED} for bucket 0 of the given log + * table. Returns the active segment's {@code .log} path so callers can assert it survives + * cleanup. + * + *

Without a manifest the bucket falls back to {@code ManifestReadStatus.NOT_LISTED} and the + * active-file cleanup skips the entire bucket (see §4.3.1 of the design doc) — which would + * prevent any orphan file under the bucket from being visited at all. + */ + private Path seedActiveBucketManifest(TablePath tablePath) throws Exception { + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + FsPath remoteLogTabletDir = + FlussPaths.remoteLogTabletDir( + new FsPath(remoteDataRoot().resolve("log").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + FsPath manifestPath = + new FsPath( + localPath(remoteLogTabletDir) + .resolve("metadata/p0.manifest") + .toUri() + .toString()); + String activeSegmentId = UUID.randomUUID().toString(); + Path activeSegment = + seedManifestAndSegment(remoteLogTabletDir, manifestPath, activeSegmentId, 0L, 0L); + upsertManifest(tableBucket, manifestPath, 0L); + return activeSegment; + } + + @Test + void defaultDoesNotEnterOrphanTableDir() throws Exception { + String dbName = newDatabaseName("defaultskip"); + long tableId = allocateDroppedTableId(dbName, "seed_table"); + createLogTable(dbName, "live_anchor"); + OrphanTableLayout layout = + createOldOrphanTableLayout( + remoteDataRoot(), + dbName, + tableId, + "ghost_table", + "99999999999999999999.log"); + + runCleanerForAllDatabases(false); + + assertThat(Files.exists(layout.orphanFile)).isTrue(); + assertThat(Files.exists(layout.tableDir)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_orphan_table") + && m.contains("default-conservative") + && m.contains(layout.tableDir.toString())); + } + + @Test + void optInCleansOrphanTableDirWhenEnabled() throws Exception { + String dbName = newDatabaseName("optin"); + long tableId = allocateDroppedTableId(dbName, "seed_table"); + createLogTable(dbName, "live_anchor"); + OrphanTableLayout layout = + createOldOrphanTableLayout( + remoteDataRoot(), + dbName, + tableId, + "ghost_table", + "99999999999999999999.log"); + + runCleanerForAllDatabases(false, "--allow-clean-orphan-tables"); + + assertThat(Files.exists(layout.orphanFile)).isFalse(); + assertThat(Files.exists(layout.tableDir)).isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(layout.orphanFile.toString())); + } + + @Test + void pkOrphanTableRetainsSharedSstEvenWithOptIn() throws Exception { + String dbName = newDatabaseName("orphankv"); + long tableId = allocateDroppedPrimaryKeyTableId(dbName, "seed_pk_table"); + createLogTable(dbName, "live_anchor"); + OrphanTableLayout layout = + createOldOrphanKvTableLayout( + remoteDataRoot(), + dbName, + tableId, + "ghost_pk_table", + "aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa-orphan.sst"); + + runCleanerForDatabase(false, dbName, "--allow-clean-orphan-tables"); + + assertThat(Files.exists(layout.orphanFile)).isTrue(); + assertThat(Files.exists(layout.tableDir)).isTrue(); + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("rule=kv-shared-sst") + && m.contains(layout.orphanFile.toString())); + } + + @Test + void manifestPreservedByDefault() throws Exception { + String dbName = newDatabaseName("manifest"); + TablePath tablePath = createLogTable(dbName, "manifest_default"); + Path orphanManifest = createOldLogManifestFile(tablePath, "orphan.manifest"); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(orphanManifest)).isTrue(); + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("rule=log-manifest") + && m.contains(orphanManifest.toString())); + } + + @Test + void retainedNonLatestSnapshotPreserved() throws Exception { + String dbName = newDatabaseName("retained"); + TablePath tablePath = createPrimaryKeyTable(dbName, "retained_pk"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + FsPath remoteKvTabletDir = + FlussPaths.remoteKvTabletDir( + new FsPath(remoteDataRoot().resolve("kv").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + + seedKvSnapshots(tableBucket, remoteKvTabletDir, new long[] {1L, 2L, 3L, 4L}); + + // Drop a snapshot directory locally without registering it in ZK to model a + // crash-leftover. The active set is derived from ZK references, so this + // unreferenced snapshot must still be cleaned — guarding the assertions below + // from passing trivially when the cleaner fails to scan at all. + long unreferencedSnapshotId = 99L; + Path unreferencedSnapshotDir = + localPath( + FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, unreferencedSnapshotId)); + Files.createDirectories(unreferencedSnapshotDir); + Path unreferencedMeta = unreferencedSnapshotDir.resolve("_METADATA"); + Files.write(unreferencedMeta, new byte[] {0x33}); + makeOld(unreferencedMeta); + makeOld(unreferencedSnapshotDir); + + runCleanerForDatabase(false, dbName); + + // Every snapshot still referenced in ZK is preserved, regardless of recency. + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 1L)))) + .isTrue(); + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 2L)))) + .isTrue(); + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 3L)))) + .isTrue(); + assertThat(Files.exists(localPath(FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, 4L)))) + .isTrue(); + assertThat(Files.exists(unreferencedSnapshotDir)).isFalse(); + } + + @Test + void listPartitionInfosFailureScopesToSingleTable() throws Exception { + String dbName = newDatabaseName("partfail"); + PartitionedTableLayout tableA = createPartitionedLogTable(dbName, "table_a", "pa"); + PartitionedTableLayout tableB = createPartitionedLogTable(dbName, "table_b", "pb"); + + long orphanPartitionIdForA = + Math.max( + tableA.partitionInfo.getPartitionId(), + tableB.partitionInfo.getPartitionId()); + long orphanPartitionIdForB = + Math.min( + tableA.partitionInfo.getPartitionId(), + tableB.partitionInfo.getPartitionId()); + + OrphanPartitionLayout orphanA = + createOldOrphanPartitionLayout( + remoteDataRoot(), + tableA.tablePath, + tableA.tableId, + "ghost-a", + orphanPartitionIdForA, + "99999999999999999999.log"); + OrphanPartitionLayout orphanB = + createOldOrphanPartitionLayout( + remoteDataRoot(), + tableB.tablePath, + tableB.tableId, + "ghost-b", + orphanPartitionIdForB, + "99999999999999999999.log"); + + ZooKeeperClient zk = FLUSS_CLUSTER_EXTENSION.getZooKeeperClient(); + String brokenPartitionPath = + PartitionZNode.path(tableA.tablePath, tableA.partitionInfo.getPartitionName()); + byte[] originalPartitionBytes = + zk.getCuratorClient().getData().forPath(brokenPartitionPath); + zk.getCuratorClient() + .setData() + .forPath(brokenPartitionPath, "not-json".getBytes(StandardCharsets.UTF_8)); + try { + runCleanerForDatabase(false, dbName, "--allow-clean-orphan-partitions"); + } finally { + zk.getCuratorClient().setData().forPath(brokenPartitionPath, originalPartitionBytes); + } + + assertThat(Files.exists(orphanA.partitionDir)).isTrue(); + assertThat(Files.exists(orphanA.orphanFile)).isTrue(); + assertThat(Files.exists(orphanB.partitionDir)).isFalse(); + assertThat(Files.exists(orphanB.orphanFile)).isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_partition_list") + && m.contains("table=" + tableA.tablePath.getTableName())); + } + + @Test + void multipleRoundsConvergeAfterManifestUpsert() throws Exception { + String dbName = newDatabaseName("converge"); + TablePath tablePath = createLogTable(dbName, "converge_log"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + FsPath remoteLogTabletDir = + FlussPaths.remoteLogTabletDir( + new FsPath(remoteDataRoot().resolve("log").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + + String segmentId = UUID.randomUUID().toString(); + FsPath manifest0 = + new FsPath( + localPath(remoteLogTabletDir) + .resolve("metadata/p0.manifest") + .toUri() + .toString()); + Path oldSegment = seedManifestAndSegment(remoteLogTabletDir, manifest0, segmentId, 0L, 0L); + upsertManifest(tableBucket, manifest0, 0L); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(oldSegment)).isTrue(); + + FsPath manifest1 = + new FsPath( + localPath(remoteLogTabletDir) + .resolve("metadata/p1.manifest") + .toUri() + .toString()); + Path newSegment = + seedManifestAndSegment(remoteLogTabletDir, manifest1, segmentId, 100L, 100L); + upsertManifest(tableBucket, manifest1, 100L); + + runCleanerForDatabase(false, dbName); + + assertThat(Files.exists(oldSegment)).isFalse(); + assertThat(Files.exists(newSegment)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(oldSegment.toString())); + } + + @Test + void singleTableModeSkipsOrphanTableScan() throws Exception { + String dbName = newDatabaseName("singletable"); + long orphanTableId = allocateDroppedTableId(dbName, "orphan_seed"); + TablePath liveTable = createLogTable(dbName, "live_target"); + OrphanTableLayout layout = + createOldOrphanTableLayout( + remoteDataRoot(), + dbName, + orphanTableId, + "ghost_table", + "99999999999999999999.log"); + + runCleanerForDatabase( + false, dbName, "--table", liveTable.getTableName(), "--allow-clean-orphan-tables"); + + // The orphan-table scan must skip because tableInfosComplete=false in --table + // single-table mode. + // Sibling orphan must be preserved even with --allow-clean-orphan-tables set. + assertThat(Files.exists(layout.orphanFile)).isTrue(); + assertThat(Files.exists(layout.tableDir)).isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=skip_orphan_table_scan") + && m.contains("reason=tableInfos-incomplete") + && m.contains("db=" + dbName)); + // Must use the dedicated event, not the older skip_db. + assertThat(auditMessages()) + .noneMatch(m -> m.contains("action=skip_db") && m.contains("db=" + dbName)); + } + + @Test + void kvUnitFailureDoesNotBlockLogCleanup() throws Exception { + String dbName = newDatabaseName("crossflow"); + TablePath tablePath = createPrimaryKeyTable(dbName, "fail_kv_keep_log"); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + TableBucket tableBucket = new TableBucket(tableInfo.getTableId(), 0); + + // Seed a valid KV snapshot in ZK so listBucketSnapshots returns a child to decode. + FsPath remoteKvTabletDir = + FlussPaths.remoteKvTabletDir( + new FsPath(remoteDataRoot().resolve("kv").toUri().toString()), + PhysicalTablePath.of(tablePath), + tableBucket); + long activeSnapshotId = 1L; + seedKvSnapshots(tableBucket, remoteKvTabletDir, new long[] {activeSnapshotId}); + + // Seed a log manifest + active segment so the log bucket reaches RESOLVED in the + // active-file cleanup. + Path activeLogSegment = seedActiveBucketManifest(tablePath); + + // ----------------------------------------------------------------- + // Step 1 — baseline (no fault injection) + // Plant an orphan KV snapshot dir under snap-99 (NOT registered in ZK) plus an + // orphan log segment. With the cluster wired normally, cleanup MUST delete them: + // this establishes the negative control that proves the phase-2 preservation + // claim is meaningful and not just an accidental no-op. + // ----------------------------------------------------------------- + long baselineOrphanSnapshotId = 99L; + FsPath baselineOrphanKvDir = + FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, baselineOrphanSnapshotId); + Path baselineOrphanKvMetadata = localPath(baselineOrphanKvDir).resolve("_METADATA"); + Path baselineOrphanKvSst = + localPath(baselineOrphanKvDir).resolve(baselineOrphanSnapshotId + ".sst"); + Files.createDirectories(localPath(baselineOrphanKvDir)); + Files.write(baselineOrphanKvMetadata, new byte[] {0x55}); + Files.write(baselineOrphanKvSst, new byte[] {0x66}); + makeOld(baselineOrphanKvMetadata); + makeOld(baselineOrphanKvSst); + + Path baselineOrphanLogSegment = createOldSegmentFile(tablePath, "99999999999999999999.log"); + + runCleanerForDatabase(false, dbName); + + // Baseline: snap-99 files were DELETED, proving normal cleanup would have killed + // them. Path-specific assertions guarantee these audit events refer to phase 1. + assertThat(Files.exists(baselineOrphanKvMetadata)) + .as( + "phase 1 baseline: snap-99/_METADATA must be DELETED " + + "(cleanup would normally remove orphan KV files)") + .isFalse(); + assertThat(Files.exists(baselineOrphanKvSst)) + .as("phase 1 baseline: snap-99/.sst must be DELETED") + .isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(baselineOrphanKvMetadata.toString())); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(baselineOrphanKvSst.toString())); + // Baseline: orphan log segment was DELETED and the active segment survived. Phase 1's + // log deletion is asserted both via Files.exists and via the audit stream so the final + // phase-2 assertion can require TWO deletion events on the same path (one per phase). + assertThat(Files.exists(baselineOrphanLogSegment)) + .as("phase 1 baseline: orphan log segment must be DELETED") + .isFalse(); + assertThat(Files.exists(activeLogSegment)) + .as("phase 1: active log segment must survive cleanup") + .isTrue(); + assertThat(auditMessages()) + .filteredOn( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(baselineOrphanLogSegment.toString())) + .as("phase 1 baseline: orphan log segment deletion must appear in audit stream") + .hasSizeGreaterThanOrEqualTo(1); + + // ----------------------------------------------------------------- + // Step 2 — fault injection + // Re-plant orphan KV files under a DIFFERENT snap-77 dir so path-specific audit + // assertions are unambiguous (phase-1 audits target snap-99, phase-2 audits + // target snap-77). Re-plant the orphan log segment at its original path (phase 1 + // deleted it) so we can verify log cleanup still proceeds when the KV unit fails. + // ----------------------------------------------------------------- + long faultInjectionOrphanSnapshotId = 77L; + FsPath faultInjectionOrphanKvDir = + FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, faultInjectionOrphanSnapshotId); + Path faultInjectionOrphanKvMetadata = + localPath(faultInjectionOrphanKvDir).resolve("_METADATA"); + Path faultInjectionOrphanKvSst = + localPath(faultInjectionOrphanKvDir) + .resolve(faultInjectionOrphanSnapshotId + ".sst"); + Files.createDirectories(localPath(faultInjectionOrphanKvDir)); + Files.write(faultInjectionOrphanKvMetadata, new byte[] {0x55}); + Files.write(faultInjectionOrphanKvSst, new byte[] {0x66}); + makeOld(faultInjectionOrphanKvMetadata); + makeOld(faultInjectionOrphanKvSst); + + // Re-planted at the SAME path as baselineOrphanLogSegment (createOldSegmentFile uses a + // fixed UUID + filename), so the audit stream will contain TWO delete events targeting + // this path -- one from each phase. The final + // filteredOn(...).hasSizeGreaterThanOrEqualTo(2) + // assertion below verifies both. + Path faultInjectionOrphanLogSegment = + createOldSegmentFile(tablePath, "99999999999999999999.log"); + + // Inject a non-numeric child znode under BucketSnapshotsZNode so server-side + // listBucketSnapshotIds throws NumberFormatException on Long.parseLong. Client-side + // fetchKvActiveSnapDirs propagates the exception and cleanActiveTableFiles catches it + // to emit skip_kv_target. + ZooKeeperClient zk = FLUSS_CLUSTER_EXTENSION.getZooKeeperClient(); + String invalidChildPath = BucketSnapshotsZNode.path(tableBucket) + "/not-a-long"; + zk.getCuratorClient().create().forPath(invalidChildPath, new byte[0]); + try { + runCleanerForDatabase(false, dbName); + } finally { + zk.getCuratorClient().delete().forPath(invalidChildPath); + } + + // KV target was skipped: skip_kv_target audit fires AND snap-77 orphan files preserved. + assertThat(auditMessages()) + .as("phase 2: skip_kv_target audit must fire when LIST_KV_SNAPSHOTS RPC fails") + .anyMatch( + m -> + m.contains("action=skip_kv_target") + && m.contains("table_id=" + tableInfo.getTableId())); + assertThat(Files.exists(faultInjectionOrphanKvMetadata)) + .as( + "phase 2: snap-77/_METADATA must be PRESERVED " + + "(KV target failure must short-circuit cleanup)") + .isTrue(); + assertThat(Files.exists(faultInjectionOrphanKvSst)) + .as("phase 2: snap-77/.sst must be PRESERVED") + .isTrue(); + // Defensive: nothing in the audit stream ever marked snap-77 files for deletion. + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(faultInjectionOrphanKvMetadata.toString())); + assertThat(auditMessages()) + .noneMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=kv-snapshot-file") + && m.contains(faultInjectionOrphanKvSst.toString())); + + // Log cleanup proceeded independently: orphan log segment DELETED, active preserved. + // The re-planted segment lives at the same path as baselineOrphanLogSegment, so the audit + // stream must contain >=2 deletion events for this path: one from phase 1, one from + // phase 2. anyMatch alone could be satisfied by phase 1's event in isolation, which is + // why we count instead. + assertThat(Files.exists(faultInjectionOrphanLogSegment)) + .as("phase 2: orphan log segment must be re-deleted (log cleanup is independent)") + .isFalse(); + assertThat(Files.exists(activeLogSegment)) + .as("phase 2: active log segment must still survive cleanup") + .isTrue(); + assertThat(auditMessages()) + .filteredOn( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(faultInjectionOrphanLogSegment.toString())) + .as( + "orphan log segment must be deleted in both phase 1 (baseline) and " + + "phase 2 (with KV fault) -- two events on the same path") + .hasSizeGreaterThanOrEqualTo(2); + } + + @Test + void optInCleansOrphanPartitionDir() throws Exception { + String dbName = newDatabaseName("orphanpart"); + // Create two partitioned tables so the tracker observes both partition IDs. + // The second table's partition ID is higher. We plant an orphan under the second + // table using the first table's (lower) ID so the guard passes: + // orphanId <= maxKnownPartitionId. + PartitionedTableLayout tableA = createPartitionedLogTable(dbName, "table_a", "pa"); + PartitionedTableLayout tableB = createPartitionedLogTable(dbName, "table_b", "pb"); + + long orphanPartitionId = + Math.min( + tableA.partitionInfo.getPartitionId(), + tableB.partitionInfo.getPartitionId()); + // Plant orphan under whichever table does NOT own the lower-ID partition. + PartitionedTableLayout targetTable = + (tableA.partitionInfo.getPartitionId() == orphanPartitionId) ? tableB : tableA; + + OrphanPartitionLayout orphan = + createOldOrphanPartitionLayout( + remoteDataRoot(), + targetTable.tablePath, + targetTable.tableId, + "ghost", + orphanPartitionId, + "99999999999999999999.log"); + + runCleanerForDatabase(false, dbName, "--allow-clean-orphan-partitions"); + + assertThat(Files.exists(orphan.orphanFile)) + .as("orphan partition file must be deleted") + .isFalse(); + assertThat(Files.exists(orphan.partitionDir)) + .as("orphan partition dir must be removed") + .isFalse(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(orphan.orphanFile.toString())); + } + + @Test + void emptyDirsSweptAfterOrphanFileDeletion() throws Exception { + String dbName = newDatabaseName("emptydir"); + TablePath tablePath = createLogTable(dbName, "emptydir_table"); + Path activeSegment = seedActiveBucketManifest(tablePath); + + // Create an orphan file as the sole content of its UUID directory. + Path orphan = createOldSegmentFile(tablePath, "99999999999999999999.log"); + Path orphanSegmentDir = orphan.getParent(); + + // Pre-condition: the segment directory exists before cleanup. + assertThat(Files.exists(orphanSegmentDir)).isTrue(); + + runCleanerForDatabase(false, dbName); + + // The orphan file must be deleted. + assertThat(Files.exists(orphan)).as("orphan file must be deleted").isFalse(); + // The now-empty UUID directory must also be swept. + assertThat(Files.exists(orphanSegmentDir)) + .as("empty segment dir must be swept after cleanup") + .isFalse(); + // Active segment and its directory survive. + assertThat(Files.exists(activeSegment)).as("active segment must survive").isTrue(); + assertThat(auditMessages()) + .anyMatch( + m -> + m.contains("action=deleted") + && m.contains("rule=log-segment") + && m.contains(orphan.toString())); + } + + private TablePath createLogTable(String databaseName, String tableName) throws Exception { + admin.createDatabase(databaseName, DatabaseDescriptor.EMPTY, true).get(); + TablePath tablePath = TablePath.of(databaseName, tableName); + Schema schema = + Schema.newBuilder() + .column("id", DataTypes.INT()) + .column("value", DataTypes.STRING()) + .build(); + TableDescriptor descriptor = + TableDescriptor.builder().schema(schema).distributedBy(1, "id").build(); + admin.createTable(tablePath, descriptor, true).get(); + return tablePath; + } + + private TablePath createPrimaryKeyTable(String databaseName, String tableName) + throws Exception { + admin.createDatabase(databaseName, DatabaseDescriptor.EMPTY, true).get(); + TablePath tablePath = TablePath.of(databaseName, tableName); + Schema schema = + Schema.newBuilder() + .column("id", DataTypes.INT()) + .column("value", DataTypes.STRING()) + .primaryKey("id") + .build(); + TableDescriptor descriptor = + TableDescriptor.builder().schema(schema).distributedBy(1, "id").build(); + admin.createTable(tablePath, descriptor, true).get(); + return tablePath; + } + + private long allocateDroppedTableId(String databaseName, String tableName) throws Exception { + TablePath tablePath = createLogTable(databaseName, tableName); + long tableId = admin.getTableInfo(tablePath).get().getTableId(); + admin.dropTable(tablePath, false).get(); + return tableId; + } + + private long allocateDroppedPrimaryKeyTableId(String databaseName, String tableName) + throws Exception { + TablePath tablePath = createPrimaryKeyTable(databaseName, tableName); + long tableId = admin.getTableInfo(tablePath).get().getTableId(); + admin.dropTable(tablePath, false).get(); + return tableId; + } + + private Path createOldSegmentFile(TablePath tablePath, String fileName) throws Exception { + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + org.apache.fluss.fs.FsPath tabletDir = + FlussPaths.remoteLogTabletDir( + new org.apache.fluss.fs.FsPath( + FLUSS_CLUSTER_EXTENSION.getRemoteDataDir() + + "/" + + FlussPaths.REMOTE_LOG_DIR_NAME), + PhysicalTablePath.of(tablePath), + new TableBucket(tableInfo.getTableId(), 0)); + Path segmentDir = + Paths.get(java.net.URI.create(tabletDir.toString())) + .resolve( + UUID.fromString("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa").toString()); + Files.createDirectories(segmentDir); + Path file = segmentDir.resolve(fileName); + Files.write(file, new byte[] {0x42}); + makeOld(file); + makeOld(segmentDir); + return file; + } + + private Path createOldLogManifestFile(TablePath tablePath, String fileName) throws Exception { + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + org.apache.fluss.fs.FsPath tabletDir = + FlussPaths.remoteLogTabletDir( + new org.apache.fluss.fs.FsPath( + FLUSS_CLUSTER_EXTENSION.getRemoteDataDir() + + "/" + + FlussPaths.REMOTE_LOG_DIR_NAME), + PhysicalTablePath.of(tablePath), + new TableBucket(tableInfo.getTableId(), 0)); + Path metadataDir = Paths.get(java.net.URI.create(tabletDir.toString())).resolve("metadata"); + Files.createDirectories(metadataDir); + Path file = metadataDir.resolve(fileName); + Files.write(file, new byte[] {0x11}); + makeOld(file); + return file; + } + + private PartitionedTableLayout createPartitionedLogTable( + String databaseName, String tableName, String partitionValue) throws Exception { + admin.createDatabase(databaseName, DatabaseDescriptor.EMPTY, true).get(); + TablePath tablePath = TablePath.of(databaseName, tableName); + Schema schema = + Schema.newBuilder() + .column("id", DataTypes.INT()) + .column("value", DataTypes.STRING()) + .column("pt", DataTypes.STRING()) + .build(); + TableDescriptor descriptor = + TableDescriptor.builder() + .schema(schema) + .distributedBy(1, "id") + .partitionedBy("pt") + .build(); + admin.createTable(tablePath, descriptor, true).get(); + admin.createPartition(tablePath, partitionSpec("pt", partitionValue), false).get(); + + Map partitionIds = + FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1); + TableInfo tableInfo = admin.getTableInfo(tablePath).get(); + long partitionId = partitionIds.get(partitionValue); + FLUSS_CLUSTER_EXTENSION.waitUntilTablePartitionReady(tableInfo.getTableId(), partitionId); + List partitionInfos = admin.listPartitionInfos(tablePath).get(); + assertThat(partitionInfos).hasSize(1); + return new PartitionedTableLayout(tablePath, tableInfo.getTableId(), partitionInfos.get(0)); + } + + private void seedKvSnapshots( + TableBucket tableBucket, FsPath remoteKvTabletDir, long[] snapshotIds) + throws Exception { + ZooKeeperClient zk = FLUSS_CLUSTER_EXTENSION.getZooKeeperClient(); + for (long snapshotId : snapshotIds) { + FsPath snapshotDir = FlussPaths.remoteKvSnapshotDir(remoteKvTabletDir, snapshotId); + Path localSnapshotDir = localPath(snapshotDir); + Files.createDirectories(localSnapshotDir); + + Path metadataFile = localSnapshotDir.resolve("_METADATA"); + Files.write(metadataFile, new byte[] {0x33}); + makeOld(metadataFile); + + Path dataFile = localSnapshotDir.resolve(snapshotId + ".sst"); + Files.write(dataFile, new byte[] {0x44}); + makeOld(dataFile); + + makeOld(localSnapshotDir); + + zk.registerTableBucketSnapshot( + tableBucket, + new BucketSnapshot( + snapshotId, snapshotId, snapshotDir.toString() + "/_METADATA")); + } + } + + private Path seedManifestAndSegment( + FsPath remoteLogTabletDir, + FsPath manifestPath, + String segmentId, + long startOffset, + long endOffset) + throws Exception { + Path manifest = localPath(manifestPath); + Files.createDirectories(manifest.getParent()); + Files.write( + manifest, + manifestJson(segmentId, startOffset, endOffset).getBytes(StandardCharsets.UTF_8)); + makeOld(manifest); + + FsPath segmentDir = new FsPath(remoteLogTabletDir, segmentId); + Path localSegmentDir = localPath(segmentDir); + Files.createDirectories(localSegmentDir); + Path logFile = + localSegmentDir.resolve(FlussPaths.filenamePrefixFromOffset(startOffset) + ".log"); + Files.write(logFile, new byte[] {0x55}); + makeOld(logFile); + return logFile; + } + + private Path writeSegmentFile(FsPath remoteLogTabletDir, String segmentId, long startOffset) + throws Exception { + FsPath segmentDir = new FsPath(remoteLogTabletDir, segmentId); + Path localSegmentDir = localPath(segmentDir); + Files.createDirectories(localSegmentDir); + Path logFile = + localSegmentDir.resolve(FlussPaths.filenamePrefixFromOffset(startOffset) + ".log"); + Files.write(logFile, new byte[] {0x55}); + makeOld(logFile); + return logFile; + } + + private void upsertManifest(TableBucket tableBucket, FsPath manifestPath, long endOffset) + throws Exception { + FLUSS_CLUSTER_EXTENSION + .getZooKeeperClient() + .upsertRemoteLogManifestHandle( + tableBucket, new RemoteLogManifestHandle(manifestPath, endOffset)); + } + + private void runCleanerForDatabase(boolean dryRun, String databaseName, String... extraArgs) + throws Exception { + List args = new ArrayList(); + args.add("--bootstrap-server"); + args.add(bootstrapServers); + args.add("--database"); + args.add(databaseName); + appendCommonArgs(args, dryRun, extraArgs); + OrphanCleanConfig config = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + args.toArray(new String[args.size()]))); + new OrphanFilesCleanAction(config).run(); + } + + private void runCleanerForAllDatabases(boolean dryRun, String... extraArgs) throws Exception { + List args = new ArrayList(); + args.add("--bootstrap-server"); + args.add(bootstrapServers); + args.add("--all-databases"); + appendCommonArgs(args, dryRun, extraArgs); + OrphanCleanConfig config = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + args.toArray(new String[args.size()]))); + new OrphanFilesCleanAction(config).run(); + } + + private static final DateTimeFormatter CUTOFF_FORMATTER = + DateTimeFormatter.ISO_OFFSET_DATE_TIME; + + private static void appendCommonArgs(List args, boolean dryRun, String... extraArgs) { + // Tests back-date their orphan files to now - 2d via makeOld(); a cutoff at now - 1d + // safely puts those files strictly before the cutoff (mtime < cutoff → DELETE-eligible). + String cutoff = OffsetDateTime.now(ZoneOffset.UTC).minusDays(1).format(CUTOFF_FORMATTER); + args.add("--older-than"); + args.add(cutoff); + for (String extraArg : extraArgs) { + args.add(extraArg); + } + if (dryRun) { + args.add("--dry-run"); + } + } + + private OrphanPartitionLayout createOldOrphanPartitionLayout( + Path remoteRoot, + TablePath tablePath, + long tableId, + String partitionName, + long partitionId, + String fileName) + throws Exception { + Path tableDir = + remoteRoot + .resolve("log") + .resolve(tablePath.getDatabaseName()) + .resolve(tablePath.getTableName() + "-" + tableId); + Path partitionDir = tableDir.resolve(partitionName + "-p" + partitionId); + Path segmentDir = + partitionDir + .resolve("0") + .resolve( + UUID.fromString("bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb").toString()); + Files.createDirectories(segmentDir); + Path orphanFile = segmentDir.resolve(fileName); + Files.write(orphanFile, new byte[] {0x66}); + makeOld(orphanFile); + makeOld(segmentDir); + makeOld(segmentDir.getParent()); + makeOld(partitionDir); + return new OrphanPartitionLayout(partitionDir, orphanFile); + } + + private OrphanTableLayout createOldOrphanTableLayout( + Path remoteRoot, String dbName, long tableId, String tableName, String fileName) + throws Exception { + Path tableDir = + remoteRoot.resolve("log").resolve(dbName).resolve(tableName + "-" + tableId); + Path segmentDir = + tableDir.resolve("0") + .resolve( + UUID.fromString("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa").toString()); + Files.createDirectories(segmentDir); + Path orphanFile = segmentDir.resolve(fileName); + Files.write(orphanFile, new byte[] {0x42}); + makeOld(orphanFile); + makeOld(segmentDir); + makeOld(segmentDir.getParent()); + makeOld(tableDir); + return new OrphanTableLayout(tableDir, orphanFile); + } + + private OrphanTableLayout createOldOrphanKvTableLayout( + Path remoteRoot, String dbName, long tableId, String tableName, String fileName) + throws Exception { + Path tableDir = remoteRoot.resolve("kv").resolve(dbName).resolve(tableName + "-" + tableId); + Path sharedDir = tableDir.resolve("0").resolve("shared"); + Files.createDirectories(sharedDir); + Path orphanFile = sharedDir.resolve(fileName); + Files.write(orphanFile, new byte[] {0x24}); + makeOld(orphanFile); + makeOld(sharedDir); + makeOld(sharedDir.getParent()); + makeOld(tableDir); + return new OrphanTableLayout(tableDir, orphanFile); + } + + private static String newDatabaseName(String prefix) { + return prefix + Long.toString(System.nanoTime()); + } + + private static PartitionSpec partitionSpec(String key, String value) { + return new PartitionSpec(Collections.singletonMap(key, value)); + } + + private static Path localPath(FsPath path) { + return Paths.get(java.net.URI.create(path.toString())); + } + + private static String manifestJson(String segmentId, long startOffset, long endOffset) { + return "{\"version\":1," + + "\"database\":\"db\"," + + "\"table\":\"t\"," + + "\"table_id\":0," + + "\"bucket_id\":0," + + "\"remote_log_segments\":[{" + + "\"segment_id\":\"" + + segmentId + + "\",\"start_offset\":" + + startOffset + + ",\"end_offset\":" + + endOffset + + ",\"max_timestamp\":0," + + "\"size_in_bytes\":1" + + "}]}"; + } + + private void makeOld(Path path) throws Exception { + Files.setLastModifiedTime( + path, FileTime.fromMillis(System.currentTimeMillis() - OLD_ENOUGH.toMillis())); + } + + private static final class PartitionedTableLayout { + private final TablePath tablePath; + private final long tableId; + private final PartitionInfo partitionInfo; + + private PartitionedTableLayout( + TablePath tablePath, long tableId, PartitionInfo partitionInfo) { + this.tablePath = tablePath; + this.tableId = tableId; + this.partitionInfo = partitionInfo; + } + } + + private static final class OrphanPartitionLayout { + private final Path partitionDir; + private final Path orphanFile; + + private OrphanPartitionLayout(Path partitionDir, Path orphanFile) { + this.partitionDir = partitionDir; + this.orphanFile = orphanFile; + } + } + + private static final class OrphanTableLayout { + private final Path tableDir; + private final Path orphanFile; + + private OrphanTableLayout(Path tableDir, Path orphanFile) { + this.tableDir = tableDir; + this.orphanFile = orphanFile; + } + } + + private static final class CapturingAppender extends AbstractAppender { + + private final List messages = new CopyOnWriteArrayList(); + + CapturingAppender(String name) { + super( + name, + null, + null, + true, + org.apache.logging.log4j.core.config.Property.EMPTY_ARRAY); + } + + @Override + public void append(LogEvent event) { + messages.add(event.getMessage().getFormattedMessage()); + } + + List messages() { + return new ArrayList(messages); + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifierTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifierTest.java new file mode 100644 index 0000000000..8746be4ae5 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/RpcErrorClassifierTest.java @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan; + +import org.apache.fluss.exception.FlussRuntimeException; +import org.apache.fluss.exception.PartitionNotExistException; +import org.apache.fluss.exception.TableNotExistException; + +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.concurrent.CompletionException; +import java.util.concurrent.TimeoutException; + +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.NOT_FOUND; +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.SERVER_ERROR; +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.TRANSIENT; +import static org.apache.fluss.flink.action.orphan.RpcErrorClassifier.Category.UNKNOWN; +import static org.assertj.core.api.Assertions.assertThat; + +class RpcErrorClassifierTest { + + @Test + void tableNotExistIsNotFound() { + assertThat(RpcErrorClassifier.classify(new TableNotExistException("x"))) + .isEqualTo(NOT_FOUND); + } + + @Test + void partitionNotExistIsNotFound() { + assertThat(RpcErrorClassifier.classify(new PartitionNotExistException("x"))) + .isEqualTo(NOT_FOUND); + } + + @Test + void ioExceptionIsTransient() { + assertThat(RpcErrorClassifier.classify(new IOException("conn reset"))).isEqualTo(TRANSIENT); + } + + @Test + void timeoutIsTransient() { + assertThat(RpcErrorClassifier.classify(new TimeoutException("rpc"))).isEqualTo(TRANSIENT); + } + + @Test + void unwrapsCompletionException() { + assertThat( + RpcErrorClassifier.classify( + new CompletionException(new TableNotExistException("x")))) + .isEqualTo(NOT_FOUND); + } + + @Test + void flussServerErrorIsServerError() { + assertThat(RpcErrorClassifier.classify(new FlussRuntimeException("internal"))) + .isEqualTo(SERVER_ERROR); + } + + @Test + void otherRuntimeIsUnknown() { + assertThat(RpcErrorClassifier.classify(new IllegalStateException("?"))).isEqualTo(UNKNOWN); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcherTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcherTest.java new file mode 100644 index 0000000000..7144b4f031 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/build/ActiveRefsFetcherTest.java @@ -0,0 +1,440 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.build; + +import org.apache.fluss.client.metadata.ActiveKvSnapshots; +import org.apache.fluss.client.metadata.RemoteLogManifestInfo; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.utils.FlussPaths; + +import org.junit.jupiter.api.Test; + +import javax.annotation.Nullable; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** Tests for {@link ActiveRefsFetcher} — log active set sourced from coordinator metadata. */ +class ActiveRefsFetcherTest { + + @Test + void emptyManifestListReturnsEmptyResult() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueEmptyResponse(); + + StubManifestReader reader = new StubManifestReader(); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.NOT_LISTED); + // Empty success must NOT trigger a retry — lock down call count. + assertThat(rpcCalls.get()).isEqualTo(1); + } + + @Test + void fileNotFoundMarksBucketReadFailedWithoutRetry() { + // Locks down "no per-bucket retry": a single FileNotFound on the manifest second-read + // immediately marks the bucket READ_FAILED; recovery is left to the next cleanup round. + // This prevents N × retries × IO blow-up during cluster-wide manifest upsert turbulence. + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.failWithNotFound(p0); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.readFailureReason(0)) + .contains("Manifest not found (likely upserted concurrently)") + .contains("bucketId=0"); + // Per-target RPC issued exactly once; no per-bucket retry burst. + assertThat(rpcCalls.get()).isEqualTo(1); + } + + @Test + void fetchLogActiveRefsByBucket_abortsOnlyFailedBucket() throws Exception { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + FsPath p1 = new FsPath("oss://b/log/db/t-7/1/metadata/p1.manifest"); + String manifestJson = manifestJson("11111111-1111-1111-1111-111111111111", 7L, 9L); + + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueMultiBucketResponse(p0, p1); + + StubManifestReader reader = new StubManifestReader(); + reader.returnBytes(p0, manifestJson.getBytes(StandardCharsets.UTF_8)); + reader.failWithNotFound(p1); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.RESOLVED); + assertThat(result.statusFor(1)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.activeRefsOf(0).logSegmentRelativePaths()) + .containsExactlyInAnyOrder( + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(7L) + + ".log", + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(7L) + + ".index", + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(7L) + + ".timeindex", + "11111111-1111-1111-1111-111111111111/" + + FlussPaths.filenamePrefixFromOffset(9L) + + ".writer_snapshot"); + assertThat(result.readFailureReason(1)) + .contains("Manifest not found (likely upserted concurrently)") + .contains("bucketId=1"); + assertThat(result.statusFor(2)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.NOT_LISTED); + // Per-target RPC issued exactly once; per-bucket failure does not trigger any extra RPC. + assertThat(rpcCalls.get()).isEqualTo(1); + } + + @Test + void fetchLogActiveRefsByBucket_targetRpcFailure() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, new StubManifestReader(), /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isFalse(); + assertThat(result.listFailureReason()).contains("RPC failure for tableId=7"); + // Per-bucket queries are not meaningful when listOk=false. + assertThatThrownBy(() -> result.statusFor(0)).isInstanceOf(IllegalStateException.class); + // Per-target RPC is retried up to maxRetries times before giving up. + assertThat(rpcCalls.get()).isEqualTo(3); + } + + @Test + void manifestParseFailureMarksBucketReadFailed() { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + StubAdmin admin = new StubAdmin(new AtomicInteger()); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.returnBytes(p0, "{}".getBytes(StandardCharsets.UTF_8)); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.readFailureReason(0)) + .contains("Manifest parse failure") + .contains("bucketId=0"); + } + + @Test + void ioErrorMarksBucketReadFailed() { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + StubAdmin admin = new StubAdmin(new AtomicInteger()); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.failWithIo(p0, new IOException("disk fault")); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, null); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.READ_FAILED); + assertThat(result.readFailureReason(0)).contains("IO error reading manifest"); + } + + @Test + void fetchKvActiveSnapDirsAggregatesPerBucket() { + StubAdmin admin = new StubAdmin(new AtomicInteger()); + Map> snapshotIds = new HashMap<>(); + snapshotIds.put(0, new HashSet<>(Arrays.asList(9L, 10L))); + snapshotIds.put(1, new HashSet<>(Arrays.asList(5L))); + admin.queueKvResponseMultiBucket(snapshotIds); + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, /* metadataReader */ null, /* maxRetries= */ 3); + KvActiveRefsFetchResult result = builder.fetchKvActiveSnapDirs(7L, null); + + assertThat(result.listOk()).isTrue(); + Map> perBucket = result.activeSnapDirsByBucket(); + assertThat(perBucket.get(0)).containsExactlyInAnyOrder("snap-9", "snap-10"); + assertThat(perBucket.get(1)).containsExactly("snap-5"); + } + + /** + * Symmetric with {@link #fetchLogActiveRefsByBucket_targetRpcFailure}: the KV per-target RPC + * retries up to {@code maxRetries} times and reports {@code listOk=false} on exhaustion. + */ + @Test + void fetchKvActiveSnapDirsRetriesThenReportsListFailure() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + // No queued KV response → StubAdmin returns failed CompletableFutures on every attempt. + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, /* metadataReader */ null, /* maxRetries= */ 3); + KvActiveRefsFetchResult result = builder.fetchKvActiveSnapDirs(7L, null); + + assertThat(result.listOk()).isFalse(); + // Reason is classified via RpcErrorClassifier for audit compatibility. + assertThat(result.listFailureReason()).isNotEmpty(); + // Per-target RPC is retried up to maxRetries times before giving up. + assertThat(rpcCalls.get()).isEqualTo(3); + } + + /** + * Verifies that a non-null {@code partitionId} is forwarded to the underlying {@code + * listRemoteLogManifests} RPC by {@link ActiveRefsFetcher#fetchLogActiveRefsByBucket}. + */ + @Test + void fetchLogActiveRefsByBucketWithPartitionIdRoutesCorrectly() throws Exception { + FsPath p0 = new FsPath("oss://b/log/db/t-7/0/metadata/p0.manifest"); + String manifestJson = manifestJson("11111111-1111-1111-1111-111111111111", 7L, 9L); + + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueResponse(p0); + + StubManifestReader reader = new StubManifestReader(); + reader.returnBytes(p0, manifestJson.getBytes(StandardCharsets.UTF_8)); + + ActiveRefsFetcher builder = new ActiveRefsFetcher(admin, reader, /* maxRetries= */ 3); + LogActiveRefsFetchResult result = builder.fetchLogActiveRefsByBucket(7L, 42L); + + assertThat(result.listOk()).isTrue(); + assertThat(result.statusFor(0)) + .isEqualTo(LogActiveRefsFetchResult.ManifestReadStatus.RESOLVED); + // Proves partitionId=42 was forwarded to the RPC (sentinel Long.MIN_VALUE would mean + // the stub was never invoked). + assertThat(admin.lastLogPartitionId.get()) + .as("partitionId must be forwarded to listRemoteLogManifests RPC") + .isEqualTo(42L); + assertThat(rpcCalls.get()) + .as("happy path must issue exactly one listRemoteLogManifests RPC") + .isEqualTo(1); + } + + /** + * Verifies that a non-null {@code partitionId} is forwarded to the underlying {@code + * listKvSnapshots} RPC by {@link ActiveRefsFetcher#fetchKvActiveSnapDirs}. + */ + @Test + void fetchKvActiveSnapDirsWithPartitionIdRoutesCorrectly() { + AtomicInteger rpcCalls = new AtomicInteger(0); + StubAdmin admin = new StubAdmin(rpcCalls); + admin.queueKvResponse(0, 5L); + + ActiveRefsFetcher builder = + new ActiveRefsFetcher(admin, /* metadataReader */ null, /* maxRetries= */ 3); + KvActiveRefsFetchResult result = builder.fetchKvActiveSnapDirs(7L, 99L); + + assertThat(result.listOk()).isTrue(); + Map> perBucket = result.activeSnapDirsByBucket(); + assertThat(perBucket).containsOnlyKeys(0); + assertThat(perBucket.get(0)).containsExactly("snap-5"); + // Proves partitionId=99 was forwarded to the RPC. + assertThat(admin.lastKvPartitionId.get()) + .as("partitionId must be forwarded to listKvSnapshots RPC") + .isEqualTo(99L); + assertThat(rpcCalls.get()) + .as("happy path must issue exactly one listKvSnapshots RPC") + .isEqualTo(1); + } + + // ------------------------------------------------------------------------- + // Test fixtures + // ------------------------------------------------------------------------- + + private static String manifestJson(String segmentId, long startOffset, long endOffset) { + return "{\"version\":1," + + "\"database\":\"db\"," + + "\"table\":\"t\"," + + "\"table_id\":7," + + "\"bucket_id\":0," + + "\"remote_log_segments\":[{" + + "\"segment_id\":\"" + + segmentId + + "\",\"start_offset\":" + + startOffset + + ",\"end_offset\":" + + endOffset + + ",\"max_timestamp\":0," + + "\"size_in_bytes\":1" + + "}]}"; + } + + /** Queues per-call responses for ListRemoteLogManifests / ListKvSnapshots and tracks calls. */ + private static final class StubAdmin implements ActiveRefsFetcher.AdminFacade { + + private final Deque> responses = new ArrayDeque<>(); + private final Deque kvResponses = new ArrayDeque<>(); + private final AtomicInteger callCounter; + private final AtomicReference lastLogPartitionId = + new AtomicReference<>(Long.MIN_VALUE); + private final AtomicReference lastKvPartitionId = + new AtomicReference<>(Long.MIN_VALUE); + + StubAdmin(AtomicInteger callCounter) { + this.callCounter = callCounter; + } + + void queueResponse(FsPath manifestPath) { + queueResponse(manifestPath, 0); + } + + void queueResponse(FsPath manifestPath, int bucketId) { + List list = new ArrayList<>(); + list.add( + new RemoteLogManifestInfo( + new TableBucket(7L, bucketId), manifestPath.toString(), 0L)); + responses.add(list); + } + + void queueMultiBucketResponse(FsPath manifestPath0, FsPath manifestPath1) { + List list = new ArrayList<>(); + list.add( + new RemoteLogManifestInfo( + new TableBucket(7L, 0), manifestPath0.toString(), 0L)); + list.add( + new RemoteLogManifestInfo( + new TableBucket(7L, 1), manifestPath1.toString(), 0L)); + responses.add(list); + } + + void queueEmptyResponse() { + responses.add(Collections.emptyList()); + } + + void queueKvResponse(int bucketId, long... snapshotIds) { + Map> snapshotIdsByBucket = new HashMap<>(); + Set ids = new HashSet<>(); + for (long id : snapshotIds) { + ids.add(id); + } + snapshotIdsByBucket.put(bucketId, ids); + kvResponses.add(new ActiveKvSnapshots(7L, null, snapshotIdsByBucket)); + } + + void queueKvResponseMultiBucket(Map> snapshotIdsByBucket) { + kvResponses.add(new ActiveKvSnapshots(7L, null, snapshotIdsByBucket)); + } + + @Override + public CompletableFuture> listRemoteLogManifests( + long tableId, @Nullable Long partitionId) { + callCounter.incrementAndGet(); + lastLogPartitionId.set(partitionId); + List next = responses.poll(); + if (next == null) { + CompletableFuture> failed = new CompletableFuture<>(); + failed.completeExceptionally( + new IllegalStateException("StubAdmin: no more queued responses")); + return failed; + } + return CompletableFuture.completedFuture(next); + } + + @Override + public CompletableFuture listKvSnapshots( + long tableId, @Nullable Long partitionId) { + callCounter.incrementAndGet(); + lastKvPartitionId.set(partitionId); + ActiveKvSnapshots next = kvResponses.poll(); + if (next == null) { + CompletableFuture failed = new CompletableFuture<>(); + failed.completeExceptionally( + new IllegalStateException("StubAdmin: no more queued kv responses")); + return failed; + } + return CompletableFuture.completedFuture(next); + } + } + + /** Per-path file-content / failure registry for the second-read step. */ + private static final class StubManifestReader implements ActiveRefsFetcher.MetadataReader { + + private final Map bytesByPath = new HashMap<>(); + private final Set notFoundPaths = new HashSet<>(); + private final Map ioFailuresByPath = new HashMap<>(); + + void returnBytes(FsPath path, byte[] data) { + bytesByPath.put(path.toString(), data); + } + + void failWithNotFound(FsPath path) { + notFoundPaths.add(path.toString()); + } + + void failWithIo(FsPath path, IOException e) { + ioFailuresByPath.put(path.toString(), e); + } + + @Override + public byte[] read(FsPath path) throws IOException { + String key = path.toString(); + if (notFoundPaths.contains(key)) { + throw new FileNotFoundException(key); + } + IOException io = ioFailuresByPath.get(key); + if (io != null) { + throw io; + } + byte[] data = bytesByPath.get(key); + if (data == null) { + throw new FileNotFoundException(key); + } + return data; + } + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfigTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfigTest.java new file mode 100644 index 0000000000..222d4743e2 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/config/OrphanCleanConfigTest.java @@ -0,0 +1,253 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.config; + +import org.apache.fluss.flink.adapter.MultipleParameterToolAdapter; + +import org.junit.jupiter.api.Test; + +import java.time.Duration; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** Tests for {@link OrphanCleanConfig}. */ +class OrphanCleanConfigTest { + + private static final DateTimeFormatter CUTOFF_FORMATTER = + DateTimeFormatter.ISO_OFFSET_DATE_TIME; + + @Test + void parsesAllDatabasesWithDefaults() { + long beforeParse = System.currentTimeMillis(); + OrphanCleanConfig config = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] {"--bootstrap-server", "h:9123", "--all-databases"})); + long afterParse = System.currentTimeMillis(); + + assertThat(config.allDatabases()).isTrue(); + assertThat(config.database()).isEmpty(); + long olderThanLow = beforeParse - Duration.ofDays(3).toMillis(); + long olderThanHigh = afterParse - Duration.ofDays(3).toMillis(); + assertThat(config.olderThanMillis()).isBetween(olderThanLow, olderThanHigh); + assertThat(config.dryRun()).isFalse(); + assertThat(config.remoteFsOpRateLimitPerSecond()).isEqualTo(100L); + assertThat(config.allowDeleteManifest()).isFalse(); + assertThat(config.allowCleanOrphanTables()).isFalse(); + assertThat(config.allowCleanOrphanPartitions()).isFalse(); + } + + @Test + void remoteFsOpRateLimitParsed() { + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--remote-fs-op-rate-limit-per-second", + "42" + })); + assertThat(cfg.remoteFsOpRateLimitPerSecond()).isEqualTo(42L); + } + + @Test + void remoteFsOpRateLimitMustBePositive() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--remote-fs-op-rate-limit-per-second", + "0" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("--remote-fs-op-rate-limit-per-second must be positive"); + } + + @Test + void databaseAndAllDatabasesAreMutuallyExclusive() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--database", + "x", + "--all-databases" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("mutually exclusive"); + } + + @Test + void cutoffCloserThanOneDayRejected() { + OffsetDateTime tooClose = OffsetDateTime.now(ZoneOffset.UTC).minusMinutes(30); + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--older-than", + tooClose.format(CUTOFF_FORMATTER) + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("at least 1d before now"); + } + + @Test + void cutoffWithoutExplicitOffsetRejected() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--older-than", + "2024-01-01 00:00:00" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("ISO-8601"); + } + + @Test + void cutoffWithExplicitOffsetParsed() { + OffsetDateTime cutoff = OffsetDateTime.now(ZoneOffset.UTC).minusDays(2).withNano(0); + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--older-than", + cutoff.format(CUTOFF_FORMATTER) + })); + assertThat(cfg.olderThanMillis()).isEqualTo(cutoff.toInstant().toEpochMilli()); + } + + @Test + void tableCannotBeUsedWithAllDatabases() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--table", + "t1" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("--table requires --database"); + } + + @Test + void bootstrapServerRequired() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] {"--all-databases"}))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("bootstrap-server"); + } + + @Test + void optInFlagsParsed() { + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "x:1", + "--all-databases", + "--allow-delete-manifest", + "--allow-clean-orphan-tables", + "--allow-clean-orphan-partitions" + })); + assertThat(cfg.allowDeleteManifest()).isTrue(); + assertThat(cfg.allowCleanOrphanTables()).isTrue(); + assertThat(cfg.allowCleanOrphanPartitions()).isTrue(); + } + + @Test + void extraConfigsParsed() { + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--conf", + "fs.oss.accessKeyId=myKey", + "--conf", + "fs.oss.accessKeySecret=mySecret", + "--conf", + "fs.oss.endpoint=oss-cn-hangzhou.aliyuncs.com" + })); + assertThat(cfg.extraConfigs()).hasSize(3); + assertThat(cfg.extraConfigs().get("fs.oss.accessKeyId")).isEqualTo("myKey"); + assertThat(cfg.extraConfigs().get("fs.oss.accessKeySecret")).isEqualTo("mySecret"); + assertThat(cfg.extraConfigs().get("fs.oss.endpoint")) + .isEqualTo("oss-cn-hangzhou.aliyuncs.com"); + } + + @Test + void extraConfigsEmptyWhenNotProvided() { + OrphanCleanConfig cfg = + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] {"--bootstrap-server", "h:9123", "--all-databases"})); + assertThat(cfg.extraConfigs()).isEmpty(); + } + + @Test + void extraConfigsRejectsMalformedEntry() { + assertThatThrownBy( + () -> + OrphanCleanConfig.fromParams( + MultipleParameterToolAdapter.fromArgs( + new String[] { + "--bootstrap-server", + "h:9123", + "--all-databases", + "--conf", + "noEqualsSign" + }))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("key=value"); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleterTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleterTest.java new file mode 100644 index 0000000000..8be4bd3d11 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/fs/SafeDeleterTest.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.fs; + +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.rule.Decision; +import org.apache.fluss.flink.action.orphan.rule.RuleId; +import org.apache.fluss.fs.FileSystem; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.fs.local.LocalFileSystem; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** Tests for {@link SafeDeleter} against the local filesystem. */ +class SafeDeleterTest { + + @TempDir Path tmp; + + @Test + void deleteFileRespectsDryRun() throws IOException { + Path target = Files.createFile(tmp.resolve("orphan.log")); + SafeDeleter d = newDeleter(localFs(), true); + d.deleteFile(new FsPath(target.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + assertThat(Files.exists(target)).isTrue(); + } + + @Test + void deleteFileActuallyDeletesWhenNotDryRun() throws IOException { + Path target = Files.createFile(tmp.resolve("orphan.log")); + SafeDeleter d = newDeleter(localFs(), false); + d.deleteFile(new FsPath(target.toString()), Decision.DELETE, RuleId.LOG_SEGMENT); + assertThat(Files.exists(target)).isFalse(); + } + + @Test + void deleteFileRejectsNonDeleteDecision() { + SafeDeleter d = newDeleter(null, false); + assertThatThrownBy( + () -> + d.deleteFile( + new FsPath("/tmp/x"), Decision.KEEP_ACTIVE, RuleId.UNKNOWN)) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + void deleteEmptyDirNoOpsOnNonEmpty() throws IOException { + Path dir = Files.createDirectory(tmp.resolve("d")); + Files.createFile(dir.resolve("child")); + SafeDeleter d = newDeleter(localFs(), false); + d.deleteEmptyDir(new FsPath(dir.toString())); + assertThat(Files.exists(dir)).isTrue(); + } + + @Test + void deleteEmptyDirActuallyDeletes() throws IOException { + Path dir = Files.createDirectory(tmp.resolve("d")); + SafeDeleter d = newDeleter(localFs(), false); + d.deleteEmptyDir(new FsPath(dir.toString())); + assertThat(Files.exists(dir)).isFalse(); + } + + private static SafeDeleter newDeleter(FileSystem fs, boolean dryRun) { + return new SafeDeleter(fs, dryRun, new AuditLogger(), RateLimiter.create(1000.0)); + } + + private static FileSystem localFs() { + return LocalFileSystem.getSharedInstance(); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/job/BucketCleanerTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/job/BucketCleanerTest.java new file mode 100644 index 0000000000..b0fc5484f5 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/job/BucketCleanerTest.java @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.job; + +import org.apache.fluss.flink.action.orphan.audit.AuditLogger; +import org.apache.fluss.flink.action.orphan.fs.SafeDeleter; +import org.apache.fluss.flink.action.orphan.rule.BucketActiveRefs; +import org.apache.fluss.flink.action.orphan.rule.RuleDispatcher; +import org.apache.fluss.fs.FsPath; +import org.apache.fluss.shaded.guava32.com.google.common.util.concurrent.RateLimiter; +import org.apache.fluss.utils.FlussPaths; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.FileTime; +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +class BucketCleanerTest { + + @Test + void removesOldEmptySegmentDirAfterDeletingExpiredFiles(@TempDir Path tmp) throws IOException { + Path bucketRoot = Files.createDirectories(tmp.resolve("bucket")); + Path segmentDir = + Files.createDirectories(bucketRoot.resolve("11111111-1111-1111-1111-111111111111")); + Path logFile = + Files.write( + segmentDir.resolve( + FlussPaths.filenamePrefixFromOffset(0L) + + FlussPaths.LOG_FILE_SUFFIX), + new byte[] {0x42}); + long cutoff = System.currentTimeMillis() - 1000L; + makeOld(logFile, cutoff - 1000L); + makeOld(segmentDir, cutoff - 1000L); + makeOld(bucketRoot, cutoff - 1000L); + + BucketCleaner cleaner = createCleaner(bucketRoot, cutoff); + + BucketCleaner.BucketCleanStats stats = + cleaner.clean(BucketActiveRefs.empty(), new FsPath(bucketRoot.toString())); + + assertThat(stats.scanned).isEqualTo(1L); + assertThat(stats.deleted).isEqualTo(2L); + assertThat(stats.emptyDirsRemoved).isEqualTo(1L); + assertThat(Files.exists(logFile)).isFalse(); + assertThat(Files.exists(segmentDir)).isFalse(); + assertThat(Files.exists(bucketRoot)).isTrue(); + } + + @Test + void keepsFreshEmptySegmentDir(@TempDir Path tmp) throws IOException { + Path bucketRoot = Files.createDirectories(tmp.resolve("bucket")); + Path segmentDir = + Files.createDirectories(bucketRoot.resolve("11111111-1111-1111-1111-111111111111")); + long cutoff = System.currentTimeMillis() - 1000L; + + BucketCleaner cleaner = createCleaner(bucketRoot, cutoff); + + BucketCleaner.BucketCleanStats stats = + cleaner.clean( + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + Collections.emptySet()), + new FsPath(bucketRoot.toString())); + + assertThat(stats.deleted).isEqualTo(0L); + assertThat(stats.emptyDirsRemoved).isEqualTo(0L); + assertThat(Files.exists(segmentDir)).isTrue(); + } + + @Test + void scansButDoesNotDeleteUnknownDotFiles(@TempDir Path tmp) throws IOException { + Path bucketRoot = Files.createDirectories(tmp.resolve("bucket")); + Path segmentDir = + Files.createDirectories(bucketRoot.resolve("11111111-1111-1111-1111-111111111111")); + Path dotFile = Files.write(segmentDir.resolve(".unknown"), new byte[] {0x42}); + long cutoff = System.currentTimeMillis() - 1000L; + makeOld(dotFile, cutoff - 1000L); + makeOld(segmentDir, cutoff - 1000L); + makeOld(bucketRoot, cutoff - 1000L); + + BucketCleaner cleaner = createCleaner(bucketRoot, cutoff); + + BucketCleaner.BucketCleanStats stats = + cleaner.clean(BucketActiveRefs.empty(), new FsPath(bucketRoot.toString())); + + assertThat(stats.scanned).isEqualTo(1L); + assertThat(stats.deleted).isEqualTo(0L); + assertThat(stats.emptyDirsRemoved).isEqualTo(0L); + assertThat(Files.exists(dotFile)).isTrue(); + assertThat(Files.exists(segmentDir)).isTrue(); + } + + private static void makeOld(Path path, long timestampMillis) throws IOException { + Files.setLastModifiedTime(path, FileTime.fromMillis(timestampMillis)); + } + + private static BucketCleaner createCleaner(Path bucketRoot, long cutoff) throws IOException { + RateLimiter remoteFsOpRateLimiter = RateLimiter.create(1000.0); + return new BucketCleaner( + new RuleDispatcher(), + new SafeDeleter( + new FsPath(bucketRoot.toString()).getFileSystem(), + false, + new AuditLogger(), + remoteFsOpRateLimiter), + new AuditLogger(), + cutoff, + remoteFsOpRateLimiter); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRuleTest.java new file mode 100644 index 0000000000..c6267d31c8 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSharedSstRuleTest.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link KvSharedSstRule}. */ +class KvSharedSstRuleTest { + + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + private static final long CUTOFF_MS = NOW - DAY_MS; + + private final KvSharedSstRule rule = new KvSharedSstRule(); + + @Test + void keepsExpiredUnreferencedSharedSst() { + FileMeta file = file("/kv/db/t-1/0/shared/abc-001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void keepsReferencedSharedSst() { + FileMeta file = file("/kv/db/t-1/0/shared/abc-001.sst", NOW - 2 * DAY_MS); + Set sharedFiles = new HashSet(); + sharedFiles.add("abc-001.sst"); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + sharedFiles); + + assertThat(rule.evaluate(file, activeRefs, CUTOFF_MS)).isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void skipsUnknownNonSstFileUnderSharedDirectory() { + FileMeta file = file("/kv/db/t-1/0/shared/abc-001.meta", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipsSstOutsideSharedDirectory() { + FileMeta file = file("/kv/db/t-1/0/snap-5/abc-001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 1L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRuleTest.java new file mode 100644 index 0000000000..c056d8e538 --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/KvSnapshotFileRuleTest.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link KvSnapshotFileRule}. */ +class KvSnapshotFileRuleTest { + + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + + /** Absolute cutoff = NOW - 1d. Files with mtime strictly less are deletion-eligible. */ + private static final long CUTOFF_MS = NOW - DAY_MS; + + private final KvSnapshotFileRule rule = new KvSnapshotFileRule(); + + @Test + void deletesExpiredSnapshotFileOutsideBucketActiveRefs() { + FileMeta file = file("/kv/db/t-1/0/snap-5/001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, kvActiveSnapDirs("snap-7", "snap-9"), CUTOFF_MS)) + .isEqualTo(Decision.DELETE); + } + + @Test + void keepsActiveSnapshotFile() { + FileMeta file = file("/kv/db/t-1/0/snap-5/001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, kvActiveSnapDirs("snap-5"), CUTOFF_MS)) + .isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void defersSnapshotWhenMtimeAtOrAfterCutoff() { + FileMeta file = file("/kv/db/t-1/0/snap-5/001.sst", NOW - DAY_MS / 2); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.DEFER); + } + + @Test + void skipsUnknownFileNameInsideSnapshotDirectory() { + FileMeta file = file("/kv/db/t-1/0/snap-5/data.bloom", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipsUnknownWhenParentIsNotSnapshotDirectory() { + FileMeta file = file("/kv/db/t-1/0/random/001.sst", NOW - 2 * DAY_MS); + + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void recognizesExactPrefixAndSuffixBasedSnapshotFiles() { + String[] fileNames = { + "_METADATA", "MANIFEST-001", "OPTIONS-002", "CURRENT", "LOG", "IDENTITY", "001.log" + }; + + for (String fileName : fileNames) { + FileMeta file = file("/kv/db/t-1/0/snap-5/" + fileName, NOW - 2 * DAY_MS); + assertThat(rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .as("file=%s", fileName) + .isEqualTo(Decision.DELETE); + } + } + + @Test + void retainedNonLatestSnapshotIsActive() { + // Simulates kv.snapshot.num-retained=2, latest snapId=10, retained={9,10}: the active set + // is the full retained set (server emits RETAINED ∪ STILL_IN_USE), so a file under snap-9 + // MUST be classified as KEEP_ACTIVE even if it's old enough to clear the cutoff. Cutoff is + // set to NOW (an aggressive value) to prove the active-set check short-circuits before the + // age check. + FileMeta file = + new FileMeta(new FsPath("oss://b/kv/db/t-7/0/snap-9/_METADATA"), 1024L, 200L); + + Decision decision = rule.evaluate(file, kvActiveSnapDirs("snap-9", "snap-10"), NOW); + + assertThat(decision).isEqualTo(Decision.KEEP_ACTIVE); + } + + private static BucketActiveRefs kvActiveSnapDirs(String... snapDirs) { + Set activeDirs = new HashSet(Arrays.asList(snapDirs)); + return new BucketActiveRefs( + Collections.emptySet(), activeDirs, Collections.emptySet()); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 1L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRuleTest.java new file mode 100644 index 0000000000..b8d166059a --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogManifestRuleTest.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link LogManifestRule}. */ +class LogManifestRuleTest { + + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + private static final long CUTOFF_MS = NOW - DAY_MS; + + /** Default-conservative rule (allowDeleteManifest=false): never deletes manifests. */ + private final LogManifestRule defaultRule = new LogManifestRule(); + + /** Opt-in rule (allowDeleteManifest=true): uses active-set + cutoff semantics. */ + private final LogManifestRule optInRule = new LogManifestRule(true); + + @Test + void deletesExpiredNonActiveManifest() { + FileMeta file = file("/log/db/t-1/0/metadata/old.manifest", NOW - 2 * DAY_MS); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + Collections.singleton("/log/db/t-1/0/metadata/current.manifest")); + + assertThat(optInRule.evaluate(file, activeRefs, CUTOFF_MS)).isEqualTo(Decision.DELETE); + } + + @Test + void keepsManifestListedInActiveManifestPaths() { + FileMeta file = file("/log/db/t-1/0/metadata/active.manifest", NOW - 2 * DAY_MS); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + Collections.singleton("/log/db/t-1/0/metadata/active.manifest")); + + assertThat(optInRule.evaluate(file, activeRefs, CUTOFF_MS)).isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void defersManifestWhenMtimeAtOrAfterCutoff() { + FileMeta file = file("/log/db/t-1/0/metadata/fresh.manifest", NOW - DAY_MS / 2); + + assertThat(optInRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.DEFER); + } + + @Test + void skipsUnknownFileInMetadataDirectory() { + FileMeta file = file("/log/db/t-1/0/metadata/readme.txt", NOW - 2 * DAY_MS); + + assertThat(defaultRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + assertThat(optInRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipsManifestOutsideMetadataDirectory() { + FileMeta file = + file( + "/log/db/t-1/0/11111111-1111-1111-1111-111111111111/file.manifest", + NOW - 2 * DAY_MS); + + assertThat(defaultRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + assertThat(optInRule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS)) + .isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void defaultRuleNeverDeletesEvenWhenStaleAndOrphan() { + // mtime=0L (very old); active-set lists a different manifest as active; under the + // default-conservative branch the rule MUST still return KEEP_ACTIVE rather than DELETE. + FileMeta file = file("/log/db/t-1/0/metadata/orphan.manifest", 0L); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + Collections.emptySet(), + Collections.emptySet(), + Collections.singleton("/log/db/t-1/0/metadata/current.manifest")); + + assertThat(defaultRule.evaluate(file, activeRefs, CUTOFF_MS)) + .isEqualTo(Decision.KEEP_ACTIVE); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 1L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRuleTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRuleTest.java new file mode 100644 index 0000000000..bb8249e55d --- /dev/null +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/action/orphan/rule/LogSegmentRuleTest.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.flink.action.orphan.rule; + +import org.apache.fluss.fs.FsPath; + +import org.junit.jupiter.api.Test; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; + +/** Tests for {@link LogSegmentRule}. */ +class LogSegmentRuleTest { + + private static final String SEGMENT_ID = "11111111-1111-1111-1111-111111111111"; + private static final long NOW = 1_700_000_000_000L; + private static final long DAY_MS = 24L * 60L * 60L * 1000L; + + /** + * Absolute cutoff = NOW - 1d. Files with mtime strictly less than this are deletion-eligible. + */ + private static final long CUTOFF_MS = NOW - DAY_MS; + + private final LogSegmentRule rule = new LogSegmentRule(); + + @Test + void deleteWhenKnownExpiredAndNotInBucketActiveRefs() { + FileMeta file = + file("/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log", NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.DELETE); + } + + @Test + void keepActiveWhenInBucketActiveRefs() { + FileMeta file = + file("/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log", NOW - 2 * DAY_MS); + Set liveFiles = new HashSet(); + liveFiles.add(SEGMENT_ID + "/00000000000000000000.log"); + BucketActiveRefs activeRefs = + new BucketActiveRefs( + liveFiles, Collections.emptySet(), Collections.emptySet()); + + Decision decision = rule.evaluate(file, activeRefs, CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.KEEP_ACTIVE); + } + + @Test + void deferWhenMtimeAtOrAfterCutoff() { + FileMeta file = + file("/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log", NOW - DAY_MS / 2); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.DEFER); + } + + @Test + void skipUnknownExtension() { + FileMeta file = + file( + "/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.bloom", + NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void skipUnknownWhenParentIsNotSegmentUuid() { + FileMeta file = file("/log/db/t-1/0/not-a-uuid/00000000000000000000.log", NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.SKIP_UNKNOWN); + } + + @Test + void deletedSuffixIsRecognizedAsKnownType() { + FileMeta file = + file( + "/log/db/t-1/0/" + SEGMENT_ID + "/00000000000000000000.log.deleted", + NOW - 2 * DAY_MS); + + Decision decision = rule.evaluate(file, BucketActiveRefs.empty(), CUTOFF_MS); + + assertThat(decision).isEqualTo(Decision.DELETE); + } + + private static FileMeta file(String path, long modificationTime) { + return new FileMeta(new FsPath(path), 100L, modificationTime); + } +} diff --git a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/adapter/FlinkMultipleParameterToolTest.java b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/adapter/FlinkMultipleParameterToolTest.java index 2cbf282352..321bc177c0 100644 --- a/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/adapter/FlinkMultipleParameterToolTest.java +++ b/fluss-flink/fluss-flink-common/src/test/java/org/apache/fluss/flink/adapter/FlinkMultipleParameterToolTest.java @@ -37,4 +37,33 @@ public void testToMap() { assertThat(adapter.toMap()).containsEntry("multi1", "multiValue3"); assertThat(adapter.toMap()).containsEntry("multi2", "multiValue2"); } + + @Test + public void testHas() { + String[] args = new String[] {"--key1", "value1", "--key2", "value2"}; + MultipleParameterToolAdapter adapter = MultipleParameterToolAdapter.fromArgs(args); + + assertThat(adapter.has("key1")).isTrue(); + assertThat(adapter.has("key2")).isTrue(); + assertThat(adapter.has("nonexistent")).isFalse(); + } + + @Test + public void testGet() { + String[] args = new String[] {"--key1", "value1", "--key2", "value2"}; + MultipleParameterToolAdapter adapter = MultipleParameterToolAdapter.fromArgs(args); + + assertThat(adapter.get("key1")).isEqualTo("value1"); + assertThat(adapter.get("key2")).isEqualTo("value2"); + assertThat(adapter.get("nonexistent")).isNull(); + } + + @Test + public void testGetMultiParameter() { + String[] args = new String[] {"--multi", "val1", "--multi", "val2", "--single", "only"}; + MultipleParameterToolAdapter adapter = MultipleParameterToolAdapter.fromArgs(args); + + assertThat(adapter.getMultiParameter("multi")).containsExactly("val1", "val2"); + assertThat(adapter.getMultiParameter("single")).containsExactly("only"); + } } diff --git a/fluss-flink/pom.xml b/fluss-flink/pom.xml index 4f65374352..a0fae789b9 100644 --- a/fluss-flink/pom.xml +++ b/fluss-flink/pom.xml @@ -76,4 +76,4 @@ - \ No newline at end of file + diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorage.java b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorage.java index 33f29f2830..56e8e24091 100644 --- a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorage.java +++ b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorage.java @@ -25,6 +25,7 @@ import org.apache.fluss.fs.FsPath; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.utils.CloseableRegistry; import org.apache.fluss.utils.ExceptionUtils; diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/LogTieringTask.java b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/LogTieringTask.java index 8c7d0d8832..c9cb32215e 100644 --- a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/LogTieringTask.java +++ b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/LogTieringTask.java @@ -22,6 +22,7 @@ import org.apache.fluss.fs.FsPath; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.rpc.gateway.CoordinatorGateway; import org.apache.fluss.rpc.messages.CommitRemoteLogManifestRequest; diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManager.java b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManager.java index 143ae251a7..f57a6ea81c 100644 --- a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManager.java +++ b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogManager.java @@ -25,6 +25,7 @@ import org.apache.fluss.fs.FsPath; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.rpc.gateway.CoordinatorGateway; import org.apache.fluss.server.log.LogManager; diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogStorage.java b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogStorage.java index 1a410fcb2c..6e1de16cf3 100644 --- a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogStorage.java +++ b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogStorage.java @@ -22,6 +22,7 @@ import org.apache.fluss.fs.FsPath; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import java.io.Closeable; diff --git a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogTablet.java b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogTablet.java index c840a0a028..9f0ae6e949 100644 --- a/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogTablet.java +++ b/fluss-server/src/main/java/org/apache/fluss/server/log/remote/RemoteLogTablet.java @@ -22,6 +22,7 @@ import org.apache.fluss.metadata.TableBucket; import org.apache.fluss.metrics.MetricNames; import org.apache.fluss.metrics.groups.MetricGroup; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.server.metrics.group.BucketMetricGroup; diff --git a/fluss-server/src/test/java/org/apache/fluss/server/coordinator/rebalance/RebalanceManagerITCase.java b/fluss-server/src/test/java/org/apache/fluss/server/coordinator/rebalance/RebalanceManagerITCase.java index a99370908b..425b5463e1 100644 --- a/fluss-server/src/test/java/org/apache/fluss/server/coordinator/rebalance/RebalanceManagerITCase.java +++ b/fluss-server/src/test/java/org/apache/fluss/server/coordinator/rebalance/RebalanceManagerITCase.java @@ -28,6 +28,7 @@ import org.apache.fluss.metadata.TableBucketReplica; import org.apache.fluss.metadata.TableDescriptor; import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.rpc.gateway.TabletServerGateway; import org.apache.fluss.rpc.messages.AddServerTagRequest; import org.apache.fluss.server.coordinator.CoordinatorEventProcessor; @@ -35,7 +36,6 @@ import org.apache.fluss.server.coordinator.rebalance.model.ClusterModel; import org.apache.fluss.server.coordinator.statemachine.ReplicaState; import org.apache.fluss.server.log.remote.RemoteLogManager; -import org.apache.fluss.server.log.remote.RemoteLogManifest; import org.apache.fluss.server.log.remote.RemoteLogTablet; import org.apache.fluss.server.replica.ReplicaManager; import org.apache.fluss.server.tablet.TabletServer; diff --git a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorageTest.java b/fluss-server/src/test/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorageTest.java index a450295a7f..a8ec544510 100644 --- a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorageTest.java +++ b/fluss-server/src/test/java/org/apache/fluss/server/log/remote/DefaultRemoteLogStorageTest.java @@ -21,6 +21,7 @@ import org.apache.fluss.fs.FsPath; import org.apache.fluss.metadata.PhysicalTablePath; import org.apache.fluss.metadata.TableBucket; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.server.log.LogTablet; import org.apache.fluss.server.log.remote.RemoteLogStorage.IndexType; diff --git a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/RemoteLogITCase.java b/fluss-server/src/test/java/org/apache/fluss/server/log/remote/RemoteLogITCase.java index 90dfa0914e..872682ad26 100644 --- a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/RemoteLogITCase.java +++ b/fluss-server/src/test/java/org/apache/fluss/server/log/remote/RemoteLogITCase.java @@ -27,6 +27,7 @@ import org.apache.fluss.metadata.TableBucket; import org.apache.fluss.metadata.TableDescriptor; import org.apache.fluss.metadata.TablePath; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import org.apache.fluss.rpc.entity.FetchLogResultForBucket; import org.apache.fluss.rpc.gateway.CoordinatorGateway; diff --git a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/TestingRemoteLogStorage.java b/fluss-server/src/test/java/org/apache/fluss/server/log/remote/TestingRemoteLogStorage.java index a946e9dd0c..eeba26a54c 100644 --- a/fluss-server/src/test/java/org/apache/fluss/server/log/remote/TestingRemoteLogStorage.java +++ b/fluss-server/src/test/java/org/apache/fluss/server/log/remote/TestingRemoteLogStorage.java @@ -20,6 +20,7 @@ import org.apache.fluss.config.Configuration; import org.apache.fluss.exception.RemoteStorageException; import org.apache.fluss.fs.FsPath; +import org.apache.fluss.remote.RemoteLogManifest; import org.apache.fluss.remote.RemoteLogSegment; import java.io.IOException; diff --git a/fluss-server/src/test/java/org/apache/fluss/server/testutils/FlussClusterExtension.java b/fluss-server/src/test/java/org/apache/fluss/server/testutils/FlussClusterExtension.java index ddf52b8bab..aeb8a848df 100644 --- a/fluss-server/src/test/java/org/apache/fluss/server/testutils/FlussClusterExtension.java +++ b/fluss-server/src/test/java/org/apache/fluss/server/testutils/FlussClusterExtension.java @@ -772,33 +772,28 @@ public CompletedSnapshot triggerAndWaitSnapshot(TableBucket tableBucket) { } private Long triggerSnapshot(TableBucket tableBucket) { - Long snapshotId = null; - Long nextSnapshotId = null; for (TabletServer ts : tabletServers.values()) { ReplicaManager.HostedReplica replica = ts.getReplicaManager().getReplica(tableBucket); if (replica instanceof ReplicaManager.OnlineReplica) { Replica r = ((ReplicaManager.OnlineReplica) replica).getReplica(); PeriodicSnapshotManager kvSnapshotManager = r.getKvSnapshotManager(); if (r.isLeader() && kvSnapshotManager != null) { - snapshotId = kvSnapshotManager.currentSnapshotId(); + long snapshotId = kvSnapshotManager.currentSnapshotId(); + // KvTablet#getGuardedExecutor runs the submitted task synchronously + // on the calling thread inside the kv write lock, so initSnapshot() + // has already completed by the time triggerSnapshot() returns. The + // counter is either bumped (a new snapshot was scheduled) or left + // unchanged (no new data since the last snapshot — legitimate no-op). kvSnapshotManager.triggerSnapshot(); - nextSnapshotId = kvSnapshotManager.currentSnapshotId(); - break; + if (kvSnapshotManager.currentSnapshotId() > snapshotId) { + return snapshotId; + } + return null; } } } - - if (snapshotId != null) { - if (nextSnapshotId > snapshotId) { - // only there is a new snapshot triggered, we return the snapshot id - return snapshotId; - } else { - return null; - } - } else { - fail("No KV snapshot manager found for table bucket " + tableBucket); - return null; - } + fail("No KV snapshot manager found for table bucket " + tableBucket); + return null; } public CompletedSnapshot waitUntilSnapshotFinished(TableBucket tableBucket, long snapshotId) { diff --git a/fluss-test-coverage/pom.xml b/fluss-test-coverage/pom.xml index d5e91d302d..357b3e98a2 100644 --- a/fluss-test-coverage/pom.xml +++ b/fluss-test-coverage/pom.xml @@ -502,6 +502,13 @@ org.apache.fluss.flink.functions.bitmap.RbAndAggFunction.AccumulatorSerializer.AccumulatorSerializerSnapshot + + org.apache.fluss.flink.action.Action + org.apache.fluss.flink.action.ActionFactory + org.apache.fluss.flink.action.ActionLoader + org.apache.fluss.flink.action.FlussActionEntrypoint + org.apache.fluss.flink.action.orphan.* + org.apache.flink.table.catalog.*