diff --git a/FLUSS_IDEA_RUN_CONFIGS.md b/FLUSS_IDEA_RUN_CONFIGS.md
new file mode 100644
index 0000000000..752a1d1296
--- /dev/null
+++ b/FLUSS_IDEA_RUN_CONFIGS.md
@@ -0,0 +1,92 @@
+# Fluss IDEA Run Configs (Coordinator + Tablet)
+
+## 1) CoordinatorServer (Application)
+
+- Main class:
+`org.apache.fluss.server.coordinator.CoordinatorServer`
+
+- Use classpath of module:
+`fluss-dist`
+
+- Program arguments:
+```text
+--configDir /Users/litiliu/IdeaProjects/fluss/fluss-dist/src/main/resources-coordinator
+```
+
+- VM options:
+```text
+-Dlog.file=/tmp/fluss-coordinator-idea.log -Dconsole.log.level=INFO -Dlog4j.configurationFile=file:/Users/litiliu/IdeaProjects/fluss/fluss-dist/src/main/resources/conf/log4j-console.properties
+```
+
+- Working directory:
+`/Users/litiliu/IdeaProjects/fluss`
+
+- Environment variables:
+```text
+ROOT_LOG_LEVEL=INFO
+```
+
+---
+
+## 2) TabletServer (Application)
+
+- Main class:
+`org.apache.fluss.server.tablet.TabletServer`
+
+- Use classpath of module:
+`fluss-dist`
+
+- Program arguments:
+```text
+--configDir /Users/litiliu/IdeaProjects/fluss/fluss-dist/src/main/resources-tablet
+```
+
+- VM options:
+```text
+-Dlog.file=/tmp/fluss-tablet-idea.log -Dconsole.log.level=INFO -Dlog4j.configurationFile=file:/Users/litiliu/IdeaProjects/fluss/fluss-dist/src/main/resources/conf/log4j-console.properties
+```
+
+- Working directory:
+`/Users/litiliu/IdeaProjects/fluss`
+
+- Environment variables:
+```text
+ROOT_LOG_LEVEL=INFO
+```
+
+---
+
+## Related config dirs
+
+- Coordinator config:
+`/Users/litiliu/IdeaProjects/fluss/fluss-dist/src/main/resources-coordinator/server.yaml`
+
+- Tablet config:
+`/Users/litiliu/IdeaProjects/fluss/fluss-dist/src/main/resources-tablet/server.yaml`
+
+---
+
+## 3) Flink SQL test (in Docker)
+
+### Start SQL Client container
+```bash
+docker run -it apache/fluss-quickstart-flink:1.20-0.9.1-incubating bash
+```
+
+### Enter Flink SQL Client
+```bash
+bash sql-client
+```
+
+### Create Fluss catalog
+```sql
+CREATE CATALOG fluss_catalog WITH (
+ 'type' = 'fluss',
+ 'bootstrap.servers' = 'host.docker.internal:9123'
+);
+```
+
+### Optional quick connectivity check inside container
+```bash
+curl -v telnet://host.docker.internal:9123
+```
diff --git a/Jenkinsfile b/Jenkinsfile
new file mode 100644
index 0000000000..1d9f87609c
--- /dev/null
+++ b/Jenkinsfile
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+@Library(['meetPaasJenkinsLib','ciHelper@master']) _
+
+/**
+ * Build version tag helper
+ */
+def imageTag() {
+ def gitHash = sh(returnStdout: true, script: "git log -n 1 --pretty=format:'%h'").trim()
+ return "0.8-${gitHash}"
+}
+
+/**
+ * Constants
+ */
+class Constants {
+ static final String DIRECTORY_BACKEND = ''
+
+ // TODO: update repo paths for fluss
+ static final String FLUSS_ECR_REPO = '/wap-dataprocessor/fluss'
+ static final String AWS_CONTAINER_REGISTRY = '527856644868.dkr.ecr.us-east-2.amazonaws.com/webex-wap'
+
+ static final String VAULT_CRED_ID = 'c3b8bd92-f54c-4ed5-be88-f438dfa066e6'
+ static final String VAULT_NAMESPACE = 'meetpaas'
+ static final String AWS_ROLE = 'wap-ecr-readwrite'
+
+ // Optional maven settings, if needed
+ static final String MAVEN_SETTING = 'bdb92e53-4de9-4335-aa45-065b69a0d624'
+}
+
+/**
+ * Anchore scan
+ */
+def scan(imageTag) {
+ sh """
+ ${env.WORKSPACE}/bin/syft -o json ${imageTag} | \
+ ${env.WORKSPACE}/bin/anchorectl image add ${imageTag} --wait --from -
+ """
+
+ def status = sh(
+ returnStdout: true,
+ script: """${env.WORKSPACE}/bin/anchorectl image check ${imageTag} \
+ | grep '^Evaluation' | awk '{print \$2}'"""
+ ).trim()
+
+ if (status == 'fail') {
+ error("Anchore scan failed for image: ${imageTag}")
+ }
+}
+
+pipeline {
+ agent { label 'wap-agents' }
+
+ // JDK is set via withMaven in Build Package
+
+ environment {
+ ENABLE_ANCHORE = "false"
+ ENABLE_IMAGE_PIPELINE = "false"
+ ANCHORECTL_ACCOUNT = "webex-wap"
+ ANCHORECTL_URL = "https://anchore.int.acmhwxt-prd-1.prod.infra.webex.com"
+ ANCHORECTL_USERNAME = credentials("UDP_ANCHORE_USERNAME")
+ ANCHORECTL_PASSWORD = credentials("UDP_ANCHORE_PASSWORD")
+ }
+
+ stages {
+ /**
+ * Install Anchore tools
+ */
+ stage('Setup Anchore') {
+ when {
+ expression { env.ENABLE_ANCHORE == 'true' }
+ }
+ steps {
+ sh "echo 'Setup Anchore tools...'"
+ sh "mkdir -p ${env.WORKSPACE}/bin"
+
+ retry(3) {
+ sh "curl -sSfL https://anchorectl-releases.anchore.io/anchorectl/install.sh \
+ | sh -s -- -b ${env.WORKSPACE}/bin v5.3.0"
+ }
+
+ retry(3) {
+ sh """
+ curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh \
+ | sh -s -- -b ${env.WORKSPACE}/bin \
+ \$(${env.WORKSPACE}/bin/anchorectl version | grep SyftVersion | awk '{print \$2}')
+ """
+ }
+ }
+ }
+
+ /**
+ * Checkout current repo
+ */
+ stage('Checkout') {
+ steps { checkout scm }
+ }
+
+ /**
+ * Build Package (Java 17, tests skipped)
+ */
+ stage('Build Package') {
+ steps {
+ withMaven(
+ jdk: 'OpenJDK-17',
+ maven: 'Maven 3.6.1',
+ options: [
+ artifactsPublisher(disabled: true),
+ junitPublisher(disabled: true)
+ ]) {
+
+ sh '''
+ set -euxo pipefail
+
+ # Jenkins 的专用临时目录:不在 repo 树里(避免 RAT),一般也不会 noexec
+ CI_TMP="${WORKSPACE}@tmp/fluss-tmp-${BUILD_NUMBER}"
+ NETTY_NATIVE="${CI_TMP}/netty-native"
+ trap 'rm -rf "${CI_TMP}" || true' EXIT
+
+ rm -rf "${CI_TMP}" || true
+ mkdir -p "${NETTY_NATIVE}"
+ chmod 755 "${WORKSPACE}@tmp" "${CI_TMP}" "${NETTY_NATIVE}" || true
+
+ # 让所有 Java 进程继承
+ export JAVA_TOOL_OPTIONS="${JAVA_TOOL_OPTIONS:-} \
+ -Djava.io.tmpdir=${CI_TMP} \
+ -Dorg.apache.fluss.shaded.netty4.io.netty.native.workdir=${NETTY_NATIVE} \
+ -Dcom.github.luben.zstd.tmpdir=${CI_TMP} \
+ --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
+ --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED"
+
+ MAVEN_REACTOR_EXCLUDES="\
+!fluss-metrics/fluss-metrics-influxdb,\
+!fluss-flink/fluss-flink-1.19,\
+!fluss-flink/fluss-flink-1.20,\
+!fluss-flink/fluss-flink-tiering,\
+!fluss-spark/fluss-spark-common,\
+!fluss-spark/fluss-spark-ut,\
+!fluss-spark/fluss-spark-3.5,\
+!fluss-spark/fluss-spark-3.4,\
+!fluss-spark,\
+!fluss-lake/fluss-lake-paimon,\
+!fluss-lake/fluss-lake-iceberg,\
+!fluss-lake/fluss-lake-lance,\
+!fluss-lake/fluss-lake-hudi,\
+!fluss-lake,\
+!fluss-kafka,\
+!fluss-jmh,\
+!fluss-filesystems/fluss-fs-oss,\
+!fluss-filesystems/fluss-fs-gs,\
+!fluss-filesystems/fluss-fs-azure,\
+!fluss-filesystems/fluss-fs-obs,\
+!fluss-filesystems/fluss-fs-cos,\
+!fluss-filesystems/fluss-fs-hdfs,\
+!fluss-test-coverage"
+
+ ./mvnw -B -V -T 1C \
+ -DskipTests \
+ -DskipOptionalDistPlugins \
+ -Ddist.plugins.descriptor=src/main/assemblies/plugins-slim.xml \
+ -Ddist.package.classifier=-slim \
+ -pl "${MAVEN_REACTOR_EXCLUDES}" \
+ clean package
+ '''
+ }
+ }
+ }
+
+ /**
+ * Archive binary package
+ */
+ stage('Archive Package') {
+ steps {
+ sh "ls -lh fluss-dist/target/*-slim-bin.tgz"
+ archiveArtifacts artifacts: 'fluss-dist/target/*-slim-bin.tgz', fingerprint: true, onlyIfSuccessful: true
+ }
+ }
+
+
+ /**
+ * Build Image
+ */
+ stage('Build Image') {
+ when {
+ expression { env.ENABLE_IMAGE_PIPELINE == 'true' }
+ }
+ steps {
+ script {
+ def tag = imageTag()
+ def ecrPath = "${Constants.AWS_CONTAINER_REGISTRY}${Constants.FLUSS_ECR_REPO}:${tag}"
+
+ dir("docker/fluss") {
+ sh """
+ DOCKER_BUILDKIT=0 docker image build . \
+ -f Dockerfile \
+ -t ${ecrPath}
+ """
+ }
+ }
+ }
+ }
+
+ /**
+ * Scan Image
+ */
+ stage('Scan Image') {
+ when {
+ expression { env.ENABLE_ANCHORE == 'true' && env.ENABLE_IMAGE_PIPELINE == 'true' }
+ }
+ steps {
+ script {
+ def tag = imageTag()
+ def ecrPath = "${Constants.AWS_CONTAINER_REGISTRY}${Constants.FLUSS_ECR_REPO}:${tag}"
+ scan(ecrPath)
+ }
+ }
+ }
+
+ /**
+ * Push Image
+ */
+ stage('Push Image') {
+ when {
+ expression { env.ENABLE_IMAGE_PIPELINE == 'true' }
+ }
+ steps {
+ script {
+ def tag = imageTag()
+ def ecrPath = "${Constants.AWS_CONTAINER_REGISTRY}${Constants.FLUSS_ECR_REPO}:${tag}"
+
+ dir(Constants.DIRECTORY_BACKEND) {
+ ecr.withRegistry(Constants.VAULT_CRED_ID, Constants.VAULT_NAMESPACE, Constants.AWS_ROLE) {
+ // Login Artifactory/ECR if needed. Replace credentialsId accordingly.
+ withCredentials([usernamePassword(credentialsId: 'cloud9-password-xiaohzho',
+ usernameVariable: 'USERNAME', passwordVariable: 'PASSWORD')]) {
+ sh """
+ docker login --username=${USERNAME} --password=${PASSWORD} artifactory.devhub-cloud.cisco.com
+ docker push ${ecrPath}
+ """
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Cleanup
+ */
+ stage('Cleanup') {
+ steps {
+ sh "echo 'Cleanup workspace...'"
+ deleteDir()
+ }
+ }
+ }
+}
diff --git a/JenkinsfileUT b/JenkinsfileUT
new file mode 100644
index 0000000000..3cedebd836
--- /dev/null
+++ b/JenkinsfileUT
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+@Library(['meetPaasJenkinsLib','ciHelper@master']) _
+
+pipeline {
+ agent { label 'wap-agents' }
+
+ stages {
+ /**
+ * Checkout current repo
+ */
+ stage('Checkout') {
+ steps { checkout scm }
+ }
+
+ /**
+ * Unit Tests (Java 17)
+ */
+ stage('Unit Tests') {
+ steps {
+ withMaven(
+ jdk: 'OpenJDK-17',
+ maven: 'Maven 3.6.1',
+ options: [
+ artifactsPublisher(disabled: true),
+ junitPublisher(disabled: true)
+ ]) {
+
+ sh '''
+ set -euxo pipefail
+
+ # Jenkins 的专用临时目录:不在 repo 树里(避免 RAT),一般也不会 noexec
+ CI_TMP="${WORKSPACE}@tmp/fluss-ut-tmp-${BUILD_NUMBER}"
+ NETTY_NATIVE="${CI_TMP}/netty-native"
+ trap 'rm -rf "${CI_TMP}" || true' EXIT
+
+ rm -rf "${CI_TMP}" || true
+ mkdir -p "${NETTY_NATIVE}"
+ chmod 755 "${WORKSPACE}@tmp" "${CI_TMP}" "${NETTY_NATIVE}" || true
+
+ # 让所有 Java 进程继承
+ export JAVA_TOOL_OPTIONS="${JAVA_TOOL_OPTIONS:-} \
+ -Djava.io.tmpdir=${CI_TMP} \
+ -Dorg.apache.fluss.shaded.netty4.io.netty.native.workdir=${NETTY_NATIVE} \
+ -Dcom.github.luben.zstd.tmpdir=${CI_TMP} \
+ --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
+ --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED"
+
+ MAVEN_REACTOR_EXCLUDES="\
+!fluss-metrics/fluss-metrics-influxdb,\
+!fluss-flink/fluss-flink-1.19,\
+!fluss-flink/fluss-flink-1.20,\
+!fluss-flink/fluss-flink-tiering,\
+!fluss-spark/fluss-spark-common,\
+!fluss-spark/fluss-spark-ut,\
+!fluss-spark/fluss-spark-3.5,\
+!fluss-spark/fluss-spark-3.4,\
+!fluss-spark,\
+!fluss-lake/fluss-lake-paimon,\
+!fluss-lake/fluss-lake-iceberg,\
+!fluss-lake/fluss-lake-lance,\
+!fluss-lake/fluss-lake-hudi,\
+!fluss-lake,\
+!fluss-kafka,\
+!fluss-jmh,\
+!fluss-filesystems/fluss-fs-oss,\
+!fluss-filesystems/fluss-fs-gs,\
+!fluss-filesystems/fluss-fs-azure,\
+!fluss-filesystems/fluss-fs-obs,\
+!fluss-filesystems/fluss-fs-cos,\
+!fluss-filesystems/fluss-fs-hdfs,\
+!fluss-test-coverage"
+
+ ./mvnw -B -V -T 1C \
+ -DskipOptionalDistPlugins \
+ -Ddist.plugins.descriptor=src/main/assemblies/plugins-slim.xml \
+ -pl "${MAVEN_REACTOR_EXCLUDES}" \
+ clean test
+ '''
+ }
+ }
+ }
+ }
+
+ post {
+ always {
+ junit allowEmptyResults: true, testResults: '**/target/surefire-reports/*.xml'
+ sh "echo 'Cleanup workspace...'"
+ deleteDir()
+ }
+ }
+}
diff --git a/docker/fluss/Dockerfile2 b/docker/fluss/Dockerfile2
new file mode 100644
index 0000000000..21914bc7e4
--- /dev/null
+++ b/docker/fluss/Dockerfile2
@@ -0,0 +1,27 @@
+##
+## Licensed to the Apache Software Foundation (ASF) under one or more
+## contributor license agreements. See the NOTICE file distributed with
+## this work for additional information regarding copyright ownership.
+## The ASF licenses this file to You under the Apache License, Version 2.0
+## (the "License"); you may not use this file except in compliance with
+## the License. You may obtain a copy of the License at
+##
+## http://www.apache.org/licenses/LICENSE-2.0
+##
+## Unless required by applicable law or agreed to in writing, software
+## distributed under the License is distributed on an "AS IS" BASIS,
+## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+## See the License for the specific language governing permissions and
+## limitations under the License.
+##
+
+FROM --platform=linux/amd64 apache/fluss:0.9.0-incubating
+
+COPY aws-sdk/ /opt/fluss/plugins/iceberg/
+
+RUN set -eux; \
+ mkdir -p /opt/fluss/plugins/iceberg/; \
+ wget -O /opt/fluss/plugins/iceberg/hadoop-apache-3.3.5-2.jar \
+ https://repo1.maven.org/maven2/io/trino/hadoop/hadoop-apache/3.3.5-2/hadoop-apache-3.3.5-2.jar
+
+RUN wget https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-flink-runtime-2.0/1.10.1/iceberg-flink-runtime-2.0-1.10.1.jar -P /opt/fluss/plugins/iceberg/
diff --git a/docker/fluss/aws-sdk-pom.xml b/docker/fluss/aws-sdk-pom.xml
new file mode 100644
index 0000000000..5bec1a1062
--- /dev/null
+++ b/docker/fluss/aws-sdk-pom.xml
@@ -0,0 +1,100 @@
+
+
+
+ 4.0.0
+
+ com.example
+ aws-sdk-downloader
+ 1.0-SNAPSHOT
+
+ 2.42.29
+
+
+
+
+
+ software.amazon.awssdk
+ aws-core
+ ${aws.sdk.version}
+
+
+
+
+ software.amazon.awssdk
+ s3
+ ${aws.sdk.version}
+
+
+
+
+ software.amazon.awssdk
+ glue
+ ${aws.sdk.version}
+
+
+ software.amazon.awssdk
+ dynamodb
+ ${aws.sdk.version}
+
+
+
+
+ software.amazon.awssdk
+ sts
+ ${aws.sdk.version}
+
+
+
+ software.amazon.awssdk
+ kms
+ ${aws.sdk.version}
+
+
+
+
+ software.amazon.awssdk
+ secretsmanager
+ ${aws.sdk.version}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+ 3.1.2
+
+
+ copy-dependencies
+ package
+
+ copy-dependencies
+
+
+ aws-sdk
+ runtime
+
+
+
+
+
+
+
diff --git a/docker/fluss/build.sh b/docker/fluss/build.sh
new file mode 100644
index 0000000000..21fdc62923
--- /dev/null
+++ b/docker/fluss/build.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+rm -rf aws-sdk && mvn -f aws-sdk-pom.xml clean package
+docker build --platform=linux/x86_64 -f Dockerfile2 -t "rmc-registry-qa.webex.com/wap-dataprocessor/flink:fluss-0.9-v1" .
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrefixKeyLookuper.java b/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrefixKeyLookuper.java
index 85b9aa5ecd..1b449beb75 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrefixKeyLookuper.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrefixKeyLookuper.java
@@ -19,7 +19,6 @@
import org.apache.fluss.bucketing.BucketingFunction;
import org.apache.fluss.client.metadata.MetadataUpdater;
-import org.apache.fluss.client.table.getter.PartitionGetter;
import org.apache.fluss.exception.PartitionNotExistException;
import org.apache.fluss.metadata.DataLakeFormat;
import org.apache.fluss.metadata.SchemaGetter;
@@ -28,6 +27,7 @@
import org.apache.fluss.row.InternalRow;
import org.apache.fluss.row.encode.KeyEncoder;
import org.apache.fluss.types.RowType;
+import org.apache.fluss.utils.PartitionComputer;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
@@ -60,7 +60,7 @@ class PrefixKeyLookuper extends AbstractLookuper implements Lookuper {
/**
* a getter to extract partition from prefix lookup key row, null when it's not a partitioned.
*/
- private @Nullable final PartitionGetter partitionGetter;
+ private @Nullable final PartitionComputer partitionComputer;
public PrefixKeyLookuper(
TableInfo tableInfo,
@@ -95,10 +95,8 @@ public PrefixKeyLookuper(
lookupRowType, tableInfo.getBucketKeys(), lakeFormat);
this.bucketingFunction = BucketingFunction.of(lakeFormat);
- this.partitionGetter =
- tableInfo.isPartitioned()
- ? new PartitionGetter(lookupRowType, tableInfo.getPartitionKeys())
- : null;
+ this.partitionComputer =
+ tableInfo.isPartitioned() ? new PartitionComputer(tableInfo, lookupRowType) : null;
}
private void validatePrefixLookup(TableInfo tableInfo, List lookupColumns) {
@@ -126,7 +124,7 @@ private void validatePrefixLookup(TableInfo tableInfo, List lookupColumn
// verify the lookup columns must contain all partition fields if this is partitioned table
if (tableInfo.isPartitioned()) {
- List partitionKeys = tableInfo.getPartitionKeys();
+ List partitionKeys = tableInfo.getPartitionInputColumns();
Set lookupColumnsSet = new HashSet<>(lookupColumns);
if (!lookupColumnsSet.containsAll(partitionKeys)) {
throw new IllegalArgumentException(
@@ -138,9 +136,9 @@ private void validatePrefixLookup(TableInfo tableInfo, List lookupColumn
}
// verify the lookup columns must contain all bucket keys **in order**
- List physicalLookupColumns = new ArrayList<>(lookupColumns);
- physicalLookupColumns.removeAll(tableInfo.getPartitionKeys());
- if (!physicalLookupColumns.equals(bucketKeys)) {
+ List bucketLookupColumns =
+ removePartitionOnlyInputColumns(tableInfo, lookupColumns);
+ if (!bucketLookupColumns.equals(bucketKeys)) {
throw new IllegalArgumentException(
String.format(
"Can not perform prefix lookup on table '%s', "
@@ -158,6 +156,21 @@ private void validatePrefixLookup(TableInfo tableInfo, List lookupColumn
}
}
+ private List removePartitionOnlyInputColumns(
+ TableInfo tableInfo, List lookupColumns) {
+ Set bucketKeySet = new HashSet<>(tableInfo.getBucketKeys());
+ Set partitionInputColumnSet = new HashSet<>(tableInfo.getPartitionInputColumns());
+ List bucketLookupColumns = new ArrayList<>();
+ for (String lookupColumn : lookupColumns) {
+ if (partitionInputColumnSet.contains(lookupColumn)
+ && !bucketKeySet.contains(lookupColumn)) {
+ continue;
+ }
+ bucketLookupColumns.add(lookupColumn);
+ }
+ return bucketLookupColumns;
+ }
+
@Override
public CompletableFuture lookup(InternalRow prefixKey) {
byte[] prefixKeyBytes = prefixKeyEncoder.encodeKey(prefixKey);
@@ -168,12 +181,12 @@ public CompletableFuture lookup(InternalRow prefixKey) {
int bucketId = bucketingFunction.bucketing(bucketKeyBytes, numBuckets);
Long partitionId = null;
- if (partitionGetter != null) {
+ if (partitionComputer != null) {
try {
partitionId =
getPartitionId(
prefixKey,
- partitionGetter,
+ partitionComputer,
tableInfo.getTablePath(),
metadataUpdater);
} catch (PartitionNotExistException e) {
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrimaryKeyLookuper.java b/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrimaryKeyLookuper.java
index 26a03c47c5..d4205b6b39 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrimaryKeyLookuper.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/lookup/PrimaryKeyLookuper.java
@@ -19,7 +19,6 @@
import org.apache.fluss.bucketing.BucketingFunction;
import org.apache.fluss.client.metadata.MetadataUpdater;
-import org.apache.fluss.client.table.getter.PartitionGetter;
import org.apache.fluss.exception.PartitionNotExistException;
import org.apache.fluss.metadata.DataLakeFormat;
import org.apache.fluss.metadata.SchemaGetter;
@@ -28,6 +27,7 @@
import org.apache.fluss.row.InternalRow;
import org.apache.fluss.row.encode.KeyEncoder;
import org.apache.fluss.types.RowType;
+import org.apache.fluss.utils.PartitionComputer;
import javax.annotation.Nullable;
import javax.annotation.concurrent.NotThreadSafe;
@@ -55,7 +55,7 @@ class PrimaryKeyLookuper extends AbstractLookuper implements Lookuper {
private final boolean insertIfNotExists;
/** a getter to extract partition from lookup key row, null when it's not a partitioned. */
- private @Nullable final PartitionGetter partitionGetter;
+ private @Nullable final PartitionComputer partitionComputer;
public PrimaryKeyLookuper(
TableInfo tableInfo,
@@ -88,10 +88,8 @@ public PrimaryKeyLookuper(
this.bucketingFunction = BucketingFunction.of(lakeFormat);
- this.partitionGetter =
- tableInfo.isPartitioned()
- ? new PartitionGetter(lookupRowType, tableInfo.getPartitionKeys())
- : null;
+ this.partitionComputer =
+ tableInfo.isPartitioned() ? new PartitionComputer(tableInfo, lookupRowType) : null;
}
@Override
@@ -104,12 +102,12 @@ public CompletableFuture lookup(InternalRow lookupKey) {
? pkBytes
: bucketKeyEncoder.encodeKey(lookupKey);
Long partitionId = null;
- if (partitionGetter != null) {
+ if (partitionComputer != null) {
try {
partitionId =
getPartitionId(
lookupKey,
- partitionGetter,
+ partitionComputer,
tableInfo.getTablePath(),
metadataUpdater);
} catch (PartitionNotExistException e) {
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/metadata/MetadataUpdater.java b/fluss-client/src/main/java/org/apache/fluss/client/metadata/MetadataUpdater.java
index eb65f9f91d..e9274e0d58 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/metadata/MetadataUpdater.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/metadata/MetadataUpdater.java
@@ -182,7 +182,10 @@ public void checkAndUpdateTableMetadata(Set tablePaths) {
public boolean checkAndUpdatePartitionMetadata(PhysicalTablePath physicalTablePath)
throws PartitionNotExistException {
if (!cluster.getPartitionId(physicalTablePath).isPresent()) {
- updateMetadata(null, Collections.singleton(physicalTablePath), null);
+ updateMetadata(
+ Collections.singleton(physicalTablePath.getTablePath()),
+ Collections.singleton(physicalTablePath),
+ null);
}
return cluster.getPartitionId(physicalTablePath).isPresent();
}
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/AbstractTableWriter.java b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/AbstractTableWriter.java
index 96739d3271..5e3e899788 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/AbstractTableWriter.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/AbstractTableWriter.java
@@ -17,7 +17,6 @@
package org.apache.fluss.client.table.writer;
-import org.apache.fluss.client.table.getter.PartitionGetter;
import org.apache.fluss.client.write.WriteRecord;
import org.apache.fluss.client.write.WriterClient;
import org.apache.fluss.config.ConfigOptions;
@@ -26,6 +25,7 @@
import org.apache.fluss.metadata.TableInfo;
import org.apache.fluss.metadata.TablePath;
import org.apache.fluss.row.InternalRow;
+import org.apache.fluss.utils.PartitionComputer;
import javax.annotation.Nullable;
@@ -38,16 +38,16 @@ public abstract class AbstractTableWriter implements TableWriter {
protected final TablePath tablePath;
protected final WriterClient writerClient;
protected final int fieldCount;
- private final @Nullable PartitionGetter partitionFieldGetter;
+ private final @Nullable PartitionComputer partitionComputer;
protected AbstractTableWriter(
TablePath tablePath, TableInfo tableInfo, WriterClient writerClient) {
this.tablePath = tablePath;
this.writerClient = writerClient;
this.fieldCount = tableInfo.getRowType().getFieldCount();
- this.partitionFieldGetter =
+ this.partitionComputer =
tableInfo.isPartitioned()
- ? new PartitionGetter(tableInfo.getRowType(), tableInfo.getPartitionKeys())
+ ? new PartitionComputer(tableInfo, tableInfo.getRowType())
: null;
}
@@ -108,11 +108,11 @@ protected interface ResultFactory {
protected PhysicalTablePath getPhysicalPath(InternalRow row) {
// not partitioned table, return the original physical path
- if (partitionFieldGetter == null) {
+ if (partitionComputer == null) {
return PhysicalTablePath.of(tablePath);
} else {
// partitioned table, extract partition from the row
- String partition = partitionFieldGetter.getPartition(row);
+ String partition = partitionComputer.getPartition(row);
return PhysicalTablePath.of(tablePath, partition);
}
}
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/TypedUpsertWriterImpl.java b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/TypedUpsertWriterImpl.java
index 45af9f0ef6..736bc95ac2 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/TypedUpsertWriterImpl.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/TypedUpsertWriterImpl.java
@@ -34,7 +34,6 @@
class TypedUpsertWriterImpl implements TypedUpsertWriter {
private final UpsertWriter delegate;
- private final TableInfo tableInfo;
private final RowType tableSchema;
@Nullable private final int[] targetColumns;
@@ -48,12 +47,11 @@ class TypedUpsertWriterImpl implements TypedUpsertWriter {
TypedUpsertWriterImpl(
UpsertWriter delegate, Class pojoClass, TableInfo tableInfo, int[] targetColumns) {
this.delegate = delegate;
- this.tableInfo = tableInfo;
this.tableSchema = tableInfo.getRowType();
this.targetColumns = targetColumns;
// Precompute projections
- this.pkProjection = this.tableSchema.project(tableInfo.getPhysicalPrimaryKeys());
+ this.pkProjection = this.tableSchema.project(tableInfo.getPrimaryKeys());
this.targetProjection =
(targetColumns == null) ? null : this.tableSchema.project(targetColumns);
@@ -92,7 +90,10 @@ public CompletableFuture delete(T record) {
private InternalRow convertPojo(T pojo, boolean forDelete) {
final RowType projection;
final PojoToRowConverter converter;
- if (forDelete) {
+ if (forDelete && pkProjection.getFieldCount() == tableSchema.getFieldCount()) {
+ projection = tableSchema;
+ converter = pojoToRowConverter;
+ } else if (forDelete) {
projection = pkProjection;
converter = pkConverter;
} else if (targetProjection != null && targetConverter != null) {
@@ -104,22 +105,12 @@ private InternalRow convertPojo(T pojo, boolean forDelete) {
}
GenericRow projected = converter.toRow(pojo);
- if (projection == tableSchema) {
+ if (projection == tableSchema || forDelete) {
return projected;
}
// expand projected row to full row if needed
GenericRow full = new GenericRow(tableSchema.getFieldCount());
- if (forDelete) {
- // set PK fields, others null
- for (String pk : tableInfo.getPhysicalPrimaryKeys()) {
- int projIndex = projection.getFieldIndex(pk);
-
- // TODO: this can be optimized by pre-computing
- // the index mapping in the constructor?
- int fullIndex = tableSchema.getFieldIndex(pk);
- full.setField(fullIndex, projected.getField(projIndex));
- }
- } else if (targetColumns != null) {
+ if (targetColumns != null) {
for (int i = 0; i < projection.getFieldCount(); i++) {
String name = projection.getFieldNames().get(i);
int fullIdx = tableSchema.getFieldIndex(name);
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriter.java b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriter.java
index e4d751747d..40e8ddc543 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriter.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriter.java
@@ -40,6 +40,8 @@ public interface UpsertWriter extends TableWriter {
/**
* Delete a certain record from the Fluss table. The input must contain the primary key fields.
+ * When the full table row and primary-key row have the same field count, the input is treated
+ * as a full table row in table schema order.
*
* @param record the record to delete.
* @return A {@link CompletableFuture} that always delete result when complete normally.
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriterImpl.java b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriterImpl.java
index 6b7f821a1e..4658f30374 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriterImpl.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/table/writer/UpsertWriterImpl.java
@@ -21,6 +21,7 @@
import org.apache.fluss.client.write.WriteRecord;
import org.apache.fluss.client.write.WriterClient;
import org.apache.fluss.metadata.KvFormat;
+import org.apache.fluss.metadata.PhysicalTablePath;
import org.apache.fluss.metadata.TableInfo;
import org.apache.fluss.metadata.TablePath;
import org.apache.fluss.row.BinaryRow;
@@ -32,6 +33,7 @@
import org.apache.fluss.row.indexed.IndexedRow;
import org.apache.fluss.rpc.protocol.MergeMode;
import org.apache.fluss.types.RowType;
+import org.apache.fluss.utils.PartitionComputer;
import javax.annotation.Nullable;
@@ -45,6 +47,10 @@ class UpsertWriterImpl extends AbstractTableWriter implements UpsertWriter {
private final TableInfo tableInfo;
private final KeyEncoder primaryKeyEncoder;
private final @Nullable int[] targetColumns;
+ private final RowType primaryKeyRowType;
+ private final KeyEncoder primaryKeyDeleteEncoder;
+ private final KeyEncoder bucketKeyDeleteEncoder;
+ private final @Nullable PartitionComputer deletePartitionComputer;
// same to primaryKeyEncoder if the bucket key is the same to the primary key
private final KeyEncoder bucketKeyEncoder;
@@ -94,6 +100,24 @@ class UpsertWriterImpl extends AbstractTableWriter implements UpsertWriter {
tableInfo.getRowType(),
tableInfo.getBucketKeys(),
tableInfo.getTableConfig().getDataLakeFormat().orElse(null));
+ this.primaryKeyRowType = rowType.project(tableInfo.getPrimaryKeys());
+ this.primaryKeyDeleteEncoder =
+ KeyEncoder.ofPrimaryKeyEncoder(
+ primaryKeyRowType,
+ tableInfo.getPhysicalPrimaryKeys(),
+ tableInfo.getTableConfig(),
+ tableInfo.isDefaultBucketKey());
+ this.bucketKeyDeleteEncoder =
+ tableInfo.isDefaultBucketKey()
+ ? primaryKeyDeleteEncoder
+ : KeyEncoder.ofBucketKeyEncoder(
+ primaryKeyRowType,
+ tableInfo.getBucketKeys(),
+ tableInfo.getTableConfig().getDataLakeFormat().orElse(null));
+ this.deletePartitionComputer =
+ tableInfo.isPartitioned()
+ ? new PartitionComputer(tableInfo, primaryKeyRowType)
+ : null;
this.kvFormat = tableInfo.getTableConfig().getKvFormat();
this.writeFormat = WriteFormat.fromKvFormat(this.kvFormat);
@@ -201,7 +225,25 @@ public CompletableFuture upsert(InternalRow row) {
*/
@Override
public CompletableFuture delete(InternalRow row) {
- checkFieldCount(row);
+ // Prefer full-row interpretation when full table row and primary-key row have the same
+ // field count. This preserves table-schema ordering for all-column primary-key tables.
+ if (row.getFieldCount() == fieldCount) {
+ return deleteFullRow(row);
+ }
+ if (row.getFieldCount() == primaryKeyRowType.getFieldCount()) {
+ return deletePrimaryKeyRow(row);
+ }
+ throw new IllegalArgumentException(
+ "The field count of the row does not match the table schema or primary key schema. "
+ + "Expected full table row: "
+ + fieldCount
+ + ", expected primary key row: "
+ + primaryKeyRowType.getFieldCount()
+ + ", Actual: "
+ + row.getFieldCount());
+ }
+
+ private CompletableFuture deleteFullRow(InternalRow row) {
byte[] key = primaryKeyEncoder.encodeKey(row);
byte[] bucketKey =
bucketKeyEncoder == primaryKeyEncoder ? key : bucketKeyEncoder.encodeKey(row);
@@ -217,6 +259,31 @@ public CompletableFuture delete(InternalRow row) {
return sendWithResult(record, DeleteResult::new);
}
+ private CompletableFuture deletePrimaryKeyRow(InternalRow row) {
+ byte[] key = primaryKeyDeleteEncoder.encodeKey(row);
+ byte[] bucketKey =
+ bucketKeyDeleteEncoder == primaryKeyDeleteEncoder
+ ? key
+ : bucketKeyDeleteEncoder.encodeKey(row);
+ WriteRecord record =
+ WriteRecord.forDelete(
+ tableInfo,
+ getDeletePhysicalPath(row),
+ key,
+ bucketKey,
+ writeFormat,
+ targetColumns,
+ mergeMode);
+ return sendWithResult(record, DeleteResult::new);
+ }
+
+ private PhysicalTablePath getDeletePhysicalPath(InternalRow row) {
+ if (deletePartitionComputer == null) {
+ return PhysicalTablePath.of(tablePath);
+ }
+ return PhysicalTablePath.of(tablePath, deletePartitionComputer.getPartition(row));
+ }
+
private BinaryRow encodeRow(InternalRow row) {
if (kvFormat == KvFormat.INDEXED && row instanceof IndexedRow) {
return (IndexedRow) row;
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientUtils.java b/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientUtils.java
index 8629c6f95a..ba716270c2 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientUtils.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/utils/ClientUtils.java
@@ -18,13 +18,13 @@
package org.apache.fluss.client.utils;
import org.apache.fluss.client.metadata.MetadataUpdater;
-import org.apache.fluss.client.table.getter.PartitionGetter;
import org.apache.fluss.config.ConfigOptions;
import org.apache.fluss.exception.IllegalConfigurationException;
import org.apache.fluss.exception.PartitionNotExistException;
import org.apache.fluss.metadata.PhysicalTablePath;
import org.apache.fluss.metadata.TablePath;
import org.apache.fluss.row.InternalRow;
+import org.apache.fluss.utils.PartitionComputer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -122,12 +122,12 @@ public static Integer getPort(String address) {
*/
public static Long getPartitionId(
InternalRow row,
- PartitionGetter partitionGetter,
+ PartitionComputer partitionComputer,
TablePath tablePath,
MetadataUpdater metadataUpdater)
throws PartitionNotExistException {
- checkNotNull(partitionGetter, "partitionGetter shouldn't be null.");
- String partitionName = partitionGetter.getPartition(row);
+ checkNotNull(partitionComputer, "partitionComputer shouldn't be null.");
+ String partitionName = partitionComputer.getPartition(row);
PhysicalTablePath physicalTablePath = PhysicalTablePath.of(tablePath, partitionName);
metadataUpdater.checkAndUpdatePartitionMetadata(physicalTablePath);
return metadataUpdater.getCluster().getPartitionIdOrElseThrow(physicalTablePath);
diff --git a/fluss-client/src/main/java/org/apache/fluss/client/utils/MetadataUtils.java b/fluss-client/src/main/java/org/apache/fluss/client/utils/MetadataUtils.java
index 2990054999..0e37973dc4 100644
--- a/fluss-client/src/main/java/org/apache/fluss/client/utils/MetadataUtils.java
+++ b/fluss-client/src/main/java/org/apache/fluss/client/utils/MetadataUtils.java
@@ -123,7 +123,8 @@ public static Cluster sendMetadataRequestAndRebuildCluster(
Map newPartitionIdByPath;
NewTableMetadata newTableMetadata =
- getTableMetadataToUpdate(originCluster, response);
+ getTableMetadataToUpdate(
+ originCluster, response, tablePaths, tablePartitions);
if (partialUpdate) {
// If partial update, we will clear the to be updated table out ot
@@ -160,8 +161,12 @@ public static Cluster sendMetadataRequestAndRebuildCluster(
}
private static NewTableMetadata getTableMetadataToUpdate(
- Cluster cluster, MetadataResponse metadataResponse) {
+ Cluster cluster,
+ MetadataResponse metadataResponse,
+ @Nullable Set requestedTablePaths,
+ @Nullable Collection requestedTablePartitions) {
Map newTablePathToTableId = new HashMap<>();
+ Map newTablePathByTableId = new HashMap<>();
Map> newBucketLocations = new HashMap<>();
Map newPartitionIdByPath = new HashMap<>();
@@ -177,6 +182,7 @@ private static NewTableMetadata getTableMetadataToUpdate(
protoTablePath.getDatabaseName(),
protoTablePath.getTableName());
newTablePathToTableId.put(tablePath, tableId);
+ newTablePathByTableId.put(tableId, tablePath);
// Get all buckets for the table.
List pbBucketMetadataList =
@@ -195,7 +201,16 @@ private static NewTableMetadata getTableMetadataToUpdate(
pbPartitionMetadata -> {
long tableId = pbPartitionMetadata.getTableId();
// the table path should be initialized at begin
- TablePath tablePath = cluster.getTablePathOrElseThrow(tableId);
+ TablePath tablePath =
+ findTablePathForPartition(
+ tableId,
+ pbPartitionMetadata,
+ cluster,
+ newTablePathByTableId,
+ requestedTablePaths,
+ requestedTablePartitions);
+ newTablePathToTableId.put(tablePath, tableId);
+ newTablePathByTableId.put(tableId, tablePath);
PhysicalTablePath physicalTablePath =
PhysicalTablePath.of(tablePath, pbPartitionMetadata.getPartitionName());
newPartitionIdByPath.put(
@@ -214,6 +229,60 @@ private static NewTableMetadata getTableMetadataToUpdate(
newTablePathToTableId, newBucketLocations, newPartitionIdByPath);
}
+ private static TablePath findTablePathForPartition(
+ long tableId,
+ PbPartitionMetadata partitionMetadata,
+ Cluster cluster,
+ Map newTablePathByTableId,
+ @Nullable Set requestedTablePaths,
+ @Nullable Collection requestedTablePartitions) {
+ if (newTablePathByTableId.containsKey(tableId)) {
+ return newTablePathByTableId.get(tableId);
+ }
+
+ TablePath tablePath =
+ findRequestedTablePath(
+ partitionMetadata, requestedTablePaths, requestedTablePartitions);
+ if (tablePath != null) {
+ return tablePath;
+ }
+
+ return cluster.getTablePathOrElseThrow(tableId);
+ }
+
+ private static @Nullable TablePath findRequestedTablePath(
+ PbPartitionMetadata partitionMetadata,
+ @Nullable Set requestedTablePaths,
+ @Nullable Collection requestedTablePartitions) {
+ // Some partition metadata responses do not include table metadata for the returned table
+ // id. In that case, infer the table path from the requested partition paths only when the
+ // partition name uniquely identifies one requested table. If multiple requested tables use
+ // the same partition name, fall back to the table id mapping from the current cluster.
+ if (requestedTablePartitions != null) {
+ TablePath matchedTablePath = null;
+ for (PhysicalTablePath physicalTablePath : requestedTablePartitions) {
+ if (partitionMetadata
+ .getPartitionName()
+ .equals(physicalTablePath.getPartitionName())) {
+ if (matchedTablePath != null
+ && !matchedTablePath.equals(physicalTablePath.getTablePath())) {
+ return null;
+ }
+ matchedTablePath = physicalTablePath.getTablePath();
+ }
+ }
+ if (matchedTablePath != null) {
+ return matchedTablePath;
+ }
+ }
+
+ if (requestedTablePaths != null && requestedTablePaths.size() == 1) {
+ return requestedTablePaths.iterator().next();
+ }
+
+ return null;
+ }
+
private static final class NewTableMetadata {
private final Map tablePathToTableId;
private final Map> bucketLocations;
diff --git a/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterITCase.java b/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterITCase.java
index 6d099a24e6..63ce191973 100644
--- a/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterITCase.java
+++ b/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterITCase.java
@@ -23,9 +23,14 @@
import org.apache.fluss.cluster.Cluster;
import org.apache.fluss.cluster.ServerNode;
import org.apache.fluss.config.Configuration;
+import org.apache.fluss.metadata.PartitionSpec;
+import org.apache.fluss.metadata.PhysicalTablePath;
+import org.apache.fluss.metadata.Schema;
+import org.apache.fluss.metadata.TableDescriptor;
import org.apache.fluss.metadata.TablePath;
import org.apache.fluss.rpc.RpcClient;
import org.apache.fluss.server.testutils.FlussClusterExtension;
+import org.apache.fluss.types.DataTypes;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.RegisterExtension;
@@ -74,6 +79,44 @@ void testRebuildClusterNTimes() throws Exception {
}
}
+ @Test
+ void testUpdatePartitionMetadataForUnknownTable() throws Exception {
+ Configuration clientConf = FLUSS_CLUSTER_EXTENSION.getClientConfig();
+ TablePath tablePath = TablePath.of("fluss", "metadata_partition_update");
+ PartitionSpec partitionSpec =
+ new PartitionSpec(Collections.singletonMap("dt", "2026-01-01"));
+ PhysicalTablePath physicalTablePath = PhysicalTablePath.of(tablePath, "2026-01-01");
+ TableDescriptor tableDescriptor =
+ TableDescriptor.builder()
+ .schema(
+ Schema.newBuilder()
+ .column("id", DataTypes.INT())
+ .column("dt", DataTypes.STRING())
+ .build())
+ .partitionedBy("dt")
+ .distributedBy(1)
+ .build();
+ try (Connection conn = ConnectionFactory.createConnection(clientConf);
+ Admin admin = conn.getAdmin()) {
+ admin.createTable(tablePath, tableDescriptor, true).get();
+ admin.createPartition(tablePath, partitionSpec, true).get();
+ }
+
+ MetadataUpdater metadataUpdater =
+ new MetadataUpdater(clientConf, FLUSS_CLUSTER_EXTENSION.getRpcClient());
+ assertThat(metadataUpdater.getCluster().getTableId(tablePath)).isEmpty();
+
+ assertThat(metadataUpdater.checkAndUpdatePartitionMetadata(physicalTablePath)).isTrue();
+
+ assertThat(metadataUpdater.getCluster().getTableId(tablePath)).isPresent();
+ assertThat(metadataUpdater.getPartitionId(physicalTablePath)).isPresent();
+ assertThat(
+ metadataUpdater
+ .getCluster()
+ .getAvailableBucketsForPhysicalTablePath(physicalTablePath))
+ .hasSize(1);
+ }
+
@Test
void testUpdateWithEmptyMetadataResponse() throws Exception {
RpcClient rpcClient = FLUSS_CLUSTER_EXTENSION.getRpcClient();
diff --git a/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterTest.java b/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterTest.java
index 1cda13b930..579767893f 100644
--- a/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterTest.java
+++ b/fluss-client/src/test/java/org/apache/fluss/client/metadata/MetadataUpdaterTest.java
@@ -17,23 +17,40 @@
package org.apache.fluss.client.metadata;
+import org.apache.fluss.cluster.BucketLocation;
import org.apache.fluss.cluster.Cluster;
import org.apache.fluss.cluster.ServerNode;
import org.apache.fluss.cluster.ServerType;
import org.apache.fluss.config.Configuration;
import org.apache.fluss.exception.StaleMetadataException;
+import org.apache.fluss.metadata.PhysicalTablePath;
+import org.apache.fluss.metadata.Schema;
+import org.apache.fluss.metadata.TableBucket;
+import org.apache.fluss.metadata.TableDescriptor;
+import org.apache.fluss.metadata.TableInfo;
+import org.apache.fluss.metadata.TablePath;
import org.apache.fluss.rpc.RpcClient;
import org.apache.fluss.rpc.gateway.AdminReadOnlyGateway;
import org.apache.fluss.rpc.messages.MetadataRequest;
import org.apache.fluss.rpc.messages.MetadataResponse;
import org.apache.fluss.rpc.metrics.TestingClientMetricGroup;
import org.apache.fluss.server.coordinator.TestCoordinatorGateway;
+import org.apache.fluss.server.metadata.BucketMetadata;
+import org.apache.fluss.server.metadata.PartitionMetadata;
+import org.apache.fluss.server.metadata.TableMetadata;
+import org.apache.fluss.types.DataTypes;
import org.junit.jupiter.api.Test;
+import java.util.Arrays;
import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ExecutionException;
+import static org.apache.fluss.client.utils.MetadataUtils.sendMetadataRequestAndRebuildCluster;
import static org.apache.fluss.server.utils.ServerRpcMessageUtils.buildMetadataResponse;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -70,6 +87,146 @@ void testInitializeClusterWithRetries() throws Exception {
.hasMessageContaining("The metadata is stale.");
}
+ @Test
+ void testPartialUpdatePartitionMetadataUsesRequestedPathForStaleTableMetadata()
+ throws Exception {
+ TablePath tablePath = TablePath.of("fluss", "partitioned_table");
+ PhysicalTablePath physicalTablePath = PhysicalTablePath.of(tablePath, "dt=2026-01-01");
+ long staleTableId = 99L;
+ long tableId = 100L;
+ long partitionId = 200L;
+ BucketMetadata bucketMetadata =
+ new BucketMetadata(0, TS_NODE.id(), 0, Collections.singletonList(TS_NODE.id()));
+ TableDescriptor tableDescriptor =
+ TableDescriptor.builder()
+ .schema(
+ Schema.newBuilder()
+ .column("id", DataTypes.INT())
+ .column("dt", DataTypes.STRING())
+ .build())
+ .partitionedBy("dt")
+ .distributedBy(1)
+ .build();
+ TableInfo tableInfo =
+ TableInfo.of(tablePath, staleTableId, 0, tableDescriptor, "/tmp/table", 1L, 1L);
+ MetadataResponse metadataResponse =
+ buildMetadataResponse(
+ CS_NODE,
+ Collections.singleton(TS_NODE),
+ Collections.singletonList(
+ new TableMetadata(tableInfo, Collections.emptyList())),
+ Collections.singletonList(
+ new PartitionMetadata(
+ tableId,
+ "dt=2026-01-01",
+ partitionId,
+ Collections.singletonList(bucketMetadata))));
+
+ Cluster cluster =
+ sendMetadataRequestAndRebuildCluster(
+ new StaticMetadataGateway(metadataResponse, 1, 1),
+ true,
+ Cluster.empty(),
+ Collections.singleton(tablePath),
+ Collections.singleton(physicalTablePath),
+ null);
+
+ assertThat(cluster.getTableId(tablePath)).hasValue(tableId);
+ assertThat(cluster.getPartitionId(physicalTablePath)).hasValue(partitionId);
+ assertThat(cluster.getBucketLocation(new TableBucket(tableId, partitionId, 0)))
+ .map(BucketLocation::getLeader)
+ .hasValue(TS_NODE.id());
+ assertThat(cluster.getAvailableBucketsForPhysicalTablePath(physicalTablePath)).hasSize(1);
+ }
+
+ @Test
+ void testPartialUpdatePartitionMetadataFallsBackToClusterForAmbiguousRequestedPartitionName()
+ throws Exception {
+ TablePath tablePath = TablePath.of("fluss", "partitioned_table");
+ TablePath otherTablePath = TablePath.of("fluss", "other_partitioned_table");
+ String partitionName = "dt=2026-01-01";
+ PhysicalTablePath physicalTablePath = PhysicalTablePath.of(tablePath, partitionName);
+ PhysicalTablePath otherPhysicalTablePath =
+ PhysicalTablePath.of(otherTablePath, partitionName);
+ long tableId = 100L;
+ long partitionId = 200L;
+ BucketMetadata bucketMetadata =
+ new BucketMetadata(0, TS_NODE.id(), 0, Collections.singletonList(TS_NODE.id()));
+ MetadataResponse metadataResponse =
+ buildMetadataResponse(
+ CS_NODE,
+ Collections.singleton(TS_NODE),
+ Collections.emptyList(),
+ Collections.singletonList(
+ new PartitionMetadata(
+ tableId,
+ partitionName,
+ partitionId,
+ Collections.singletonList(bucketMetadata))));
+ Map tableIds = new HashMap<>();
+ tableIds.put(tablePath, tableId);
+ Cluster originCluster =
+ new Cluster(
+ Collections.singletonMap(TS_NODE.id(), TS_NODE),
+ CS_NODE,
+ Collections.emptyMap(),
+ tableIds,
+ Collections.emptyMap());
+
+ Cluster cluster =
+ sendMetadataRequestAndRebuildCluster(
+ new StaticMetadataGateway(metadataResponse, 2, 2),
+ true,
+ originCluster,
+ new HashSet<>(Arrays.asList(tablePath, otherTablePath)),
+ Arrays.asList(physicalTablePath, otherPhysicalTablePath),
+ null);
+
+ assertThat(cluster.getTableId(tablePath)).hasValue(tableId);
+ assertThat(cluster.getPartitionId(physicalTablePath)).hasValue(partitionId);
+ assertThat(cluster.getPartitionId(otherPhysicalTablePath)).isEmpty();
+ }
+
+ @Test
+ void testPartialUpdatePartitionMetadataRejectsAmbiguousPartitionNameWithoutTableIdMapping() {
+ TablePath tablePath = TablePath.of("fluss", "partitioned_table");
+ TablePath otherTablePath = TablePath.of("fluss", "other_partitioned_table");
+ String partitionName = "dt=2026-01-01";
+ MetadataResponse metadataResponse =
+ buildMetadataResponse(
+ CS_NODE,
+ Collections.singleton(TS_NODE),
+ Collections.emptyList(),
+ Collections.singletonList(
+ new PartitionMetadata(
+ 100L,
+ partitionName,
+ 200L,
+ Collections.singletonList(
+ new BucketMetadata(
+ 0,
+ TS_NODE.id(),
+ 0,
+ Collections.singletonList(
+ TS_NODE.id()))))));
+
+ assertThatThrownBy(
+ () ->
+ sendMetadataRequestAndRebuildCluster(
+ new StaticMetadataGateway(metadataResponse, 2, 2),
+ true,
+ Cluster.empty(),
+ new HashSet<>(Arrays.asList(tablePath, otherTablePath)),
+ Arrays.asList(
+ PhysicalTablePath.of(tablePath, partitionName),
+ PhysicalTablePath.of(
+ otherTablePath, partitionName)),
+ null))
+ .isInstanceOf(ExecutionException.class)
+ .hasCauseInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("table path not found for tableId 100 in cluster");
+ }
+
private static final class TestingAdminReadOnlyGateway extends TestCoordinatorGateway {
private final int maxRetryCount;
@@ -95,4 +252,28 @@ public CompletableFuture metadata(MetadataRequest request) {
}
}
}
+
+ private static final class StaticMetadataGateway extends TestCoordinatorGateway {
+
+ private final MetadataResponse metadataResponse;
+ private final int expectedTablePathCount;
+ private final int expectedPartitionPathCount;
+
+ private StaticMetadataGateway(
+ MetadataResponse metadataResponse,
+ int expectedTablePathCount,
+ int expectedPartitionPathCount) {
+ this.metadataResponse = metadataResponse;
+ this.expectedTablePathCount = expectedTablePathCount;
+ this.expectedPartitionPathCount = expectedPartitionPathCount;
+ }
+
+ @Override
+ public CompletableFuture metadata(MetadataRequest request) {
+ assertThat(request.getTablePathsList()).hasSize(expectedTablePathCount);
+ assertThat(request.getPartitionsPathsList()).hasSize(expectedPartitionPathCount);
+ assertThat(request.getPartitionsIds()).isEmpty();
+ return CompletableFuture.completedFuture(metadataResponse);
+ }
+ }
}
diff --git a/fluss-client/src/test/java/org/apache/fluss/client/table/PartitionedTableITCase.java b/fluss-client/src/test/java/org/apache/fluss/client/table/PartitionedTableITCase.java
index d204d087a4..3402f6096c 100644
--- a/fluss-client/src/test/java/org/apache/fluss/client/table/PartitionedTableITCase.java
+++ b/fluss-client/src/test/java/org/apache/fluss/client/table/PartitionedTableITCase.java
@@ -21,22 +21,35 @@
import org.apache.fluss.client.lookup.Lookuper;
import org.apache.fluss.client.table.writer.AppendWriter;
import org.apache.fluss.client.table.writer.UpsertWriter;
+import org.apache.fluss.config.AutoPartitionTimeUnit;
import org.apache.fluss.config.ConfigOptions;
+import org.apache.fluss.exception.FlussRuntimeException;
+import org.apache.fluss.exception.InvalidPartitionException;
import org.apache.fluss.exception.PartitionNotExistException;
import org.apache.fluss.exception.TooManyPartitionsException;
+import org.apache.fluss.metadata.DateTruncPartitionTransform;
+import org.apache.fluss.metadata.PartitionExpression;
import org.apache.fluss.metadata.PartitionInfo;
+import org.apache.fluss.metadata.PartitionKey;
import org.apache.fluss.metadata.PhysicalTablePath;
import org.apache.fluss.metadata.Schema;
+import org.apache.fluss.metadata.TableChange;
import org.apache.fluss.metadata.TableDescriptor;
+import org.apache.fluss.metadata.TableInfo;
import org.apache.fluss.metadata.TablePath;
import org.apache.fluss.row.GenericRow;
import org.apache.fluss.row.InternalRow;
+import org.apache.fluss.row.TimestampNtz;
import org.apache.fluss.types.DataTypes;
import org.junit.jupiter.api.Test;
+import java.nio.file.Path;
import java.time.Duration;
+import java.time.LocalDateTime;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -47,6 +60,8 @@
import static org.apache.fluss.testutils.InternalRowAssert.assertThatRow;
import static org.apache.fluss.testutils.common.CommonTestUtils.retry;
import static org.apache.fluss.testutils.common.CommonTestUtils.waitValue;
+import static org.apache.fluss.utils.FlussPaths.KV_TABLET_DIR_PREFIX;
+import static org.apache.fluss.utils.FlussPaths.LOG_TABLET_DIR_PREFIX;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
@@ -58,6 +73,503 @@
*/
class PartitionedTableITCase extends ClientToServerITCaseBase {
+ @Test
+ void testImplicitPartitionedPrimaryKeyTableEndToEndWithDirectoryLayout() throws Exception {
+ TablePath tablePath = TablePath.of("test_db_1", "test_implicit_partitioned_pk_e2e_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, true);
+ Table table = conn.getTable(tablePath);
+ TimestampNtz eventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ InternalRow writtenRow = row(schema.getRowType(), "us", 1, eventTime, 10, "e2e-value-10");
+
+ UpsertWriter upsertWriter = table.newUpsert().createWriter();
+ upsertWriter.upsert(writtenRow).get();
+ upsertWriter.flush();
+
+ TableInfo tableInfo = admin.getTableInfo(tablePath).get();
+ assertThat(tableInfo.getRowType().getFieldNames())
+ .containsExactly("region", "id", "event_time", "seq", "payload");
+ assertThat(tableInfo.getRowType().getFieldNames()).doesNotContain("event_day");
+ assertThat(tableInfo.getPrimaryKeys()).containsExactly("region", "id", "event_time", "seq");
+ assertThat(tableInfo.getPartitionKeys()).containsExactly("region", "event_day");
+ assertThat(tableInfo.getPhysicalPartitionKeys()).containsExactly("region");
+ assertThat(tableInfo.getVirtualPartitionKeys()).containsExactly("event_day");
+ assertThat(tableInfo.getPartitionSourceColumns()).containsExactly("event_time");
+ assertThat(tableInfo.getPartitionExpressions()).hasSize(1);
+ assertThat(tableInfo.getPartitionExpressions().get(0).getVirtualPartitionSpecKey())
+ .hasValue("event_day");
+
+ List partitionInfos = waitForPartitionInfos(tablePath, 1);
+ PartitionInfo partitionInfo = partitionInfos.get(0);
+ assertThat(partitionInfo.getPartitionName()).isEqualTo("us$20240315");
+ assertThat(partitionInfo.getPartitionSpec().getSpecMap())
+ .containsEntry("region", "us")
+ .containsEntry("event_day", "20240315");
+ assertThat(FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1))
+ .containsEntry("us$20240315", partitionInfo.getPartitionId());
+
+ Lookuper lookuper = table.newLookup().createLookuper();
+ assertThatRow(lookupRow(lookuper, row("us", 1, eventTime, 10)))
+ .withSchema(schema.getRowType())
+ .isEqualTo(writtenRow);
+
+ assertPartitionReplicaDirectories(
+ tablePath, tableInfo.getTableId(), partitionInfo.getPartitionId(), "us$20240315");
+ }
+
+ @Test
+ void testImplicitPartitionedPrimaryKeyTableWriteLookupAndDelete() throws Exception {
+ TablePath tablePath = TablePath.of("test_db_1", "test_implicit_partitioned_pk_table_1");
+ Schema schema =
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING())
+ .column("id", DataTypes.INT())
+ .column("event_time", DataTypes.TIMESTAMP())
+ .column("seq", DataTypes.INT())
+ .column("payload", DataTypes.STRING())
+ .primaryKey("region", "id", "event_time", "seq")
+ .build();
+ TableDescriptor descriptor =
+ TableDescriptor.builder()
+ .schema(schema)
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time", AutoPartitionTimeUnit.DAY))))
+ .distributedBy(2, "id", "event_time")
+ .build();
+ createTable(tablePath, descriptor, false);
+ admin.createPartition(
+ tablePath,
+ newPartitionSpec(
+ Arrays.asList("region", "event_day"),
+ Arrays.asList("us", "20240315")),
+ false)
+ .get();
+ Map partitionIdByNames =
+ FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1);
+ assertThat(partitionIdByNames).containsKey("us$20240315");
+ assertThat(
+ admin.listPartitionInfos(
+ tablePath, newPartitionSpec("event_day", "20240315"))
+ .get())
+ .hasSize(1);
+
+ TimestampNtz eventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ InternalRow firstRow = row(schema.getRowType(), "us", 1, eventTime, 10, "value-10");
+ InternalRow secondRow = row(schema.getRowType(), "us", 1, eventTime, 11, "value-11");
+
+ Table table = conn.getTable(tablePath);
+ UpsertWriter upsertWriter = table.newUpsert().createWriter();
+ upsertWriter.upsert(firstRow).get();
+ upsertWriter.upsert(secondRow).get();
+ upsertWriter.flush();
+
+ Lookuper lookuper = table.newLookup().createLookuper();
+ assertThatRow(lookupRow(lookuper, row("us", 1, eventTime, 10)))
+ .withSchema(schema.getRowType())
+ .isEqualTo(firstRow);
+
+ Lookuper prefixLookuper =
+ table.newLookup()
+ .lookupBy(Arrays.asList("region", "id", "event_time"))
+ .createLookuper();
+ List prefixRows =
+ prefixLookuper.lookup(row("us", 1, eventTime)).get().getRowList();
+ assertThat(prefixRows).hasSize(2);
+
+ Lookuper reorderedPrefixLookuper =
+ table.newLookup()
+ .lookupBy(Arrays.asList("id", "event_time", "region"))
+ .createLookuper();
+ assertThat(reorderedPrefixLookuper.lookup(row(1, eventTime, "us")).get().getRowList())
+ .hasSize(2);
+ assertThat(reorderedPrefixLookuper.lookup(row(1, eventTime, "eu")).get().getRowList())
+ .isEmpty();
+
+ upsertWriter.delete(row("us", 1, eventTime, 10)).get();
+ upsertWriter.flush();
+
+ assertThat(lookupRow(lookuper, row("us", 1, eventTime, 10))).isNull();
+ prefixRows = prefixLookuper.lookup(row("us", 1, eventTime)).get().getRowList();
+ assertThat(prefixRows).hasSize(1);
+ assertThatRow(prefixRows.get(0)).withSchema(schema.getRowType()).isEqualTo(secondRow);
+ }
+
+ @Test
+ void testImplicitPartitionPrefixLookupWithRoutingOnlyTransformSource() throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partitioned_prefix_lookup_table_1");
+ Schema schema =
+ Schema.newBuilder()
+ .column("id", DataTypes.INT())
+ .column("event_time", DataTypes.TIMESTAMP())
+ .column("seq", DataTypes.INT())
+ .column("payload", DataTypes.STRING())
+ .primaryKey("id", "event_time", "seq")
+ .build();
+ TableDescriptor descriptor =
+ TableDescriptor.builder()
+ .schema(schema)
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time", AutoPartitionTimeUnit.DAY))))
+ .distributedBy(2, "id")
+ .build();
+ createTable(tablePath, descriptor, false);
+ admin.createPartition(tablePath, newPartitionSpec("event_day", "20240315"), false).get();
+ FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1);
+
+ TimestampNtz firstEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ TimestampNtz secondEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 11, 30));
+ InternalRow firstRow = row(schema.getRowType(), 1, firstEventTime, 10, "value-10");
+ InternalRow secondRow = row(schema.getRowType(), 1, secondEventTime, 11, "value-11");
+ InternalRow otherIdRow = row(schema.getRowType(), 2, firstEventTime, 12, "value-12");
+
+ Table table = conn.getTable(tablePath);
+ UpsertWriter upsertWriter = table.newUpsert().createWriter();
+ upsertWriter.upsert(firstRow).get();
+ upsertWriter.upsert(secondRow).get();
+ upsertWriter.upsert(otherIdRow).get();
+ upsertWriter.flush();
+
+ Lookuper prefixLookuper =
+ table.newLookup().lookupBy(Arrays.asList("id", "event_time")).createLookuper();
+ List prefixRows =
+ prefixLookuper.lookup(row(1, firstEventTime)).get().getRowList();
+ assertThat(prefixRows).hasSize(2);
+
+ assertThat(prefixLookuper.lookup(row(2, firstEventTime)).get().getRowList()).hasSize(1);
+ }
+
+ @Test
+ void testImplicitPartitionedPrimaryKeyTableDynamicCreatePartition() throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partitioned_pk_dynamic_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, true);
+ Table table = conn.getTable(tablePath);
+ UpsertWriter upsertWriter = table.newUpsert().createWriter();
+
+ TimestampNtz eventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ InternalRow row = row(schema.getRowType(), "us", 1, eventTime, 10, "value-10");
+ upsertWriter.upsert(row).get();
+ upsertWriter.flush();
+
+ List partitionInfoList =
+ waitValue(
+ () -> {
+ List partitionInfos =
+ admin.listPartitionInfos(tablePath).get();
+ if (partitionInfos.size() == 1) {
+ return Optional.of(partitionInfos);
+ } else {
+ return Optional.empty();
+ }
+ },
+ Duration.ofMinutes(1),
+ "Fail to wait for the implicit partition created.");
+ assertThat(partitionInfoList.get(0).getPartitionName()).isEqualTo("us$20240315");
+
+ Lookuper lookuper = table.newLookup().createLookuper();
+ assertThatRow(lookupRow(lookuper, row("us", 1, eventTime, 10)))
+ .withSchema(schema.getRowType())
+ .isEqualTo(row);
+ }
+
+ @Test
+ void testImplicitPartitionedLogTableAppend() throws Exception {
+ TablePath tablePath = TablePath.of("test_db_1", "test_implicit_partitioned_log_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, false);
+ admin.createPartition(
+ tablePath,
+ newPartitionSpec(
+ Arrays.asList("region", "event_day"),
+ Arrays.asList("us", "20240315")),
+ false)
+ .get();
+ Map partitionIdByNames =
+ FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1);
+ assertThat(partitionIdByNames).containsKey("us$20240315");
+
+ TimestampNtz firstEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ TimestampNtz secondEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 23, 59));
+ List expectedRows =
+ Arrays.asList(
+ row(schema.getRowType(), "us", 1, firstEventTime, 10, "value-10"),
+ row(schema.getRowType(), "us", 2, secondEventTime, 20, "value-20"));
+
+ Table table = conn.getTable(tablePath);
+ AppendWriter appendWriter = table.newAppend().createWriter();
+ for (InternalRow row : expectedRows) {
+ appendWriter.append(row).get();
+ }
+ appendWriter.flush();
+
+ Map> expectPartitionAppendRows = new HashMap<>();
+ expectPartitionAppendRows.put(partitionIdByNames.get("us$20240315"), expectedRows);
+ verifyPartitionLogs(table, schema.getRowType(), expectPartitionAppendRows);
+ }
+
+ @Test
+ void testImplicitPartitionedLogTableDynamicCreatePartitions() throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partitioned_log_dynamic_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, false);
+ TimestampNtz firstEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ TimestampNtz secondEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 16, 0, 30));
+ InternalRow firstRow = row(schema.getRowType(), "us", 1, firstEventTime, 10, "value-10");
+ InternalRow secondRow = row(schema.getRowType(), "us", 2, secondEventTime, 20, "value-20");
+
+ Table table = conn.getTable(tablePath);
+ AppendWriter appendWriter = table.newAppend().createWriter();
+ appendWriter.append(firstRow).get();
+ appendWriter.append(secondRow).get();
+ appendWriter.flush();
+
+ List partitionInfos = waitForPartitionInfos(tablePath, 2);
+ assertThat(partitionInfos)
+ .extracting(PartitionInfo::getPartitionName)
+ .containsExactlyInAnyOrder("us$20240315", "us$20240316");
+
+ Map> expectPartitionAppendRows = new HashMap<>();
+ for (PartitionInfo partitionInfo : partitionInfos) {
+ if (partitionInfo.getPartitionName().equals("us$20240315")) {
+ expectPartitionAppendRows.put(
+ partitionInfo.getPartitionId(), Arrays.asList(firstRow));
+ } else {
+ expectPartitionAppendRows.put(
+ partitionInfo.getPartitionId(), Arrays.asList(secondRow));
+ }
+ }
+ verifyPartitionLogs(table, schema.getRowType(), expectPartitionAppendRows);
+ }
+
+ @Test
+ void testImplicitPartitionedLogTableWriteNewPartitionFailsWhenDynamicDisabled()
+ throws Exception {
+ clientConf.set(ConfigOptions.CLIENT_WRITER_DYNAMIC_CREATE_PARTITION_ENABLED, false);
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partitioned_log_disabled_dynamic_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, false);
+ Table table = conn.getTable(tablePath);
+ AppendWriter appendWriter = table.newAppend().createWriter();
+ TimestampNtz eventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ InternalRow row = row(schema.getRowType(), "us", 1, eventTime, 10, "value-10");
+
+ assertThatThrownBy(() -> appendWriter.append(row).get())
+ .cause()
+ .isInstanceOf(PartitionNotExistException.class)
+ .hasMessageContaining(
+ "Table partition '%s' does not exist.",
+ PhysicalTablePath.of(tablePath, "us$20240315"));
+ }
+
+ @Test
+ void testImplicitPartitionManagementUsesFinalSpecKeysOnly() throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partition_management_table_1");
+ createImplicitPartitionedTable(tablePath, false);
+ TableInfo tableInfo = admin.getTableInfo(tablePath).get();
+ assertThat(tableInfo.getRowType().getFieldNames()).doesNotContain("event_day");
+ DateTruncPartitionTransform transform =
+ (DateTruncPartitionTransform)
+ tableInfo.getPartitionExpressions().get(0).getTransform();
+ assertThat(transform.getTimeZone()).isPresent();
+ admin.createPartition(
+ tablePath,
+ newPartitionSpec(
+ Arrays.asList("region", "event_day"),
+ Arrays.asList("us", "20240315")),
+ false)
+ .get();
+
+ assertThat(
+ admin.listPartitionInfos(
+ tablePath, newPartitionSpec("event_day", "20240315"))
+ .get())
+ .hasSize(1);
+ assertThatThrownBy(
+ () ->
+ admin.listPartitionInfos(
+ tablePath,
+ newPartitionSpec("event_time", "2024-03-15"))
+ .get())
+ .cause()
+ .isInstanceOf(FlussRuntimeException.class)
+ .hasMessageContaining("table don't contains this partitionKey: event_time");
+ assertThatThrownBy(
+ () ->
+ admin.createPartition(
+ tablePath,
+ newPartitionSpec(
+ Arrays.asList("region", "event_time"),
+ Arrays.asList("us", "2024-03-15")),
+ false)
+ .get())
+ .cause()
+ .isInstanceOf(InvalidPartitionException.class)
+ .hasMessageContaining("partition key 'event_day'");
+ }
+
+ @Test
+ void testImplicitPartitionExpressionsSurviveSchemaReload() throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partition_schema_reload_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, true);
+ TableInfo beforeAlter = admin.getTableInfo(tablePath).get();
+
+ admin.alterTable(
+ tablePath,
+ Collections.singletonList(
+ TableChange.addColumn(
+ "extra",
+ DataTypes.STRING(),
+ "extra column",
+ TableChange.ColumnPosition.last())),
+ false)
+ .get();
+
+ TableInfo afterAlter = admin.getTableInfo(tablePath).get();
+ assertThat(afterAlter.getPartitionExpressions())
+ .isEqualTo(beforeAlter.getPartitionExpressions());
+ assertThat(afterAlter.getPartitionKeys()).containsExactly("region", "event_day");
+ assertThat(afterAlter.getRowType().getFieldNames()).contains("extra");
+
+ admin.createPartition(
+ tablePath,
+ newPartitionSpec(
+ Arrays.asList("region", "event_day"),
+ Arrays.asList("us", "20240315")),
+ false)
+ .get();
+ FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1);
+ Table table = conn.getTable(tablePath);
+ UpsertWriter upsertWriter = table.newUpsert().createWriter();
+ TimestampNtz eventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ InternalRow row =
+ row(afterAlter.getRowType(), "us", 1, eventTime, 10, "value-10", "extra-value");
+
+ upsertWriter.upsert(row).get();
+ upsertWriter.flush();
+
+ Lookuper lookuper = table.newLookup().createLookuper();
+ assertThatRow(lookupRow(lookuper, row("us", 1, eventTime, 10)))
+ .withSchema(afterAlter.getRowType())
+ .isEqualTo(row);
+ assertThat(afterAlter.getSchema().getColumns()).hasSize(schema.getColumns().size() + 1);
+ }
+
+ @Test
+ void testImplicitPartitionedPrimaryKeyTableLookupMissingPartitionAndOverwrite()
+ throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partitioned_pk_overwrite_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, true);
+ admin.createPartition(
+ tablePath,
+ newPartitionSpec(
+ Arrays.asList("region", "event_day"),
+ Arrays.asList("us", "20240315")),
+ false)
+ .get();
+ FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1);
+ Table table = conn.getTable(tablePath);
+ UpsertWriter upsertWriter = table.newUpsert().createWriter();
+ TimestampNtz eventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ InternalRow oldRow = row(schema.getRowType(), "us", 1, eventTime, 10, "old-value");
+ InternalRow newRow = row(schema.getRowType(), "us", 1, eventTime, 10, "new-value");
+
+ upsertWriter.upsert(oldRow).get();
+ upsertWriter.upsert(newRow).get();
+ upsertWriter.flush();
+
+ Lookuper lookuper = table.newLookup().createLookuper();
+ assertThatRow(lookupRow(lookuper, row("us", 1, eventTime, 10)))
+ .withSchema(schema.getRowType())
+ .isEqualTo(newRow);
+
+ TimestampNtz missingPartitionEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 16, 10, 30));
+ assertThat(lookupRow(lookuper, row("us", 1, missingPartitionEventTime, 10))).isNull();
+ }
+
+ @Test
+ void testImplicitPartitionedPrimaryKeyTableDistinguishesSourceValuesInSamePartition()
+ throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partitioned_pk_same_partition_table_1");
+ Schema schema = createImplicitPartitionedTable(tablePath, true);
+ admin.createPartition(
+ tablePath,
+ newPartitionSpec(
+ Arrays.asList("region", "event_day"),
+ Arrays.asList("us", "20240315")),
+ false)
+ .get();
+ FLUSS_CLUSTER_EXTENSION.waitUntilPartitionAllReady(tablePath, 1);
+ Table table = conn.getTable(tablePath);
+ UpsertWriter upsertWriter = table.newUpsert().createWriter();
+ TimestampNtz firstEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 10, 30));
+ TimestampNtz secondEventTime =
+ TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 3, 15, 11, 30));
+ InternalRow firstRow = row(schema.getRowType(), "us", 1, firstEventTime, 10, "value-10");
+ InternalRow secondRow = row(schema.getRowType(), "us", 1, secondEventTime, 10, "value-11");
+
+ upsertWriter.upsert(firstRow).get();
+ upsertWriter.upsert(secondRow).get();
+ upsertWriter.flush();
+
+ Lookuper lookuper = table.newLookup().createLookuper();
+ assertThatRow(lookupRow(lookuper, row("us", 1, firstEventTime, 10)))
+ .withSchema(schema.getRowType())
+ .isEqualTo(firstRow);
+ assertThatRow(lookupRow(lookuper, row("us", 1, secondEventTime, 10)))
+ .withSchema(schema.getRowType())
+ .isEqualTo(secondRow);
+
+ upsertWriter.delete(row("us", 1, firstEventTime, 10)).get();
+ upsertWriter.flush();
+
+ assertThat(lookupRow(lookuper, row("us", 1, firstEventTime, 10))).isNull();
+ assertThatRow(lookupRow(lookuper, row("us", 1, secondEventTime, 10)))
+ .withSchema(schema.getRowType())
+ .isEqualTo(secondRow);
+ }
+
+ @Test
+ void testImplicitPartitionedPrimaryKeyPrefixLookupRequiresTransformSource() throws Exception {
+ TablePath tablePath =
+ TablePath.of("test_db_1", "test_implicit_partitioned_pk_prefix_table_1");
+ createImplicitPartitionedTable(tablePath, true);
+ Table table = conn.getTable(tablePath);
+
+ assertThatThrownBy(
+ () ->
+ table.newLookup()
+ .lookupBy(Arrays.asList("region", "id"))
+ .createLookuper())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("must contain all partition fields [region, event_time]");
+ }
+
@Test
void testPartitionedPrimaryKeyTable() throws Exception {
TablePath tablePath = TablePath.of("test_db_1", "test_static_partitioned_pk_table_1");
@@ -236,6 +748,105 @@ void testCreatePartitionExceedMaxPartitionNumber() throws Exception {
+ "test_db_1.test_pk_table_1, only allow 10 partitions."));
}
+ private List waitForPartitionInfos(TablePath tablePath, int expectedSize)
+ throws Exception {
+ return waitValue(
+ () -> {
+ List partitionInfos = admin.listPartitionInfos(tablePath).get();
+ if (partitionInfos.size() == expectedSize) {
+ return Optional.of(partitionInfos);
+ } else {
+ return Optional.empty();
+ }
+ },
+ Duration.ofMinutes(1),
+ "Fail to wait for the implicit partition created.");
+ }
+
+ private void assertPartitionReplicaDirectories(
+ TablePath tablePath, long tableId, long partitionId, String partitionName) {
+ String expectedPartitionDir = partitionName + "-p" + partitionId;
+ retry(
+ Duration.ofMinutes(1),
+ () -> {
+ List physicalTablePaths = new ArrayList<>();
+ List logTabletDirs = new ArrayList<>();
+ List kvTabletDirs = new ArrayList<>();
+ FLUSS_CLUSTER_EXTENSION
+ .getTabletServers()
+ .forEach(
+ tabletServer ->
+ tabletServer
+ .getReplicaManager()
+ .onlineReplicas()
+ .filter(
+ replica ->
+ replica.getTableBucket()
+ .getTableId()
+ == tableId)
+ .filter(
+ replica ->
+ Long.valueOf(partitionId)
+ .equals(
+ replica.getTableBucket()
+ .getPartitionId()))
+ .forEach(
+ replica -> {
+ physicalTablePaths.add(
+ replica
+ .getPhysicalTablePath());
+ logTabletDirs.add(
+ replica.getLogTablet()
+ .getLogDir()
+ .toPath());
+ if (replica.getKvTablet() != null) {
+ kvTabletDirs.add(
+ replica.getKvTablet()
+ .getKvTabletDir()
+ .toPath());
+ }
+ }));
+ assertThat(physicalTablePaths)
+ .isNotEmpty()
+ .containsOnly(PhysicalTablePath.of(tablePath, partitionName));
+ assertThat(logTabletDirs)
+ .isNotEmpty()
+ .allSatisfy(
+ logTabletDir ->
+ assertTabletDir(
+ logTabletDir,
+ expectedPartitionDir,
+ tablePath,
+ tableId,
+ LOG_TABLET_DIR_PREFIX));
+ assertThat(kvTabletDirs)
+ .isNotEmpty()
+ .allSatisfy(
+ kvTabletDir ->
+ assertTabletDir(
+ kvTabletDir,
+ expectedPartitionDir,
+ tablePath,
+ tableId,
+ KV_TABLET_DIR_PREFIX));
+ });
+ }
+
+ private void assertTabletDir(
+ Path tabletDir,
+ String expectedPartitionDir,
+ TablePath tablePath,
+ long tableId,
+ String tabletDirPrefix) {
+ assertThat(tabletDir).exists().isDirectory();
+ assertThat(tabletDir.getFileName().toString()).startsWith(tabletDirPrefix);
+ assertThat(tabletDir.getParent().getFileName().toString()).isEqualTo(expectedPartitionDir);
+ assertThat(tabletDir.getParent().getParent().getFileName().toString())
+ .isEqualTo(tablePath.getTableName() + "-" + tableId);
+ assertThat(tabletDir.getParent().getParent().getParent().getFileName().toString())
+ .isEqualTo(tablePath.getDatabaseName());
+ }
+
private Schema createPartitionedTable(TablePath tablePath, boolean isPrimaryTable)
throws Exception {
Schema.Builder schemaBuilder =
@@ -258,4 +869,39 @@ private Schema createPartitionedTable(TablePath tablePath, boolean isPrimaryTabl
createTable(tablePath, partitionTableDescriptor, false);
return schema;
}
+
+ private Schema createImplicitPartitionedTable(TablePath tablePath, boolean isPrimaryTable)
+ throws Exception {
+ Schema.Builder schemaBuilder =
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING())
+ .column("id", DataTypes.INT())
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .column("seq", DataTypes.INT())
+ .column("payload", DataTypes.STRING());
+
+ if (isPrimaryTable) {
+ schemaBuilder.primaryKey("region", "id", "event_time", "seq");
+ }
+
+ Schema schema = schemaBuilder.build();
+ TableDescriptor.Builder tableDescriptorBuilder =
+ TableDescriptor.builder()
+ .schema(schema)
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time", AutoPartitionTimeUnit.DAY))));
+ if (isPrimaryTable) {
+ tableDescriptorBuilder.distributedBy(2, "id", "event_time");
+ } else {
+ tableDescriptorBuilder.distributedBy(1);
+ }
+ TableDescriptor tableDescriptor = tableDescriptorBuilder.build();
+ createTable(tablePath, tableDescriptor, false);
+ return schema;
+ }
}
diff --git a/fluss-client/src/test/java/org/apache/fluss/client/table/writer/UpsertWriterImplTest.java b/fluss-client/src/test/java/org/apache/fluss/client/table/writer/UpsertWriterImplTest.java
new file mode 100644
index 0000000000..5e36eb4a8c
--- /dev/null
+++ b/fluss-client/src/test/java/org/apache/fluss/client/table/writer/UpsertWriterImplTest.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.client.table.writer;
+
+import org.apache.fluss.client.write.WriteCallback;
+import org.apache.fluss.client.write.WriteRecord;
+import org.apache.fluss.client.write.WriterClient;
+import org.apache.fluss.config.AutoPartitionTimeUnit;
+import org.apache.fluss.metadata.DateTruncPartitionTransform;
+import org.apache.fluss.metadata.PartitionExpression;
+import org.apache.fluss.metadata.PartitionKey;
+import org.apache.fluss.metadata.Schema;
+import org.apache.fluss.metadata.TableDescriptor;
+import org.apache.fluss.metadata.TableInfo;
+import org.apache.fluss.metadata.TablePath;
+import org.apache.fluss.row.BinaryString;
+import org.apache.fluss.row.GenericRow;
+import org.apache.fluss.row.InternalRow;
+import org.apache.fluss.row.TimestampNtz;
+import org.apache.fluss.types.DataTypes;
+
+import org.junit.jupiter.api.Test;
+
+import java.time.LocalDateTime;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+
+/** Tests for {@link UpsertWriterImpl}. */
+class UpsertWriterImplTest {
+
+ @Test
+ void testDeleteAcceptsPrimaryKeyRowForImplicitPartitionedTable() throws Exception {
+ TableInfo tableInfo = implicitPartitionedTableInfo();
+ AtomicReference sentRecord = new AtomicReference<>();
+ WriterClient writerClient = mock(WriterClient.class);
+ doAnswer(
+ invocation -> {
+ WriteRecord record = invocation.getArgument(0);
+ WriteCallback callback = invocation.getArgument(1);
+ sentRecord.set(record);
+ callback.onCompletion(null, 11L, null);
+ return null;
+ })
+ .when(writerClient)
+ .send(any(WriteRecord.class), any(WriteCallback.class));
+ UpsertWriterImpl writer =
+ new UpsertWriterImpl(tableInfo.getTablePath(), tableInfo, null, writerClient);
+ LocalDateTime eventTime = LocalDateTime.of(2024, 3, 15, 10, 30);
+
+ DeleteResult result =
+ writer.delete(
+ GenericRow.of(
+ BinaryString.fromString("us"),
+ 7,
+ TimestampNtz.fromLocalDateTime(eventTime)))
+ .get();
+
+ assertThat(result.getLogEndOffset()).isEqualTo(11L);
+ assertThat(sentRecord.get().getPhysicalTablePath().getPartitionName())
+ .isEqualTo("us$20240315");
+ assertThat(sentRecord.get().getRow()).isNull();
+ assertThat(sentRecord.get().getKey()).isNotNull();
+ assertThat(sentRecord.get().getBucketKey()).isNotNull();
+ }
+
+ @Test
+ void testTypedDeleteDelegatesLogicalPrimaryKeyRow() throws Exception {
+ TableInfo tableInfo = implicitPartitionedTableInfo();
+ CapturingUpsertWriter delegate = new CapturingUpsertWriter();
+ TypedUpsertWriterImpl writer =
+ new TypedUpsertWriterImpl<>(delegate, EventPojo.class, tableInfo, null);
+ EventPojo event = new EventPojo();
+ event.region = "us";
+ event.id = 7;
+ event.eventTime = LocalDateTime.of(2024, 3, 15, 10, 30);
+ event.payload = "payload";
+
+ writer.delete(event).get();
+
+ InternalRow row = delegate.deletedRow.get();
+ assertThat(row.getFieldCount()).isEqualTo(tableInfo.getPrimaryKeys().size());
+ assertThat(row.getString(0).toString()).isEqualTo("us");
+ assertThat(row.getInt(1)).isEqualTo(7);
+ assertThat(row.getTimestampNtz(2, 6))
+ .isEqualTo(TimestampNtz.fromLocalDateTime(event.eventTime));
+ }
+
+ @Test
+ void testDeleteUsesFullRowWhenPrimaryKeyRowCountMatchesTableRowCount() throws Exception {
+ TableInfo tableInfo = allColumnsPrimaryKeyTableInfo();
+ AtomicReference sentRecord = new AtomicReference<>();
+ WriterClient writerClient = mock(WriterClient.class);
+ doAnswer(
+ invocation -> {
+ WriteRecord record = invocation.getArgument(0);
+ WriteCallback callback = invocation.getArgument(1);
+ sentRecord.set(record);
+ callback.onCompletion(null, 11L, null);
+ return null;
+ })
+ .when(writerClient)
+ .send(any(WriteRecord.class), any(WriteCallback.class));
+ UpsertWriterImpl writer =
+ new UpsertWriterImpl(tableInfo.getTablePath(), tableInfo, null, writerClient);
+
+ DeleteResult result = writer.delete(GenericRow.of(7, BinaryString.fromString("us"))).get();
+
+ assertThat(result.getLogEndOffset()).isEqualTo(11L);
+ assertThat(sentRecord.get().getPhysicalTablePath().getPartitionName()).isNull();
+ assertThat(sentRecord.get().getRow()).isNull();
+ assertThat(sentRecord.get().getKey()).isNotNull();
+ assertThat(sentRecord.get().getBucketKey()).isNotNull();
+ }
+
+ @Test
+ void testTypedDeleteUsesFullRowWhenPrimaryKeyRowCountMatchesTableRowCount() throws Exception {
+ TableInfo tableInfo = allColumnsPrimaryKeyTableInfo();
+ CapturingUpsertWriter delegate = new CapturingUpsertWriter();
+ TypedUpsertWriterImpl writer =
+ new TypedUpsertWriterImpl<>(delegate, AllPrimaryKeyPojo.class, tableInfo, null);
+ AllPrimaryKeyPojo record = new AllPrimaryKeyPojo();
+ record.id = 7;
+ record.region = "us";
+
+ writer.delete(record).get();
+
+ InternalRow row = delegate.deletedRow.get();
+ assertThat(row.getFieldCount()).isEqualTo(tableInfo.getRowType().getFieldCount());
+ assertThat(row.getInt(0)).isEqualTo(7);
+ assertThat(row.getString(1).toString()).isEqualTo("us");
+ }
+
+ private static TableInfo implicitPartitionedTableInfo() {
+ Schema schema =
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING())
+ .column("id", DataTypes.INT())
+ .column("eventTime", DataTypes.TIMESTAMP())
+ .column("payload", DataTypes.STRING())
+ .primaryKey("region", "id", "eventTime")
+ .build();
+ TableDescriptor descriptor =
+ TableDescriptor.builder()
+ .schema(schema)
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "eventDay",
+ DateTruncPartitionTransform.of(
+ "eventTime", AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1)
+ .build();
+ return TableInfo.of(TablePath.of("db", "t"), 1, 0, descriptor, null, 0, 0);
+ }
+
+ private static TableInfo allColumnsPrimaryKeyTableInfo() {
+ Schema schema =
+ Schema.newBuilder()
+ .column("id", DataTypes.INT())
+ .column("region", DataTypes.STRING())
+ .primaryKey("region", "id")
+ .build();
+ TableDescriptor descriptor =
+ TableDescriptor.builder().schema(schema).distributedBy(1).build();
+ return TableInfo.of(TablePath.of("db", "t2"), 2, 0, descriptor, null, 0, 0);
+ }
+
+ /** POJO matching the full table schema. */
+ public static class EventPojo {
+ public String region;
+ public Integer id;
+ public LocalDateTime eventTime;
+ public String payload;
+
+ public EventPojo() {}
+ }
+
+ /** POJO for a table whose primary key contains every physical column. */
+ public static class AllPrimaryKeyPojo {
+ public Integer id;
+ public String region;
+
+ public AllPrimaryKeyPojo() {}
+ }
+
+ private static class CapturingUpsertWriter implements UpsertWriter {
+ private final AtomicReference deletedRow = new AtomicReference<>();
+
+ @Override
+ public void flush() {}
+
+ @Override
+ public CompletableFuture upsert(InternalRow record) {
+ return CompletableFuture.completedFuture(UpsertResult.empty());
+ }
+
+ @Override
+ public CompletableFuture delete(InternalRow record) {
+ deletedRow.set(record);
+ return CompletableFuture.completedFuture(DeleteResult.empty());
+ }
+ }
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/DateTruncPartitionTransform.java b/fluss-common/src/main/java/org/apache/fluss/metadata/DateTruncPartitionTransform.java
new file mode 100644
index 0000000000..df791a2534
--- /dev/null
+++ b/fluss-common/src/main/java/org/apache/fluss/metadata/DateTruncPartitionTransform.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.metadata;
+
+import org.apache.fluss.annotation.PublicEvolving;
+import org.apache.fluss.config.AutoPartitionTimeUnit;
+
+import java.time.ZoneId;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+import java.util.Objects;
+import java.util.Optional;
+
+import static org.apache.fluss.utils.Preconditions.checkNotNull;
+
+/** A partition transform that truncates a date or timestamp source column to a time unit. */
+@PublicEvolving
+public final class DateTruncPartitionTransform implements PartitionTransform {
+
+ private static final long serialVersionUID = 1L;
+
+ private final String sourceColumn;
+ private final AutoPartitionTimeUnit timeUnit;
+ private final ZoneId timeZone;
+
+ private DateTruncPartitionTransform(
+ String sourceColumn, AutoPartitionTimeUnit timeUnit, ZoneId timeZone) {
+ this.sourceColumn = checkNotNull(sourceColumn, "source column must not be null.");
+ this.timeUnit = checkNotNull(timeUnit, "time unit must not be null.");
+ this.timeZone = timeZone;
+ }
+
+ /** Creates an unresolved date-trunc transform whose time zone is resolved by the server. */
+ public static DateTruncPartitionTransform of(
+ String sourceColumn, AutoPartitionTimeUnit timeUnit) {
+ return new DateTruncPartitionTransform(sourceColumn, timeUnit, null);
+ }
+
+ /** Creates a date-trunc transform with an explicit time zone. */
+ public static DateTruncPartitionTransform of(
+ String sourceColumn, AutoPartitionTimeUnit timeUnit, ZoneId timeZone) {
+ return new DateTruncPartitionTransform(
+ sourceColumn, timeUnit, checkNotNull(timeZone, "time zone must not be null."));
+ }
+
+ @Override
+ public TransformType getType() {
+ return TransformType.DATE_TRUNC;
+ }
+
+ @Override
+ public List getSourceColumns() {
+ return Collections.singletonList(sourceColumn);
+ }
+
+ /** Returns the physical source column. */
+ public String getSourceColumn() {
+ return sourceColumn;
+ }
+
+ /** Returns the truncation unit. */
+ public AutoPartitionTimeUnit getTimeUnit() {
+ return timeUnit;
+ }
+
+ /** Returns the transform time zone if it has been resolved. */
+ public Optional getTimeZone() {
+ return Optional.ofNullable(timeZone);
+ }
+
+ /** Returns a copy of this transform with the given resolved time zone. */
+ public DateTruncPartitionTransform withTimeZone(ZoneId timeZone) {
+ return DateTruncPartitionTransform.of(sourceColumn, timeUnit, timeZone);
+ }
+
+ /** Returns the default virtual partition spec key for this transform. */
+ public String defaultPartitionSpecKey() {
+ return sourceColumn + "_" + timeUnit.name().toLowerCase(Locale.ROOT);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ DateTruncPartitionTransform that = (DateTruncPartitionTransform) o;
+ return Objects.equals(sourceColumn, that.sourceColumn)
+ && timeUnit == that.timeUnit
+ && Objects.equals(timeZone, that.timeZone);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(sourceColumn, timeUnit, timeZone);
+ }
+
+ @Override
+ public String toString() {
+ return "DateTruncPartitionTransform{"
+ + "sourceColumn='"
+ + sourceColumn
+ + '\''
+ + ", timeUnit="
+ + timeUnit
+ + ", timeZone="
+ + timeZone
+ + '}';
+ }
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionExpression.java b/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionExpression.java
new file mode 100644
index 0000000000..6cb6a1703f
--- /dev/null
+++ b/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionExpression.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.metadata;
+
+import org.apache.fluss.annotation.PublicEvolving;
+
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.Optional;
+
+import static org.apache.fluss.utils.Preconditions.checkNotNull;
+
+/** Metadata binding a virtual partition spec key to a partition transform. */
+@PublicEvolving
+public final class PartitionExpression implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private final String virtualPartitionSpecKey;
+ private final PartitionTransform transform;
+
+ private PartitionExpression(String virtualPartitionSpecKey, PartitionTransform transform) {
+ this.virtualPartitionSpecKey = virtualPartitionSpecKey;
+ this.transform = checkNotNull(transform, "partition transform must not be null.");
+ }
+
+ /** Creates an expression whose virtual partition spec key will be generated at build time. */
+ public static PartitionExpression of(PartitionTransform transform) {
+ return new PartitionExpression(null, transform);
+ }
+
+ /** Creates an expression with an explicit virtual partition spec key. */
+ public static PartitionExpression of(
+ String virtualPartitionSpecKey, PartitionTransform transform) {
+ return new PartitionExpression(
+ checkNotNull(
+ virtualPartitionSpecKey, "virtual partition spec key must not be null."),
+ transform);
+ }
+
+ /** Returns the virtual partition spec key if it has been resolved. */
+ public Optional getVirtualPartitionSpecKey() {
+ return Optional.ofNullable(virtualPartitionSpecKey);
+ }
+
+ /** Returns the transform. */
+ public PartitionTransform getTransform() {
+ return transform;
+ }
+
+ /** Returns a copy with a resolved virtual partition spec key. */
+ public PartitionExpression withVirtualPartitionSpecKey(String virtualPartitionSpecKey) {
+ return PartitionExpression.of(virtualPartitionSpecKey, transform);
+ }
+
+ /** Returns a copy with a resolved transform. */
+ public PartitionExpression withTransform(PartitionTransform transform) {
+ if (virtualPartitionSpecKey == null) {
+ return PartitionExpression.of(transform);
+ }
+ return PartitionExpression.of(virtualPartitionSpecKey, transform);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ PartitionExpression that = (PartitionExpression) o;
+ return Objects.equals(virtualPartitionSpecKey, that.virtualPartitionSpecKey)
+ && Objects.equals(transform, that.transform);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(virtualPartitionSpecKey, transform);
+ }
+
+ @Override
+ public String toString() {
+ return "PartitionExpression{"
+ + "virtualPartitionSpecKey='"
+ + virtualPartitionSpecKey
+ + '\''
+ + ", transform="
+ + transform
+ + '}';
+ }
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionKey.java b/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionKey.java
new file mode 100644
index 0000000000..311e3f1062
--- /dev/null
+++ b/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionKey.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.metadata;
+
+import org.apache.fluss.annotation.PublicEvolving;
+
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.Optional;
+
+import static org.apache.fluss.utils.Preconditions.checkNotNull;
+
+/** One ordered table partition key, either a physical column or a virtual expression. */
+@PublicEvolving
+public final class PartitionKey implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /** Partition key kind. */
+ public enum Kind {
+ COLUMN,
+ EXPRESSION
+ }
+
+ private final Kind kind;
+ private final String columnName;
+ private final PartitionExpression expression;
+
+ private PartitionKey(Kind kind, String columnName, PartitionExpression expression) {
+ this.kind = checkNotNull(kind, "partition key kind must not be null.");
+ this.columnName = columnName;
+ this.expression = expression;
+ }
+
+ /** Creates a physical column partition key. */
+ public static PartitionKey column(String columnName) {
+ return new PartitionKey(
+ Kind.COLUMN, checkNotNull(columnName, "column name must not be null."), null);
+ }
+
+ /** Creates a virtual expression partition key. */
+ public static PartitionKey expression(PartitionExpression expression) {
+ return new PartitionKey(
+ Kind.EXPRESSION,
+ null,
+ checkNotNull(expression, "partition expression must not be null."));
+ }
+
+ /** Returns the partition key kind. */
+ public Kind getKind() {
+ return kind;
+ }
+
+ /** Returns the physical column name or resolved virtual partition spec key. */
+ public Optional getPartitionSpecKey() {
+ if (kind == Kind.COLUMN) {
+ return Optional.of(columnName);
+ }
+ return expression.getVirtualPartitionSpecKey();
+ }
+
+ /** Returns the physical column name for a column partition key. */
+ public Optional getColumnName() {
+ return Optional.ofNullable(columnName);
+ }
+
+ /** Returns the expression for a virtual expression partition key. */
+ public Optional getExpression() {
+ return Optional.ofNullable(expression);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ PartitionKey that = (PartitionKey) o;
+ return kind == that.kind
+ && Objects.equals(columnName, that.columnName)
+ && Objects.equals(expression, that.expression);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(kind, columnName, expression);
+ }
+
+ @Override
+ public String toString() {
+ return "PartitionKey{"
+ + "kind="
+ + kind
+ + ", columnName='"
+ + columnName
+ + '\''
+ + ", expression="
+ + expression
+ + '}';
+ }
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionTransform.java b/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionTransform.java
new file mode 100644
index 0000000000..573345de9e
--- /dev/null
+++ b/fluss-common/src/main/java/org/apache/fluss/metadata/PartitionTransform.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.metadata;
+
+import org.apache.fluss.annotation.PublicEvolving;
+
+import java.io.Serializable;
+import java.util.List;
+
+/** A transform used to derive an implicit partition value from physical row columns. */
+@PublicEvolving
+public interface PartitionTransform extends Serializable {
+
+ /** Returns the transform type. */
+ TransformType getType();
+
+ /** Returns the physical source columns required to compute this transform. */
+ List getSourceColumns();
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/TableDescriptor.java b/fluss-common/src/main/java/org/apache/fluss/metadata/TableDescriptor.java
index fc19f6b7ea..8ee67eacc5 100644
--- a/fluss-common/src/main/java/org/apache/fluss/metadata/TableDescriptor.java
+++ b/fluss-common/src/main/java/org/apache/fluss/metadata/TableDescriptor.java
@@ -28,6 +28,7 @@
import javax.annotation.Nullable;
import java.io.Serializable;
+import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -72,6 +73,7 @@ public final class TableDescriptor implements Serializable {
private final Schema schema;
private final @Nullable String comment;
private final List partitionKeys;
+ private final List partitionExpressions;
private final @Nullable TableDistribution tableDistribution;
private final Map properties;
private final Map customProperties;
@@ -80,43 +82,39 @@ private TableDescriptor(
Schema schema,
@Nullable String comment,
List partitionKeys,
+ List partitionExpressions,
@Nullable TableDistribution tableDistribution,
Map properties,
Map customProperties) {
this.schema = checkNotNull(schema, "schema must not be null.");
this.comment = comment;
- this.partitionKeys = checkNotNull(partitionKeys, "partition keys must not be null.");
+ this.partitionKeys =
+ Collections.unmodifiableList(
+ new ArrayList<>(
+ checkNotNull(partitionKeys, "partition keys must not be null.")));
+ this.partitionExpressions =
+ Collections.unmodifiableList(
+ new ArrayList<>(
+ checkNotNull(
+ partitionExpressions,
+ "partition expressions must not be null.")));
this.properties = unmodifiableMap(checkNotNull(properties, "options must not be null."));
this.customProperties =
unmodifiableMap(
checkNotNull(customProperties, "customProperties must not be null."));
+ validatePartitionMetadata(schema, this.partitionKeys, this.partitionExpressions);
+ List physicalPartitionKeys =
+ getPhysicalPartitionKeys(this.partitionKeys, this.partitionExpressions);
+
// validate and normalize bucket keys.
- this.tableDistribution = normalizeDistribution(schema, partitionKeys, tableDistribution);
+ this.tableDistribution =
+ normalizeDistribution(schema, physicalPartitionKeys, tableDistribution);
- // validate partition keys and bucket keys
Set columnNames =
schema.getColumns().stream()
.map(Schema.Column::getName)
.collect(Collectors.toSet());
- if (schema.getPrimaryKey().isPresent()) {
- List pkColumns = schema.getPrimaryKey().get().getColumnNames();
- partitionKeys.forEach(
- f ->
- checkArgument(
- pkColumns.contains(f),
- "Partitioned Primary Key Table requires partition key %s is a subset of the primary key %s.",
- partitionKeys,
- pkColumns));
- } else {
- partitionKeys.forEach(
- f ->
- checkArgument(
- columnNames.contains(f),
- "Partition key '%s' does not exist in the schema.",
- f));
- }
-
if (this.tableDistribution != null) {
this.tableDistribution
.getBucketKeys()
@@ -169,7 +167,8 @@ public List getBucketKeys() {
*/
public boolean isDefaultBucketKey() {
if (schema.getPrimaryKey().isPresent()) {
- return getBucketKeys().equals(defaultBucketKeyOfPrimaryKeyTable(schema, partitionKeys));
+ return getBucketKeys()
+ .equals(defaultBucketKeyOfPrimaryKeyTable(schema, getPhysicalPartitionKeys()));
} else {
return getBucketKeys().isEmpty();
}
@@ -199,6 +198,42 @@ public List getPartitionKeys() {
return partitionKeys;
}
+ /** Returns partition expressions for virtual partition keys. */
+ public List getPartitionExpressions() {
+ return partitionExpressions;
+ }
+
+ /** Returns true when the table contains virtual partition expressions. */
+ public boolean hasPartitionExpressions() {
+ return !partitionExpressions.isEmpty();
+ }
+
+ /** Returns schema-backed physical partition keys only. */
+ public List getPhysicalPartitionKeys() {
+ return getPhysicalPartitionKeys(partitionKeys, partitionExpressions);
+ }
+
+ /** Returns virtual partition spec keys only. */
+ public List getVirtualPartitionKeys() {
+ return getVirtualPartitionKeys(partitionExpressions);
+ }
+
+ /** Returns physical columns referenced by partition transforms. */
+ public List getPartitionSourceColumns() {
+ return getPartitionSourceColumns(partitionExpressions);
+ }
+
+ /** Returns physical columns required to compute partition specs. */
+ public List getPartitionInputColumns() {
+ List partitionInputColumns = new ArrayList<>(getPhysicalPartitionKeys());
+ for (String sourceColumn : getPartitionSourceColumns()) {
+ if (!partitionInputColumns.contains(sourceColumn)) {
+ partitionInputColumns.add(sourceColumn);
+ }
+ }
+ return partitionInputColumns;
+ }
+
/** Returns the distribution of the table if the {@code DISTRIBUTED} clause is defined. */
public Optional getTableDistribution() {
return Optional.ofNullable(tableDistribution);
@@ -242,7 +277,13 @@ public int getReplicationFactor() {
*/
public TableDescriptor withProperties(Map newProperties) {
return new TableDescriptor(
- schema, comment, partitionKeys, tableDistribution, newProperties, customProperties);
+ schema,
+ comment,
+ partitionKeys,
+ partitionExpressions,
+ tableDistribution,
+ newProperties,
+ customProperties);
}
/**
@@ -255,6 +296,7 @@ public TableDescriptor withProperties(
schema,
comment,
partitionKeys,
+ partitionExpressions,
tableDistribution,
newProperties,
newCustomProperties);
@@ -302,6 +344,7 @@ public TableDescriptor withBucketCount(int newBucketCount) {
schema,
comment,
partitionKeys,
+ partitionExpressions,
new TableDistribution(
newBucketCount,
Optional.ofNullable(tableDistribution)
@@ -311,6 +354,34 @@ public TableDescriptor withBucketCount(int newBucketCount) {
customProperties);
}
+ /** Returns a copy whose implicit partition transforms have resolved time-zone metadata. */
+ public TableDescriptor withResolvedPartitionExpressionTimeZone(ZoneId defaultTimeZone) {
+ checkNotNull(defaultTimeZone, "default time zone must not be null.");
+ if (partitionExpressions.isEmpty()) {
+ return this;
+ }
+ List resolvedPartitionExpressions = new ArrayList<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ PartitionTransform transform = partitionExpression.getTransform();
+ if (transform instanceof DateTruncPartitionTransform) {
+ DateTruncPartitionTransform dateTruncTransform =
+ (DateTruncPartitionTransform) transform;
+ if (!dateTruncTransform.getTimeZone().isPresent()) {
+ transform = dateTruncTransform.withTimeZone(defaultTimeZone);
+ }
+ }
+ resolvedPartitionExpressions.add(partitionExpression.withTransform(transform));
+ }
+ return new TableDescriptor(
+ schema,
+ comment,
+ partitionKeys,
+ resolvedPartitionExpressions,
+ tableDistribution,
+ properties,
+ customProperties);
+ }
+
public Optional getComment() {
return Optional.ofNullable(comment);
}
@@ -345,6 +416,7 @@ public boolean equals(Object o) {
return Objects.equals(schema, table.schema)
&& Objects.equals(comment, table.comment)
&& Objects.equals(partitionKeys, table.partitionKeys)
+ && Objects.equals(partitionExpressions, table.partitionExpressions)
&& Objects.equals(tableDistribution, table.tableDistribution)
&& Objects.equals(properties, table.properties)
&& Objects.equals(customProperties, table.customProperties);
@@ -353,7 +425,13 @@ public boolean equals(Object o) {
@Override
public int hashCode() {
return Objects.hash(
- schema, comment, partitionKeys, tableDistribution, properties, customProperties);
+ schema,
+ comment,
+ partitionKeys,
+ partitionExpressions,
+ tableDistribution,
+ properties,
+ customProperties);
}
@Override
@@ -366,6 +444,9 @@ public String toString() {
+ '\''
+ ", partitionKeys="
+ partitionKeys
+ + (partitionExpressions.isEmpty()
+ ? ""
+ : ", partitionExpressions=" + partitionExpressions)
+ ", tableDistribution="
+ tableDistribution
+ ", properties="
@@ -380,17 +461,17 @@ public String toString() {
@Nullable
private static TableDistribution normalizeDistribution(
Schema schema,
- List partitionKeys,
+ List physicalPartitionKeys,
@Nullable TableDistribution originDistribution) {
if (originDistribution != null) {
// we may need to check and normalize bucket key
List bucketKeys = originDistribution.getBucketKeys();
// bucket key shouldn't include partition key
- if (bucketKeys.stream().anyMatch(partitionKeys::contains)) {
+ if (bucketKeys.stream().anyMatch(physicalPartitionKeys::contains)) {
throw new IllegalArgumentException(
String.format(
"Bucket key %s shouldn't include any column in partition keys %s.",
- bucketKeys, partitionKeys));
+ bucketKeys, physicalPartitionKeys));
}
// if primary key set
@@ -399,7 +480,7 @@ private static TableDistribution normalizeDistribution(
if (bucketKeys.isEmpty()) {
return new TableDistribution(
originDistribution.getBucketCount().orElse(null),
- defaultBucketKeyOfPrimaryKeyTable(schema, partitionKeys));
+ defaultBucketKeyOfPrimaryKeyTable(schema, physicalPartitionKeys));
} else {
// check the provided bucket key
List pkColumns = schema.getPrimaryKey().get().getColumnNames();
@@ -410,7 +491,7 @@ private static TableDistribution normalizeDistribution(
+ "keys for primary-key tables. The primary keys are %s, the "
+ "partition keys are %s, but "
+ "the user-defined bucket keys are %s.",
- pkColumns, partitionKeys, bucketKeys));
+ pkColumns, physicalPartitionKeys, bucketKeys));
}
return new TableDistribution(
originDistribution.getBucketCount().orElse(null), bucketKeys);
@@ -423,7 +504,7 @@ private static TableDistribution normalizeDistribution(
// to primary key (exclude partition key if it is partitioned table)
if (schema.getPrimaryKey().isPresent()) {
return new TableDistribution(
- null, defaultBucketKeyOfPrimaryKeyTable(schema, partitionKeys));
+ null, defaultBucketKeyOfPrimaryKeyTable(schema, physicalPartitionKeys));
} else {
return originDistribution;
}
@@ -432,20 +513,135 @@ private static TableDistribution normalizeDistribution(
/** The default bucket key of primary key table is the primary key excluding partition keys. */
private static List defaultBucketKeyOfPrimaryKeyTable(
- Schema schema, List partitionKeys) {
+ Schema schema, List physicalPartitionKeys) {
checkArgument(schema.getPrimaryKey().isPresent(), "Primary key must be set.");
List bucketKeys = new ArrayList<>(schema.getPrimaryKey().get().getColumnNames());
- bucketKeys.removeAll(partitionKeys);
+ bucketKeys.removeAll(physicalPartitionKeys);
if (bucketKeys.isEmpty()) {
throw new IllegalArgumentException(
String.format(
"Primary Key constraint %s should not be same with partition fields %s.",
- schema.getPrimaryKey().get().getColumnNames(), partitionKeys));
+ schema.getPrimaryKey().get().getColumnNames(), physicalPartitionKeys));
}
return bucketKeys;
}
+ private static void validatePartitionMetadata(
+ Schema schema,
+ List partitionKeys,
+ List partitionExpressions) {
+ Set partitionKeySet = new HashSet<>(partitionKeys);
+ checkArgument(
+ partitionKeySet.size() == partitionKeys.size(),
+ "Duplicate partition keys are not allowed: %s.",
+ partitionKeys);
+
+ Set columnNames =
+ schema.getColumns().stream()
+ .map(Schema.Column::getName)
+ .collect(Collectors.toSet());
+ List virtualPartitionKeys = getVirtualPartitionKeys(partitionExpressions);
+ Set virtualPartitionKeySet = new HashSet<>(virtualPartitionKeys);
+ checkArgument(
+ virtualPartitionKeySet.size() == virtualPartitionKeys.size(),
+ "Duplicate virtual partition spec keys are not allowed: %s.",
+ virtualPartitionKeys);
+
+ for (String virtualPartitionKey : virtualPartitionKeys) {
+ checkArgument(
+ partitionKeySet.contains(virtualPartitionKey),
+ "Virtual partition spec key '%s' is not present in partition keys %s.",
+ virtualPartitionKey,
+ partitionKeys);
+ checkArgument(
+ !columnNames.contains(virtualPartitionKey),
+ "Virtual partition spec key '%s' conflicts with a physical column.",
+ virtualPartitionKey);
+ }
+
+ List physicalPartitionKeys =
+ getPhysicalPartitionKeys(partitionKeys, partitionExpressions);
+ for (String partitionKey : partitionKeys) {
+ if (!columnNames.contains(partitionKey)
+ && !virtualPartitionKeySet.contains(partitionKey)) {
+ if (partitionExpressions.isEmpty()) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Partition key '%s' does not exist in the schema.",
+ partitionKey));
+ }
+ throw new IllegalArgumentException(
+ String.format(
+ "Partition key '%s' does not exist in the schema or partition expressions.",
+ partitionKey));
+ }
+ }
+ for (String sourceColumn : getPartitionSourceColumns(partitionExpressions)) {
+ int sourceColumnIndex = schema.getRowType().getFieldIndex(sourceColumn);
+ checkArgument(
+ sourceColumnIndex >= 0,
+ "Partition transform source column '%s' does not exist in the schema.",
+ sourceColumn);
+ checkArgument(
+ !schema.getRowType().getTypeAt(sourceColumnIndex).isNullable(),
+ "Partition transform source column '%s' must be non-nullable.",
+ sourceColumn);
+ }
+
+ if (schema.getPrimaryKey().isPresent()) {
+ List pkColumns = schema.getPrimaryKey().get().getColumnNames();
+ for (String partitionKey : physicalPartitionKeys) {
+ checkArgument(
+ pkColumns.contains(partitionKey),
+ "Partitioned Primary Key Table requires physical partition keys %s is a subset of the primary key %s.",
+ physicalPartitionKeys,
+ pkColumns);
+ }
+ for (String sourceColumn : getPartitionSourceColumns(partitionExpressions)) {
+ checkArgument(
+ pkColumns.contains(sourceColumn),
+ "Partitioned Primary Key Table requires transform source column '%s' is in the primary key %s.",
+ sourceColumn,
+ pkColumns);
+ }
+ }
+ }
+
+ private static List getPhysicalPartitionKeys(
+ List partitionKeys, List partitionExpressions) {
+ Set virtualPartitionKeys =
+ new HashSet<>(getVirtualPartitionKeys(partitionExpressions));
+ return partitionKeys.stream()
+ .filter(partitionKey -> !virtualPartitionKeys.contains(partitionKey))
+ .collect(Collectors.toList());
+ }
+
+ private static List getVirtualPartitionKeys(
+ List partitionExpressions) {
+ List virtualPartitionKeys = new ArrayList<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ checkArgument(
+ partitionExpression.getVirtualPartitionSpecKey().isPresent(),
+ "Virtual partition expression must have a resolved partition spec key.");
+ virtualPartitionKeys.add(partitionExpression.getVirtualPartitionSpecKey().get());
+ }
+ return virtualPartitionKeys;
+ }
+
+ private static List getPartitionSourceColumns(
+ List partitionExpressions) {
+ List sourceColumns = new ArrayList<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ for (String sourceColumn : partitionExpression.getTransform().getSourceColumns()) {
+ if (!sourceColumns.contains(sourceColumn)) {
+ sourceColumns.add(sourceColumn);
+ }
+ }
+ }
+ return sourceColumns;
+ }
+
// ----------------------------------------------------------------------------------------
/**
@@ -508,12 +704,15 @@ public static class Builder {
private final Map properties;
private final Map customProperties;
private final List partitionKeys;
+ private final List partitionExpressions;
private @Nullable String comment;
private @Nullable TableDistribution tableDistribution;
+ private @Nullable PartitionDeclarationMode partitionDeclarationMode;
protected Builder() {
this.properties = new HashMap<>();
this.partitionKeys = new ArrayList<>();
+ this.partitionExpressions = new ArrayList<>();
this.customProperties = new HashMap<>();
}
@@ -522,8 +721,15 @@ protected Builder(TableDescriptor descriptor) {
this.properties = new HashMap<>(descriptor.getProperties());
this.customProperties = new HashMap<>(descriptor.getCustomProperties());
this.partitionKeys = new ArrayList<>(descriptor.getPartitionKeys());
+ this.partitionExpressions = new ArrayList<>(descriptor.getPartitionExpressions());
this.comment = descriptor.getComment().orElse(null);
this.tableDistribution = descriptor.getTableDistribution().orElse(null);
+ if (!partitionKeys.isEmpty()) {
+ this.partitionDeclarationMode =
+ partitionExpressions.isEmpty()
+ ? PartitionDeclarationMode.LEGACY_PHYSICAL
+ : PartitionDeclarationMode.PARTITION_KEYS;
+ }
}
/** Define the schema of the {@link TableDescriptor}. */
@@ -614,8 +820,41 @@ public Builder partitionedBy(String... partitionKeys) {
/** Define which columns this table is partitioned by. */
public Builder partitionedBy(List partitionKeys) {
+ checkArgument(
+ partitionDeclarationMode == null
+ || partitionDeclarationMode == PartitionDeclarationMode.LEGACY_PHYSICAL,
+ "partitionedBy(...) and partitionedByKeys(...) cannot be mixed in the same builder.");
+ partitionDeclarationMode = PartitionDeclarationMode.LEGACY_PHYSICAL;
this.partitionKeys.clear();
this.partitionKeys.addAll(partitionKeys);
+ this.partitionExpressions.clear();
+ return this;
+ }
+
+ /** Define ordered partition keys, including physical columns and virtual expressions. */
+ public Builder partitionedByKeys(PartitionKey... partitionKeys) {
+ return partitionedByKeys(Arrays.asList(partitionKeys));
+ }
+
+ /** Define ordered partition keys, including physical columns and virtual expressions. */
+ public Builder partitionedByKeys(List partitionKeys) {
+ checkArgument(
+ partitionDeclarationMode == null
+ || partitionDeclarationMode == PartitionDeclarationMode.PARTITION_KEYS,
+ "partitionedBy(...) and partitionedByKeys(...) cannot be mixed in the same builder.");
+ partitionDeclarationMode = PartitionDeclarationMode.PARTITION_KEYS;
+ this.partitionKeys.clear();
+ this.partitionExpressions.clear();
+ for (PartitionKey partitionKey : partitionKeys) {
+ if (partitionKey.getKind() == PartitionKey.Kind.COLUMN) {
+ this.partitionKeys.add(partitionKey.getColumnName().get());
+ } else {
+ PartitionExpression resolvedExpression =
+ resolvePartitionExpression(partitionKey.getExpression().get());
+ this.partitionKeys.add(resolvedExpression.getVirtualPartitionSpecKey().get());
+ this.partitionExpressions.add(resolvedExpression);
+ }
+ }
return this;
}
@@ -653,9 +892,32 @@ public TableDescriptor build() {
schema,
comment,
partitionKeys,
+ partitionExpressions,
tableDistribution,
properties,
customProperties);
}
+
+ private static PartitionExpression resolvePartitionExpression(
+ PartitionExpression partitionExpression) {
+ if (partitionExpression.getVirtualPartitionSpecKey().isPresent()) {
+ return partitionExpression;
+ }
+
+ PartitionTransform transform = partitionExpression.getTransform();
+ checkArgument(
+ transform instanceof DateTruncPartitionTransform,
+ "Unsupported partition transform type: %s.",
+ transform.getType());
+ DateTruncPartitionTransform dateTruncTransform =
+ (DateTruncPartitionTransform) transform;
+ return partitionExpression.withVirtualPartitionSpecKey(
+ dateTruncTransform.defaultPartitionSpecKey());
+ }
+
+ private enum PartitionDeclarationMode {
+ LEGACY_PHYSICAL,
+ PARTITION_KEYS
+ }
}
}
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/TableInfo.java b/fluss-common/src/main/java/org/apache/fluss/metadata/TableInfo.java
index 00f58b81f0..a6a801bbee 100644
--- a/fluss-common/src/main/java/org/apache/fluss/metadata/TableInfo.java
+++ b/fluss-common/src/main/java/org/apache/fluss/metadata/TableInfo.java
@@ -28,11 +28,16 @@
import javax.annotation.Nullable;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
+import java.util.Set;
import java.util.stream.Collectors;
+import static org.apache.fluss.utils.Preconditions.checkArgument;
+
/**
* Information of a created table metadata, includes table id (unique identifier of the table in the
* cluster), schema, distribution, partitioning, etc.
@@ -59,6 +64,7 @@ public final class TableInfo {
private final List physicalPrimaryKeys;
private final List bucketKeys;
private final List partitionKeys;
+ private final List partitionExpressions;
private final int numBuckets;
private final Configuration properties;
private final TableConfig tableConfig;
@@ -85,15 +91,51 @@ public TableInfo(
@Nullable String comment,
long createdTime,
long modifiedTime) {
+ this(
+ tablePath,
+ tableId,
+ schemaId,
+ schema,
+ bucketKeys,
+ partitionKeys,
+ Collections.emptyList(),
+ numBuckets,
+ properties,
+ customProperties,
+ remoteDataDir,
+ comment,
+ createdTime,
+ modifiedTime);
+ }
+
+ public TableInfo(
+ TablePath tablePath,
+ long tableId,
+ int schemaId,
+ Schema schema,
+ List bucketKeys,
+ List partitionKeys,
+ List partitionExpressions,
+ int numBuckets,
+ Configuration properties,
+ Configuration customProperties,
+ @Nullable String remoteDataDir,
+ @Nullable String comment,
+ long createdTime,
+ long modifiedTime) {
this.tablePath = tablePath;
this.tableId = tableId;
this.schemaId = schemaId;
this.schema = schema;
this.rowType = schema.getRowType();
this.primaryKeys = schema.getPrimaryKeyColumnNames();
- this.physicalPrimaryKeys = generatePhysicalPrimaryKey(primaryKeys, partitionKeys);
- this.bucketKeys = bucketKeys;
- this.partitionKeys = partitionKeys;
+ this.bucketKeys = Collections.unmodifiableList(new ArrayList<>(bucketKeys));
+ this.partitionKeys = Collections.unmodifiableList(new ArrayList<>(partitionKeys));
+ this.partitionExpressions =
+ Collections.unmodifiableList(new ArrayList<>(partitionExpressions));
+ validatePartitionMetadata(schema, this.partitionKeys, this.partitionExpressions);
+ this.physicalPrimaryKeys =
+ generatePhysicalPrimaryKey(primaryKeys, getPhysicalPartitionKeys());
this.numBuckets = numBuckets;
this.properties = properties;
this.tableConfig = new TableConfig(properties);
@@ -327,6 +369,59 @@ public List getPartitionKeys() {
return partitionKeys;
}
+ /** Returns partition expressions for virtual partition keys. */
+ public List getPartitionExpressions() {
+ return partitionExpressions;
+ }
+
+ /** Returns true when the table contains virtual partition expressions. */
+ public boolean hasPartitionExpressions() {
+ return !partitionExpressions.isEmpty();
+ }
+
+ /** Returns schema-backed physical partition keys only. */
+ public List getPhysicalPartitionKeys() {
+ List virtualPartitionKeys = getVirtualPartitionKeys();
+ return partitionKeys.stream()
+ .filter(partitionKey -> !virtualPartitionKeys.contains(partitionKey))
+ .collect(Collectors.toList());
+ }
+
+ /** Returns virtual partition spec keys only. */
+ public List getVirtualPartitionKeys() {
+ List virtualPartitionKeys = new ArrayList<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ if (partitionExpression.getVirtualPartitionSpecKey().isPresent()) {
+ virtualPartitionKeys.add(partitionExpression.getVirtualPartitionSpecKey().get());
+ }
+ }
+ return virtualPartitionKeys;
+ }
+
+ /** Returns physical columns referenced by partition transforms. */
+ public List getPartitionSourceColumns() {
+ List sourceColumns = new ArrayList<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ for (String sourceColumn : partitionExpression.getTransform().getSourceColumns()) {
+ if (!sourceColumns.contains(sourceColumn)) {
+ sourceColumns.add(sourceColumn);
+ }
+ }
+ }
+ return sourceColumns;
+ }
+
+ /** Returns physical columns required to compute partition specs. */
+ public List getPartitionInputColumns() {
+ List partitionInputColumns = new ArrayList<>(getPhysicalPartitionKeys());
+ for (String sourceColumn : getPartitionSourceColumns()) {
+ if (!partitionInputColumns.contains(sourceColumn)) {
+ partitionInputColumns.add(sourceColumn);
+ }
+ }
+ return partitionInputColumns;
+ }
+
/** Get the number of buckets of the table. */
public int getNumBuckets() {
return numBuckets;
@@ -401,10 +496,13 @@ public long getModifiedTime() {
* table.
*/
public TableDescriptor toTableDescriptor() {
- return TableDescriptor.builder()
- .schema(schema)
- .partitionedBy(partitionKeys)
- .distributedBy(numBuckets, bucketKeys)
+ TableDescriptor.Builder builder = TableDescriptor.builder().schema(schema);
+ if (partitionExpressions.isEmpty()) {
+ builder.partitionedBy(partitionKeys);
+ } else {
+ builder.partitionedByKeys(toPartitionKeys(partitionKeys, partitionExpressions));
+ }
+ return builder.distributedBy(numBuckets, bucketKeys)
.properties(properties.toMap())
.customProperties(customProperties.toMap())
.comment(comment)
@@ -436,6 +534,7 @@ public static TableInfo of(
schema,
tableDescriptor.getBucketKeys(),
tableDescriptor.getPartitionKeys(),
+ tableDescriptor.getPartitionExpressions(),
numBuckets,
Configuration.fromMap(tableDescriptor.getProperties()),
Configuration.fromMap(tableDescriptor.getCustomProperties()),
@@ -461,6 +560,7 @@ public boolean equals(Object o) {
&& Objects.equals(physicalPrimaryKeys, that.physicalPrimaryKeys)
&& Objects.equals(bucketKeys, that.bucketKeys)
&& Objects.equals(partitionKeys, that.partitionKeys)
+ && Objects.equals(partitionExpressions, that.partitionExpressions)
&& Objects.equals(properties, that.properties)
&& Objects.equals(customProperties, that.customProperties)
&& Objects.equals(remoteDataDir, that.remoteDataDir)
@@ -479,6 +579,7 @@ public int hashCode() {
physicalPrimaryKeys,
bucketKeys,
partitionKeys,
+ partitionExpressions,
numBuckets,
properties,
customProperties,
@@ -503,6 +604,8 @@ public String toString() {
+ bucketKeys
+ ", partitionKeys="
+ partitionKeys
+ + ", partitionExpressions="
+ + partitionExpressions
+ ", numBuckets="
+ numBuckets
+ ", properties="
@@ -529,4 +632,121 @@ private static List generatePhysicalPrimaryKey(
.filter(pk -> !partitionKeys.contains(pk))
.collect(Collectors.toList());
}
+
+ private static void validatePartitionMetadata(
+ Schema schema,
+ List partitionKeys,
+ List partitionExpressions) {
+ Set columnNames = new HashSet<>(schema.getColumnNames());
+ Set partitionKeySet = new HashSet<>(partitionKeys);
+ checkArgument(
+ partitionKeySet.size() == partitionKeys.size(),
+ "Duplicate partition keys are not allowed: %s.",
+ partitionKeys);
+
+ Set virtualPartitionKeys =
+ validatePartitionExpressions(partitionKeySet, partitionKeys, partitionExpressions);
+ for (String partitionKey : partitionKeys) {
+ checkArgument(
+ columnNames.contains(partitionKey)
+ || virtualPartitionKeys.contains(partitionKey),
+ "Partition key '%s' does not exist in the schema or partition expressions.",
+ partitionKey);
+ }
+ for (String virtualPartitionKey : virtualPartitionKeys) {
+ checkArgument(
+ !columnNames.contains(virtualPartitionKey),
+ "Virtual partition spec key '%s' conflicts with a physical column.",
+ virtualPartitionKey);
+ }
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ for (String sourceColumn : partitionExpression.getTransform().getSourceColumns()) {
+ int sourceColumnIndex = schema.getRowType().getFieldIndex(sourceColumn);
+ checkArgument(
+ sourceColumnIndex >= 0,
+ "Partition transform source column '%s' does not exist in the schema.",
+ sourceColumn);
+ checkArgument(
+ !schema.getRowType().getTypeAt(sourceColumnIndex).isNullable(),
+ "Partition transform source column '%s' must be non-nullable.",
+ sourceColumn);
+ }
+ }
+
+ if (schema.getPrimaryKey().isPresent()) {
+ List pkColumns = schema.getPrimaryKey().get().getColumnNames();
+ List physicalPartitionKeys =
+ getPhysicalPartitionKeys(partitionKeys, virtualPartitionKeys);
+ for (String partitionKey : physicalPartitionKeys) {
+ checkArgument(
+ pkColumns.contains(partitionKey),
+ "Partitioned Primary Key Table requires physical partition keys %s is a subset of the primary key %s.",
+ physicalPartitionKeys,
+ pkColumns);
+ }
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ for (String sourceColumn : partitionExpression.getTransform().getSourceColumns()) {
+ checkArgument(
+ pkColumns.contains(sourceColumn),
+ "Partitioned Primary Key Table requires transform source column '%s' is in the primary key %s.",
+ sourceColumn,
+ pkColumns);
+ }
+ }
+ }
+ }
+
+ private static List getPhysicalPartitionKeys(
+ List partitionKeys, Set virtualPartitionKeys) {
+ return partitionKeys.stream()
+ .filter(partitionKey -> !virtualPartitionKeys.contains(partitionKey))
+ .collect(Collectors.toList());
+ }
+
+ private static Set validatePartitionExpressions(
+ Set partitionKeySet,
+ List partitionKeys,
+ List partitionExpressions) {
+ Set virtualPartitionKeys = new HashSet<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ checkArgument(
+ partitionExpression.getVirtualPartitionSpecKey().isPresent(),
+ "Partition expression must contain a resolved virtual partition spec key.");
+ String virtualPartitionKey = partitionExpression.getVirtualPartitionSpecKey().get();
+ checkArgument(
+ partitionKeySet.contains(virtualPartitionKey),
+ "Virtual partition spec key '%s' is not present in partition keys %s.",
+ virtualPartitionKey,
+ partitionKeys);
+ checkArgument(
+ virtualPartitionKeys.add(virtualPartitionKey),
+ "Duplicate virtual partition spec key '%s'.",
+ virtualPartitionKey);
+ }
+ return virtualPartitionKeys;
+ }
+
+ private static List toPartitionKeys(
+ List partitionKeys, List partitionExpressions) {
+ List orderedPartitionKeys = new ArrayList<>();
+ for (String partitionKey : partitionKeys) {
+ PartitionExpression matchedExpression = null;
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ if (partitionExpression.getVirtualPartitionSpecKey().isPresent()
+ && partitionExpression
+ .getVirtualPartitionSpecKey()
+ .get()
+ .equals(partitionKey)) {
+ matchedExpression = partitionExpression;
+ break;
+ }
+ }
+ if (matchedExpression == null) {
+ orderedPartitionKeys.add(PartitionKey.column(partitionKey));
+ } else {
+ orderedPartitionKeys.add(PartitionKey.expression(matchedExpression));
+ }
+ }
+ return orderedPartitionKeys;
+ }
}
diff --git a/fluss-common/src/main/java/org/apache/fluss/metadata/TransformType.java b/fluss-common/src/main/java/org/apache/fluss/metadata/TransformType.java
new file mode 100644
index 0000000000..d2e7d46e57
--- /dev/null
+++ b/fluss-common/src/main/java/org/apache/fluss/metadata/TransformType.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.metadata;
+
+import org.apache.fluss.annotation.PublicEvolving;
+
+/** Supported transform types for implicit partitioning. */
+@PublicEvolving
+public enum TransformType {
+ DATE_TRUNC
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/utils/PartitionComputer.java b/fluss-common/src/main/java/org/apache/fluss/utils/PartitionComputer.java
new file mode 100644
index 0000000000..9bd531ef5f
--- /dev/null
+++ b/fluss-common/src/main/java/org/apache/fluss/utils/PartitionComputer.java
@@ -0,0 +1,233 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.utils;
+
+import org.apache.fluss.config.AutoPartitionTimeUnit;
+import org.apache.fluss.metadata.DateTruncPartitionTransform;
+import org.apache.fluss.metadata.PartitionExpression;
+import org.apache.fluss.metadata.PartitionTransform;
+import org.apache.fluss.metadata.ResolvedPartitionSpec;
+import org.apache.fluss.metadata.TableInfo;
+import org.apache.fluss.metadata.TransformType;
+import org.apache.fluss.row.InternalRow;
+import org.apache.fluss.row.TimestampLtz;
+import org.apache.fluss.row.TimestampNtz;
+import org.apache.fluss.types.DataType;
+import org.apache.fluss.types.DataTypeRoot;
+import org.apache.fluss.types.RowType;
+
+import java.time.LocalDate;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
+import java.time.ZonedDateTime;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.fluss.utils.Preconditions.checkArgument;
+import static org.apache.fluss.utils.Preconditions.checkNotNull;
+
+/** Computes a final partition spec from physical row fields and implicit partition expressions. */
+public class PartitionComputer {
+
+ private final List partitionKeys;
+ private final List partitionFieldComputers;
+
+ public PartitionComputer(TableInfo tableInfo, RowType inputRowType) {
+ this(tableInfo.getPartitionKeys(), tableInfo.getPartitionExpressions(), inputRowType);
+ }
+
+ public PartitionComputer(
+ List partitionKeys,
+ List partitionExpressions,
+ RowType inputRowType) {
+ this.partitionKeys = partitionKeys;
+ this.partitionFieldComputers = new ArrayList<>(partitionKeys.size());
+
+ Map expressionByKey = new HashMap<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ checkArgument(
+ partitionExpression.getVirtualPartitionSpecKey().isPresent(),
+ "Virtual partition expression must have a resolved partition spec key.");
+ String virtualPartitionKey = partitionExpression.getVirtualPartitionSpecKey().get();
+ checkArgument(
+ partitionKeys.contains(virtualPartitionKey),
+ "Virtual partition spec key '%s' is not present in partition keys %s.",
+ virtualPartitionKey,
+ partitionKeys);
+ checkArgument(
+ !expressionByKey.containsKey(virtualPartitionKey),
+ "Duplicate virtual partition spec key '%s'.",
+ virtualPartitionKey);
+ expressionByKey.put(virtualPartitionKey, partitionExpression);
+ }
+
+ for (String partitionKey : partitionKeys) {
+ PartitionExpression partitionExpression = expressionByKey.get(partitionKey);
+ if (partitionExpression == null) {
+ partitionFieldComputers.add(
+ PhysicalPartitionFieldComputer.create(partitionKey, inputRowType));
+ } else {
+ partitionFieldComputers.add(
+ TransformPartitionFieldComputer.create(partitionExpression, inputRowType));
+ }
+ }
+ }
+
+ /** Computes the internal partition name from the given row. */
+ public String getPartition(InternalRow row) {
+ return getResolvedPartitionSpec(row).getPartitionName();
+ }
+
+ /** Computes the resolved partition spec from the given row. */
+ public ResolvedPartitionSpec getResolvedPartitionSpec(InternalRow row) {
+ List partitionValues = new ArrayList<>(partitionFieldComputers.size());
+ for (PartitionFieldComputer partitionFieldComputer : partitionFieldComputers) {
+ partitionValues.add(partitionFieldComputer.compute(row));
+ }
+ return new ResolvedPartitionSpec(partitionKeys, partitionValues);
+ }
+
+ private interface PartitionFieldComputer {
+ String compute(InternalRow row);
+ }
+
+ private static class PhysicalPartitionFieldComputer implements PartitionFieldComputer {
+ private final String partitionKey;
+ private final DataType dataType;
+ private final InternalRow.FieldGetter fieldGetter;
+
+ private PhysicalPartitionFieldComputer(
+ String partitionKey, DataType dataType, InternalRow.FieldGetter fieldGetter) {
+ this.partitionKey = partitionKey;
+ this.dataType = dataType;
+ this.fieldGetter = fieldGetter;
+ }
+
+ private static PhysicalPartitionFieldComputer create(
+ String partitionKey, RowType inputRowType) {
+ int fieldIndex = inputRowType.getFieldIndex(partitionKey);
+ checkArgument(
+ fieldIndex >= 0,
+ "The partition column %s is not in the row %s.",
+ partitionKey,
+ inputRowType);
+ DataType dataType = inputRowType.getTypeAt(fieldIndex);
+ return new PhysicalPartitionFieldComputer(
+ partitionKey, dataType, InternalRow.createFieldGetter(dataType, fieldIndex));
+ }
+
+ @Override
+ public String compute(InternalRow row) {
+ Object partitionValue = fieldGetter.getFieldOrNull(row);
+ checkNotNull(
+ partitionValue, "Partition value for '%s' shouldn't be null.", partitionKey);
+ return PartitionUtils.convertValueOfType(partitionValue, dataType.getTypeRoot());
+ }
+ }
+
+ private static class TransformPartitionFieldComputer implements PartitionFieldComputer {
+ private final PartitionTransform transform;
+ private final DataType sourceDataType;
+ private final InternalRow.FieldGetter sourceFieldGetter;
+
+ private TransformPartitionFieldComputer(
+ PartitionTransform transform,
+ DataType sourceDataType,
+ InternalRow.FieldGetter sourceFieldGetter) {
+ this.transform = transform;
+ this.sourceDataType = sourceDataType;
+ this.sourceFieldGetter = sourceFieldGetter;
+ }
+
+ private static TransformPartitionFieldComputer create(
+ PartitionExpression partitionExpression, RowType inputRowType) {
+ PartitionTransform transform = partitionExpression.getTransform();
+ checkArgument(
+ transform.getType() == TransformType.DATE_TRUNC,
+ "Unsupported partition transform type: %s.",
+ transform.getType());
+ DateTruncPartitionTransform dateTruncTransform =
+ (DateTruncPartitionTransform) transform;
+ int sourceFieldIndex = inputRowType.getFieldIndex(dateTruncTransform.getSourceColumn());
+ checkArgument(
+ sourceFieldIndex >= 0,
+ "The partition transform source column %s is not in the row %s.",
+ dateTruncTransform.getSourceColumn(),
+ inputRowType);
+ DataType sourceDataType = inputRowType.getTypeAt(sourceFieldIndex);
+ return new TransformPartitionFieldComputer(
+ transform,
+ sourceDataType,
+ InternalRow.createFieldGetter(sourceDataType, sourceFieldIndex));
+ }
+
+ @Override
+ public String compute(InternalRow row) {
+ if (transform.getType() == TransformType.DATE_TRUNC) {
+ return computeDateTrunc((DateTruncPartitionTransform) transform, row);
+ }
+ throw new IllegalArgumentException(
+ "Unsupported partition transform type: " + transform.getType());
+ }
+
+ private String computeDateTrunc(DateTruncPartitionTransform transform, InternalRow row) {
+ Object sourceValue = sourceFieldGetter.getFieldOrNull(row);
+ checkNotNull(
+ sourceValue,
+ "Partition transform source value for '%s' shouldn't be null.",
+ transform.getSourceColumn());
+ ZonedDateTime zonedDateTime = toZonedDateTime(sourceValue, sourceDataType, transform);
+ return PartitionUtils.generateAutoPartitionTime(
+ zonedDateTime, 0, transform.getTimeUnit());
+ }
+
+ private ZonedDateTime toZonedDateTime(
+ Object sourceValue,
+ DataType sourceDataType,
+ DateTruncPartitionTransform transform) {
+ DataTypeRoot typeRoot = sourceDataType.getTypeRoot();
+ AutoPartitionTimeUnit timeUnit = transform.getTimeUnit();
+ switch (typeRoot) {
+ case DATE:
+ checkArgument(
+ timeUnit != AutoPartitionTimeUnit.HOUR,
+ "DATE_TRUNC partition transform does not support DATE + HOUR.");
+ // DATE is represented as epoch-day int in InternalRow.
+ return LocalDate.ofEpochDay((Integer) sourceValue).atStartOfDay(ZoneOffset.UTC);
+ case TIMESTAMP_WITHOUT_TIME_ZONE:
+ return ((TimestampNtz) sourceValue).toLocalDateTime().atZone(ZoneOffset.UTC);
+ case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
+ ZoneId timeZone =
+ transform
+ .getTimeZone()
+ .orElseThrow(
+ () ->
+ new IllegalArgumentException(
+ "DateTruncPartitionTransform must contain a resolved time zone."));
+ return ((TimestampLtz) sourceValue).toInstant().atZone(timeZone);
+ default:
+ throw new IllegalArgumentException(
+ String.format(
+ "DATE_TRUNC partition transform does not support source type %s.",
+ sourceDataType));
+ }
+ }
+ }
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/utils/PartitionUtils.java b/fluss-common/src/main/java/org/apache/fluss/utils/PartitionUtils.java
index c8f67955ae..907ec3cd24 100644
--- a/fluss-common/src/main/java/org/apache/fluss/utils/PartitionUtils.java
+++ b/fluss-common/src/main/java/org/apache/fluss/utils/PartitionUtils.java
@@ -28,7 +28,9 @@
import org.apache.fluss.row.GenericRow;
import org.apache.fluss.row.TimestampLtz;
import org.apache.fluss.row.TimestampNtz;
+import org.apache.fluss.types.DataField;
import org.apache.fluss.types.DataTypeRoot;
+import org.apache.fluss.types.DataTypes;
import org.apache.fluss.types.RowType;
import java.time.Instant;
@@ -352,10 +354,16 @@ public static String convertValueOfType(Object value, DataTypeRoot type) {
/** Projects {@code tableInfo}'s row type down to its partition key columns, in key order. */
public static RowType partitionRowType(TableInfo tableInfo) {
RowType schema = tableInfo.getRowType();
- List fieldNames = schema.getFieldNames();
- int[] indexes =
- tableInfo.getPartitionKeys().stream().mapToInt(fieldNames::indexOf).toArray();
- return schema.project(indexes);
+ List virtualPartitionKeys = tableInfo.getVirtualPartitionKeys();
+ List partitionFields = new ArrayList<>();
+ for (String partitionKey : tableInfo.getPartitionKeys()) {
+ if (virtualPartitionKeys.contains(partitionKey)) {
+ partitionFields.add(new DataField(partitionKey, DataTypes.STRING().copy(false)));
+ } else {
+ partitionFields.add(schema.getField(partitionKey));
+ }
+ }
+ return new RowType(partitionFields);
}
/**
diff --git a/fluss-common/src/main/java/org/apache/fluss/utils/json/PartitionExpressionJsonSerde.java b/fluss-common/src/main/java/org/apache/fluss/utils/json/PartitionExpressionJsonSerde.java
new file mode 100644
index 0000000000..40dcb8ff62
--- /dev/null
+++ b/fluss-common/src/main/java/org/apache/fluss/utils/json/PartitionExpressionJsonSerde.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.utils.json;
+
+import org.apache.fluss.annotation.Internal;
+import org.apache.fluss.config.AutoPartitionTimeUnit;
+import org.apache.fluss.metadata.DateTruncPartitionTransform;
+import org.apache.fluss.metadata.PartitionExpression;
+import org.apache.fluss.metadata.PartitionTransform;
+import org.apache.fluss.metadata.TransformType;
+import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator;
+import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode;
+
+import java.io.IOException;
+import java.time.ZoneId;
+
+/** Json serializer and deserializer for {@link PartitionExpression}. */
+@Internal
+public class PartitionExpressionJsonSerde
+ implements JsonSerializer, JsonDeserializer {
+
+ public static final PartitionExpressionJsonSerde INSTANCE = new PartitionExpressionJsonSerde();
+
+ public static final String PARTITION_EXPRESSIONS_NAME = "partition_expressions";
+
+ private static final String VIRTUAL_PARTITION_SPEC_KEY_NAME = "virtual_partition_spec_key";
+ private static final String TRANSFORM_NAME = "transform";
+ private static final String TYPE_NAME = "type";
+ private static final String SOURCE_COLUMN_NAME = "source_column";
+ private static final String UNIT_NAME = "unit";
+ private static final String TIME_ZONE_NAME = "time_zone";
+ private static final String DATE_TRUNC_TYPE = "date_trunc";
+
+ @Override
+ public void serialize(PartitionExpression partitionExpression, JsonGenerator generator)
+ throws IOException {
+ generator.writeStartObject();
+ if (partitionExpression.getVirtualPartitionSpecKey().isPresent()) {
+ generator.writeStringField(
+ VIRTUAL_PARTITION_SPEC_KEY_NAME,
+ partitionExpression.getVirtualPartitionSpecKey().get());
+ }
+
+ generator.writeFieldName(TRANSFORM_NAME);
+ serializeTransform(partitionExpression.getTransform(), generator);
+ generator.writeEndObject();
+ }
+
+ @Override
+ public PartitionExpression deserialize(JsonNode node) {
+ return deserialize(node, false);
+ }
+
+ /** Deserializes a partition expression whose transform metadata must be fully resolved. */
+ public PartitionExpression deserializeResolved(JsonNode node) {
+ return deserialize(node, true);
+ }
+
+ private PartitionExpression deserialize(JsonNode node, boolean requireResolvedTransform) {
+ JsonNode virtualPartitionSpecKeyNode = node.get(VIRTUAL_PARTITION_SPEC_KEY_NAME);
+ if (virtualPartitionSpecKeyNode == null) {
+ throw new IllegalArgumentException(
+ "Partition expression must contain virtual_partition_spec_key.");
+ }
+ PartitionTransform transform =
+ deserializeTransform(node.get(TRANSFORM_NAME), requireResolvedTransform);
+ return PartitionExpression.of(virtualPartitionSpecKeyNode.asText(), transform);
+ }
+
+ private void serializeTransform(PartitionTransform transform, JsonGenerator generator)
+ throws IOException {
+ if (transform.getType() == TransformType.DATE_TRUNC) {
+ DateTruncPartitionTransform dateTruncTransform =
+ (DateTruncPartitionTransform) transform;
+ generator.writeStartObject();
+ generator.writeStringField(TYPE_NAME, DATE_TRUNC_TYPE);
+ generator.writeStringField(SOURCE_COLUMN_NAME, dateTruncTransform.getSourceColumn());
+ generator.writeStringField(UNIT_NAME, dateTruncTransform.getTimeUnit().name());
+ if (dateTruncTransform.getTimeZone().isPresent()) {
+ generator.writeStringField(
+ TIME_ZONE_NAME, dateTruncTransform.getTimeZone().get().getId());
+ }
+ generator.writeEndObject();
+ } else {
+ throw new IllegalArgumentException(
+ "Unsupported partition transform type: " + transform.getType());
+ }
+ }
+
+ private PartitionTransform deserializeTransform(JsonNode node, boolean requireResolved) {
+ if (node == null) {
+ throw new IllegalArgumentException("Partition expression must contain transform.");
+ }
+ String type = node.get(TYPE_NAME).asText();
+ if (!DATE_TRUNC_TYPE.equals(type)) {
+ throw new IllegalArgumentException("Unsupported partition transform type: " + type);
+ }
+
+ String sourceColumn = node.get(SOURCE_COLUMN_NAME).asText();
+ AutoPartitionTimeUnit timeUnit =
+ AutoPartitionTimeUnit.valueOf(node.get(UNIT_NAME).asText());
+ JsonNode timeZoneNode = node.get(TIME_ZONE_NAME);
+ if (timeZoneNode == null) {
+ if (requireResolved) {
+ throw new IllegalArgumentException(
+ "Persisted DATE_TRUNC partition transform must contain resolved time zone.");
+ }
+ return DateTruncPartitionTransform.of(sourceColumn, timeUnit);
+ }
+ return DateTruncPartitionTransform.of(
+ sourceColumn, timeUnit, ZoneId.of(timeZoneNode.asText()));
+ }
+}
diff --git a/fluss-common/src/main/java/org/apache/fluss/utils/json/TableDescriptorJsonSerde.java b/fluss-common/src/main/java/org/apache/fluss/utils/json/TableDescriptorJsonSerde.java
index 6945196073..753ade397d 100644
--- a/fluss-common/src/main/java/org/apache/fluss/utils/json/TableDescriptorJsonSerde.java
+++ b/fluss-common/src/main/java/org/apache/fluss/utils/json/TableDescriptorJsonSerde.java
@@ -18,6 +18,8 @@
package org.apache.fluss.utils.json;
import org.apache.fluss.annotation.Internal;
+import org.apache.fluss.metadata.PartitionExpression;
+import org.apache.fluss.metadata.PartitionKey;
import org.apache.fluss.metadata.Schema;
import org.apache.fluss.metadata.TableDescriptor;
import org.apache.fluss.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerator;
@@ -26,9 +28,12 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
/** Json serializer and deserializer for {@link TableDescriptor}. */
@Internal
@@ -71,6 +76,14 @@ public void serialize(TableDescriptor tableDescriptor, JsonGenerator generator)
generator.writeString(partitionKey);
}
generator.writeEndArray();
+ if (!tableDescriptor.getPartitionExpressions().isEmpty()) {
+ generator.writeArrayFieldStart(PartitionExpressionJsonSerde.PARTITION_EXPRESSIONS_NAME);
+ for (PartitionExpression partitionExpression :
+ tableDescriptor.getPartitionExpressions()) {
+ PartitionExpressionJsonSerde.INSTANCE.serialize(partitionExpression, generator);
+ }
+ generator.writeEndArray();
+ }
// serialize tableDistribution.
if (tableDescriptor.getTableDistribution().isPresent()) {
@@ -120,7 +133,21 @@ public TableDescriptor deserialize(JsonNode node) {
while (partitionJsons.hasNext()) {
partitionKeys.add(partitionJsons.next().asText());
}
- builder.partitionedBy(partitionKeys);
+
+ List partitionExpressions = new ArrayList<>();
+ if (node.has(PartitionExpressionJsonSerde.PARTITION_EXPRESSIONS_NAME)) {
+ Iterator expressionJsons =
+ node.get(PartitionExpressionJsonSerde.PARTITION_EXPRESSIONS_NAME).elements();
+ while (expressionJsons.hasNext()) {
+ partitionExpressions.add(
+ PartitionExpressionJsonSerde.INSTANCE.deserialize(expressionJsons.next()));
+ }
+ }
+ if (partitionExpressions.isEmpty()) {
+ builder.partitionedBy(partitionKeys);
+ } else {
+ builder.partitionedByKeys(toPartitionKeys(partitionKeys, partitionExpressions));
+ }
if (node.has(BUCKET_KEY_NAME) || node.has(BUCKET_COUNT_NAME)) {
Iterator bucketJsons = node.get(BUCKET_KEY_NAME).elements();
@@ -144,6 +171,44 @@ public TableDescriptor deserialize(JsonNode node) {
return builder.build();
}
+ private List toPartitionKeys(
+ List partitionKeys, List partitionExpressions) {
+ Set partitionKeySet = new HashSet<>(partitionKeys);
+ Map expressionByKey = new HashMap<>();
+ for (PartitionExpression partitionExpression : partitionExpressions) {
+ String virtualPartitionSpecKey =
+ partitionExpression
+ .getVirtualPartitionSpecKey()
+ .orElseThrow(
+ () ->
+ new IllegalArgumentException(
+ "Partition expression must contain virtual_partition_spec_key."));
+ if (!partitionKeySet.contains(virtualPartitionSpecKey)) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Virtual partition spec key '%s' is not present in partition_key %s.",
+ virtualPartitionSpecKey, partitionKeys));
+ }
+ if (expressionByKey.put(virtualPartitionSpecKey, partitionExpression) != null) {
+ throw new IllegalArgumentException(
+ String.format(
+ "Duplicate virtual partition spec key '%s'.",
+ virtualPartitionSpecKey));
+ }
+ }
+ return partitionKeys.stream()
+ .map(
+ partitionKey -> {
+ PartitionExpression partitionExpression =
+ expressionByKey.get(partitionKey);
+ if (partitionExpression == null) {
+ return PartitionKey.column(partitionKey);
+ }
+ return PartitionKey.expression(partitionExpression);
+ })
+ .collect(Collectors.toList());
+ }
+
private Map deserializeProperties(JsonNode node) {
HashMap properties = new HashMap<>();
Iterator optionsKeys = node.fieldNames();
diff --git a/fluss-common/src/test/java/org/apache/fluss/classloading/ComponentClassLoaderTest.java b/fluss-common/src/test/java/org/apache/fluss/classloading/ComponentClassLoaderTest.java
index f04e8a3ae6..9fc9c5e0ba 100644
--- a/fluss-common/src/test/java/org/apache/fluss/classloading/ComponentClassLoaderTest.java
+++ b/fluss-common/src/test/java/org/apache/fluss/classloading/ComponentClassLoaderTest.java
@@ -25,6 +25,7 @@
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;
@@ -42,14 +43,18 @@ class ComponentClassLoaderTest {
private static final Class> CLASS_RETURNED_BY_OWNER = ComponentClassLoaderTest.class;
private static final String NON_EXISTENT_RESOURCE_NAME = "foo/Bar";
- private static String resourceToLoad;
+ private static final String RESOURCE_PACKAGE_PREFIX = "org.apache.fluss.test";
+ private static final String RESOURCE_TO_LOAD =
+ RESOURCE_PACKAGE_PREFIX.replace('.', '/') + "/resource";
private static final URL RESOURCE_RETURNED_BY_OWNER = createURL();
@TempDir private static Path tmp;
@BeforeAll
- public static void setup() {
- resourceToLoad = tmp.toString();
+ public static void setup() throws IOException {
+ Path resource = tmp.resolve(RESOURCE_TO_LOAD);
+ Files.createDirectories(resource.getParent());
+ Files.createFile(resource);
}
// ----------------------------------------------------------------------------------------------
@@ -164,17 +169,17 @@ void testComponentOnlyIsDefaultForResources() throws IOException {
@Test
void testOwnerFirstResourceFoundIgnoresComponent() {
TestUrlClassLoader owner =
- new TestUrlClassLoader(resourceToLoad, RESOURCE_RETURNED_BY_OWNER);
+ new TestUrlClassLoader(RESOURCE_TO_LOAD, RESOURCE_RETURNED_BY_OWNER);
final ComponentClassLoader componentClassLoader =
new ComponentClassLoader(
new URL[] {},
owner,
- new String[] {resourceToLoad},
+ new String[] {RESOURCE_PACKAGE_PREFIX},
new String[0],
Collections.emptyMap());
- final URL loadedResource = componentClassLoader.getResource(resourceToLoad);
+ final URL loadedResource = componentClassLoader.getResource(RESOURCE_TO_LOAD);
assertThat(loadedResource).isSameAs(RESOURCE_RETURNED_BY_OWNER);
}
@@ -184,31 +189,31 @@ void testOwnerFirstResourceNotFoundFallsBackToComponent() throws Exception {
final ComponentClassLoader componentClassLoader =
new ComponentClassLoader(
- new URL[] {tmp.getRoot().toUri().toURL()},
+ new URL[] {tmp.toUri().toURL()},
owner,
- new String[] {resourceToLoad},
+ new String[] {RESOURCE_PACKAGE_PREFIX},
new String[0],
Collections.emptyMap());
- final URL loadedResource = componentClassLoader.getResource(resourceToLoad);
- assertThat(loadedResource.toString()).contains(resourceToLoad);
+ final URL loadedResource = componentClassLoader.getResource(RESOURCE_TO_LOAD);
+ assertThat(loadedResource.toString()).contains(RESOURCE_TO_LOAD);
}
@Test
void testComponentFirstResourceFoundIgnoresOwner() throws Exception {
TestUrlClassLoader owner =
- new TestUrlClassLoader(resourceToLoad, RESOURCE_RETURNED_BY_OWNER);
+ new TestUrlClassLoader(RESOURCE_TO_LOAD, RESOURCE_RETURNED_BY_OWNER);
final ComponentClassLoader componentClassLoader =
new ComponentClassLoader(
- new URL[] {tmp.getRoot().toUri().toURL()},
+ new URL[] {tmp.toUri().toURL()},
owner,
new String[0],
- new String[] {resourceToLoad},
+ new String[] {RESOURCE_PACKAGE_PREFIX},
Collections.emptyMap());
- final URL loadedResource = componentClassLoader.getResource(resourceToLoad);
- assertThat(loadedResource.toString()).contains(resourceToLoad);
+ final URL loadedResource = componentClassLoader.getResource(RESOURCE_TO_LOAD);
+ assertThat(loadedResource.toString()).contains(RESOURCE_TO_LOAD);
}
@Test
diff --git a/fluss-common/src/test/java/org/apache/fluss/metadata/TableDescriptorTest.java b/fluss-common/src/test/java/org/apache/fluss/metadata/TableDescriptorTest.java
index bd99610752..09c33a6f59 100644
--- a/fluss-common/src/test/java/org/apache/fluss/metadata/TableDescriptorTest.java
+++ b/fluss-common/src/test/java/org/apache/fluss/metadata/TableDescriptorTest.java
@@ -17,15 +17,20 @@
package org.apache.fluss.metadata;
+import org.apache.fluss.config.AutoPartitionTimeUnit;
import org.apache.fluss.config.ConfigBuilder;
import org.apache.fluss.config.ConfigOption;
+import org.apache.fluss.config.ConfigOptions;
+import org.apache.fluss.config.Configuration;
import org.apache.fluss.types.DataTypes;
import org.junit.jupiter.api.Test;
+import java.time.ZoneId;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Optional;
@@ -244,6 +249,192 @@ void testWithProperties() {
assertThat(copy.getProperties()).hasSize(0);
}
+ @Test
+ void testImplicitPartitionDescriptorCopyHelpersPreserveExpressions() {
+ TableDescriptor descriptor = implicitPartitionDescriptor().build();
+
+ TableDescriptor fromExistingBuilder = TableDescriptor.builder(descriptor).build();
+ assertThat(fromExistingBuilder).isEqualTo(descriptor);
+ assertThat(fromExistingBuilder.getPartitionExpressions())
+ .isEqualTo(descriptor.getPartitionExpressions());
+
+ Map properties = Collections.singletonMap("table.test.option", "1");
+ assertThat(descriptor.withProperties(properties).getPartitionExpressions())
+ .isEqualTo(descriptor.getPartitionExpressions());
+ assertThat(descriptor.withBucketCount(8).getPartitionExpressions())
+ .isEqualTo(descriptor.getPartitionExpressions());
+ assertThat(descriptor.withDataLakeFormat(DataLakeFormat.PAIMON).getPartitionExpressions())
+ .isEqualTo(descriptor.getPartitionExpressions());
+ }
+
+ @Test
+ void testExistingDescriptorBuilderKeepsPartitionDeclarationMode() {
+ TableDescriptor implicitDescriptor = implicitPartitionDescriptor().build();
+
+ assertThatThrownBy(
+ () ->
+ TableDescriptor.builder(implicitDescriptor)
+ .partitionedBy("region")
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "partitionedBy(...) and partitionedByKeys(...) cannot be mixed in the same builder.");
+
+ TableDescriptor explicitDescriptor =
+ TableDescriptor.builder().schema(SCHEMA_1).partitionedBy("f0").build();
+ assertThatThrownBy(
+ () ->
+ TableDescriptor.builder(explicitDescriptor)
+ .partitionedByKeys(PartitionKey.column("f0"))
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "partitionedBy(...) and partitionedByKeys(...) cannot be mixed in the same builder.");
+ }
+
+ @Test
+ void testRepeatedPartitionDeclarationClearsStaleStateWithinSameMode() {
+ TableDescriptor legacyDescriptor =
+ TableDescriptor.builder()
+ .schema(SCHEMA_1)
+ .partitionedBy("f0")
+ .partitionedBy("f3")
+ .build();
+ assertThat(legacyDescriptor.getPartitionKeys()).containsExactly("f3");
+ assertThat(legacyDescriptor.getPartitionExpressions()).isEmpty();
+
+ TableDescriptor partitionKeysDescriptor =
+ implicitPartitionDescriptor()
+ .partitionedByKeys(PartitionKey.column("region"))
+ .build();
+ assertThat(partitionKeysDescriptor.getPartitionKeys()).containsExactly("region");
+ assertThat(partitionKeysDescriptor.getPartitionExpressions()).isEmpty();
+ }
+
+ @Test
+ void testTableInfoToDescriptorPreservesImplicitPartitionExpressions() {
+ TableDescriptor descriptor = implicitPartitionDescriptor().build();
+ TableInfo tableInfo =
+ TableInfo.of(TablePath.of("db", "t"), 1L, 1, descriptor, null, 1L, 1L);
+
+ TableDescriptor roundTrippedDescriptor = tableInfo.toTableDescriptor();
+
+ assertThat(roundTrippedDescriptor).isEqualTo(descriptor);
+ assertThat(roundTrippedDescriptor.getPhysicalPartitionKeys())
+ .isEqualTo(tableInfo.getPhysicalPartitionKeys());
+ assertThat(roundTrippedDescriptor.getVirtualPartitionKeys())
+ .isEqualTo(tableInfo.getVirtualPartitionKeys());
+ assertThat(roundTrippedDescriptor.getPartitionSourceColumns())
+ .isEqualTo(tableInfo.getPartitionSourceColumns());
+ assertThat(roundTrippedDescriptor.getPartitionInputColumns())
+ .isEqualTo(tableInfo.getPartitionInputColumns());
+ }
+
+ @Test
+ void testImplicitPartitionMetadataObjectMethodsIncludeExpressions() {
+ TableDescriptor descriptor = implicitPartitionDescriptor().build();
+ TableDescriptor sameDescriptor = TableDescriptor.builder(descriptor).build();
+ TableDescriptor physicalOnlyDescriptor =
+ implicitPartitionDescriptor()
+ .partitionedByKeys(PartitionKey.column("region"))
+ .build();
+
+ assertThat(descriptor).isEqualTo(sameDescriptor);
+ assertThat(descriptor.hashCode()).isEqualTo(sameDescriptor.hashCode());
+ assertThat(descriptor).isNotEqualTo(physicalOnlyDescriptor);
+ assertThat(descriptor.toString()).contains("partitionExpressions");
+
+ TableInfo tableInfo =
+ TableInfo.of(
+ TablePath.of("db", "t"),
+ 1L,
+ 1,
+ descriptor.withBucketCount(1),
+ null,
+ 1L,
+ 1L);
+ TableInfo physicalOnlyTableInfo =
+ TableInfo.of(TablePath.of("db", "t"), 1L, 1, physicalOnlyDescriptor, null, 1L, 1L);
+
+ assertThat(tableInfo).isNotEqualTo(physicalOnlyTableInfo);
+ assertThat(tableInfo.toString()).contains("partitionExpressions");
+ }
+
+ @Test
+ void testResolvedImplicitPartitionTimeZoneDoesNotUseAutoPartitionTimeZone() {
+ TableDescriptor descriptor =
+ implicitPartitionDescriptor()
+ .property(ConfigOptions.TABLE_AUTO_PARTITION_TIMEZONE, "UTC")
+ .build();
+
+ TableDescriptor resolved =
+ descriptor.withResolvedPartitionExpressionTimeZone(ZoneId.of("Asia/Shanghai"));
+ DateTruncPartitionTransform transform =
+ (DateTruncPartitionTransform)
+ resolved.getPartitionExpressions().get(0).getTransform();
+
+ assertThat(transform.getTimeZone()).hasValue(ZoneId.of("Asia/Shanghai"));
+ }
+
+ @Test
+ void testInvalidImplicitPartitionMetadata() {
+ assertThatThrownBy(
+ () ->
+ implicitPartitionDescriptor()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "region",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit
+ .DAY))))
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Virtual partition spec key 'region' conflicts with a physical column.");
+
+ assertThatThrownBy(
+ () ->
+ implicitPartitionDescriptor()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit
+ .DAY))),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit
+ .MONTH))))
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("Duplicate partition keys are not allowed");
+ }
+
+ @Test
+ void testOldServerIgnoringPartitionExpressionsFailsFast() {
+ assertThatThrownBy(
+ () ->
+ TableDescriptor.builder()
+ .schema(
+ Schema.newBuilder()
+ .column(
+ "event_time",
+ DataTypes.TIMESTAMP().copy(false))
+ .build())
+ .partitionedBy("event_day")
+ .distributedBy(1)
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Partition key 'event_day' does not exist in the schema.");
+ }
+
@Test
void testInvalidTableDescriptor() {
// schema without primary key.
@@ -285,7 +476,7 @@ void testInvalidTableDescriptor() {
.build())
.isInstanceOf(IllegalArgumentException.class)
.hasMessage(
- "Partitioned Primary Key Table requires partition key [dt] is a subset of the primary key [id].");
+ "Partitioned Primary Key Table requires physical partition keys [dt] is a subset of the primary key [id].");
}
@Test
@@ -319,6 +510,276 @@ void testPartitionedTable() {
"Bucket key [f0, f3] shouldn't include any column in partition keys [f0].");
}
+ @Test
+ void testPartitionedByKeysWithImplicitPartitionExpression() {
+ Schema schema =
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column("ts", DataTypes.TIMESTAMP().copy(false))
+ .column("region", DataTypes.STRING().copy(false))
+ .primaryKey("id", "ts", "region")
+ .build();
+
+ TableDescriptor descriptor =
+ TableDescriptor.builder()
+ .schema(schema)
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ DateTruncPartitionTransform.of(
+ "ts", AutoPartitionTimeUnit.DAY))))
+ .build();
+
+ assertThat(descriptor.getPartitionKeys()).containsExactly("region", "ts_day");
+ assertThat(descriptor.getPhysicalPartitionKeys()).containsExactly("region");
+ assertThat(descriptor.getVirtualPartitionKeys()).containsExactly("ts_day");
+ assertThat(descriptor.getPartitionSourceColumns()).containsExactly("ts");
+ assertThat(descriptor.getPartitionInputColumns()).containsExactly("region", "ts");
+ assertThat(descriptor.getBucketKeys()).containsExactly("id", "ts");
+ assertThat(descriptor.isDefaultBucketKey()).isTrue();
+ assertThat(descriptor.getPartitionExpressions()).hasSize(1);
+
+ TableInfo tableInfo =
+ TableInfo.of(
+ TablePath.of("db", "t"),
+ 1L,
+ 1,
+ descriptor.withBucketCount(1),
+ null,
+ 1L,
+ 1L);
+ assertThat(tableInfo.getPhysicalPrimaryKeys()).containsExactly("id", "ts");
+ assertThat(tableInfo.getPartitionExpressions().get(0).getVirtualPartitionSpecKey())
+ .hasValue("ts_day");
+ }
+
+ @Test
+ void testPartitionedByAndPartitionedByKeysCannotBeMixed() {
+ assertThatThrownBy(
+ () ->
+ TableDescriptor.builder()
+ .schema(SCHEMA_1)
+ .partitionedBy("f0")
+ .partitionedByKeys(PartitionKey.column("f3")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "partitionedBy(...) and partitionedByKeys(...) cannot be mixed in the same builder.");
+
+ assertThatThrownBy(
+ () ->
+ TableDescriptor.builder()
+ .schema(SCHEMA_1)
+ .partitionedByKeys(PartitionKey.column("f0"))
+ .partitionedBy("f3"))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "partitionedBy(...) and partitionedByKeys(...) cannot be mixed in the same builder.");
+ }
+
+ @Test
+ void testInvalidPartitionExpressionMetadata() {
+ Schema nullableSourceSchema =
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column("ts", DataTypes.TIMESTAMP())
+ .build();
+
+ assertThatThrownBy(
+ () ->
+ TableDescriptor.builder()
+ .schema(nullableSourceSchema)
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ DateTruncPartitionTransform.of(
+ "ts",
+ AutoPartitionTimeUnit
+ .DAY))))
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Partition transform source column 'ts' must be non-nullable.");
+
+ Schema sourceNotInPkSchema =
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column("ts", DataTypes.TIMESTAMP().copy(false))
+ .primaryKey("id")
+ .build();
+
+ assertThatThrownBy(
+ () ->
+ TableDescriptor.builder()
+ .schema(sourceNotInPkSchema)
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ DateTruncPartitionTransform.of(
+ "ts",
+ AutoPartitionTimeUnit
+ .DAY))))
+ .build())
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Partitioned Primary Key Table requires transform source column 'ts' is in the primary key [id].");
+ }
+
+ @Test
+ void testTableInfoRejectsInconsistentPartitionExpressions() {
+ Schema schema =
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .primaryKey("id", "event_time")
+ .build();
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ schema,
+ Collections.singletonList("event_day"),
+ Collections.emptyList()))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Partition key 'event_day' does not exist in the schema or partition expressions.");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column(
+ "event_day",
+ DataTypes.TIMESTAMP().copy(false))
+ .primaryKey("id", "event_day")
+ .build(),
+ Collections.singletonList("event_day"),
+ Collections.singletonList(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_day",
+ AutoPartitionTimeUnit.DAY)))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Virtual partition spec key 'event_day' conflicts with a physical column.");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ schema,
+ Collections.singletonList("event_day"),
+ Collections.singletonList(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "missing_time",
+ AutoPartitionTimeUnit.DAY)))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Partition transform source column 'missing_time' does not exist in the schema.");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP())
+ .build(),
+ Collections.singletonList("event_day"),
+ Collections.singletonList(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY)))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Partition transform source column 'event_time' must be non-nullable.");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column("dt", DataTypes.STRING().copy(false))
+ .primaryKey("id")
+ .build(),
+ Collections.singletonList("dt"),
+ Collections.emptyList()))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Partitioned Primary Key Table requires physical partition keys [dt] is a subset of the primary key [id].");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ Schema.newBuilder()
+ .column("id", DataTypes.INT().copy(false))
+ .column(
+ "event_time",
+ DataTypes.TIMESTAMP().copy(false))
+ .primaryKey("id")
+ .build(),
+ Collections.singletonList("event_day"),
+ Collections.singletonList(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY)))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Partitioned Primary Key Table requires transform source column 'event_time' is in the primary key [id].");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ schema,
+ Collections.singletonList("event_day"),
+ Collections.singletonList(
+ PartitionExpression.of(
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY)))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Partition expression must contain a resolved virtual partition spec key.");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ schema,
+ Collections.singletonList("event_day"),
+ Collections.singletonList(
+ PartitionExpression.of(
+ "event_month",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.MONTH)))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Virtual partition spec key 'event_month' is not present in partition keys [event_day].");
+
+ assertThatThrownBy(
+ () ->
+ tableInfo(
+ schema,
+ Collections.singletonList("event_day"),
+ Arrays.asList(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY)),
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.MONTH)))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Duplicate virtual partition spec key 'event_day'.");
+ }
+
@Test
void testInvalidListaggParameterEmptyDelimiter() {
// LISTAGG with empty delimiter - should fail
@@ -388,4 +849,45 @@ void testValidateAggFunctionWithDataType() {
AggFunctions.of(AggFunctionType.LAST_VALUE, params).validateDataType(DataTypes.STRING());
AggFunctions.of(AggFunctionType.LISTAGG, params).validateDataType(DataTypes.STRING());
}
+
+ private static TableDescriptor.Builder implicitPartitionDescriptor() {
+ Schema schema =
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("id", DataTypes.INT().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .primaryKey("region", "id", "event_time")
+ .build();
+ return TableDescriptor.builder()
+ .schema(schema)
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time", AutoPartitionTimeUnit.DAY))))
+ .distributedBy(4);
+ }
+
+ private static TableInfo tableInfo(
+ Schema schema,
+ List partitionKeys,
+ List partitionExpressions) {
+ return new TableInfo(
+ TablePath.of("db", "t"),
+ 1L,
+ 0,
+ schema,
+ Collections.singletonList("id"),
+ partitionKeys,
+ partitionExpressions,
+ 1,
+ new Configuration(),
+ new Configuration(),
+ null,
+ null,
+ 1L,
+ 1L);
+ }
}
diff --git a/fluss-common/src/test/java/org/apache/fluss/utils/PartitionComputerTest.java b/fluss-common/src/test/java/org/apache/fluss/utils/PartitionComputerTest.java
new file mode 100644
index 0000000000..282cf46458
--- /dev/null
+++ b/fluss-common/src/test/java/org/apache/fluss/utils/PartitionComputerTest.java
@@ -0,0 +1,556 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.fluss.utils;
+
+import org.apache.fluss.config.AutoPartitionTimeUnit;
+import org.apache.fluss.metadata.DateTruncPartitionTransform;
+import org.apache.fluss.metadata.PartitionExpression;
+import org.apache.fluss.metadata.PartitionKey;
+import org.apache.fluss.metadata.Schema;
+import org.apache.fluss.metadata.TableDescriptor;
+import org.apache.fluss.metadata.TableInfo;
+import org.apache.fluss.metadata.TablePath;
+import org.apache.fluss.row.BinaryString;
+import org.apache.fluss.row.GenericRow;
+import org.apache.fluss.row.TimestampLtz;
+import org.apache.fluss.row.TimestampNtz;
+import org.apache.fluss.types.DataType;
+import org.apache.fluss.types.DataTypeRoot;
+import org.apache.fluss.types.DataTypes;
+import org.apache.fluss.types.RowType;
+
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.time.LocalDate;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.util.Arrays;
+import java.util.TimeZone;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
+/** Test for {@link PartitionComputer}. */
+class PartitionComputerTest {
+
+ @Test
+ void testComputePhysicalOnlyPartition() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("dt", DataTypes.DATE().copy(false))
+ .build(),
+ TableDescriptor.builder().partitionedBy("region", "dt").distributedBy(1));
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, tableInfo.getRowType());
+
+ assertThat(
+ partitionComputer.getResolvedPartitionSpec(
+ GenericRow.of(
+ BinaryString.fromString("us"),
+ (int) LocalDate.of(2024, 3, 15).toEpochDay())))
+ .hasToString("region=us/dt=2024-03-15");
+ assertThat(
+ partitionComputer.getPartition(
+ GenericRow.of(
+ BinaryString.fromString("us"),
+ (int) LocalDate.of(2024, 3, 15).toEpochDay())))
+ .isEqualTo("us$2024-03-15");
+ }
+
+ @Test
+ void testComputeMixedPhysicalAndImplicitPartition() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_month",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.MONTH))))
+ .distributedBy(1));
+
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, tableInfo.getRowType());
+
+ assertThat(
+ partitionComputer.getResolvedPartitionSpec(
+ GenericRow.of(
+ BinaryString.fromString("us"),
+ TimestampNtz.fromLocalDateTime(
+ LocalDateTime.of(2024, 3, 15, 10, 30)))))
+ .hasToString("region=us/event_month=202403");
+ assertThat(
+ partitionComputer.getPartition(
+ GenericRow.of(
+ BinaryString.fromString("us"),
+ TimestampNtz.fromLocalDateTime(
+ LocalDateTime.of(2024, 3, 15, 10, 30)))))
+ .isEqualTo("us$202403");
+ }
+
+ @Test
+ void testPartitionRowTypeForImplicitPartition() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1));
+
+ RowType partitionRowType = PartitionUtils.partitionRowType(tableInfo);
+
+ assertThat(partitionRowType.getFieldNames()).containsExactly("region", "event_day");
+ assertThat(partitionRowType.getTypeAt(0).getTypeRoot()).isEqualTo(DataTypeRoot.STRING);
+ assertThat(partitionRowType.getTypeAt(1).getTypeRoot()).isEqualTo(DataTypeRoot.STRING);
+ assertThat(
+ PartitionUtils.toPartitionRow(
+ Arrays.asList("us", "20240315"), partitionRowType)
+ .getString(1)
+ .toString())
+ .isEqualTo("20240315");
+ }
+
+ @Test
+ void testComputeDateSourcePartition() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder().column("dt", DataTypes.DATE().copy(false)).build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "dt_month",
+ DateTruncPartitionTransform.of(
+ "dt",
+ AutoPartitionTimeUnit.MONTH))))
+ .distributedBy(1));
+
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, tableInfo.getRowType());
+
+ assertThat(
+ partitionComputer.getResolvedPartitionSpec(
+ GenericRow.of((int) LocalDate.of(2024, 3, 15).toEpochDay())))
+ .hasToString("dt_month=202403");
+ assertThat(
+ partitionComputer.getPartition(
+ GenericRow.of((int) LocalDate.of(2024, 12, 31).toEpochDay())))
+ .isEqualTo("202412");
+ }
+
+ @Test
+ void testComputeMultipleVirtualKeysAndCanonicalFormats() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_hour",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.HOUR))),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_month",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.MONTH))),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_quarter",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.QUARTER))),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_year",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.YEAR))))
+ .distributedBy(1));
+
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, tableInfo.getRowType());
+
+ assertThat(
+ partitionComputer
+ .getResolvedPartitionSpec(
+ GenericRow.of(
+ TimestampNtz.fromLocalDateTime(
+ LocalDateTime.of(2024, 11, 11, 11, 30))))
+ .getPartitionValues())
+ .containsExactly("2024111111", "20241111", "202411", "20244", "2024");
+ }
+
+ @Test
+ void testExpressionOrderDoesNotOverridePartitionKeyOrder() {
+ RowType inputRowType =
+ RowType.of(
+ new DataType[] {DataTypes.STRING().copy(false), DataTypes.TIMESTAMP()},
+ new String[] {"region", "event_time"});
+ PartitionComputer partitionComputer =
+ new PartitionComputer(
+ Arrays.asList("event_day", "region", "event_month"),
+ Arrays.asList(
+ PartitionExpression.of(
+ "event_month",
+ DateTruncPartitionTransform.of(
+ "event_time", AutoPartitionTimeUnit.MONTH)),
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time", AutoPartitionTimeUnit.DAY))),
+ inputRowType);
+
+ assertThat(
+ partitionComputer.getResolvedPartitionSpec(
+ GenericRow.of(
+ BinaryString.fromString("eu"),
+ TimestampNtz.fromLocalDateTime(
+ LocalDateTime.of(2024, 3, 15, 10, 30)))))
+ .hasToString("event_day=20240315/region=eu/event_month=202403");
+ }
+
+ @Test
+ void testComputeFromProjectedInputRowTypeByName() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1));
+ RowType projectedInputRowType =
+ RowType.of(
+ new DataType[] {DataTypes.TIMESTAMP(), DataTypes.STRING()},
+ new String[] {"event_time", "region"});
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, projectedInputRowType);
+
+ assertThat(
+ partitionComputer.getResolvedPartitionSpec(
+ GenericRow.of(
+ TimestampNtz.fromLocalDateTime(
+ LocalDateTime.of(2024, 3, 15, 10, 30)),
+ BinaryString.fromString("eu"))))
+ .hasToString("region=eu/event_day=20240315");
+ }
+
+ @Test
+ void testMissingProjectedInputColumnsFailFast() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1));
+
+ assertThatThrownBy(
+ () ->
+ new PartitionComputer(
+ tableInfo,
+ RowType.of(
+ new DataType[] {DataTypes.TIMESTAMP()},
+ new String[] {"event_time"})))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("The partition column region is not in the row");
+
+ assertThatThrownBy(
+ () ->
+ new PartitionComputer(
+ tableInfo,
+ RowType.of(
+ new DataType[] {DataTypes.STRING()},
+ new String[] {"region"})))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining(
+ "The partition transform source column event_time is not in the row");
+ }
+
+ @Test
+ void testOldClientIgnoringPartitionExpressionsFailsFast() {
+ RowType inputRowType =
+ RowType.of(
+ new DataType[] {DataTypes.TIMESTAMP().copy(false)},
+ new String[] {"event_time"});
+
+ assertThatThrownBy(
+ () ->
+ new PartitionComputer(
+ Arrays.asList("event_day"), Arrays.asList(), inputRowType))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessageContaining("The partition column event_day is not in the row")
+ .hasMessageContaining("event_time");
+ }
+
+ @Test
+ void testRejectInconsistentPartitionExpressions() {
+ RowType inputRowType =
+ RowType.of(
+ new DataType[] {DataTypes.TIMESTAMP().copy(false)},
+ new String[] {"event_time"});
+
+ assertThatThrownBy(
+ () ->
+ new PartitionComputer(
+ Arrays.asList("event_day"),
+ Arrays.asList(
+ PartitionExpression.of(
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))),
+ inputRowType))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Virtual partition expression must have a resolved partition spec key.");
+
+ assertThatThrownBy(
+ () ->
+ new PartitionComputer(
+ Arrays.asList("event_day"),
+ Arrays.asList(
+ PartitionExpression.of(
+ "event_month",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.MONTH))),
+ inputRowType))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Virtual partition spec key 'event_month' is not present in partition keys [event_day].");
+
+ assertThatThrownBy(
+ () ->
+ new PartitionComputer(
+ Arrays.asList("event_day"),
+ Arrays.asList(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY)),
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.MONTH))),
+ inputRowType))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Duplicate virtual partition spec key 'event_day'.");
+ }
+
+ @Test
+ void testNullRuntimePartitionValuesFailFast() {
+ TableInfo mixedTableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1));
+ PartitionComputer mixedPartitionComputer =
+ new PartitionComputer(mixedTableInfo, mixedTableInfo.getRowType());
+
+ assertThatThrownBy(
+ () ->
+ mixedPartitionComputer.getPartition(
+ GenericRow.of(
+ null,
+ TimestampNtz.fromLocalDateTime(
+ LocalDateTime.of(2024, 3, 15, 10, 30)))))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Partition value for 'region' shouldn't be null.");
+
+ TableInfo virtualOnlyTableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1));
+ PartitionComputer virtualOnlyPartitionComputer =
+ new PartitionComputer(virtualOnlyTableInfo, virtualOnlyTableInfo.getRowType());
+
+ assertThatThrownBy(
+ () ->
+ virtualOnlyPartitionComputer.getPartition(
+ GenericRow.of((Object) null)))
+ .isInstanceOf(NullPointerException.class)
+ .hasMessage("Partition transform source value for 'event_time' shouldn't be null.");
+ }
+
+ @Test
+ void testTimestampNtzDoesNotUseJvmDefaultTimeZone() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1));
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, tableInfo.getRowType());
+ GenericRow row =
+ GenericRow.of(TimestampNtz.fromLocalDateTime(LocalDateTime.of(2024, 4, 1, 0, 30)));
+ TimeZone originalTimeZone = TimeZone.getDefault();
+ try {
+ TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
+ String utcPartition = partitionComputer.getPartition(row);
+ TimeZone.setDefault(TimeZone.getTimeZone("Asia/Shanghai"));
+ String shanghaiPartition = partitionComputer.getPartition(row);
+
+ assertThat(shanghaiPartition).isEqualTo(utcPartition).isEqualTo("20240401");
+ } finally {
+ TimeZone.setDefault(originalTimeZone);
+ }
+ }
+
+ @Test
+ void testComputeTimestampLtzWithResolvedTimeZone() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("event_time", DataTypes.TIMESTAMP_LTZ().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_hour",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.HOUR,
+ ZoneId.of("Asia/Shanghai")))))
+ .distributedBy(1));
+
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, tableInfo.getRowType());
+
+ assertThat(
+ partitionComputer.getPartition(
+ GenericRow.of(
+ TimestampLtz.fromInstant(
+ Instant.parse("2024-03-31T16:30:00Z")))))
+ .isEqualTo("2024040100");
+ }
+
+ @Test
+ void testTimestampLtzRequiresResolvedTimeZone() {
+ TableInfo tableInfo =
+ tableInfo(
+ Schema.newBuilder()
+ .column("event_time", DataTypes.TIMESTAMP_LTZ().copy(false))
+ .build(),
+ TableDescriptor.builder()
+ .partitionedByKeys(
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_hour",
+ DateTruncPartitionTransform.of(
+ "event_time",
+ AutoPartitionTimeUnit.HOUR))))
+ .distributedBy(1));
+
+ PartitionComputer partitionComputer =
+ new PartitionComputer(tableInfo, tableInfo.getRowType());
+
+ assertThatThrownBy(
+ () ->
+ partitionComputer.getPartition(
+ GenericRow.of(TimestampLtz.fromEpochMillis(0))))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("DateTruncPartitionTransform must contain a resolved time zone.");
+ }
+
+ private static TableInfo tableInfo(Schema schema, TableDescriptor.Builder descriptorBuilder) {
+ TableDescriptor descriptor = descriptorBuilder.schema(schema).build();
+ return TableInfo.of(TablePath.of("db", "t"), 1, 0, descriptor, null, 0, 0);
+ }
+}
diff --git a/fluss-common/src/test/java/org/apache/fluss/utils/PartitionUtilsTest.java b/fluss-common/src/test/java/org/apache/fluss/utils/PartitionUtilsTest.java
index aeb2c1d7c3..8fcefdbafb 100644
--- a/fluss-common/src/test/java/org/apache/fluss/utils/PartitionUtilsTest.java
+++ b/fluss-common/src/test/java/org/apache/fluss/utils/PartitionUtilsTest.java
@@ -20,14 +20,21 @@
import org.apache.fluss.config.AutoPartitionTimeUnit;
import org.apache.fluss.config.ConfigOptions;
import org.apache.fluss.exception.InvalidPartitionException;
+import org.apache.fluss.metadata.DateTruncPartitionTransform;
+import org.apache.fluss.metadata.PartitionExpression;
+import org.apache.fluss.metadata.PartitionKey;
import org.apache.fluss.metadata.PartitionSpec;
import org.apache.fluss.metadata.ResolvedPartitionSpec;
+import org.apache.fluss.metadata.Schema;
import org.apache.fluss.metadata.TableDescriptor;
import org.apache.fluss.metadata.TableInfo;
import org.apache.fluss.row.BinaryString;
+import org.apache.fluss.row.GenericRow;
import org.apache.fluss.row.TimestampLtz;
import org.apache.fluss.row.TimestampNtz;
import org.apache.fluss.types.DataTypeRoot;
+import org.apache.fluss.types.DataTypes;
+import org.apache.fluss.types.RowType;
import org.junit.jupiter.api.Test;
@@ -169,6 +176,41 @@ void testGenerateAutoPartitionName(
}
}
+ @Test
+ void testImplicitPartitionRowTypeUsesVirtualPartitionKeyAsString() {
+ Schema schema =
+ Schema.newBuilder()
+ .column("region", DataTypes.STRING().copy(false))
+ .column("event_time", DataTypes.TIMESTAMP().copy(false))
+ .column("payload", DataTypes.STRING())
+ .build();
+ TableDescriptor descriptor =
+ TableDescriptor.builder()
+ .schema(schema)
+ .partitionedByKeys(
+ PartitionKey.column("region"),
+ PartitionKey.expression(
+ PartitionExpression.of(
+ "event_day",
+ DateTruncPartitionTransform.of(
+ "event_time", AutoPartitionTimeUnit.DAY))))
+ .distributedBy(1)
+ .build();
+ TableInfo tableInfo =
+ TableInfo.of(DATA1_TABLE_PATH, 1L, 1, descriptor, DEFAULT_REMOTE_DATA_DIR, 1L, 1L);
+
+ RowType partitionRowType = PartitionUtils.partitionRowType(tableInfo);
+ GenericRow partitionRow =
+ PartitionUtils.toPartitionRow(Arrays.asList("us", "20240315"), partitionRowType);
+
+ assertThat(partitionRowType.getFieldNames()).containsExactly("region", "event_day");
+ assertThat(partitionRowType.getTypeAt(0).getTypeRoot()).isEqualTo(DataTypeRoot.STRING);
+ assertThat(partitionRowType.getTypeAt(1).getTypeRoot()).isEqualTo(DataTypeRoot.STRING);
+ assertThat(partitionRowType.getTypeAt(1).isNullable()).isFalse();
+ assertThat(partitionRow.getString(0)).isEqualTo(BinaryString.fromString("us"));
+ assertThat(partitionRow.getString(1)).isEqualTo(BinaryString.fromString("20240315"));
+ }
+
@Test
void testString() {
Object value = BinaryString.fromString("Fluss");
diff --git a/fluss-common/src/test/java/org/apache/fluss/utils/json/TableDescriptorJsonSerdeTest.java b/fluss-common/src/test/java/org/apache/fluss/utils/json/TableDescriptorJsonSerdeTest.java
index 9136c005f7..56f9ecaabe 100644
--- a/fluss-common/src/test/java/org/apache/fluss/utils/json/TableDescriptorJsonSerdeTest.java
+++ b/fluss-common/src/test/java/org/apache/fluss/utils/json/TableDescriptorJsonSerdeTest.java
@@ -17,12 +17,24 @@
package org.apache.fluss.utils.json;
+import org.apache.fluss.config.AutoPartitionTimeUnit;
+import org.apache.fluss.metadata.DateTruncPartitionTransform;
import org.apache.fluss.metadata.KvFormat;
import org.apache.fluss.metadata.LogFormat;
+import org.apache.fluss.metadata.PartitionExpression;
+import org.apache.fluss.metadata.PartitionKey;
import org.apache.fluss.metadata.TableDescriptor;
+import org.junit.jupiter.api.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.time.DateTimeException;
+import java.time.ZoneId;
import java.util.Collections;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatThrownBy;
+
/** Test for {@link TableDescriptorJsonSerde}. */
public class TableDescriptorJsonSerdeTest extends JsonSerdeTestBase {
TableDescriptorJsonSerdeTest() {
@@ -31,7 +43,7 @@ public class TableDescriptorJsonSerdeTest extends JsonSerdeTestBase
+ readTableDescriptor(
+ implicitPartitionJson(
+ "[\"a_day\"]",
+ "[{\"transform\":{\"type\":\"date_trunc\",\"source_column\":\"a\",\"unit\":\"DAY\"}}]")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Partition expression must contain virtual_partition_spec_key.");
+
+ assertThatThrownBy(
+ () ->
+ readTableDescriptor(
+ implicitPartitionJson(
+ "[\"other_day\"]",
+ "[{\"virtual_partition_spec_key\":\"a_day\",\"transform\":{\"type\":\"date_trunc\",\"source_column\":\"a\",\"unit\":\"DAY\"}}]")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Virtual partition spec key 'a_day' is not present in partition_key [other_day].");
+
+ assertThatThrownBy(
+ () ->
+ readTableDescriptor(
+ implicitPartitionJson(
+ "[\"a_day\"]",
+ "[{\"virtual_partition_spec_key\":\"a_day\",\"transform\":{\"type\":\"date_trunc\",\"source_column\":\"a\",\"unit\":\"DAY\"}},"
+ + "{\"virtual_partition_spec_key\":\"a_day\",\"transform\":{\"type\":\"date_trunc\",\"source_column\":\"a\",\"unit\":\"MONTH\"}}]")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage("Duplicate virtual partition spec key 'a_day'.");
+
+ assertThatThrownBy(
+ () ->
+ readTableDescriptor(
+ implicitPartitionJson(
+ "[\"missing_day\",\"a_day\"]",
+ "[{\"virtual_partition_spec_key\":\"a_day\",\"transform\":{\"type\":\"date_trunc\",\"source_column\":\"a\",\"unit\":\"DAY\"}}]")))
+ .isInstanceOf(IllegalArgumentException.class)
+ .hasMessage(
+ "Partition key 'missing_day' does not exist in the schema or partition expressions.");
+
+ assertThatThrownBy(
+ () ->
+ readTableDescriptor(
+ implicitPartitionJson(
+ "[\"a_day\"]",
+ "[{\"virtual_partition_spec_key\":\"a_day\",\"transform\":{\"type\":\"date_trunc\",\"source_column\":\"a\",\"unit\":\"DAY\",\"time_zone\":\"Invalid/Zone\"}}]")))
+ .isInstanceOf(DateTimeException.class);
+ }
+
+ private static TableDescriptor readTableDescriptor(String json) {
+ return JsonSerdeUtils.readValue(
+ json.getBytes(StandardCharsets.UTF_8), TableDescriptorJsonSerde.INSTANCE);
+ }
+
+ private static String implicitPartitionJson(
+ String partitionKeyJson, String partitionExpressionsJson) {
+ return "{\"version\":1,\"schema\":"
+ + SchemaJsonSerdeTest.SCHEMA_JSON_1
+ + ",\"partition_key\":"
+ + partitionKeyJson
+ + ",\"partition_expressions\":"
+ + partitionExpressionsJson
+ + ",\"bucket_key\":[\"a\"],\"bucket_count\":16,\"properties\":{},\"custom_properties\":{}}";
+ }
}
diff --git a/fluss-dist/pom.xml b/fluss-dist/pom.xml
index feb7872ba5..cc3feedf01 100644
--- a/fluss-dist/pom.xml
+++ b/fluss-dist/pom.xml
@@ -30,6 +30,12 @@
Apache Fluss (Incubating) : Dist
jar
+
+ src/main/assemblies/plugins.xml
+
+ fluss-${project.version}${dist.package.classifier}-bin
+
+
@@ -39,21 +45,6 @@
provided
-
-
- org.apache.fluss
- fluss-fs-hdfs
- ${project.version}
- provided
-
-
-
- org.apache.fluss
- fluss-fs-oss
- ${project.version}
- provided
-
-
org.apache.fluss
fluss-fs-s3
@@ -76,62 +67,6 @@
provided
-
- org.apache.fluss
- fluss-metrics-influxdb
- ${project.version}
- provided
-
-
-
- org.apache.fluss
- fluss-lake-paimon
- ${project.version}
- provided
-
-
-
- org.apache.fluss
- fluss-lake-iceberg
- ${project.version}
- provided
-
-
-
- org.apache.fluss
- fluss-lake-lance
- ${project.version}
- provided
-
-
-
- org.apache.fluss
- fluss-lake-hudi
- ${project.version}
- provided
-
-
-
- org.apache.flink
- flink-shaded-hadoop-2-uber
- 2.8.3-10.0
- compile
-
-
- org.slf4j
- slf4j-log4j12
-
-
-
-
-
- org.apache.paimon
- paimon-bundle
- ${paimon.version}
- compile
-
-
-
@@ -164,6 +99,86 @@
+
+ include-optional-dist-plugins
+
+
+ !skipOptionalDistPlugins
+
+
+
+
+
+ org.apache.fluss
+ fluss-fs-hdfs
+ ${project.version}
+ provided
+
+
+
+ org.apache.fluss
+ fluss-fs-oss
+ ${project.version}
+ provided
+
+
+
+ org.apache.fluss
+ fluss-metrics-influxdb
+ ${project.version}
+ provided
+
+
+
+ org.apache.fluss
+ fluss-lake-paimon
+ ${project.version}
+ provided
+
+
+
+ org.apache.fluss
+ fluss-lake-iceberg
+ ${project.version}
+ provided
+
+
+
+ org.apache.fluss
+ fluss-lake-lance
+ ${project.version}
+ provided
+
+
+
+ org.apache.fluss
+ fluss-lake-hudi
+ ${project.version}
+ provided
+
+
+
+ org.apache.flink
+ flink-shaded-hadoop-2-uber
+ 2.8.3-10.0
+ compile
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+
+
+ org.apache.paimon
+ paimon-bundle
+ ${paimon.version}
+ compile
+
+
+
+
symlink-build-target
@@ -203,7 +218,7 @@
-sfn
- ${project.basedir}/target/fluss-${project.version}-bin/fluss-${project.version}
+ ${project.basedir}/target/${dist.assembly.finalName}/fluss-${project.version}
${project.basedir}/../build-target
@@ -222,7 +237,7 @@
maven-assembly-plugin
- fluss-${project.version}-bin
+ ${dist.assembly.finalName}
false
@@ -296,7 +311,7 @@
- src/main/assemblies/plugins.xml
+ ${dist.plugins.descriptor}
@@ -318,4 +333,4 @@
-
\ No newline at end of file
+
diff --git a/fluss-dist/src/main/assemblies/plugins-slim.xml b/fluss-dist/src/main/assemblies/plugins-slim.xml
new file mode 100644
index 0000000000..e7eec7ef0c
--- /dev/null
+++ b/fluss-dist/src/main/assemblies/plugins-slim.xml
@@ -0,0 +1,55 @@
+
+
+ plugins
+
+ dir
+
+
+ true
+ fluss-${project.version}
+
+
+
+
+ ../fluss-filesystems/fluss-fs-s3/target/fluss-fs-s3-${project.version}.jar
+ plugins/s3/
+ fluss-fs-s3-${project.version}.jar
+ 0644
+
+
+
+
+ ../fluss-metrics/fluss-metrics-prometheus/target/fluss-metrics-prometheus-${project.version}.jar
+ plugins/prometheus/
+ fluss-metrics-prometheus-${project.version}.jar
+ 0644
+
+
+
+ ../fluss-metrics/fluss-metrics-jmx/target/fluss-metrics-jmx-${project.version}.jar
+ plugins/jmx/
+ fluss-metrics-jmx-${project.version}.jar
+ 0644
+
+
+
+
diff --git a/fluss-dist/src/main/assemblies/tgz.xml b/fluss-dist/src/main/assemblies/tgz.xml
index d8bc17cbfc..9ecdb6de50 100644
--- a/fluss-dist/src/main/assemblies/tgz.xml
+++ b/fluss-dist/src/main/assemblies/tgz.xml
@@ -26,7 +26,7 @@
fluss-${project.version}
- ${project.basedir}/target/fluss-${project.version}-bin/fluss-${project.version}
+ ${project.basedir}/target/${dist.assembly.finalName}/fluss-${project.version}
/
**/*
diff --git a/fluss-dist/src/main/resources-coordinator/conf/log4j-console.properties b/fluss-dist/src/main/resources-coordinator/conf/log4j-console.properties
new file mode 100644
index 0000000000..d43beb98ba
--- /dev/null
+++ b/fluss-dist/src/main/resources-coordinator/conf/log4j-console.properties
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
+monitorInterval=30
+# This affects logging for both user code and Flink
+rootLogger.level=${env:ROOT_LOG_LEVEL:-INFO}
+rootLogger.appenderRef.console.ref=ConsoleAppender
+rootLogger.appenderRef.rolling.ref=RollingFileAppender
+# Uncomment this if you want to _only_ change Fluss's logging
+#logger.fluss.name = org.apache.fluss
+#logger.fluss.level = INFO
+# The following lines keep the log level of common libraries/connectors on
+# log level INFO. The root logger does not override this. You have to manually
+# change the log levels here.
+logger.hadoop.name=org.apache.hadoop
+logger.hadoop.level=INFO
+logger.zookeeper.name=org.apache.zookeeper
+logger.zookeeper.level=INFO
+logger.shaded_zookeeper.name=org.apache.flink.shaded.zookeeper3
+logger.shaded_zookeeper.level=INFO
+# Log all infos to the console
+appender.console.name=ConsoleAppender
+appender.console.type=CONSOLE
+appender.console.layout.type=PatternLayout
+appender.console.layout.pattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+appender.console.filter.threshold.type=ThresholdFilter
+appender.console.filter.threshold.level=${sys:console.log.level:-ALL}
+# Log all infos in the given rolling file
+appender.rolling.name=RollingFileAppender
+appender.rolling.type=RollingFile
+appender.rolling.append=true
+appender.rolling.fileName=${sys:log.file}
+appender.rolling.filePattern=${sys:log.file}.%i
+appender.rolling.layout.type=PatternLayout
+appender.rolling.layout.pattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+appender.rolling.policies.type=Policies
+appender.rolling.policies.size.type=SizeBasedTriggeringPolicy
+appender.rolling.policies.size.size=100MB
+appender.rolling.policies.startup.type=OnStartupTriggeringPolicy
+appender.rolling.strategy.type=DefaultRolloverStrategy
+appender.rolling.strategy.max=${env:MAX_LOG_FILE_NUMBER:-10}
+# Suppress the irrelevant (wrong) warnings from the Netty channel handler
+logger.netty.name=org.jboss.netty.channel.DefaultChannelPipeline
+logger.netty.level=OFF
+# don't print jindo log
+logger.jindo_fsStats.name=com.aliyun.jindodata.common.FsStats
+logger.jindo_fsStats.level=OFF
+logger.jindo_jnative.name=com.aliyun.jindodata.jnative.NativeLogger
+logger.jindo_jnative.level=OFF
+
diff --git a/fluss-dist/src/main/resources-coordinator/server.yaml b/fluss-dist/src/main/resources-coordinator/server.yaml
new file mode 100644
index 0000000000..3b731b5fd2
--- /dev/null
+++ b/fluss-dist/src/main/resources-coordinator/server.yaml
@@ -0,0 +1,80 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+#==============================================================================
+# Zookeeper
+#==============================================================================
+
+# Zookeeper connection string (see zookeeper docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all Fluss znodes.
+zookeeper.address: localhost:2181
+
+#==============================================================================
+# Common
+#==============================================================================
+
+# The default bucket number to be used when creating tables if no bucket number
+# is specified for the table.
+default.bucket.number: 1
+
+# The default replication factor to be used when creating tables if no replication
+# factor is specified for the table.
+default.replication.factor: 1
+
+# The local data directory to be used for Fluss to storing kv and log data.
+data.dir: /tmp/fluss-data
+
+# The remote data directory to be used for Fluss. Now, it is only used for storing
+# kv snapshot data. Expected be set to a remote location like: oss://bucket/path for oss
+# filesystem or hdfs://namenode:port/path for hdfs filesystem
+remote.data.dir: /tmp/fluss-remote-data
+
+#==============================================================================
+# Listeners
+#==============================================================================
+
+# The network address and port to which the server binds for accepting connections.
+# The format is '{listener_name}://{host}:{port}', and multiple addresses can be specified, separated by commas.
+bind.listeners: FLUSS://0.0.0.0:9123
+advertised.listeners: FLUSS://host.docker.internal:9123
+
+
+#==============================================================================
+# Tablet Server
+#==============================================================================
+
+
+# The id of the tablet server to be run, must be set and should be different
+# when running multiple tablet servers.
+tablet-server.id: 0
+
+# Controlled shutdown configuration for tablet servers
+# tablet-server.controlled-shutdown.max-retries: 3
+# tablet-server.controlled-shutdown.retry-interval: 1000ms
+
+#==============================================================================
+# OSS FileSystem
+#==============================================================================
+
+# The configuration for oss filesystem when using oss as the remote data directory
+# fs.oss.endpoint: xxx
+# fs.oss.accessKeyId: xxx
+# fs.oss.accessKeySecret: xxx
diff --git a/fluss-dist/src/main/resources-tablet/conf/log4j-console.properties b/fluss-dist/src/main/resources-tablet/conf/log4j-console.properties
new file mode 100644
index 0000000000..d43beb98ba
--- /dev/null
+++ b/fluss-dist/src/main/resources-tablet/conf/log4j-console.properties
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Allows this configuration to be modified at runtime. The file will be checked every 30 seconds.
+monitorInterval=30
+# This affects logging for both user code and Flink
+rootLogger.level=${env:ROOT_LOG_LEVEL:-INFO}
+rootLogger.appenderRef.console.ref=ConsoleAppender
+rootLogger.appenderRef.rolling.ref=RollingFileAppender
+# Uncomment this if you want to _only_ change Fluss's logging
+#logger.fluss.name = org.apache.fluss
+#logger.fluss.level = INFO
+# The following lines keep the log level of common libraries/connectors on
+# log level INFO. The root logger does not override this. You have to manually
+# change the log levels here.
+logger.hadoop.name=org.apache.hadoop
+logger.hadoop.level=INFO
+logger.zookeeper.name=org.apache.zookeeper
+logger.zookeeper.level=INFO
+logger.shaded_zookeeper.name=org.apache.flink.shaded.zookeeper3
+logger.shaded_zookeeper.level=INFO
+# Log all infos to the console
+appender.console.name=ConsoleAppender
+appender.console.type=CONSOLE
+appender.console.layout.type=PatternLayout
+appender.console.layout.pattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+appender.console.filter.threshold.type=ThresholdFilter
+appender.console.filter.threshold.level=${sys:console.log.level:-ALL}
+# Log all infos in the given rolling file
+appender.rolling.name=RollingFileAppender
+appender.rolling.type=RollingFile
+appender.rolling.append=true
+appender.rolling.fileName=${sys:log.file}
+appender.rolling.filePattern=${sys:log.file}.%i
+appender.rolling.layout.type=PatternLayout
+appender.rolling.layout.pattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n
+appender.rolling.policies.type=Policies
+appender.rolling.policies.size.type=SizeBasedTriggeringPolicy
+appender.rolling.policies.size.size=100MB
+appender.rolling.policies.startup.type=OnStartupTriggeringPolicy
+appender.rolling.strategy.type=DefaultRolloverStrategy
+appender.rolling.strategy.max=${env:MAX_LOG_FILE_NUMBER:-10}
+# Suppress the irrelevant (wrong) warnings from the Netty channel handler
+logger.netty.name=org.jboss.netty.channel.DefaultChannelPipeline
+logger.netty.level=OFF
+# don't print jindo log
+logger.jindo_fsStats.name=com.aliyun.jindodata.common.FsStats
+logger.jindo_fsStats.level=OFF
+logger.jindo_jnative.name=com.aliyun.jindodata.jnative.NativeLogger
+logger.jindo_jnative.level=OFF
+
diff --git a/fluss-dist/src/main/resources-tablet/server.yaml b/fluss-dist/src/main/resources-tablet/server.yaml
new file mode 100644
index 0000000000..941528cc6f
--- /dev/null
+++ b/fluss-dist/src/main/resources-tablet/server.yaml
@@ -0,0 +1,80 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+#==============================================================================
+# Zookeeper
+#==============================================================================
+
+# Zookeeper connection string (see zookeeper docs for details).
+# This is a comma separated host:port pairs, each corresponding to a zk
+# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
+# You can also append an optional chroot string to the urls to specify the
+# root directory for all Fluss znodes.
+zookeeper.address: localhost:2181
+
+#==============================================================================
+# Common
+#==============================================================================
+
+# The default bucket number to be used when creating tables if no bucket number
+# is specified for the table.
+default.bucket.number: 1
+
+# The default replication factor to be used when creating tables if no replication
+# factor is specified for the table.
+default.replication.factor: 1
+
+# The local data directory to be used for Fluss to storing kv and log data.
+data.dir: /tmp/fluss-data
+
+# The remote data directory to be used for Fluss. Now, it is only used for storing
+# kv snapshot data. Expected be set to a remote location like: oss://bucket/path for oss
+# filesystem or hdfs://namenode:port/path for hdfs filesystem
+remote.data.dir: /tmp/fluss-remote-data
+
+#==============================================================================
+# Listeners
+#==============================================================================
+
+# The network address and port to which the server binds for accepting connections.
+# The format is '{listener_name}://{host}:{port}', and multiple addresses can be specified, separated by commas.
+bind.listeners: FLUSS://0.0.0.0:9124
+advertised.listeners: FLUSS://host.docker.internal:9124
+
+
+#==============================================================================
+# Tablet Server
+#==============================================================================
+
+
+# The id of the tablet server to be run, must be set and should be different
+# when running multiple tablet servers.
+tablet-server.id: 0
+
+# Controlled shutdown configuration for tablet servers
+# tablet-server.controlled-shutdown.max-retries: 3
+# tablet-server.controlled-shutdown.retry-interval: 1000ms
+
+#==============================================================================
+# OSS FileSystem
+#==============================================================================
+
+# The configuration for oss filesystem when using oss as the remote data directory
+# fs.oss.endpoint: xxx
+# fs.oss.accessKeyId: xxx
+# fs.oss.accessKeySecret: xxx
diff --git a/fluss-filesystems/fluss-fs-gs/src/test/java/org/apache/fluss/fs/gs/GSFileSystemBehaviorITCase.java b/fluss-filesystems/fluss-fs-gs/src/test/java/org/apache/fluss/fs/gs/GSFileSystemBehaviorITCase.java
index b1902c2bbb..c4333891ee 100644
--- a/fluss-filesystems/fluss-fs-gs/src/test/java/org/apache/fluss/fs/gs/GSFileSystemBehaviorITCase.java
+++ b/fluss-filesystems/fluss-fs-gs/src/test/java/org/apache/fluss/fs/gs/GSFileSystemBehaviorITCase.java
@@ -76,6 +76,13 @@ private static FileSystem createFileSystem() throws IOException {
configuration.setString("fs.gs.auth.type", "SERVICE_ACCOUNT_JSON_KEYFILE");
configuration.setString("fs.gs.auth.service.account.json.keyfile", path);
configuration.setString("fs.gs.inputstream.support.gzip.encoding.enable", "false");
+ configuration.setString("fs.gs.block.size", "67108864");
+ configuration.setString("fs.gs.outputstream.buffer.size", "8388608");
+ configuration.setString("fs.gs.outputstream.pipe.buffer.size", "1048576");
+ configuration.setString("fs.gs.outputstream.upload.chunk.size", "67108864");
+ configuration.setString("fs.gs.inputstream.inplace.seek.limit", "8388608");
+ configuration.setString("fs.gs.inputstream.min.range.request.size", "2097152");
+ FileSystem.initialize(configuration, null);
FileSystem fileSystem =
gsFileSystemPlugin.create(URI.create("gs://test-bucket/flusspath"), configuration);
@@ -108,6 +115,7 @@ private static void applyInMemoryStorage(FileSystem fileSystem) throws IOExcepti
@AfterAll
static void tearDown() throws IOException {
+ FileSystem.initialize(new Configuration(), null);
mockGSServer.close();
}
}
diff --git a/fluss-filesystems/fluss-fs-s3/pom.xml b/fluss-filesystems/fluss-fs-s3/pom.xml
index c25ddbbd8a..a63fe6e4d6 100644
--- a/fluss-filesystems/fluss-fs-s3/pom.xml
+++ b/fluss-filesystems/fluss-fs-s3/pom.xml
@@ -387,4 +387,4 @@
-
\ No newline at end of file
+
diff --git a/fluss-filesystems/pom.xml b/fluss-filesystems/pom.xml
index 34a81de98f..0e9ab35d38 100644
--- a/fluss-filesystems/pom.xml
+++ b/fluss-filesystems/pom.xml
@@ -82,4 +82,4 @@
-
\ No newline at end of file
+
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkRowDataChannelComputer.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkRowDataChannelComputer.java
index 23a3e0cd04..81656bbf14 100644
--- a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkRowDataChannelComputer.java
+++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkRowDataChannelComputer.java
@@ -19,20 +19,22 @@
import org.apache.fluss.annotation.VisibleForTesting;
import org.apache.fluss.bucketing.BucketingFunction;
-import org.apache.fluss.client.table.getter.PartitionGetter;
import org.apache.fluss.exception.FlussRuntimeException;
import org.apache.fluss.flink.row.RowWithOp;
import org.apache.fluss.flink.sink.serializer.FlussSerializationSchema;
import org.apache.fluss.flink.sink.serializer.SerializerInitContextImpl;
import org.apache.fluss.metadata.DataLakeFormat;
+import org.apache.fluss.metadata.PartitionExpression;
import org.apache.fluss.row.InternalRow;
import org.apache.fluss.row.encode.KeyEncoder;
import org.apache.fluss.types.RowType;
+import org.apache.fluss.utils.PartitionComputer;
import org.apache.flink.table.data.RowData;
import javax.annotation.Nullable;
+import java.util.Collections;
import java.util.List;
import static org.apache.fluss.utils.Preconditions.checkNotNull;
@@ -47,13 +49,14 @@ public class FlinkRowDataChannelComputer implements ChannelComputer bucketKeys;
private final List partitionKeys;
+ private final List partitionExpressions;
private final FlussSerializationSchema serializationSchema;
private transient int numChannels;
private transient BucketingFunction bucketingFunction;
private transient KeyEncoder bucketKeyEncoder;
private transient boolean combineShuffleWithPartitionName;
- private transient @Nullable PartitionGetter partitionGetter;
+ private transient @Nullable PartitionComputer partitionComputer;
public FlinkRowDataChannelComputer(
RowType flussRowType,
@@ -62,9 +65,28 @@ public FlinkRowDataChannelComputer(
@Nullable DataLakeFormat lakeFormat,
int numBucket,
FlussSerializationSchema serializationSchema) {
+ this(
+ flussRowType,
+ bucketKeys,
+ partitionKeys,
+ Collections.emptyList(),
+ lakeFormat,
+ numBucket,
+ serializationSchema);
+ }
+
+ public FlinkRowDataChannelComputer(
+ RowType flussRowType,
+ List bucketKeys,
+ List partitionKeys,
+ List partitionExpressions,
+ @Nullable DataLakeFormat lakeFormat,
+ int numBucket,
+ FlussSerializationSchema serializationSchema) {
this.flussRowType = flussRowType;
this.bucketKeys = bucketKeys;
this.partitionKeys = partitionKeys;
+ this.partitionExpressions = partitionExpressions;
this.lakeFormat = lakeFormat;
this.numBucket = numBucket;
this.serializationSchema = serializationSchema;
@@ -76,15 +98,16 @@ public void setup(int numChannels) {
this.bucketingFunction = BucketingFunction.of(lakeFormat);
this.bucketKeyEncoder = KeyEncoder.ofBucketKeyEncoder(flussRowType, bucketKeys, lakeFormat);
if (partitionKeys.isEmpty()) {
- this.partitionGetter = null;
+ this.partitionComputer = null;
} else {
- this.partitionGetter = new PartitionGetter(flussRowType, partitionKeys);
+ this.partitionComputer =
+ new PartitionComputer(partitionKeys, partitionExpressions, flussRowType);
}
// Use shared logic from ChannelComputer to determine sharding strategy
this.combineShuffleWithPartitionName =
ChannelComputer.shouldCombinePartitionInSharding(
- partitionGetter != null, numBucket, numChannels);
+ partitionComputer != null, numBucket, numChannels);
try {
// no need to read real database, thus assume to deserialize the fluss row as same as
@@ -105,8 +128,8 @@ public int channel(InputT record) {
if (!combineShuffleWithPartitionName) {
return ChannelComputer.select(bucketId, numChannels);
} else {
- checkNotNull(partitionGetter, "partitionGetter is null");
- String partitionName = partitionGetter.getPartition(row);
+ checkNotNull(partitionComputer, "partitionComputer is null");
+ String partitionName = partitionComputer.getPartition(row);
return ChannelComputer.select(partitionName, bucketId, numChannels);
}
} catch (Exception e) {
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkSink.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkSink.java
index 159b42764c..7bf3d1e80d 100644
--- a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkSink.java
+++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlinkSink.java
@@ -32,6 +32,7 @@
import org.apache.fluss.flink.sink.writer.FlinkSinkWriter;
import org.apache.fluss.flink.sink.writer.UpsertSinkWriter;
import org.apache.fluss.metadata.DataLakeFormat;
+import org.apache.fluss.metadata.PartitionExpression;
import org.apache.fluss.metadata.TablePath;
import org.apache.flink.api.common.functions.FlatMapFunction;
@@ -50,6 +51,7 @@
import javax.annotation.Nullable;
import java.io.Serializable;
+import java.util.Collections;
import java.util.List;
import static org.apache.fluss.flink.sink.FlinkStreamPartitioner.partition;
@@ -121,6 +123,7 @@ static class AppendSinkWriterBuilder
private final int numBucket;
private final List bucketKeys;
private final List partitionKeys;
+ private final List partitionExpressions;
private final @Nullable DataLakeFormat lakeFormat;
private final DistributionMode distributionMode;
private final FlussSerializationSchema flussSerializationSchema;
@@ -135,12 +138,37 @@ public AppendSinkWriterBuilder(
@Nullable DataLakeFormat lakeFormat,
DistributionMode distributionMode,
FlussSerializationSchema flussSerializationSchema) {
+ this(
+ tablePath,
+ flussConfig,
+ tableRowType,
+ numBucket,
+ bucketKeys,
+ partitionKeys,
+ Collections.emptyList(),
+ lakeFormat,
+ distributionMode,
+ flussSerializationSchema);
+ }
+
+ public AppendSinkWriterBuilder(
+ TablePath tablePath,
+ Configuration flussConfig,
+ RowType tableRowType,
+ int numBucket,
+ List bucketKeys,
+ List partitionKeys,
+ List partitionExpressions,
+ @Nullable DataLakeFormat lakeFormat,
+ DistributionMode distributionMode,
+ FlussSerializationSchema flussSerializationSchema) {
this.tablePath = tablePath;
this.flussConfig = flussConfig;
this.tableRowType = tableRowType;
this.numBucket = numBucket;
this.bucketKeys = bucketKeys;
this.partitionKeys = partitionKeys;
+ this.partitionExpressions = partitionExpressions;
this.lakeFormat = lakeFormat;
this.distributionMode = distributionMode;
this.flussSerializationSchema = flussSerializationSchema;
@@ -185,6 +213,7 @@ public DataStream addPreWriteTopology(DataStream input) {
new DataStatisticsOperatorFactory<>(
toFlussRowType(tableRowType),
partitionKeys,
+ partitionExpressions,
flussSerializationSchema))
// Set the parallelism same as input operator to encourage
// chaining
@@ -196,6 +225,7 @@ public DataStream addPreWriteTopology(DataStream input) {
toFlussRowType(tableRowType),
bucketKeys,
partitionKeys,
+ partitionExpressions,
numBucket,
lakeFormat,
flussSerializationSchema),
@@ -226,6 +256,7 @@ private DataStream bucketShuffle(DataStream input) {
toFlussRowType(tableRowType),
bucketKeys,
partitionKeys,
+ partitionExpressions,
lakeFormat,
numBucket,
flussSerializationSchema),
@@ -246,6 +277,7 @@ static class UpsertSinkWriterBuilder
private final int numBucket;
private final List bucketKeys;
private final List partitionKeys;
+ private final List partitionExpressions;
private final @Nullable DataLakeFormat lakeFormat;
private final DistributionMode distributionMode;
private final FlussSerializationSchema flussSerializationSchema;
@@ -277,6 +309,36 @@ static class UpsertSinkWriterBuilder
FlussSerializationSchema flussSerializationSchema,
boolean enableUndoRecovery,
@Nullable String producerId) {
+ this(
+ tablePath,
+ flussConfig,
+ tableRowType,
+ targetColumnIndexes,
+ numBucket,
+ bucketKeys,
+ partitionKeys,
+ Collections.emptyList(),
+ lakeFormat,
+ distributionMode,
+ flussSerializationSchema,
+ enableUndoRecovery,
+ producerId);
+ }
+
+ UpsertSinkWriterBuilder(
+ TablePath tablePath,
+ Configuration flussConfig,
+ RowType tableRowType,
+ @Nullable int[] targetColumnIndexes,
+ int numBucket,
+ List bucketKeys,
+ List partitionKeys,
+ List partitionExpressions,
+ @Nullable DataLakeFormat lakeFormat,
+ DistributionMode distributionMode,
+ FlussSerializationSchema flussSerializationSchema,
+ boolean enableUndoRecovery,
+ @Nullable String producerId) {
this.tablePath = tablePath;
this.flussConfig = flussConfig;
this.tableRowType = tableRowType;
@@ -284,6 +346,7 @@ static class UpsertSinkWriterBuilder
this.numBucket = numBucket;
this.bucketKeys = bucketKeys;
this.partitionKeys = partitionKeys;
+ this.partitionExpressions = partitionExpressions;
this.lakeFormat = lakeFormat;
this.distributionMode = distributionMode;
this.flussSerializationSchema = flussSerializationSchema;
@@ -319,6 +382,7 @@ public DataStream addPreWriteTopology(DataStream input) {
toFlussRowType(tableRowType),
bucketKeys,
partitionKeys,
+ partitionExpressions,
lakeFormat,
numBucket,
flussSerializationSchema),
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlussSinkBuilder.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlussSinkBuilder.java
index c8808f12f8..ffd7be2ba0 100644
--- a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlussSinkBuilder.java
+++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/FlussSinkBuilder.java
@@ -233,6 +233,7 @@ public FlussSink build() {
numBucket,
bucketKeys,
partitionKeys,
+ tableInfo.getPartitionExpressions(),
lakeFormat,
distributionMode,
serializationSchema,
@@ -248,6 +249,7 @@ public FlussSink build() {
numBucket,
bucketKeys,
partitionKeys,
+ tableInfo.getPartitionExpressions(),
lakeFormat,
distributionMode,
serializationSchema);
diff --git a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/shuffle/DataStatisticsOperator.java b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/shuffle/DataStatisticsOperator.java
index ccf9388097..f679ebc2db 100644
--- a/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/shuffle/DataStatisticsOperator.java
+++ b/fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/sink/shuffle/DataStatisticsOperator.java
@@ -19,13 +19,14 @@
import org.apache.fluss.annotation.Internal;
import org.apache.fluss.annotation.VisibleForTesting;
-import org.apache.fluss.client.table.getter.PartitionGetter;
import org.apache.fluss.exception.FlussRuntimeException;
import org.apache.fluss.flink.row.RowWithOp;
import org.apache.fluss.flink.sink.serializer.FlussSerializationSchema;
import org.apache.fluss.flink.sink.serializer.SerializerInitContextImpl;
+import org.apache.fluss.metadata.PartitionExpression;
import org.apache.fluss.row.InternalRow;
import org.apache.fluss.types.RowType;
+import org.apache.fluss.utils.PartitionComputer;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.runtime.operators.coordination.OperatorEvent;
@@ -38,6 +39,7 @@
import org.apache.flink.streaming.api.operators.StreamOperatorParameters;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import java.util.Collections;
import java.util.List;
import static org.apache.fluss.flink.adapter.RuntimeContextAdapter.getIndexOfThisSubtask;
@@ -60,12 +62,13 @@ public class DataStatisticsOperator
private final String operatorName;
private final RowType rowType;
private final List partitionKeys;
+ private final List partitionExpressions;
private final FlussSerializationSchema flussSerializationSchema;
private final OperatorEventGateway operatorEventGateway;
private transient int subtaskIndex;
private transient volatile DataStatistics localStatistics;
- private transient PartitionGetter partitionGetter;
+ private transient PartitionComputer partitionComputer;
private transient TypeSerializer statisticsSerializer;
DataStatisticsOperator(
@@ -75,6 +78,24 @@ public class DataStatisticsOperator
List partitionKeys,
OperatorEventGateway operatorEventGateway,
FlussSerializationSchema flussSerializationSchema) {
+ this(
+ parameters,
+ operatorName,
+ rowType,
+ partitionKeys,
+ Collections.emptyList(),
+ operatorEventGateway,
+ flussSerializationSchema);
+ }
+
+ DataStatisticsOperator(
+ StreamOperatorParameters