diff --git a/docs/source/contributor-guide/spark_expressions_support.md b/docs/source/contributor-guide/spark_expressions_support.md index 8f82847bb6..b6fcd2f6d1 100644 --- a/docs/source/contributor-guide/spark_expressions_support.md +++ b/docs/source/contributor-guide/spark_expressions_support.md @@ -598,7 +598,16 @@ ### struct_funcs - [x] named_struct + - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8. + - Spark 3.5.8 (audited 2026-05-27): baseline. `CreateNamedStruct(children)` with `NoThrow`; `children` is `Seq(name1, val1, name2, val2, ...)`. Names must be foldable `StringType` and distinct; non-null. `dataType` is the `StructType` of the resulting fields. Comet routes via `CometCreateNamedStruct`, builds a `CreateNamedStruct` proto, and the native `CreateNamedStruct` expression (in `native/spark-expr/src/struct_funcs/create_named_struct.rs`) emits a `StructArray`. + - Spark 4.0.1 (audited 2026-05-27): semantics unchanged; an `override def stateful: Boolean = true` flag is added for the optimizer. Parser-level Alias unwrapping (`case (a: Alias, _) => Seq(Literal(a.name), a)`) is transparent to Comet. + - Spark 4.1.1 (audited 2026-05-27): semantics unchanged; adds `override def contextIndependentFoldable: Boolean = children.forall(_.contextIndependentFoldable)` for the new constant-folding pass; no impact on Comet conversion. + - Comet limitation: a `CreateNamedStruct` whose names contain duplicates falls back to Spark. (Spark's analyzer also tolerates duplicates at the column-name level, but the proto layer would lose them, so the fallback is the safe choice.) - [x] struct + - Spark 3.4.3 (audited 2026-05-27): identical to 3.5.8. + - Spark 3.5.8 (audited 2026-05-27): the SQL function `struct(a, b, c)` is built by `CreateStruct.create`, which lowers to a `CreateNamedStruct` with synthetic field names `col1`, `col2`, etc. Comet handles both `struct` and `named_struct` via the same `CometCreateNamedStruct` path; the synthetic-name and explicit-name cases behave identically. + - Spark 4.0.1 (audited 2026-05-27): same lowering; identical to 3.5.8. + - Spark 4.1.1 (audited 2026-05-27): same lowering; identical to 3.5.8. ### url_funcs diff --git a/spark/src/main/scala/org/apache/comet/serde/structs.scala b/spark/src/main/scala/org/apache/comet/serde/structs.scala index 9ef00272ec..3f0f184263 100644 --- a/spark/src/main/scala/org/apache/comet/serde/structs.scala +++ b/spark/src/main/scala/org/apache/comet/serde/structs.scala @@ -31,15 +31,24 @@ import org.apache.comet.DataTypeSupport import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, serializeDataType} object CometCreateNamedStruct extends CometExpressionSerde[CreateNamedStruct] { + + private val duplicateNamesReason = + "`CreateNamedStruct` with duplicate field names is not supported" + + override def getUnsupportedReasons(): Seq[String] = Seq(duplicateNamesReason) + + override def getSupportLevel(expr: CreateNamedStruct): SupportLevel = { + if (expr.names.length != expr.names.distinct.length) { + Unsupported(Some(duplicateNamesReason)) + } else { + Compatible() + } + } + override def convert( expr: CreateNamedStruct, inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { - if (expr.names.length != expr.names.distinct.length) { - withInfo(expr, "CreateNamedStruct with duplicate field names are not supported") - return None - } - val valExprs = expr.valExprs.map(exprToProtoInternal(_, inputs, binding)) if (valExprs.forall(_.isDefined)) {