From b3d43393abe7f80d6cc7179bb384e9a70a3c542f Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 4 Jun 2026 14:12:50 -0600 Subject: [PATCH] fix: clean up CometCast support-level reporting (#4501) Address the cast expression audit follow-ups from #4493. Several of the issue's line references were stale; the resolutions reflect current main: - canCastFromFloat/Double/Decimal need no evalMode parameter. The native code branches on eval mode for these narrowing casts but matches Spark in every mode (CometCastSuite passes in LEGACY, ANSI, and TRY), so there is no per-mode support-level divergence to report. No change needed. - Remove the vestigial getIncompatibleReasons/getUnsupportedReasons overrides. For Cast they are never consumed: cast.md is generated directly from the per-pair isSupported matrix and EXPLAIN surfaces the per-pair reason via getSupportLevel. - Fill in the previously empty Incompatible() reason for negative-scale decimal to string, sharing the string with the test via a constant to avoid drift. - Clarify the literal short-circuit comment in getSupportLevel: literal casts are folded by Spark at planning time, so they never execute natively. --- .../apache/comet/expressions/CometCast.scala | 23 +++++++++++-------- .../org/apache/comet/CometCastSuite.scala | 7 ++---- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala index 68c4d1fb7e..bcc9ee0692 100644 --- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala +++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala @@ -32,6 +32,10 @@ import org.apache.comet.shims.CometExprShim object CometCast extends CometExpressionSerde[Cast] with CometExprShim { + // Shared with CometCastSuite so the asserted reason cannot drift from production. + private[comet] val negativeScaleDecimalToStringReason: String = + "Negative-scale decimal requires spark.sql.legacy.allowNegativeScaleOfDecimal=true" + def supportedTypes: Seq[DataType] = Seq( DataTypes.BooleanType, @@ -48,17 +52,17 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { DataTypes.TimestampType, DataTypes.TimestampNTZType) - override def getIncompatibleReasons(): Seq[String] = Seq( - "Some cast operations between specific type pairs may produce different results than Spark." + - " Refer to the compatibility guide for the full matrix of supported cast operations.") - - override def getUnsupportedReasons(): Seq[String] = Seq( - "Not all cast type combinations are supported. Unsupported casts fall back to Spark.") + // Note: `getIncompatibleReasons` / `getUnsupportedReasons` are intentionally not + // overridden here. The per-pair `isSupported` matrix is the canonical source of cast + // reasons: `cast.md` is generated directly from it (see `GenerateDocs.writeCastMatrixForMode`) + // and EXPLAIN surfaces the per-pair reason via `getSupportLevel`. A single static sentence + // would only duplicate the matrix and risk drifting from it. override def getSupportLevel(cast: Cast): SupportLevel = { if (cast.child.isInstanceOf[Literal]) { - // casting from literal is compatible because we delegate to Spark - // further data type checks will be performed by CometLiteral + // A cast whose child is a literal is folded by Spark at planning time via `cast.eval()` + // (see `convert`), so the cast never executes natively and the result matches Spark by + // definition. `CometLiteral` then validates the resulting literal's data type. Compatible() } else { isSupported(cast.child.dataType, cast.dataType, cast.timeZoneId, evalMode(cast)) @@ -254,7 +258,8 @@ object CometCast extends CometExpressionSerde[Cast] with CometExprShim { val allowNegativeScale = SQLConf.get .getConfString("spark.sql.legacy.allowNegativeScaleOfDecimal", "false") .toBoolean - if (allowNegativeScale) Compatible() else Incompatible() + if (allowNegativeScale) Compatible() + else Incompatible(Some(negativeScaleDecimalToStringReason)) case _: DecimalType => // Compatible across all eval modes: LEGACY uses cast_decimal128_to_utf8 which // replicates Java BigDecimal.toString() (scientific notation when adj_exp < -6); diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala index 76b5433100..9937530ae5 100644 --- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala @@ -787,11 +787,8 @@ class CometCastSuite extends CometTestBase with AdaptiveSparkPlanHelper { } withSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal" -> "false") { assert( - CometCast.isSupported( - negScaleType, - DataTypes.StringType, - None, - CometEvalMode.LEGACY) == Incompatible()) + CometCast.isSupported(negScaleType, DataTypes.StringType, None, CometEvalMode.LEGACY) == + Incompatible(Some(CometCast.negativeScaleDecimalToStringReason))) } }