From 66c3e2bd1dd2a5ac7ce57dd6d18e58c335e3781b Mon Sep 17 00:00:00 2001 From: Jianghua Yang Date: Wed, 20 May 2026 00:30:35 +0800 Subject: [PATCH] ORCA: handle INDF predicate in PdxlnMergeJoin PdxlnMergeJoin extracted merge-key operands via blind indexing: CExpression *pexprPredOuter = (*pexprPred)[0]; CExpression *pexprPredInner = (*pexprPred)[1]; That works for plain equality `a = b` (binary ScalarCmp) but a NULL- safe equality clause is `NOT (a IS DISTINCT FROM b)` -- a *unary* NOT wrapping the binary IsDistinctFrom. Indexing [1] on the NOT walks off the end of its 1-element child array, then DeriveUsedColumns on the garbage pointer segfaults. Use CPhysicalJoin::AlignJoinKeyOuterInner -- the same helper PdxlnHashJoin already uses (line 5139) -- which handles both equality and INDF shapes and strips binary-coercible casts. Reconstruct the DXL predicate from the aligned operands via PexprScalarCmp / PexprINDF so the downstream PG translator sees an operand order matching the children we emit. --- .../src/translate/CTranslatorExprToDXL.cpp | 51 ++++++++++--------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp index 51acac518be..252999875b2 100644 --- a/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp +++ b/src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp @@ -4964,41 +4964,46 @@ CTranslatorExprToDXL::PdxlnMergeJoin(CExpression *pexprMJ, // At this point, they all better be merge joinable GPOS_ASSERT(CPhysicalJoin::FMergeJoinCompatible( pexprPred, pexprOuterChild, pexprInnerChild)); - CExpression *pexprPredOuter = (*pexprPred)[0]; - CExpression *pexprPredInner = (*pexprPred)[1]; - // align extracted columns with outer and inner children of the join - CColRefSet *pcrsOuterChild = pexprOuterChild->DeriveOutputColumns(); - CColRefSet *pcrsPredInner = pexprPredInner->DeriveUsedColumns(); + // Extract the two key columns out of pexprPred. Plain equality + // is `a = b` (binary ScalarCmp) but NULL-safe equality (INDF) is + // `NOT (a IS DISTINCT FROM b)`, a *unary* NOT around the binary + // IsDistinctFrom. Indexing [0]/[1] directly works for equality + // but accesses out of bounds for INDF. + CExpression *pexprPredOuter = nullptr; + CExpression *pexprPredInner = nullptr; + IMDId *mdid_scop = nullptr; + CPhysicalJoin::AlignJoinKeyOuterInner(pexprPred, pexprOuterChild, + pexprInnerChild, &pexprPredOuter, + &pexprPredInner, &mdid_scop); + #ifdef GPOS_DEBUG + CColRefSet *pcrsOuterChild = pexprOuterChild->DeriveOutputColumns(); CColRefSet *pcrsInnerChild = pexprInnerChild->DeriveOutputColumns(); CColRefSet *pcrsPredOuter = pexprPredOuter->DeriveUsedColumns(); + CColRefSet *pcrsPredInner = pexprPredInner->DeriveUsedColumns(); + GPOS_ASSERT(pcrsOuterChild->ContainsAll(pcrsPredOuter) && + pcrsInnerChild->ContainsAll(pcrsPredInner) && + "merge join keys are not aligned with children"); #endif - if (pcrsOuterChild->ContainsAll(pcrsPredInner)) + pexprPredOuter->AddRef(); + pexprPredInner->AddRef(); + CExpression *pexprPredNew; + if (CPredicateUtils::IsEqualityOp(pexprPred)) { - GPOS_ASSERT(pcrsInnerChild->ContainsAll(pcrsPredOuter)); - std::swap(pexprPredOuter, pexprPredInner); -#ifdef GPOS_DEBUG - std::swap(pcrsPredOuter, pcrsPredInner); -#endif - - pexprPredOuter->AddRef(); - pexprPredInner->AddRef(); - pexprPred = - CUtils::PexprScalarEqCmp(m_mp, pexprPredOuter, pexprPredInner); + pexprPredNew = CUtils::PexprScalarCmp(m_mp, pexprPredOuter, + pexprPredInner, mdid_scop); } else { - pexprPred->AddRef(); + GPOS_ASSERT(CPredicateUtils::FINDF(pexprPred)); + pexprPredNew = CUtils::PexprINDF(m_mp, pexprPredOuter, + pexprPredInner, mdid_scop); } - GPOS_ASSERT(pcrsOuterChild->ContainsAll(pcrsPredOuter) && - pcrsInnerChild->ContainsAll(pcrsPredInner) && - "merge join keys are not aligned with children"); - - dxlnode_merge_conds->AddChild(PdxlnScalar(pexprPred)); - pexprPred->Release(); + dxlnode_merge_conds->AddChild(PdxlnScalar(pexprPredNew)); + pexprPredNew->Release(); } pdrgpexprPredicates->Release();