From 225bab1b297887feb6b2691e8e6abd47ab4bb02b Mon Sep 17 00:00:00 2001 From: tanishq-chugh Date: Wed, 27 May 2026 20:39:51 +0530 Subject: [PATCH 1/2] HIVE-29262: Incorrect column ordering output in case of different ordering of mutual columns in query & window function --- .../ql/optimizer/physical/Vectorizer.java | 23 +- ql/src/test/queries/clientpositive/lead_vec.q | 149 + .../clientpositive/llap/lead_vec.q.out | 14322 ++++++++++++++++ 3 files changed, 14490 insertions(+), 4 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/lead_vec.q create mode 100644 ql/src/test/results/clientpositive/llap/lead_vec.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index f4b4c2ff3bad..08d396ae3f4c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -5161,13 +5161,11 @@ private static VectorPTFInfo createVectorPTFInfo(Operator partitionExpressionByOutputColumn = new HashMap<>(); for (int i = 0; i < partitionKeyCount; i++) { VectorExpression partitionExpression = vContext.getVectorExpression(partitionExprNodeDescs[i]); - TypeInfo typeInfo = partitionExpression.getOutputTypeInfo(); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - partitionColumnVectorTypes[i] = columnVectorType; partitionColumnMap[i] = partitionExpression.getOutputColumnNum(); - partitionExpressions[i] = partitionExpression; + partitionExpressionByOutputColumn.put(partitionColumnMap[i], partitionExpression); } final int orderKeyCount = orderExprNodeDescs.length; @@ -5190,6 +5188,23 @@ private static VectorPTFInfo createVectorPTFInfo(Operator Date: Fri, 29 May 2026 17:22:06 +0530 Subject: [PATCH 2/2] Fix failing test cases --- .../ql/optimizer/physical/Vectorizer.java | 80 +++++++++++++++---- 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 08d396ae3f4c..1fba6bf9d5f9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -5076,6 +5076,26 @@ private static void createVectorPTFDesc(Operator ptfOp, vectorizedPTFMaxMemoryBufferingBatchCount); } + /** + * Whether a partition expression refers to the same input column as an output column slot. + */ + private static boolean partitionExprMatchesInputColumn(ExprNodeDesc partitionExpr, + List outputSignature, int[] outputColumnProjectionMap, int evaluatorCount, + int inputColumnNum) { + ExprNodeDescEqualityWrapper partitionWrapper = + new ExprNodeDescEqualityWrapper(partitionExpr); + for (int i = evaluatorCount; i < outputColumnProjectionMap.length; i++) { + if (outputColumnProjectionMap[i] != inputColumnNum) { + continue; + } + ExprNodeColumnDesc outputColExpr = new ExprNodeColumnDesc(outputSignature.get(i)); + if (partitionWrapper.equals(new ExprNodeDescEqualityWrapper(outputColExpr))) { + return true; + } + } + return false; + } + private static void determineKeyAndNonKeyInputColumnMap(int[] outputColumnProjectionMap, boolean isPartitionOrderBy, int[] orderColumnMap, int[] partitionColumnMap, int evaluatorCount, ArrayList keyInputColumns, @@ -5161,11 +5181,17 @@ private static VectorPTFInfo createVectorPTFInfo(Operator partitionExpressionByOutputColumn = new HashMap<>(); + int[] planPartitionColumnMap = new int[partitionKeyCount]; + Type[] planPartitionColumnVectorTypes = new Type[partitionKeyCount]; + VectorExpression[] planPartitionExpressions = new VectorExpression[partitionKeyCount]; for (int i = 0; i < partitionKeyCount; i++) { - VectorExpression partitionExpression = vContext.getVectorExpression(partitionExprNodeDescs[i]); - partitionColumnMap[i] = partitionExpression.getOutputColumnNum(); - partitionExpressionByOutputColumn.put(partitionColumnMap[i], partitionExpression); + VectorExpression partitionExpression = + vContext.getVectorExpression(partitionExprNodeDescs[i]); + TypeInfo typeInfo = partitionExpression.getOutputTypeInfo(); + planPartitionColumnVectorTypes[i] = + VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); + planPartitionColumnMap[i] = partitionExpression.getOutputColumnNum(); + planPartitionExpressions[i] = partitionExpression; } final int orderKeyCount = orderExprNodeDescs.length; @@ -5184,25 +5210,47 @@ private static VectorPTFInfo createVectorPTFInfo(Operator keyInputColumns = new ArrayList(); ArrayList nonKeyInputColumns = new ArrayList(); determineKeyAndNonKeyInputColumnMap(outputColumnProjectionMap, isPartitionOrderBy, orderColumnMap, - partitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns); + planPartitionColumnMap, evaluatorCount, keyInputColumns, nonKeyInputColumns); int[] keyInputColumnMap = ArrayUtils.toPrimitive(keyInputColumns.toArray(new Integer[0])); int[] nonKeyInputColumnMap = ArrayUtils.toPrimitive(nonKeyInputColumns.toArray(new Integer[0])); + boolean[] partitionUsed = new boolean[partitionKeyCount]; int partitionIndex = 0; for (int keyCol : keyInputColumnMap) { - VectorExpression partitionExpression = partitionExpressionByOutputColumn.get(keyCol); - if (partitionExpression == null) { - continue; + for (int i = 0; i < partitionKeyCount; i++) { + if (!partitionUsed[i] && planPartitionColumnMap[i] == keyCol) { + partitionColumnVectorTypes[partitionIndex] = planPartitionColumnVectorTypes[i]; + partitionColumnMap[partitionIndex] = keyCol; + partitionExpressions[partitionIndex] = planPartitionExpressions[i]; + partitionUsed[i] = true; + partitionIndex++; + break; + } } - TypeInfo typeInfo = partitionExpression.getOutputTypeInfo(); - Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); - partitionColumnVectorTypes[partitionIndex] = columnVectorType; - partitionColumnMap[partitionIndex] = keyCol; - partitionExpressions[partitionIndex] = partitionExpression; - partitionIndex++; } - if (partitionIndex != partitionKeyCount) { - throw new HiveException("Failed to map partition columns to key input column order"); + for (int keyCol : keyInputColumnMap) { + for (int i = 0; i < partitionKeyCount; i++) { + if (partitionUsed[i]) { + continue; + } + if (partitionExprMatchesInputColumn(partitionExprNodeDescs[i], outputSignature, + outputColumnProjectionMap, evaluatorCount, keyCol)) { + partitionColumnVectorTypes[partitionIndex] = planPartitionColumnVectorTypes[i]; + partitionColumnMap[partitionIndex] = keyCol; + partitionExpressions[partitionIndex] = planPartitionExpressions[i]; + partitionUsed[i] = true; + partitionIndex++; + break; + } + } + } + for (int i = 0; i < partitionKeyCount; i++) { + if (!partitionUsed[i]) { + partitionColumnVectorTypes[partitionIndex] = planPartitionColumnVectorTypes[i]; + partitionColumnMap[partitionIndex] = planPartitionColumnMap[i]; + partitionExpressions[partitionIndex] = planPartitionExpressions[i]; + partitionIndex++; + } } VectorExpression[][] evaluatorInputExpressions = new VectorExpression[evaluatorCount][];