From 748e0308d41cf30ba991ad0709911e37c7fc3601 Mon Sep 17 00:00:00 2001 From: Philip Degarmo Date: Fri, 19 Jun 2026 12:41:33 -0700 Subject: [PATCH 1/2] Fix a miscompile that occurs when reading a matrix contained in a struct with no sibling fields. The expected behavior is that each load instruction is at a 4 byte offset, but observed behavior is that Loads are always at 0 offset. This was introduced in commit 909c55245 (merged 2025-03-17), and it's observable regressed between the releases v1.8.2502 and v1.8.2505. The regression was tested as still live in v1.10.2605.24 and current HEAD. This change adds a test that reproduces the problem. ClangHLSLTests shows 2 failures with this test pre-patch and 0 failures after the patch. --- lib/HLSL/HLOperationLower.cpp | 37 +++++++++++++------ ...ab_load_struct_matrix_element_offsets.hlsl | 31 ++++++++++++++++ 2 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_load_struct_matrix_element_offsets.hlsl diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index ff30dcbf20..4264270a09 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -9222,6 +9222,13 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, assert(resultSize <= 16); std::vector idxList(resultSize); + // For raw buffers the byte offset rides in the buffer index and the element + // offset operand stays undef, so the per-element offset must accumulate into + // bufIdx. For structured buffers the per-element offset is the element offset + // and bufIdx is the structure index. + bool isRawBuf = DXIL::IsRawBuffer(ResKind); + Value *matBaseIdx = isRawBuf ? bufIdx : baseOffset; + switch (subOp) { case HLSubscriptOpcode::ColMatSubscript: case HLSubscriptOpcode::RowMatSubscript: { @@ -9229,7 +9236,7 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, Value *offset = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); offset = subBuilder.CreateMul(offset, EltByteSize); - idxList[i] = subBuilder.CreateAdd(baseOffset, offset); + idxList[i] = subBuilder.CreateAdd(matBaseIdx, offset); } } break; case HLSubscriptOpcode::RowMatElement: @@ -9238,7 +9245,7 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (unsigned i = 0; i < resultSize; i++) { Value *offset = subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize); - idxList[i] = subBuilder.CreateAdd(baseOffset, offset); + idxList[i] = subBuilder.CreateAdd(matBaseIdx, offset); } } break; default: @@ -9251,9 +9258,10 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (auto U = CI->user_begin(); U != CI->user_end();) { Value *subsUser = *(U++); if (resultSize == 1) { - TranslateStructBufSubscriptUser(cast(subsUser), handle, - ResKind, bufIdx, idxList[0], status, - hlslOP, DL); + TranslateStructBufSubscriptUser( + cast(subsUser), handle, ResKind, + isRawBuf ? idxList[0] : bufIdx, isRawBuf ? baseOffset : idxList[0], + status, hlslOP, DL); continue; } if (GetElementPtrInst *GEP = dyn_cast(subsUser)) { @@ -9261,8 +9269,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { Instruction *gepUserInst = cast(*(gepU++)); - TranslateStructBufSubscriptUser(gepUserInst, handle, ResKind, bufIdx, - GEPOffset, status, hlslOP, DL); + TranslateStructBufSubscriptUser( + gepUserInst, handle, ResKind, isRawBuf ? GEPOffset : bufIdx, + isRawBuf ? baseOffset : GEPOffset, status, hlslOP, DL); } GEP->eraseFromParent(); @@ -9276,13 +9285,15 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (unsigned i = 0; i < resultSize; i++) { Value *EltVal = stBuilder.CreateExtractElement(Val, i); uint8_t mask = DXIL::kCompMask_X; - GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP, + GenerateStructBufSt(handle, isRawBuf ? idxList[i] : bufIdx, + isRawBuf ? baseOffset : idxList[i], EltTy, hlslOP, stBuilder, {EltVal, undefElt, undefElt, undefElt}, mask, alignment); } } else { uint8_t mask = DXIL::kCompMask_X; - GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP, + GenerateStructBufSt(handle, isRawBuf ? idxList[0] : bufIdx, + isRawBuf ? baseOffset : idxList[0], EltTy, hlslOP, stBuilder, {Val, undefElt, undefElt, undefElt}, mask, alignment); } @@ -9300,14 +9311,16 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (unsigned i = 0; i < resultSize; i++) { Value *ResultElt; // TODO: This can be inefficient for row major matrix load - GenerateRawBufLd(handle, bufIdx, idxList[i], + GenerateRawBufLd(handle, isRawBuf ? idxList[i] : bufIdx, + isRawBuf ? baseOffset : idxList[i], /*status*/ nullptr, EltTy, ResultElt, hlslOP, ldBuilder, 1, alignment); ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i); } } else { - GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr, EltTy, - ldData, hlslOP, ldBuilder, 4, alignment); + GenerateRawBufLd(handle, isRawBuf ? idxList[0] : bufIdx, + isRawBuf ? baseOffset : idxList[0], /*status*/ nullptr, + EltTy, ldData, hlslOP, ldBuilder, 4, alignment); } ldUser->replaceAllUsesWith(ldData); ldUser->eraseFromParent(); diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_load_struct_matrix_element_offsets.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_load_struct_matrix_element_offsets.hlsl new file mode 100644 index 0000000000..758a910182 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_load_struct_matrix_element_offsets.hlsl @@ -0,0 +1,31 @@ +// Reading individual matrix elements through the struct member of a +// ByteAddressBuffer.Load must address each element at its own +// byte offset. For raw buffers the byte offset rides in the load index operand +// and the element-offset operand stays undef, so the four first-column reads +// below must land at indices 0, 4, 8 and 12 -- they must not collapse to the +// matrix base (index 0), which is what happened before the matrix-element +// subscript path was taught the raw-buffer addressing convention. + +// RUN: %dxc -T cs_6_6 -E cs %s | FileCheck %s + +struct Box { float4x4 m; }; + +ByteAddressBuffer src : register(t0); +RWByteAddressBuffer dst : register(u0); + +[numthreads(1, 1, 1)] +void cs() +{ + Box b = src.Load(0); + + // Column-major: _m00/_m10/_m20/_m30 live at bytes 0/4/8/12. + dst.Store(0, asuint(b.m._m00)); + dst.Store(4, asuint(b.m._m10)); + dst.Store(8, asuint(b.m._m20)); + dst.Store(12, asuint(b.m._m30)); +} + +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 0, i32 undef, i8 1, i32 4) +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 4, i32 undef, i8 1, i32 4) +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 8, i32 undef, i8 1, i32 4) +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 12, i32 undef, i8 1, i32 4) From d6a4e6b9d19952ef135d3639299a131cf5aa0e46 Mon Sep 17 00:00:00 2001 From: Philip Degarmo Date: Fri, 19 Jun 2026 13:16:05 -0700 Subject: [PATCH 2/2] clang format fix --- lib/HLSL/HLOperationLower.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index 4264270a09..73be0acf32 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -9258,10 +9258,10 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (auto U = CI->user_begin(); U != CI->user_end();) { Value *subsUser = *(U++); if (resultSize == 1) { - TranslateStructBufSubscriptUser( - cast(subsUser), handle, ResKind, - isRawBuf ? idxList[0] : bufIdx, isRawBuf ? baseOffset : idxList[0], - status, hlslOP, DL); + TranslateStructBufSubscriptUser(cast(subsUser), handle, + ResKind, isRawBuf ? idxList[0] : bufIdx, + isRawBuf ? baseOffset : idxList[0], + status, hlslOP, DL); continue; } if (GetElementPtrInst *GEP = dyn_cast(subsUser)) {