diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index ff30dcbf20..73be0acf32 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -9222,6 +9222,13 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, assert(resultSize <= 16); std::vector idxList(resultSize); + // For raw buffers the byte offset rides in the buffer index and the element + // offset operand stays undef, so the per-element offset must accumulate into + // bufIdx. For structured buffers the per-element offset is the element offset + // and bufIdx is the structure index. + bool isRawBuf = DXIL::IsRawBuffer(ResKind); + Value *matBaseIdx = isRawBuf ? bufIdx : baseOffset; + switch (subOp) { case HLSubscriptOpcode::ColMatSubscript: case HLSubscriptOpcode::RowMatSubscript: { @@ -9229,7 +9236,7 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, Value *offset = CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i); offset = subBuilder.CreateMul(offset, EltByteSize); - idxList[i] = subBuilder.CreateAdd(baseOffset, offset); + idxList[i] = subBuilder.CreateAdd(matBaseIdx, offset); } } break; case HLSubscriptOpcode::RowMatElement: @@ -9238,7 +9245,7 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (unsigned i = 0; i < resultSize; i++) { Value *offset = subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize); - idxList[i] = subBuilder.CreateAdd(baseOffset, offset); + idxList[i] = subBuilder.CreateAdd(matBaseIdx, offset); } } break; default: @@ -9252,8 +9259,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, Value *subsUser = *(U++); if (resultSize == 1) { TranslateStructBufSubscriptUser(cast(subsUser), handle, - ResKind, bufIdx, idxList[0], status, - hlslOP, DL); + ResKind, isRawBuf ? idxList[0] : bufIdx, + isRawBuf ? baseOffset : idxList[0], + status, hlslOP, DL); continue; } if (GetElementPtrInst *GEP = dyn_cast(subsUser)) { @@ -9261,8 +9269,9 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) { Instruction *gepUserInst = cast(*(gepU++)); - TranslateStructBufSubscriptUser(gepUserInst, handle, ResKind, bufIdx, - GEPOffset, status, hlslOP, DL); + TranslateStructBufSubscriptUser( + gepUserInst, handle, ResKind, isRawBuf ? GEPOffset : bufIdx, + isRawBuf ? baseOffset : GEPOffset, status, hlslOP, DL); } GEP->eraseFromParent(); @@ -9276,13 +9285,15 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (unsigned i = 0; i < resultSize; i++) { Value *EltVal = stBuilder.CreateExtractElement(Val, i); uint8_t mask = DXIL::kCompMask_X; - GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP, + GenerateStructBufSt(handle, isRawBuf ? idxList[i] : bufIdx, + isRawBuf ? baseOffset : idxList[i], EltTy, hlslOP, stBuilder, {EltVal, undefElt, undefElt, undefElt}, mask, alignment); } } else { uint8_t mask = DXIL::kCompMask_X; - GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP, + GenerateStructBufSt(handle, isRawBuf ? idxList[0] : bufIdx, + isRawBuf ? baseOffset : idxList[0], EltTy, hlslOP, stBuilder, {Val, undefElt, undefElt, undefElt}, mask, alignment); } @@ -9300,14 +9311,16 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle, for (unsigned i = 0; i < resultSize; i++) { Value *ResultElt; // TODO: This can be inefficient for row major matrix load - GenerateRawBufLd(handle, bufIdx, idxList[i], + GenerateRawBufLd(handle, isRawBuf ? idxList[i] : bufIdx, + isRawBuf ? baseOffset : idxList[i], /*status*/ nullptr, EltTy, ResultElt, hlslOP, ldBuilder, 1, alignment); ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i); } } else { - GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr, EltTy, - ldData, hlslOP, ldBuilder, 4, alignment); + GenerateRawBufLd(handle, isRawBuf ? idxList[0] : bufIdx, + isRawBuf ? baseOffset : idxList[0], /*status*/ nullptr, + EltTy, ldData, hlslOP, ldBuilder, 4, alignment); } ldUser->replaceAllUsesWith(ldData); ldUser->eraseFromParent(); diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_load_struct_matrix_element_offsets.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_load_struct_matrix_element_offsets.hlsl new file mode 100644 index 0000000000..758a910182 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/objects/ByteAddressBuffer/bab_load_struct_matrix_element_offsets.hlsl @@ -0,0 +1,31 @@ +// Reading individual matrix elements through the struct member of a +// ByteAddressBuffer.Load must address each element at its own +// byte offset. For raw buffers the byte offset rides in the load index operand +// and the element-offset operand stays undef, so the four first-column reads +// below must land at indices 0, 4, 8 and 12 -- they must not collapse to the +// matrix base (index 0), which is what happened before the matrix-element +// subscript path was taught the raw-buffer addressing convention. + +// RUN: %dxc -T cs_6_6 -E cs %s | FileCheck %s + +struct Box { float4x4 m; }; + +ByteAddressBuffer src : register(t0); +RWByteAddressBuffer dst : register(u0); + +[numthreads(1, 1, 1)] +void cs() +{ + Box b = src.Load(0); + + // Column-major: _m00/_m10/_m20/_m30 live at bytes 0/4/8/12. + dst.Store(0, asuint(b.m._m00)); + dst.Store(4, asuint(b.m._m10)); + dst.Store(8, asuint(b.m._m20)); + dst.Store(12, asuint(b.m._m30)); +} + +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 0, i32 undef, i8 1, i32 4) +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 4, i32 undef, i8 1, i32 4) +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 8, i32 undef, i8 1, i32 4) +// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 12, i32 undef, i8 1, i32 4)