Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions lib/HLSL/HLOperationLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9222,14 +9222,21 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
assert(resultSize <= 16);
std::vector<Value *> idxList(resultSize);

// For raw buffers the byte offset rides in the buffer index and the element
// offset operand stays undef, so the per-element offset must accumulate into
// bufIdx. For structured buffers the per-element offset is the element offset
// and bufIdx is the structure index.
bool isRawBuf = DXIL::IsRawBuffer(ResKind);
Value *matBaseIdx = isRawBuf ? bufIdx : baseOffset;

switch (subOp) {
case HLSubscriptOpcode::ColMatSubscript:
case HLSubscriptOpcode::RowMatSubscript: {
for (unsigned i = 0; i < resultSize; i++) {
Value *offset =
CI->getArgOperand(HLOperandIndex::kMatSubscriptSubOpIdx + i);
offset = subBuilder.CreateMul(offset, EltByteSize);
idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
idxList[i] = subBuilder.CreateAdd(matBaseIdx, offset);
}
} break;
case HLSubscriptOpcode::RowMatElement:
Expand All @@ -9238,7 +9245,7 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
for (unsigned i = 0; i < resultSize; i++) {
Value *offset =
subBuilder.CreateMul(EltIdxs->getAggregateElement(i), EltByteSize);
idxList[i] = subBuilder.CreateAdd(baseOffset, offset);
idxList[i] = subBuilder.CreateAdd(matBaseIdx, offset);
}
} break;
default:
Expand All @@ -9252,17 +9259,19 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
Value *subsUser = *(U++);
if (resultSize == 1) {
TranslateStructBufSubscriptUser(cast<Instruction>(subsUser), handle,
ResKind, bufIdx, idxList[0], status,
hlslOP, DL);
ResKind, isRawBuf ? idxList[0] : bufIdx,
isRawBuf ? baseOffset : idxList[0],
status, hlslOP, DL);
continue;
}
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(subsUser)) {
Value *GEPOffset = LowerGEPOnMatIndexListToIndex(GEP, idxList);

for (auto gepU = GEP->user_begin(); gepU != GEP->user_end();) {
Instruction *gepUserInst = cast<Instruction>(*(gepU++));
TranslateStructBufSubscriptUser(gepUserInst, handle, ResKind, bufIdx,
GEPOffset, status, hlslOP, DL);
TranslateStructBufSubscriptUser(
gepUserInst, handle, ResKind, isRawBuf ? GEPOffset : bufIdx,
isRawBuf ? baseOffset : GEPOffset, status, hlslOP, DL);
}

GEP->eraseFromParent();
Expand All @@ -9276,13 +9285,15 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
for (unsigned i = 0; i < resultSize; i++) {
Value *EltVal = stBuilder.CreateExtractElement(Val, i);
uint8_t mask = DXIL::kCompMask_X;
GenerateStructBufSt(handle, bufIdx, idxList[i], EltTy, hlslOP,
GenerateStructBufSt(handle, isRawBuf ? idxList[i] : bufIdx,
isRawBuf ? baseOffset : idxList[i], EltTy, hlslOP,
stBuilder, {EltVal, undefElt, undefElt, undefElt},
mask, alignment);
}
} else {
uint8_t mask = DXIL::kCompMask_X;
GenerateStructBufSt(handle, bufIdx, idxList[0], EltTy, hlslOP,
GenerateStructBufSt(handle, isRawBuf ? idxList[0] : bufIdx,
isRawBuf ? baseOffset : idxList[0], EltTy, hlslOP,
stBuilder, {Val, undefElt, undefElt, undefElt},
mask, alignment);
}
Expand All @@ -9300,14 +9311,16 @@ void TranslateStructBufMatSubscript(CallInst *CI, Value *handle,
for (unsigned i = 0; i < resultSize; i++) {
Value *ResultElt;
// TODO: This can be inefficient for row major matrix load
GenerateRawBufLd(handle, bufIdx, idxList[i],
GenerateRawBufLd(handle, isRawBuf ? idxList[i] : bufIdx,
isRawBuf ? baseOffset : idxList[i],
/*status*/ nullptr, EltTy, ResultElt, hlslOP,
ldBuilder, 1, alignment);
ldData = ldBuilder.CreateInsertElement(ldData, ResultElt, i);
}
} else {
GenerateRawBufLd(handle, bufIdx, idxList[0], /*status*/ nullptr, EltTy,
ldData, hlslOP, ldBuilder, 4, alignment);
GenerateRawBufLd(handle, isRawBuf ? idxList[0] : bufIdx,
isRawBuf ? baseOffset : idxList[0], /*status*/ nullptr,
EltTy, ldData, hlslOP, ldBuilder, 4, alignment);
}
ldUser->replaceAllUsesWith(ldData);
ldUser->eraseFromParent();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Reading individual matrix elements through the struct member of a
// ByteAddressBuffer.Load<struct{float4x4}> must address each element at its own
// byte offset. For raw buffers the byte offset rides in the load index operand
// and the element-offset operand stays undef, so the four first-column reads
// below must land at indices 0, 4, 8 and 12 -- they must not collapse to the
// matrix base (index 0), which is what happened before the matrix-element
// subscript path was taught the raw-buffer addressing convention.

// RUN: %dxc -T cs_6_6 -E cs %s | FileCheck %s

struct Box { float4x4 m; };

ByteAddressBuffer src : register(t0);
RWByteAddressBuffer dst : register(u0);

[numthreads(1, 1, 1)]
void cs()
{
Box b = src.Load<Box>(0);

// Column-major: _m00/_m10/_m20/_m30 live at bytes 0/4/8/12.
dst.Store(0, asuint(b.m._m00));
dst.Store(4, asuint(b.m._m10));
dst.Store(8, asuint(b.m._m20));
dst.Store(12, asuint(b.m._m30));
}

// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 0, i32 undef, i8 1, i32 4)
// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 4, i32 undef, i8 1, i32 4)
// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 8, i32 undef, i8 1, i32 4)
// CHECK: rawBufferLoad.f32(i32 139, %dx.types.Handle %{{.*}}, i32 12, i32 undef, i8 1, i32 4)
Loading