Skip to content

Commit 87a016a

Browse files
Revert "Use postsync for copy and fill"
This reverts commit cffe7f1. Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
1 parent cffe7f1 commit 87a016a

File tree

9 files changed

+30
-410
lines changed

9 files changed

+30
-410
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,11 +235,6 @@ struct CommandListCoreFamily : CommandListImp {
235235
void appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker);
236236
void appendEventForProfilingCopyCommand(ze_event_handle_t hEvent, bool beforeWalker);
237237
void appendSignalEventPostWalker(ze_event_handle_t hEvent, bool workloadPartition);
238-
void programEventL3Flush(ze_event_handle_t hEvent,
239-
Device *device,
240-
uint32_t partitionCount,
241-
NEO::CommandContainer &commandContainer);
242-
void adjustEventKernelCount(ze_event_handle_t hEvent);
243238
void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
244239
void appendComputeBarrierCommand();
245240
NEO::PipeControlArgs createBarrierFlags();

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -224,17 +224,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
224224
return ret;
225225
}
226226

227-
ret = appendLaunchKernelWithParams(hKernel, pLaunchFuncArgs,
228-
hSignalEvent, false, false, true);
229-
if (ret) {
230-
return ret;
231-
}
232-
233-
if (hSignalEvent) {
234-
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
235-
}
236-
237-
return ret;
227+
return appendLaunchKernelWithParams(hKernel, pLaunchFuncArgs,
228+
hSignalEvent, false, false, true);
238229
}
239230

240231
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -251,12 +242,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
251242
appendEventForProfiling(hEvent, true, false);
252243
ret = appendLaunchKernelWithParams(hKernel, pDispatchArgumentsBuffer,
253244
nullptr, true, false, false);
254-
if (ret) {
255-
return ret;
256-
}
257-
if (hEvent) {
258-
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
259-
}
260245
appendSignalEventPostWalker(hEvent, false);
261246

262247
return ret;
@@ -291,9 +276,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
291276
return ret;
292277
}
293278
}
294-
if (hEvent) {
295-
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
296-
}
279+
297280
appendSignalEventPostWalker(hEvent, false);
298281

299282
return ret;
@@ -817,6 +800,22 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemAdvise(ze_device_hand
817800
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
818801
}
819802

803+
template <GFXCORE_FAMILY gfxCoreFamily>
804+
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
805+
const ze_group_count_t *pThreadGroupDimensions,
806+
ze_event_handle_t hEvent) {
807+
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
808+
}
809+
810+
template <GFXCORE_FAMILY gfxCoreFamily>
811+
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
812+
if (beforeWalker) {
813+
appendEventForProfiling(hEvent, true, false);
814+
} else {
815+
appendSignalEventPostWalker(hEvent, false);
816+
}
817+
}
818+
820819
template <GFXCORE_FAMILY gfxCoreFamily>
821820
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(void *dstPtr,
822821
NEO::GraphicsAllocation *dstPtrAlloc,
@@ -1070,7 +1069,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
10701069
}
10711070

10721071
appendEventForProfilingAllWalkers(hSignalEvent, true);
1073-
adjustEventKernelCount(hSignalEvent);
10741072

10751073
if (ret == ZE_RESULT_SUCCESS && leftSize) {
10761074
Builtin func = Builtin::CopyBufferToBufferSide;
@@ -1130,22 +1128,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
11301128
isStateless);
11311129
}
11321130

1133-
if (hSignalEvent) {
1134-
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
1135-
}
11361131
appendEventForProfilingAllWalkers(hSignalEvent, false);
11371132

11381133
const auto &hwInfo = this->device->getHwInfo();
11391134
if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo)) {
11401135
auto event = Event::fromHandle(hSignalEvent);
11411136
if (event) {
11421137
dstAllocationStruct.needsFlush &= !event->signalScope;
1143-
dstAllocationStruct.needsFlush &= !event->l3FlushWaApplied;
11441138
}
11451139

1146-
dstAllocationStruct.needsFlush &= !isCopyOnly();
1147-
1148-
if (dstAllocationStruct.needsFlush) {
1140+
if (dstAllocationStruct.needsFlush && !isCopyOnly()) {
11491141
NEO::PipeControlArgs args;
11501142
args.dcFlushEnable = true;
11511143
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
@@ -1460,7 +1452,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
14601452
builtinFunction->setArgumentValue(2, sizeof(value), &value);
14611453

14621454
appendEventForProfilingAllWalkers(hSignalEvent, true);
1463-
adjustEventKernelCount(hSignalEvent);
14641455

14651456
uint32_t groups = static_cast<uint32_t>(size) / groupSizeX;
14661457
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
@@ -1535,7 +1526,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
15351526
builtinFunction->setArgumentValue(3, sizeof(patternSizeInEls), &patternSizeInEls);
15361527

15371528
appendEventForProfilingAllWalkers(hSignalEvent, true);
1538-
adjustEventKernelCount(hSignalEvent);
15391529

15401530
ze_group_count_t dispatchFuncArgs{groups, 1u, 1u};
15411531
res = appendLaunchKernelSplit(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent);
@@ -1574,21 +1564,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
15741564
}
15751565
}
15761566

1577-
if (hSignalEvent) {
1578-
programEventL3Flush(hSignalEvent, this->device, this->partitionCount, commandContainer);
1579-
}
15801567
appendEventForProfilingAllWalkers(hSignalEvent, false);
15811568

15821569
const auto &hwInfo = this->device->getHwInfo();
15831570
if (NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo)) {
15841571
auto event = Event::fromHandle(hSignalEvent);
15851572
if (event) {
15861573
hostPointerNeedsFlush &= !event->signalScope;
1587-
hostPointerNeedsFlush &= !event->l3FlushWaApplied;
15881574
}
15891575

1590-
hostPointerNeedsFlush &= !isCopyOnly();
1591-
15921576
if (hostPointerNeedsFlush) {
15931577
NEO::PipeControlArgs args;
15941578
args.dcFlushEnable = true;

level_zero/core/source/cmdlist/cmdlist_hw_base.inl

Lines changed: 0 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -32,26 +32,6 @@ size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
3232
return helper.getRenderSurfaceStateSize();
3333
}
3434

35-
template <GFXCORE_FAMILY gfxCoreFamily>
36-
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(ze_event_handle_t hEvent,
37-
Device *device,
38-
uint32_t partitionCount,
39-
NEO::CommandContainer &commandContainer) {
40-
}
41-
42-
template <GFXCORE_FAMILY gfxCoreFamily>
43-
void CommandListCoreFamily<gfxCoreFamily>::adjustEventKernelCount(ze_event_handle_t hEvent) {
44-
}
45-
46-
template <GFXCORE_FAMILY gfxCoreFamily>
47-
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
48-
if (beforeWalker) {
49-
appendEventForProfiling(hEvent, true, false);
50-
} else {
51-
appendSignalEventPostWalker(hEvent, false);
52-
}
53-
}
54-
5535
template <GFXCORE_FAMILY gfxCoreFamily>
5636
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
5737
const ze_group_count_t *pThreadGroupDimensions,
@@ -194,17 +174,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
194174
return ZE_RESULT_SUCCESS;
195175
}
196176

197-
template <GFXCORE_FAMILY gfxCoreFamily>
198-
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
199-
const ze_group_count_t *pThreadGroupDimensions,
200-
ze_event_handle_t hEvent) {
201-
if (hEvent) {
202-
auto event = Event::fromHandle(hEvent);
203-
event->kernelCount = 1;
204-
}
205-
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, nullptr, false, false, false);
206-
}
207-
208177
template <GFXCORE_FAMILY gfxCoreFamily>
209178
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {}
210179

level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl

Lines changed: 7 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -84,20 +84,14 @@ void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t num
8484
}
8585

8686
template <GFXCORE_FAMILY gfxCoreFamily>
87-
void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(ze_event_handle_t hEvent,
88-
Device *device,
89-
uint32_t partitionCount,
90-
NEO::CommandContainer &commandContainer) {
87+
void programEventL3Flush(ze_event_handle_t hEvent,
88+
Device *device,
89+
uint32_t partitionCount,
90+
NEO::CommandContainer &commandContainer) {
9191
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
9292
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
9393
auto event = Event::fromHandle(hEvent);
9494

95-
const auto &hwInfo = this->device->getHwInfo();
96-
bool L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
97-
if (!L3FlushEnable || isCopyOnly()) {
98-
return;
99-
}
100-
10195
auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize())
10296
: event->getSinglePacketSize();
10397
uint64_t eventAddress = event->getPacketAddress(device) + eventPartitionOffset;
@@ -127,13 +121,6 @@ void CommandListCoreFamily<gfxCoreFamily>::programEventL3Flush(ze_event_handle_t
127121
args);
128122
}
129123

130-
template <GFXCORE_FAMILY gfxCoreFamily>
131-
void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingAllWalkers(ze_event_handle_t hEvent, bool beforeWalker) {
132-
if (hEvent && isCopyOnly()) {
133-
appendSignalEventPostWalker(hEvent, false);
134-
}
135-
}
136-
137124
template <GFXCORE_FAMILY gfxCoreFamily>
138125
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(ze_kernel_handle_t hKernel,
139126
const ze_group_count_t *pThreadGroupDimensions,
@@ -178,7 +165,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
178165
commandContainer.addToResidencyContainer(eventAlloc);
179166
L3FlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(event->signalScope, hwInfo);
180167
isTimestampEvent = event->isUsingContextEndOffset();
181-
182168
eventAddress = event->getPacketAddress(this->device);
183169
}
184170

@@ -252,7 +238,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
252238
if (partitionCount > 1) {
253239
event->setPacketsInUse(partitionCount);
254240
}
255-
programEventL3Flush(hEvent, this->device, this->partitionCount, commandContainer);
241+
if (L3FlushEnable) {
242+
programEventL3Flush<gfxCoreFamily>(hEvent, this->device, partitionCount, commandContainer);
243+
}
256244
}
257245

258246
if (neoDevice->getDebugger()) {
@@ -304,27 +292,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
304292
return ZE_RESULT_SUCCESS;
305293
}
306294

307-
template <GFXCORE_FAMILY gfxCoreFamily>
308-
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelSplit(ze_kernel_handle_t hKernel,
309-
const ze_group_count_t *pThreadGroupDimensions,
310-
ze_event_handle_t hEvent) {
311-
if (hEvent) {
312-
auto event = Event::fromHandle(hEvent);
313-
event->kernelCount += 1;
314-
}
315-
return appendLaunchKernelWithParams(hKernel, pThreadGroupDimensions, hEvent, false, false, false);
316-
}
317-
318-
template <GFXCORE_FAMILY gfxCoreFamily>
319-
void CommandListCoreFamily<gfxCoreFamily>::adjustEventKernelCount(ze_event_handle_t hEvent) {
320-
if (hEvent) {
321-
auto event = Event::fromHandle(hEvent);
322-
if (!isCopyOnly()) {
323-
event->kernelCount = 0u;
324-
}
325-
}
326-
}
327-
328295
template <GFXCORE_FAMILY gfxCoreFamily>
329296
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
330297
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),

level_zero/core/source/event/event.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ ze_result_t EventPoolImp::initialize(DriverHandle *driver, Context *context, uin
8080
eventSize = static_cast<uint32_t>(alignUp(EventPacketsCount::eventPackets * hwHelper.getSingleTimestampPacketSize(), eventAlignment));
8181

8282
size_t alignedSize = alignUp<size_t>(numEvents * eventSize, MemoryConstants::pageSize64k);
83-
NEO::AllocationType allocationType = NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER;
84-
83+
NEO::AllocationType allocationType = isEventPoolTimestampFlagSet() ? NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER
84+
: NEO::AllocationType::BUFFER_HOST_MEMORY;
8585
if (this->devices.size() > 1) {
8686
useDeviceAlloc = false;
8787
}

level_zero/core/source/event/event_impl.inl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,7 @@ uint32_t EventImp<TagSizeT>::getPacketsUsedInLastKernel() {
393393

394394
template <typename TagSizeT>
395395
void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
396-
auto kernelIndex = getCurrKernelDataIndex();
397-
kernelEventCompletionData[kernelIndex].setPacketsUsed(value);
396+
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
398397
}
399398

400399
template <typename TagSizeT>

0 commit comments

Comments
 (0)