diff --git a/bin/slm_hyper.py b/bin/slm_hyper.py index 91ab3bfc..59a71ef4 100755 --- a/bin/slm_hyper.py +++ b/bin/slm_hyper.py @@ -1,7 +1,6 @@ #!/usr/bin/python3 #Written by ABA to update the format of the slm file to be compliant with hyperflash model used in testbench -import numpy as np import os import os.path import argparse @@ -24,7 +23,10 @@ with open(args.input_file, "rU") as fi: data = list(map(lambda x:x.split(delimiter), fi.read().strip().split("\n"))) fo=open(args.output_file, "w") -A=np.array(data) + +# Write the header fo.write('@000000\n') -for i in range(0, A.shape[0],2): - fo.write('%s%s\n' %(A[i+1][1],A[i][1])) + +# Iterate over rows in pairs +for i in range(0, len(data)-1, 2): # subtract 1 to avoid IndexError if odd number of rows + fo.write(f'{data[i+1][1]}{data[i][1]}\n') diff --git a/include/archi/chips/pulp_cluster/memory_map.h b/include/archi/chips/pulp_cluster/memory_map.h index 3f6d717e..65dd0846 100644 --- a/include/archi/chips/pulp_cluster/memory_map.h +++ b/include/archi/chips/pulp_cluster/memory_map.h @@ -113,6 +113,7 @@ #define ARCHI_HMR_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_HMR_OFFSET ) #define ARCHI_TCDM_SCRUBBER_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_TCDM_SCRUBBER_OFFSET ) #define ARCHI_HWPE_HCI_ECC_ADDR ( ARCHI_CLUSTER_PERIPHERALS_ADDR + ARCHI_HWPE_HCI_ECC_OFFSET ) +#define ARCHI_IDMA_EXT_ADDR ARCHI_MCHAN_EXT_ADDR #define ARCHI_CLUSTER_CTRL_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_CLUSTER_CTRL_OFFSET ) #define ARCHI_ICACHE_CTRL_GLOBAL_ADDR(cid) ( ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid) + ARCHI_ICACHE_CTRL_OFFSET ) @@ -132,6 +133,7 @@ #define ARCHI_EU_DEMUX_OFFSET ( 0x00000 ) #define ARCHI_MCHAN_DEMUX_OFFSET ( 0x00400 ) +#define ARCHI_IDMA_DEMUX_OFFSET ARCHI_MCHAN_DEMUX_OFFSET #define ARCHI_DEMUX_PERIPHERALS_ADDR ( ARCHI_CLUSTER_ADDR + ARCHI_DEMUX_PERIPHERALS_OFFSET ) @@ -139,4 +141,6 @@ #define ARCHI_EU_DEMUX_ADDR ( ARCHI_DEMUX_PERIPHERALS_ADDR + ARCHI_EU_DEMUX_OFFSET ) #define ARCHI_MCHAN_DEMUX_ADDR ( ARCHI_DEMUX_PERIPHERALS_ADDR + ARCHI_MCHAN_DEMUX_OFFSET ) +#define ARCHI_IDMA_DEMUX_ADDR ( ARCHI_DEMUX_PERIPHERALS_ADDR + ARCHI_IDMA_DEMUX_OFFSET ) + #endif diff --git a/include/archi/chips/pulp_cluster/properties.h b/include/archi/chips/pulp_cluster/properties.h index 305b413c..42f73beb 100644 --- a/include/archi/chips/pulp_cluster/properties.h +++ b/include/archi/chips/pulp_cluster/properties.h @@ -18,6 +18,8 @@ #ifndef __ARCHI_CHIPS_PULP_PROPERTIES_H__ #define __ARCHI_CHIPS_PULP_PROPERTIES_H__ +#define ARCHI_HAS_DMA_DEMUX 1 + /* * FPGA */ diff --git a/include/archi/chips/pulp_cluster/pulp.h b/include/archi/chips/pulp_cluster/pulp.h index 9cd94ffc..ae516a64 100644 --- a/include/archi/chips/pulp_cluster/pulp.h +++ b/include/archi/chips/pulp_cluster/pulp.h @@ -37,8 +37,12 @@ #include "archi/chips/pulp_cluster/memory_map.h" #include "archi/chips/pulp_cluster/apb_soc.h" #include "archi/stdout/stdout_v3.h" + #include "archi/dma/mchan_v7.h" +#include "archi/dma/idma_v2.h" + + #include "archi/udma/cpi/udma_cpi_v1.h" #include "archi/udma/i2c/udma_i2c_v2.h" #include "archi/udma/i2s/udma_i2s_v2.h" diff --git a/include/archi/dma/idma_v2.h b/include/archi/dma/idma_v2.h new file mode 100644 index 00000000..fd1c15e5 --- /dev/null +++ b/include/archi/dma/idma_v2.h @@ -0,0 +1,106 @@ +// Generated register defines for idma_reg32_3d + +// Copyright information found in source file: +// Copyright 2023 ETH Zurich and University of Bologna. + +// Licensing information found in source file: +// +// SPDX-License-Identifier: SHL-0.51 + +#ifndef _IDMA_REG32_3D_REG_DEFS_ +#define _IDMA_REG32_3D_REG_DEFS_ + +#ifdef __cplusplus +extern "C" { +#endif +// Number of dimensions available +#define IDMA_REG32_3D_PARAM_NUM_DIMS 3 + +// Register width +#define IDMA_REG32_3D_PARAM_REG_WIDTH 32 + +// Configuration Register for DMA settings +#define IDMA_REG32_3D_CONF_REG_OFFSET 0x0 +#define IDMA_REG32_3D_CONF_DECOUPLE_AW_BIT 0 +#define IDMA_REG32_3D_CONF_DECOUPLE_RW_BIT 1 +#define IDMA_REG32_3D_CONF_SRC_REDUCE_LEN_BIT 2 +#define IDMA_REG32_3D_CONF_DST_REDUCE_LEN_BIT 3 +#define IDMA_REG32_3D_CONF_SRC_MAX_LLEN_MASK 0x7 +#define IDMA_REG32_3D_CONF_SRC_MAX_LLEN_OFFSET 4 +#define IDMA_REG32_3D_CONF_SRC_MAX_LLEN_FIELD \ + ((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_SRC_MAX_LLEN_MASK, .index = IDMA_REG32_3D_CONF_SRC_MAX_LLEN_OFFSET }) +#define IDMA_REG32_3D_CONF_DST_MAX_LLEN_MASK 0x7 +#define IDMA_REG32_3D_CONF_DST_MAX_LLEN_OFFSET 7 +#define IDMA_REG32_3D_CONF_DST_MAX_LLEN_FIELD \ + ((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_DST_MAX_LLEN_MASK, .index = IDMA_REG32_3D_CONF_DST_MAX_LLEN_OFFSET }) +#define IDMA_REG32_3D_CONF_ENABLE_ND_MASK 0x3 +#define IDMA_REG32_3D_CONF_ENABLE_ND_OFFSET 10 +#define IDMA_REG32_3D_CONF_ENABLE_ND_FIELD \ + ((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_ENABLE_ND_MASK, .index = IDMA_REG32_3D_CONF_ENABLE_ND_OFFSET }) +#define IDMA_REG32_3D_CONF_SRC_PROTOCOL_MASK 0x7 +#define IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET 12 +#define IDMA_REG32_3D_CONF_SRC_PROTOCOL_FIELD \ + ((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_SRC_PROTOCOL_MASK, .index = IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET }) +#define IDMA_REG32_3D_CONF_DST_PROTOCOL_MASK 0x7 +#define IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET 15 +#define IDMA_REG32_3D_CONF_DST_PROTOCOL_FIELD \ + ((bitfield_field32_t) { .mask = IDMA_REG32_3D_CONF_DST_PROTOCOL_MASK, .index = IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET }) + +// DMA Status +#define IDMA_REG32_3D_STATUS_0_REG_OFFSET 0x4 +#define IDMA_REG32_3D_STATUS_0_BUSY_0_MASK 0x3ff +#define IDMA_REG32_3D_STATUS_0_BUSY_0_OFFSET 0 +#define IDMA_REG32_3D_STATUS_0_BUSY_0_FIELD \ + ((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_0_BUSY_0_MASK, .index = IDMA_REG32_3D_STATUS_0_BUSY_0_OFFSET }) + +// DMA Status +#define IDMA_REG32_3D_STATUS_1_REG_OFFSET 0x8 +#define IDMA_REG32_3D_STATUS_1_BUSY_1_MASK 0x3ff +#define IDMA_REG32_3D_STATUS_1_BUSY_1_OFFSET 0 +#define IDMA_REG32_3D_STATUS_1_BUSY_1_FIELD \ + ((bitfield_field32_t) { .mask = IDMA_REG32_3D_STATUS_1_BUSY_1_MASK, .index = IDMA_REG32_3D_STATUS_1_BUSY_1_OFFSET }) + +// Next ID, launches transfer, returns 0 if transfer not set up properly. +#define IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET 0xc + +// Next ID, launches transfer, returns 0 if transfer not set up properly. +#define IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET 0x10 + +// Get ID of finished transactions. +#define IDMA_REG32_3D_DONE_ID_0_REG_OFFSET 0x14 + +// Get ID of finished transactions. +#define IDMA_REG32_3D_DONE_ID_1_REG_OFFSET 0x18 + +// Low destination address +#define IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET 0xd0 + +// Low source address +#define IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET 0xd8 + +// Low transfer length in byte +#define IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET 0xe0 + +// Low destination stride dimension 2 +#define IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET 0xe8 + +// Low source stride dimension 2 +#define IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET 0xf0 + +// Low number of repetitions dimension 2 +#define IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET 0xf8 + +// Low destination stride dimension 3 +#define IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET 0x100 + +// Low source stride dimension 3 +#define IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET 0x108 + +// Low number of repetitions dimension 3 +#define IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET 0x110 + +#ifdef __cplusplus +} // extern "C" +#endif +#endif // _IDMA_REG32_3D_REG_DEFS_ +// End generated register defines for idma_reg32_3d \ No newline at end of file diff --git a/include/hal/chips/pulp_cluster/pulp.h b/include/hal/chips/pulp_cluster/pulp.h index 671dd443..550b2696 100644 --- a/include/hal/chips/pulp_cluster/pulp.h +++ b/include/hal/chips/pulp_cluster/pulp.h @@ -26,7 +26,6 @@ #endif // __ibex__ #include "hal/eu/eu_v3.h" #include "hal/itc/itc_v1.h" -#include "hal/dma/mchan_v7.h" #include "hal/timer/timer_v2.h" #include "hal/soc_eu/soc_eu_v2.h" #include "hal/cluster_ctrl/cluster_ctrl_v2.h" @@ -45,4 +44,8 @@ #include "hal/tcdm_scrubber/tcdm_scrubber.h" #include "hal/hwpe_hci_ecc/hwpe_hci_ecc.h" +#include "hal/dma/mchan_v7.h" + +#include "hal/dma/idma_v2.h" + #endif diff --git a/include/hal/cluster_ctrl/cluster_ctrl_v2.h b/include/hal/cluster_ctrl/cluster_ctrl_v2.h index 322aa864..0ab57b5c 100644 --- a/include/hal/cluster_ctrl/cluster_ctrl_v2.h +++ b/include/hal/cluster_ctrl/cluster_ctrl_v2.h @@ -71,4 +71,12 @@ static inline void hal_cluster_ctrl_return_set_remote(int cid, int value){ pulp_write32(ARCHI_CLUSTER_PERIPHERALS_GLOBAL_ADDR(cid)+ARCHI_CLUSTER_CTRL_OFFSET+ARCHI_CLUSTER_CTRL_RETURN, value | 1 << ARCHI_CLUSTER_CTRL_RETURN_SHIFT_BITS); } +static inline void plp_ctrl_cluster_cfg_set(unsigned int mask) { + pulp_write32(ARCHI_CLUSTER_CTRL_ADDR + ARCHI_CLUSTER_CTRL_CLUSTER_CFG, mask); +} + +static inline int plp_ctrl_cluster_cfg_get() { + return pulp_read32(ARCHI_CLUSTER_CTRL_ADDR + ARCHI_CLUSTER_CTRL_CLUSTER_CFG); +} + #endif diff --git a/include/hal/dma/idma_v2.h b/include/hal/dma/idma_v2.h new file mode 100644 index 00000000..7afff76c --- /dev/null +++ b/include/hal/dma/idma_v2.h @@ -0,0 +1,1166 @@ +/* + * Copyright (C) 2021 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __HAL_IDMA_V2_H__ +#define __HAL_IDMA_V2_H__ + +#include +#include "hal/pulp.h" + +#define PLP_DMA_LOC2EXT 0 +#define PLP_DMA_EXT2LOC 1 + +#define PLP_DMA_1D 0 +#define PLP_DMA_2D 1 + +#define IDMA_EVENT 8 // all iDMA tx_cplt events are broadcast +#define IDMA_ID_COUNTER_WIDTH 32 +#define IDMA_ID_MASK 0xffffffff + +typedef enum { + IDMA_PROT_AXI = 0, // AXI protocol: L2 memory + IDMA_PROT_OBI = 1, // OBI protocol: L1 memory + IDMA_PROT_INIT = 4 // INIT protocol: /dev/null (write to here and the stream disappears, read from here and get all-zeros) +} idma_prot_t; + +typedef unsigned int dma_ext_t; + +#define IDMA_DEFAULT_CONFIG 0x0 +#define IDMA_DEFAULT_CONFIG_L1TOL2 (IDMA_DEFAULT_CONFIG | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_AXI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) +#define IDMA_DEFAULT_CONFIG_L2TOL1 (IDMA_DEFAULT_CONFIG | (IDMA_PROT_AXI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) +#define IDMA_DEFAULT_CONFIG_L1TOL1 (IDMA_DEFAULT_CONFIG | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) + +#define IDMA_DEFAULT_CONFIG_2D 0x400 +#define IDMA_DEFAULT_CONFIG_L1TOL2_2D (IDMA_DEFAULT_CONFIG_2D | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_AXI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) +#define IDMA_DEFAULT_CONFIG_L2TOL1_2D (IDMA_DEFAULT_CONFIG_2D | (IDMA_PROT_AXI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) +#define IDMA_DEFAULT_CONFIG_L1TOL1_2D (IDMA_DEFAULT_CONFIG_2D | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) + +#define IDMA_DEFAULT_CONFIG_3D 0x800 +#define IDMA_DEFAULT_CONFIG_L1TOL2_3D (IDMA_DEFAULT_CONFIG_3D | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_AXI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) +#define IDMA_DEFAULT_CONFIG_L2TOL1_3D (IDMA_DEFAULT_CONFIG_3D | (IDMA_PROT_AXI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) +#define IDMA_DEFAULT_CONFIG_L1TOL1_3D (IDMA_DEFAULT_CONFIG_3D | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (IDMA_PROT_OBI << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)) + + +/** @name High-level DMA memory copy functions; compatible with MCHAN driver interface + * The following functions can be used to trigger DMA transfers to copy data between the cluster memory (L1) and another memory outside the cluster (another cluster L1 or L2). + * The DMA supports the following features: + * - Transfers are event-based. With event-based transfers the core can call a wait function to block execution until the transfer is done. + * - The DMA supports 2D transfers which allows transfering a 2D tile in one command. Additional information must then be given to specify the width of the tile and the number of bytes between 2 lines of the tile. + * - The event sent at the end of the transfer is broadcasted to all cluster cores. + * - To identify specific transfers, the DMA provides a transfer identifier. + * - Multiple transfers can be launched simultaneously, with them being executed 2-4 in parallel, with more waiting in a queue. + */ +/**@{*/ + + +/** Memory transfer with event-based completion. + * + \param ext Address in the external memory where to access the data. There is no restriction on memory alignment. + \param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int plp_dma_memcpy(dma_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); +static inline int plp_cl_dma_memcpy(dma_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); + + +/** Cluster memory to external memory transfer with event-based completion. + * + \param ext Address in the external memory where to store the data. There is no restriction on memory alignment. + \param loc Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int plp_dma_l1ToExt(dma_ext_t ext, unsigned int loc, unsigned short size); +static inline int plp_cl_dma_l1ToExt(dma_ext_t ext, unsigned int loc, unsigned short size); + + +/** External memory to cluster memory transfer with event-based completion. + * + \param loc Address in the cluster memory where to store the data. There is no restriction on memory alignment. + \param ext Address in the external memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int plp_dma_extToL1(unsigned int loc, dma_ext_t ext, unsigned short size); + + +/** 2-dimensional memory transfer with event-based completion. + * + \param ext Address in the external memory where to access the data. There is no restriction on memory alignment. + \param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. + \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int plp_dma_memcpy_2d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, int ext2loc); +static inline int plp_cl_dma_memcpy_2d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, int ext2loc); + + +/** Cluster memory to external memory 2-dimensional transfer with event-based completion. + * + \param ext Address in the external memory where to store the data. There is no restriction on memory alignment. + \param loc Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. + \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int plp_dma_l1ToExt_2d(dma_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_cl_dma_l1ToExt_2d(dma_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); + +/** External memory to cluster memory 2-dimensional transfer with event-based completion. + * + \param loc Address in the cluster memory where to store the data. There is no restriction on memory alignment. + \param ext Address in the external memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. + \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer +*/ +static inline int plp_dma_extToL1_2d(unsigned int loc, dma_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_cl_dma_extToL1_2d(unsigned int loc, dma_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); + +/** 3-dimensional memory transfer with event-based completion. + * + \param ext Address in the external memory where to access the data. There is no restriction on memory alignment. + \param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. + \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary + \param reps_2d Number of 1D transfers inside each 2D transfer. + \param reps_3d Number of 2D pages to be transfered. + \param stride_3d 3D stride, which is the number of bytes which are added to the beginning of the current page to switch to the next one in the destination memory region. + \param length_3d 3D length, which is the number of transfered bytes after which the DMA will switch to the next page in the source memory region. + \param + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ + static inline int plp_dma_memcpy_3d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, unsigned int stride_3d, unsigned int length_3d, unsigned int reps_2d, unsigned int reps_3d, int ext2loc); + static inline int plp_cl_dma_memcpy_3d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, unsigned int stride_3d, unsigned int length_3d, unsigned int reps_3d, int ext2loc); + +//!@} + + +/** @name iDMA specific copy functions; these are preferred when writing new code. + /**@{*/ + +/** +/** Arbitrary memory transfer with event-based completion. + * + \param src Address from where to copy data. There is no restriction on memory alignment. + \param dst Address to which to copy data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be smaller than 65536. + \param src_prot choose IDMA_PROT_AXI for transfer from L2, IDMA_PROT_OBI for transfer from L1 and IDMA_PROT_INIT for a transfer + of all-0 data + \param dst_prot choose IDMA_PROT_AXI for transfer to L2, IDMA_PROT_OBI for transfer to L1 and IDMA_PROT_INIT for a transfer + to /dev/null (i.e. the stream will be "eaten"). Note that AXI-to-AXI transfers are not supported. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int pulp_idma_memcpy(unsigned int src, unsigned int dst, unsigned int size, idma_prot_t src_prot, idma_prot_t dst_prot); +static inline int pulp_cl_idma_memcpy(unsigned int src, unsigned int dst, unsigned int size, idma_prot_t src_prot, idma_prot_t dst_prot); + +/** Cluster memory to external memory transfer with event-based completion. + * + \param src Address in the external memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. +*/ +static inline int pulp_idma_L1ToL2(unsigned int src, unsigned int dst, unsigned short size); +static inline int pulp_cl_idma_L1ToL2(unsigned int src, unsigned int dst, unsigned short size); + +/** External memory to cluster memory transfer with event-based completion. + * + \param src Address in the cluster memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the external memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. +*/ +static inline int pulp_idma_L2ToL1(unsigned int src, unsigned int dst, unsigned short size); +static inline int pulp_cl_idma_L2ToL1(unsigned int src, unsigned int dst, unsigned short size); + + +/** Intra-cluster memory transfer with event-based completion. + * + \param src Address in the cluster memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. +*/ +static inline int pulp_idma_L1ToL1(unsigned int src, unsigned int dst, unsigned short size); +static inline int pulp_cl_idma_L1ToL1(unsigned int src, unsigned int dst, unsigned short size); + +/** 2-dimensional memory transfer with event-based completion. + * + \param src Address from where to access the data. There is no restriction on memory alignment. + \param dst Address where to write the data. There is no restriction on memory alignment. + \param size Number of bytes per 1D transfer to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param src_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. Set equal to size to obtain an 1D transfer from the source region. + \param dst_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. Set equal to size to obtain an 1D transfer to the destination memory. + \param num_reps Number of 1D transfers that comprise the 2D transfer. + \param src_prot choose IDMA_PROT_AXI for transfer from L2, IDMA_PROT_OBI for transfer from L1 and IDMA_PROT_INIT for a transfer + of all-0 data + \param dst_prot choose IDMA_PROT_AXI for transfer to L2, IDMA_PROT_OBI for transfer to L1 and IDMA_PROT_INIT for a transfer + to /dev/null (i.e. the stream will be "eaten"). Note that AXI-to-AXI transfers are not supported. + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int pulp_idma_memcpy_2d(unsigned int src, unsigned int dst, unsigned int size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, idma_prot_t src_prot, idma_prot_t dst_prot); +static inline int pulp_cl_idma_memcpy_2d(unsigned int src, unsigned int dst, unsigned int size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, idma_prot_t src_prot, idma_prot_t dst_prot); + +/** Cluster memory to external memory 2-dimensional transfer with event-based completion. + * + \param src Address in the external memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param src_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param dst_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param num_reps Number of 1D transfers that comprise the 2D transfer. + + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int pulp_idma_L1ToL2_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); +static inline int pulp_cl_idma_L1ToL2_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); + + +/** Cluster memory to external memory 2-dimensional transfer with event-based completion. + * + \param src Address in the external memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param src_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param dst_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param num_reps Number of 1D transfers that comprise the 2D transfer. + + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int pulp_idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); +static inline int pulp_cl_idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); + +/** Intra-cluster memory 2-dimensional transfer with event-based completion. + * + \param src Address in the cluster memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param src_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param dst_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param num_reps Number of 1D transfers that comprise the 2D transfer. + + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ + +static inline int pulp_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); +static inline int pulp_cl_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); + + + +/** Cluster memory to external memory 3-dimensional transfer with event-based completion. + * + \param src Address in the external memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param src_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param dst_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param num_reps Number of 1D transfers that comprise the 2D transfer. + \param src_stride_3d Stride between 2D pages in the source memory. + \param dst_stride_3d Stride between 2D pages in the destination memory. + \param num_reps_3d Number of 2D pages to be transfered. + + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ + static inline int pulp_idma_L1ToL2_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d); + static inline int pulp_cl_idma_L1ToL2_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d); + +// static inline int pulp_cl_idma_L1ToL2_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); + + + /** Cluster memory to external memory 3-dimensional transfer with event-based completion. + * + \param src Address in the external memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param src_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param dst_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param num_reps Number of 1D transfers that comprise the 2D transfer. + \param src_stride_3d Stride between 2D pages in the source memory. + \param dst_stride_3d Stride between 2D pages in the destination memory. + \param num_reps_3d Number of 2D pages to be transfered. + + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ + static inline int pulp_idma_L2ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d); + static inline int pulp_cl_idma_L2ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d); + + /** Intra-cluster memory 3-dimensional transfer with event-based completion. + * + \param src Address in the external memory where to store the data. There is no restriction on memory alignment. + \param dst Address in the cluster memory where to load the data. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param src_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param dst_stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. + \param num_reps Number of 1D transfers that comprise the 2D transfer. + \param src_stride_3d Stride between 2D pages in the source memory. + \param dst_stride_3d Stride between 2D pages in the destination memory. + \param num_reps_3d Number of 2D pages to be transfered. + + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ + + static inline int pulp_idma_L1ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps_2d, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d); + static inline int pulp_cl_idma_L1ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps_2d, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d); + + +/** DMA-based zeromem using the "init" protocol. + * + \param dst Address in memory to fill with zeros. There is no restriction on memory alignment. + \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. + \param dst_prot protocol with which the destination memory is attached (should be AXI or OBI) + + \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + */ +static inline int pulp_idma_zeromem(unsigned int dst, unsigned short size, idma_prot_t dst_prot); +static inline int pulp_cl_idma_zeromem(unsigned int dst, unsigned short size, idma_prot_t dst_prot); +/** @name DMA wait functions + */ + +/** DMA barrier. + * This blocks the core until no transfer is on-going in the DMA. + * Careful: these only wait for transfers towards L2 + */ +static inline void plp_dma_barrier(); +static inline void plp_cl_dma_barrier(); + +/** DMA barrier. + * This blocks the core until no transfer is on-going in the DMA. + * Careful: these only wait for transfers towards L1 + */ + +static inline void plp_dma_barrier_toL1(); +static inline void plp_cl_dma_barrier_toL1(); + +/** DMA barrier. + * This blocks the core until no transfer is on-going in the DMA. + * Careful: these only wait for transfers towards L2 + */ + +static inline void plp_dma_barrier_toL2(); +static inline void plp_cl_dma_barrier_toL2(); + +/** DMA wait. + * This blocks the core until the specified transfer is finished. + * + \param counter The counter ID identifying the transfer. This has been returned from an enqueued transfer (e.g. plp_dma_l2ToL1_2d) + */ +static inline void plp_dma_wait(unsigned int dma_tx_id); +static inline void plp_cl_dma_wait(unsigned int dma_tx_id); +static inline void plp_cl_dma_wait_toL1(unsigned int dma_tx_id); +static inline void plp_cl_dma_wait_toL2(unsigned int dma_tx_id); +//!@} + + +/** @name iDMA low-level functions. + * This can be used instead of the high-level ones in order to have more control over the DMA features. + */ + +/** + * iDMA configuration generation + * A standard memcpy will set all of these values - except for src and dest protocol - to 0. + * + \param decouple if set to true, there is no longer exactly one AXI write_request issued for + every read request. This mode can improve performance of unaligned transfers when crossing + the AXI page boundaries. + \param deburst if set, the DMA will split all bursts in single transfers + \param serialize if set, the DMA will only send AX belonging to a given Arbitrary 1D burst request + at a time. This is default behavior to prevent deadlocks. Setting `serialize` to + zero violates the AXI4+ATOP specification. + \param num_dim number of dimensions: 1, 2 or 3. Invalid values will be treated as 1. + \param src_prot Source protocol: AXI for transfer from L2, OBI for transfer from L1, INIT for zeromem + \param dst_prot Destination protocol: AXI for transfer to L2, OBI for transfer to L1, INIT for transfer to /dev/null + \return The generated configuration + */ +static inline unsigned int pulp_idma_get_conf(unsigned int decouple_aw, unsigned int decouple_rw, unsigned int n_d, idma_prot_t src_prot, idma_prot_t dst_prot); + + +/** + * Setting only source and destination protocols for a given transfer configuration + * + \param conf the configuration on which to set the source and destination protocols + \param src_prot Source protocol: AXI for transfer from L2, OBI for transfer from L1, INIT for zeromem + \param dst_prot Destination protocol: AXI for transfer to L2, OBI for transfer to L1, INIT for transfer to /dev/null + \return The generated configuration + */ +static inline unsigned int pulp_idma_set_conf_prot(unsigned int conf, idma_prot_t src_prot, idma_prot_t dst_prot); + +/** + * iDMA transfer status + * + \param dma_tx_id The dma transfer identifier + \return transfer status. 1 if complete, 0 if still ongoing or waiting. + */ +static inline unsigned int pulp_idma_tx_cplt(unsigned int dma_tx_id); +static inline unsigned int pulp_cl_idma_tx_cplt(unsigned int dma_tx_id); + + +/** + * iDMA 2D memory transfer + * Launches a standard 2D memory transfer + * + \param dst_addr The destination address + \param src_addr The source address + \param num_bytes The number bytes (per stride) + \param dst_stride The stride at the destination + \param src_stride The stride at the source + \param num_reps The number of repetitions + \return The dma transfer identifier + */ +//static inline unsigned int pulp_idma_memcpy_2d(unsigned int const dst_addr, unsigned int const src_addr, unsigned int num_bytes, unsigned int dst_stride, unsigned int src_stride, unsigned int num_reps); + + +/** + * iDMA advanced memory transfer + * Launches a 1D memory transfer with special configuration options + * + \param dst_addr The destination address + \param src_addr The source address + \param num_bytes The number bytes + \param decouple if set to true, there is no longer exactly one AXI write_request issued for + every read request. This mode can improve performance of unaligned transfers when crossing + the AXI page boundaries. + \param deburst if set, the DMA will split all bursts in single transfers + \param serialize if set, the DMA will only send AX belonging to a given Arbitrary 1D burst request + at a time. This is default behavior to prevent deadlocks. Setting `serialize` to + zero violates the AXI4+ATOP specification. + \param twod if set, the DMA will execute a 2D transfer + \param dst_stride if 2D, the stride at the destination + \param src_stride if 2D, the stride at the source + \param num_reps if 2D, the number of repetitions + \return The dma trasfer identifier + */ +//static inline unsigned int pulp_idma_memcpy_advanced(unsigned int const dst_addr, unsigned int const src_addr, unsigned int num_bytes, unsigned int decouple, unsigned int deburst, unsigned int serialize, unsigned int twod, unsigned int dst_stride, unsigned int src_stride, unsigned int num_reps); + +/** Return the DMA status. + * + \return DMA status. 1 means there are still on-going transfers, 0 means nothing is on-going. + */ +static inline unsigned int plp_dma_status(); +static inline unsigned int plp_cl_dma_status(); + +/** Return the DMA status for a transfer towards L1 memory. + * + \return DMA status. 1 means there are still on-going transfers, 0 means nothing is on-going. + */ + +static inline unsigned int plp_dma_status_toL1(); +static inline unsigned int plp_cl_dma_status_toL1(); + +/** Return the DMA status for a transfer towards L2 memory. + * + \return DMA status. 1 means there are still on-going transfers, 0 means nothing is on-going. + */ + +static inline unsigned int plp_dma_status_toL2(); +static inline unsigned int plp_cl_dma_status_toL2(); + +/* CLOCK GATING PROCEDURE FOR iDMA */ +/* Three modes are supported: + - No clock: the whole iDMA is unresponsive + - Frontend-only clock: only the iDMA frontend is clocked. This way power consumption + is kept to a minimum while still being responsive to incoming transfer requests. + - Fully clocked: both the frontend and datapath of iDMA are clocked. Notice that + clock gating for the dapath is fully managed in rtl. +*/ + +// Enables the frontend clock +static inline void plp_idma_enable_clk(); +// Disables the frontend clock +static inline void plp_idma_disable_clk(); + +//!@} + + +/// @cond IMPLEM + +#if ARCHI_HAS_DMA_DEMUX +#define DMA_DEMUX_ADDR ARCHI_IDMA_DEMUX_ADDR +#endif +#define DMA_ADDR ARCHI_IDMA_EXT_ADDR + +#if defined(__riscv__) && !defined(RV_ISA_RV32) && !defined(__LLVM__) +#define DMA_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)DMA_ADDR, (offset)) +#define DMA_READ(offset) __builtin_pulp_OffsetedRead((int *)DMA_ADDR, (offset)) +#ifdef ARCHI_HAS_DMA_DEMUX +#define DMA_CL_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)DMA_DEMUX_ADDR, (offset)) +#define DMA_CL_READ(offset) __builtin_pulp_OffsetedRead((int *)DMA_DEMUX_ADDR, (offset)) +#endif +#else +#define DMA_WRITE(value, offset) pulp_write32(DMA_ADDR + (offset), (value)) +#define DMA_READ(offset) pulp_read32(DMA_ADDR + (offset)) +#define DMA_CL_WRITE(value, offset) DMA_WRITE(value, offset) +#define DMA_CL_READ(offset) DMA_READ(offset) +#ifdef ARCHI_HAS_DMA_DEMUX +#define DMA_CL_WRITE(value, offset) pulp_write32(DMA_DEMUX_ADDR + (offset), (value)) +#define DMA_CL_READ(offset) pulp_read32(DMA_DEMUX_ADDR + (offset)) +#endif +#endif +// if we don't have the peripheral demux, the cluster write/read functions are equal to the regular versions +#ifndef ARCHI_HAS_DMA_DEMUX +#define DMA_CL_WRITE(value, offset) DMA_WRITE(value, offset) +#define DMA_CL_READ(offset) DMA_READ(offset) +#endif + +// +// CLOCK GATING CONTROL +// + +static inline void plp_idma_enable_clk() { + uint32_t cluster_ctrl_cfg_reg; + cluster_ctrl_cfg_reg = plp_ctrl_cluster_cfg_get(); + plp_ctrl_cluster_cfg_set(cluster_ctrl_cfg_reg | (1 << 17)); +} + +static inline void plp_idma_disable_clk() { + uint32_t cluster_ctrl_cfg_reg; + cluster_ctrl_cfg_reg = plp_ctrl_cluster_cfg_get(); + plp_ctrl_cluster_cfg_set(cluster_ctrl_cfg_reg & (0 << 17)); +} + +static inline int plp_dma_memcpy(dma_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { + if (ext2loc) + return pulp_idma_L2ToL1(ext, loc, size); + else + return pulp_idma_L1ToL2(loc, ext, size); +} +static inline int plp_cl_dma_memcpy(dma_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { + if (ext2loc) + return pulp_cl_idma_L2ToL1(ext, loc, size); + else + return pulp_cl_idma_L1ToL2(loc, ext, size); +} + +static inline int plp_dma_l1ToExt(dma_ext_t ext, unsigned int loc, unsigned short size) { + pulp_idma_L1ToL2(loc, ext, size); +} +static inline int plp_cl_dma_l1ToExt(dma_ext_t ext, unsigned int loc, unsigned short size) { + pulp_cl_idma_L1ToL2(loc, ext, size); +} + +static inline int plp_dma_extToL1(unsigned int loc, dma_ext_t ext, unsigned short size) { + pulp_idma_L2ToL1(ext, loc, size); +} +static inline int plp_cl_dma_extToL1(unsigned int loc, dma_ext_t ext, unsigned short size) { + pulp_cl_idma_L2ToL1(ext, loc, size); +} + +static inline int plp_dma_memcpy_2d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, int ext2loc) { + if (ext2loc) + return pulp_idma_L2ToL1_2d(ext, loc, length, stride, length, size/length); + else + return pulp_idma_L1ToL2_2d(loc, ext, length, length, stride, size/length); +} +static inline int plp_cl_dma_memcpy_2d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, int ext2loc) { + if (ext2loc) + return pulp_cl_idma_L2ToL1_2d(ext, loc, length, stride, length, size/length); + else + return pulp_cl_idma_L1ToL2_2d(loc, ext, length, length, stride, size/length); +} + +static inline int plp_dma_l1ToExt_2d(dma_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { + return pulp_idma_L1ToL2_2d(loc, ext, length, length, stride, size/length); +} +static inline int plp_cl_dma_l1ToExt_2d(dma_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { + return pulp_cl_idma_L1ToL2_2d(loc, ext, length, length, stride, size/length); +} + +static inline int plp_dma_extToL1_2d(unsigned int loc, dma_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { + return pulp_idma_L2ToL1_2d(loc, ext, length, stride, length, size/length); +} +static inline int plp_cl_dma_extToL1_2d(unsigned int loc, dma_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { + return pulp_cl_idma_L2ToL1_2d(loc, ext, length, stride, length, size/length); +} + +static inline int plp_dma_memcpy_3d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, unsigned int stride_3d, unsigned int length_3d, unsigned int reps_2d, unsigned int reps_3d, int ext2loc) { + if (ext2loc) + return pulp_idma_L2ToL1_3d(ext, loc, length, stride, length, reps_2d, stride_3d, length_3d, reps_3d); + else + return pulp_idma_L1ToL2_3d(loc, ext, length, length, stride, reps_2d, length_3d, stride_3d, reps_3d); +} + +static inline int plp_cl_dma_memcpy_3d(dma_ext_t ext, unsigned int loc, unsigned int size, unsigned int stride, unsigned int length, unsigned int stride_3d, unsigned int length_3d, unsigned int reps_3d, int ext2loc) { + if (ext2loc) + return pulp_cl_idma_L2ToL1_3d(ext, loc, length, stride, length, size/length, stride_3d, length_3d, reps_3d); + else + return pulp_cl_idma_L1ToL2_3d(loc, ext, length, length, stride, size/length, length_3d, stride_3d, reps_3d); +} + + + +static inline unsigned int pulp_idma_set_conf_prot(unsigned int conf, idma_prot_t src_prot, idma_prot_t dst_prot){ +#if defined(__riscv__) + conf = __builtin_bitinsert(conf, src_prot, 3, IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET); + conf = __builtin_bitinsert(conf, dst_prot, 3, IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET); +#else + conf &= (~((IDMA_REG32_3D_CONF_SRC_PROTOCOL_MASK | (IDMA_REG32_3D_CONF_DST_PROTOCOL_MASK << (IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET - IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET))) << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET)); // set the relevant bits to 0 + conf |= ((src_prot << IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET) | (dst_prot << IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET)); +#endif + return conf; +} + +static inline unsigned int pulp_idma_get_conf(unsigned int decouple_aw, unsigned int decouple_rw, unsigned int n_d, idma_prot_t src_prot, idma_prot_t dst_prot){ + unsigned int conf; +#if defined(__riscv__) + conf = __builtin_bitinsert(0, decouple_aw, 1, IDMA_REG32_3D_CONF_DECOUPLE_AW_BIT); + conf = __builtin_bitinsert(conf, decouple_rw, 1, IDMA_REG32_3D_CONF_DECOUPLE_RW_BIT); + conf = __builtin_bitinsert(conf, n_d, 2, IDMA_REG32_3D_CONF_ENABLE_ND_OFFSET); + conf = __builtin_bitinsert(conf, src_prot, 3, IDMA_REG32_3D_CONF_SRC_PROTOCOL_OFFSET); + conf = __builtin_bitinsert(conf, dst_prot, 3, IDMA_REG32_3D_CONF_DST_PROTOCOL_OFFSET); + // TODO: add burst length reduction +#else + conf = (((decouple_rw & 0x1)<> (IDMA_ID_COUNTER_WIDTH-1) == my_id >> (IDMA_ID_COUNTER_WIDTH-1)) { + return my_id <= done_id; + } else { + return ((done_id & (IDMA_ID_MASK - (1<<(IDMA_ID_COUNTER_WIDTH-1))) < (1<<(IDMA_ID_COUNTER_WIDTH-2)))); + } +} +static inline unsigned int pulp_cl_idma_tx_cplt(unsigned int dma_tx_id) { + unsigned int done_id = DMA_CL_READ(IDMA_REG32_3D_DONE_ID_0_REG_OFFSET); + unsigned int my_id = dma_tx_id & IDMA_ID_MASK; + if (done_id >> (IDMA_ID_COUNTER_WIDTH-1) == my_id >> (IDMA_ID_COUNTER_WIDTH-1)) { + return my_id <= done_id; + } else { + return ((done_id & (IDMA_ID_MASK - (1<<(IDMA_ID_COUNTER_WIDTH-1))) < (1<<(IDMA_ID_COUNTER_WIDTH-2)))); + } +} +static inline unsigned int pulp_cl_idma_tx_cplt_toL2(unsigned int dma_tx_id) { + unsigned int done_id = DMA_CL_READ(IDMA_REG32_3D_DONE_ID_0_REG_OFFSET); + unsigned int my_id = dma_tx_id & IDMA_ID_MASK; + if (done_id >> (IDMA_ID_COUNTER_WIDTH-1) == my_id >> (IDMA_ID_COUNTER_WIDTH-1)) { + return my_id <= done_id; + } else { + return ((done_id & (IDMA_ID_MASK - (1<<(IDMA_ID_COUNTER_WIDTH-1))) < (1<<(IDMA_ID_COUNTER_WIDTH-2)))); + } +} +static inline unsigned int pulp_cl_idma_tx_cplt_toL1(unsigned int dma_tx_id) { + unsigned int done_id = DMA_CL_READ(IDMA_REG32_3D_DONE_ID_1_REG_OFFSET); + unsigned int my_id = dma_tx_id & IDMA_ID_MASK; + if (done_id >> (IDMA_ID_COUNTER_WIDTH-1) == my_id >> (IDMA_ID_COUNTER_WIDTH-1)) { + return my_id <= done_id; + } else { + return ((done_id & (IDMA_ID_MASK - (1<<(IDMA_ID_COUNTER_WIDTH-1))) < (1<<(IDMA_ID_COUNTER_WIDTH-2)))); + } +} + + +static inline unsigned int plp_dma_status() { + return DMA_READ(IDMA_REG32_3D_STATUS_0_REG_OFFSET); +} + +static inline unsigned int plp_dma_status_toL1() { + return DMA_READ(IDMA_REG32_3D_STATUS_1_REG_OFFSET); +} + +static inline unsigned int plp_dma_status_toL2() { + return DMA_READ(IDMA_REG32_3D_STATUS_0_REG_OFFSET); +} + +static inline unsigned int plp_cl_dma_status() { + return DMA_CL_READ(IDMA_REG32_3D_STATUS_0_REG_OFFSET); +} + +static inline unsigned int plp_cl_dma_status_toL1() { + return DMA_CL_READ(IDMA_REG32_3D_STATUS_1_REG_OFFSET); +} + +static inline unsigned int plp_cl_dma_status_toL2() { + return DMA_CL_READ(IDMA_REG32_3D_STATUS_0_REG_OFFSET); +} + +static inline void plp_dma_wait(unsigned int dma_tx_id) { + while(!pulp_idma_tx_cplt(dma_tx_id)) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } + return; +} + +static inline void plp_cl_dma_wait(unsigned int dma_tx_id) { + while(!pulp_cl_idma_tx_cplt(dma_tx_id)) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } + return; +} + +static inline void plp_cl_dma_wait_toL1(unsigned int dma_tx_id) { + while(!pulp_cl_idma_tx_cplt_toL1(dma_tx_id)) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } + return; +} + +static inline void plp_cl_dma_wait_toL2(unsigned int dma_tx_id) { + while(!pulp_cl_idma_tx_cplt_toL2(dma_tx_id)) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } + return; +} + +static inline int pulp_idma_memcpy(unsigned int src, unsigned int dst, unsigned int size, idma_prot_t src_prot, idma_prot_t dst_prot) { + unsigned int dma_tx_id; + unsigned int cfg = pulp_idma_set_conf_prot(IDMA_DEFAULT_CONFIG, src_prot, dst_prot); + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + if (dst_prot == IDMA_PROT_AXI) + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + else + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} +static inline int pulp_cl_idma_memcpy(unsigned int src, unsigned int dst, unsigned int size, idma_prot_t src_prot, idma_prot_t dst_prot) { + unsigned int dma_tx_id; + unsigned int cfg = pulp_idma_set_conf_prot(IDMA_DEFAULT_CONFIG, src_prot, dst_prot); + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + if (dst_prot == IDMA_PROT_AXI) + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + else + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L1ToL2(unsigned int src, unsigned int dst, unsigned short size) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL2; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + + return dma_tx_id; +} +static inline int pulp_cl_idma_L1ToL2(unsigned int src, unsigned int dst, unsigned short size) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL2; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + + return dma_tx_id; +} + +static inline int pulp_idma_L2ToL1(unsigned int src, unsigned int dst, unsigned short size) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L2TOL1; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + + return dma_tx_id; +} +static inline int pulp_cl_idma_L2ToL1(unsigned int src, unsigned int dst, unsigned short size) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L2TOL1; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L1ToL1(unsigned int src, unsigned int dst, unsigned short size) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + + return dma_tx_id; +} +static inline int pulp_cl_idma_L1ToL1(unsigned int src, unsigned int dst, unsigned short size) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + + return dma_tx_id; +} + +static inline int pulp_idma_memcpy_2d(unsigned int src, unsigned int dst, unsigned int size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, idma_prot_t src_prot, idma_prot_t dst_prot) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_2D; + cfg = pulp_idma_set_conf_prot(cfg, src_prot, dst_prot); + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + if (dst_prot == IDMA_PROT_AXI) + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + else + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_cl_idma_memcpy_2d(unsigned int src, unsigned int dst, unsigned int size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, idma_prot_t src_prot, idma_prot_t dst_prot) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_2D; + cfg = pulp_idma_set_conf_prot(cfg, src_prot, dst_prot); + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + // Launch TX + if (dst_prot == IDMA_PROT_AXI) + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + else + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L1ToL2_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL2_2D; + printf ("NUM REPS IS %d \n", num_reps); + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + return dma_tx_id; +} +static inline int pulp_cl_idma_L1ToL2_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL2_2D; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_CL_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L2TOL1_2D; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_cl_idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L2TOL1_2D; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_CL_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1_2D; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_cl_idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1_2D; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_CL_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L1ToL2_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL2_3D; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(src_stride_3d, IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET); + DMA_WRITE(dst_stride_3d, IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET); + DMA_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + DMA_WRITE(num_reps_3d, IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L2ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L2TOL1_3D; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(num_reps, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + DMA_WRITE(src_stride_3d, IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET); + DMA_WRITE(dst_stride_3d, IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET); + DMA_WRITE(num_reps_3d, IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_cl_idma_L1ToL2_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps_2d, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL2_3D; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_CL_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(src_stride_3d, IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride_3d, IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps_2d, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps_3d, IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_cl_idma_L2ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps_2d, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L2TOL1_3D; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_CL_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps_2d, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + DMA_CL_WRITE(src_stride_3d, IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride_3d, IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps_3d, IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_L1ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps_2d, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1_3D; + DMA_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_WRITE(src_stride_3d, IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET); + DMA_WRITE(dst_stride_3d, IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET); + DMA_WRITE(num_reps_2d, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + DMA_WRITE(num_reps_3d, IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_cl_idma_L1ToL1_3d(unsigned int src, unsigned int dst, unsigned short size, unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps_2d, unsigned int src_stride_3d, unsigned int dst_stride_3d, unsigned int num_reps_3d) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG_L1TOL1_3D; + DMA_CL_WRITE(src, IDMA_REG32_3D_SRC_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + DMA_CL_WRITE(src_stride, IDMA_REG32_3D_SRC_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride, IDMA_REG32_3D_DST_STRIDE_2_LOW_REG_OFFSET); + DMA_CL_WRITE(src_stride_3d, IDMA_REG32_3D_SRC_STRIDE_3_LOW_REG_OFFSET); + DMA_CL_WRITE(dst_stride_3d, IDMA_REG32_3D_DST_STRIDE_3_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps_2d, IDMA_REG32_3D_REPS_2_LOW_REG_OFFSET); + DMA_CL_WRITE(num_reps_3d, IDMA_REG32_3D_REPS_3_LOW_REG_OFFSET); + + asm volatile("" : : : "memory"); + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + return dma_tx_id; +} + +static inline int pulp_idma_zeromem(unsigned int dst, unsigned short size, idma_prot_t dst_prot) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG; + cfg = pulp_idma_set_conf_prot(cfg, IDMA_PROT_INIT, dst_prot); + DMA_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + if (dst_prot == IDMA_PROT_AXI) + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + else + dma_tx_id = DMA_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + asm volatile("" : : : "memory"); + return dma_tx_id; +} + +static inline int pulp_cl_idma_zeromem(unsigned int dst, unsigned short size, idma_prot_t dst_prot) { + unsigned int dma_tx_id; + unsigned int cfg = IDMA_DEFAULT_CONFIG; + cfg = pulp_idma_set_conf_prot(cfg, IDMA_PROT_INIT, dst_prot); + DMA_CL_WRITE(dst, IDMA_REG32_3D_DST_ADDR_LOW_REG_OFFSET); + DMA_CL_WRITE(size, IDMA_REG32_3D_LENGTH_LOW_REG_OFFSET); + DMA_CL_WRITE(cfg, IDMA_REG32_3D_CONF_REG_OFFSET); + if (dst_prot == IDMA_PROT_AXI) + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_0_REG_OFFSET); + else + dma_tx_id = DMA_CL_READ(IDMA_REG32_3D_NEXT_ID_1_REG_OFFSET); + asm volatile("" : : : "memory"); + return dma_tx_id; +} + +// +// BARRIERS +// + + +static inline void plp_dma_barrier() { + while(plp_dma_status()) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } +} +static inline void plp_cl_dma_barrier() { + while(plp_cl_dma_status()) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } +} + +static inline void plp_dma_barrier_toL1() { + while(plp_dma_status_toL1()) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } +} +static inline void plp_cl_dma_barrier_toL1() { + while(plp_cl_dma_status_toL1()) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } +} + +static inline void plp_dma_barrier_toL2() { + while(plp_dma_status_toL2()) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } +} +static inline void plp_cl_dma_barrier_toL2() { + while(plp_cl_dma_status_toL2()) { + eu_evt_maskWaitAndClr(1 << IDMA_EVENT); + } +} + +#endif // __HAL_IDMA_V1_H__ diff --git a/include/hal/dma/mchan_v7.h b/include/hal/dma/mchan_v7.h index d95dee50..0f8ebc04 100644 --- a/include/hal/dma/mchan_v7.h +++ b/include/hal/dma/mchan_v7.h @@ -20,23 +20,23 @@ #include #include "hal/pulp.h" -#define PLP_DMA_LOC2EXT 0 -#define PLP_DMA_EXT2LOC 1 +#define PLP_MCHAN_LOC2EXT 0 +#define PLP_MCHAN_EXT2LOC 1 -#define PLP_DMA_1D 0 -#define PLP_DMA_2D 1 +#define PLP_MCHAN_1D 0 +#define PLP_MCHAN_2D 1 -#define PLP_DMA_NO_TRIG_EVT 0 -#define PLP_DMA_TRIG_EVT 1 +#define PLP_MCHAN_NO_TRIG_EVT 0 +#define PLP_MCHAN_TRIG_EVT 1 -#define PLP_DMA_NO_TRIG_IRQ 0 -#define PLP_DMA_TRIG_IRQ 1 +#define PLP_MCHAN_NO_TRIG_IRQ 0 +#define PLP_MCHAN_TRIG_IRQ 1 -#define PLP_DMA_PRIV 0 -#define PLP_DMA_SHARED 1 +#define PLP_MCHAN_PRIV 0 +#define PLP_MCHAN_SHARED 1 -#define PLP_DMA_FIX 0 -#define PLP_DMA_INC 1 +#define PLP_MCHAN_FIX 0 +#define PLP_MCHAN_INC 1 #if defined(ARCHI_HAS_MCHAN_64) && ARCHI_HAS_MCHAN_64 == 1 typedef unsigned long long mchan_ext_t; @@ -61,27 +61,27 @@ typedef unsigned int mchan_ext_t; \param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); +static inline int plp_mchan_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); /** Cluster memory to external memory transfer with event-based completion. * \param ext Address in the external memory where to store the data. There is no restriction on memory alignment. \param loc Address in the cluster memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size); +static inline int plp_mchan_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size); /** External memory to cluster memory transfer with event-based completion. * \param loc Address in the cluster memory where to store the data. There is no restriction on memory alignment. \param ext Address in the external memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size); +static inline int plp_mchan_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size); /** Memory transfer with irq-based completion. * @@ -89,27 +89,27 @@ static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned sh \param loc Address in the cluster memory where to access the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); +static inline int plp_mchan_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc); /** Cluster memory to external memory transfer with irq-based completion. * \param ext Address in the external memory where to store the data. There is no restriction on memory alignment. \param loc Address in the cluster memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size); +static inline int plp_mchan_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size); /** External memory to cluster memory transfer with irq-based completion. * \param loc Address in the cluster memory where to store the data. There is no restriction on memory alignment. \param ext Address in the external memory where to load the data. There is no restriction on memory alignment. \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size); +static inline int plp_mchan_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size); /** 2-dimensional memory transfer with event-based completion. * @@ -119,9 +119,9 @@ static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigne \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); +static inline int plp_mchan_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); /** Cluster memory to external memory 2-dimensional transfer with event-based completion. * @@ -130,9 +130,9 @@ static inline int plp_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); /** External memory to cluster memory 2-dimensional transfer with event-based completion. * @@ -141,9 +141,9 @@ static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer */ -static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); /** 2-dimensional memory transfer with irq-based completion. * @@ -153,9 +153,9 @@ static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param ext2loc If 1, the transfer is loading data from external memory and storing to cluster memory. If 0, it is the contrary - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); +static inline int plp_mchan_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc); /** Cluster memory to external memory 2-dimensional transfer with irq-based completion. * @@ -164,9 +164,9 @@ static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsig \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length); /** External memory to cluster memory 2-dimensional transfer with irq-based completion. * @@ -175,9 +175,9 @@ static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsi \param size Number of bytes to be transfered. The only restriction is that this size must fit 16 bits, i.e. must be inferior to 65536. \param stride 2D stride, which is the number of bytes which are added to the beginning of the current line to switch to the next one. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. \param length 2D length, which is the number of transfered bytes after which the DMA will switch to the next line. Must fit 16 bits, i.e. must be inferior to 65536. This applies only to the external memory. - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer */ -static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); +static inline int plp_mchan_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length); //!@} @@ -187,14 +187,14 @@ static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsi /** DMA barrier. * This blocks the core until no transfer is on-going in the DMA. */ -static inline void plp_dma_barrier(); +static inline void plp_mchan_barrier(); /** DMA wait. * This blocks the core until the specified transfer is finished. * - \param counter The counter ID identifying the transfer. This has either been allocated explicitly or returned from an enqueued transfer (e.g. plp_dma_extToL1_2d_irq) + \param counter The counter ID identifying the transfer. This has either been allocated explicitly or returned from an enqueued transfer (e.g. plp_mchan_extToL1_2d_irq) */ -static inline void plp_dma_wait(unsigned int counter); +static inline void plp_mchan_wait(unsigned int counter); //!@} @@ -207,16 +207,16 @@ static inline void plp_dma_wait(unsigned int counter); * This allocates a counter and activate it for all the next transfers until another one is allocated. This means during this period, all transfers will be accounted on this counter and thus waiting * on this counter will wait for all these transfers. * - \return The identifier of the transfer. This can be used with plp_dma_wait to wait for the completion of this transfer. + \return The identifier of the transfer. This can be used with plp_mchan_wait to wait for the completion of this transfer. */ -static inline int plp_dma_counter_alloc(); +static inline int plp_mchan_counter_alloc(); /** DMA counter release. * This makes the counter available for another transfer through the DMA counter allocator * \param counter The counter number to be released */ -static inline void plp_dma_counter_free(int counter); +static inline void plp_mchan_counter_free(int counter); /** DMA command generation. * Can be used to generate the 32 bits command to be pushed to the DMA, depending on the required mode. @@ -229,7 +229,7 @@ static inline void plp_dma_counter_free(int counter); \param broadcast If 1 the event or irq generated when the transfer is finished is sent to all cores, otherwise it is only sent to the core enqueueing the transfer. \return The generated command. */ -static inline unsigned int plp_dma_getCmd(int ext2loc, unsigned int size, int is2D, int trigEvt, int trigIrq, int broadcast); +static inline unsigned int plp_mchan_getCmd(int ext2loc, unsigned int size, int is2D, int trigEvt, int trigIrq, int broadcast); /** Generate the stride command for 2D transfers. * @@ -237,410 +237,410 @@ static inline unsigned int plp_dma_getCmd(int ext2loc, unsigned int size, int is \param len The length of the 2D transfer, i.e. the number of bytes transfered after which the DMA should switch to the new line. Must fit 16 bits, i.e. must be inferior to 65536. \return The generated command. */ -static inline unsigned int plp_dma_getStrides(unsigned short stride, unsigned short len); +static inline unsigned int plp_mchan_getStrides(unsigned short stride, unsigned short len); /** Push a transfer to the DMA * \param locAddr The address of the transfer for the cluster memory \param extAddr The address of the transfer for the external memory - \param cmd The command that specifies the type of the transfer. This can be generated using plp_dma_getCmd. + \param cmd The command that specifies the type of the transfer. This can be generated using plp_mchan_getCmd. */ -static inline void plp_dma_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr); +static inline void plp_mchan_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr); /** Push a 2D transfer to the DMA * \param locAddr The address of the transfer for the cluster memory \param extAddr The address of the transfer for the external memory - \param cmd The command that specifies the type of the transfer. This can be generated using plp_dma_getStrides. - \param strides The command that specifies the 2D transfer (stride and len). This can be generated using plp_dma_getStrides. + \param cmd The command that specifies the type of the transfer. This can be generated using plp_mchan_getStrides. + \param strides The command that specifies the 2D transfer (stride and len). This can be generated using plp_mchan_getStrides. */ -static inline void plp_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length); +static inline void plp_mchan_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length); /** Return the counter status. * \return Counter status. There is one bit per counter. 1 means there are still on-going transfers for this counter, 0 means nothing is on-going. */ -static inline unsigned int plp_dma_status(); +static inline unsigned int plp_mchan_status(); //!@} /// @cond IMPLEM +#if ARCHI_HAS_DMA_DEMUX +#define MCHAN_ADDR ARCHI_MCHAN_DEMUX_ADDR +#else +#define MCHAN_ADDR ARCHI_MCHAN_EXT_ADDR +#endif + #if defined(__riscv__) && !defined(RV_ISA_RV32) && !defined(__LLVM__) -#ifdef ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE_DEMUX(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)ARCHI_MCHAN_DEMUX_ADDR, (offset)) -#define DMA_READ_DEMUX(offset) __builtin_pulp_OffsetedRead((int *)ARCHI_MCHAN_DEMUX_ADDR, (offset)) -#endif // ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)ARCHI_MCHAN_EXT_ADDR, (offset)) -#define DMA_READ(offset) __builtin_pulp_OffsetedRead((int *)ARCHI_MCHAN_EXT_ADDR, (offset)) +#define MCHAN_WRITE(value, offset) __builtin_pulp_OffsetedWrite((value), (int *)MCHAN_ADDR, (offset)) +#define MCHAN_READ(offset) __builtin_pulp_OffsetedRead((int *)MCHAN_ADDR, (offset)) #else -#ifdef ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE_DEMUX(value, offset) pulp_write32(ARCHI_MCHAN_DEMUX_ADDR + (offset), (value)) -#define DMA_READ_DEMUX(value, offset) pulp_read32(ARCHI_MCHAN_DEMUX_ADDR + (offset)) -#endif // ARCHI_HAS_DMA_DEMUX -#define DMA_WRITE(value, offset) pulp_write32(ARCHI_MCHAN_EXT_ADDR + (offset), (value)) -#define DMA_READ(offset) pulp_read32(ARCHI_MCHAN_EXT_ADDR + (offset)) +#define MCHAN_WRITE(value, offset) pulp_write32(MCHAN_ADDR + (offset), (value)) +#define MCHAN_READ(offset) pulp_read32(MCHAN_ADDR + (offset)) #endif -static inline int plp_dma_counter_alloc() { - return DMA_READ(MCHAN_CMD_OFFSET); + +static inline int plp_mchan_counter_alloc() { + return MCHAN_READ(MCHAN_CMD_OFFSET); } -static inline int plp_cl_dma_counter_alloc() { +static inline int plp_cl_mchan_counter_alloc() { #ifdef ARCHI_HAS_DMA_DEMUX - return DMA_READ_DEMUX(MCHAN_CMD_OFFSET); + return MCHAN_READ(MCHAN_CMD_OFFSET); #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_counter_alloc(); + return plp_mchan_counter_alloc(); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_counter_free(int counter) { - DMA_WRITE(1<>32), MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)(extAddr>>32), MCHAN_CMD_OFFSET); #else - DMA_WRITE(extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE(extAddr, MCHAN_CMD_OFFSET); #endif } -static inline void plp_cl_dma_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr) { +static inline void plp_cl_mchan_cmd_push(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr) { #ifdef ARCHI_HAS_DMA_DEMUX - DMA_WRITE_DEMUX(cmd, MCHAN_CMD_OFFSET); - DMA_WRITE_DEMUX(locAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE(cmd, MCHAN_CMD_OFFSET); + MCHAN_WRITE(locAddr, MCHAN_CMD_OFFSET); #if defined(ARCHI_HAS_MCHAN_64) && ARCHI_HAS_MCHAN_64 == 1 - DMA_WRITE_DEMUX((int)extAddr, MCHAN_CMD_OFFSET); - DMA_WRITE_DEMUX((int)(extAddr>>32), MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE((int)(extAddr>>32), MCHAN_CMD_OFFSET); #else - DMA_WRITE_DEMUX(extAddr, MCHAN_CMD_OFFSET); + MCHAN_WRITE(extAddr, MCHAN_CMD_OFFSET); #endif #else // ARCHI_HAS_DMA_DEMUX - plp_dma_cmd_push(cmd, locAddr, extAddr); + plp_mchan_cmd_push(cmd, locAddr, extAddr); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { - plp_dma_cmd_push(cmd, locAddr, extAddr); - DMA_WRITE(length, MCHAN_CMD_OFFSET); - DMA_WRITE(stride, MCHAN_CMD_OFFSET); +static inline void plp_mchan_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { + plp_mchan_cmd_push(cmd, locAddr, extAddr); + MCHAN_WRITE(length, MCHAN_CMD_OFFSET); + MCHAN_WRITE(stride, MCHAN_CMD_OFFSET); } -static inline void plp_cl_dma_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { +static inline void plp_cl_mchan_cmd_push_2d(unsigned int cmd, unsigned int locAddr, mchan_ext_t extAddr, unsigned int stride, unsigned int length) { #ifdef ARCHI_HAS_DMA_DEMUX - plp_cl_dma_cmd_push(cmd, locAddr, extAddr); - DMA_WRITE_DEMUX(length, MCHAN_CMD_OFFSET); - DMA_WRITE_DEMUX(stride, MCHAN_CMD_OFFSET); + plp_cl_mchan_cmd_push(cmd, locAddr, extAddr); + MCHAN_WRITE(length, MCHAN_CMD_OFFSET); + MCHAN_WRITE(stride, MCHAN_CMD_OFFSET); #else // ARCHI_HAS_DMA_DEMUX - plp_dma_cmd_push_2d(cmd, locAddr, extAddr, stride, length); + plp_mchan_cmd_push_2d(cmd, locAddr, extAddr, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { +static inline int plp_cl_mchan_memcpy(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy(ext, loc, size, ext2loc); + return plp_mchan_memcpy(ext, loc, size, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { +static inline int plp_cl_mchan_l1ToExt(mchan_ext_t ext, unsigned int loc, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt(ext, loc, size); + return plp_mchan_l1ToExt(ext, loc, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { +static inline int plp_cl_mchan_extToL1(unsigned int loc, mchan_ext_t ext, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1(loc, ext, size); + return plp_mchan_extToL1(loc, ext, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { +static inline int plp_cl_mchan_memcpy_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy_irq(ext, loc, size, ext2loc); + return plp_mchan_memcpy_irq(ext, loc, size, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { +static inline int plp_cl_mchan_l1ToExt_irq(mchan_ext_t ext, unsigned int loc, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt_irq(ext, loc, size); + return plp_mchan_l1ToExt_irq(ext, loc, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push(cmd, loc, ext); +static inline int plp_mchan_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push(cmd, loc, ext); return counter; } -static inline int plp_cl_dma_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { +static inline int plp_cl_mchan_extToL1_irq(unsigned int loc, mchan_ext_t ext, unsigned short size) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_1D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push(cmd, loc, ext); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_1D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push(cmd, loc, ext); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1_irq(loc, ext, size); + return plp_mchan_extToL1_irq(loc, ext, size); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline void plp_mchan_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); } -static inline void plp_cl_dma_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +static inline void plp_cl_mchan_memcpy_2d_keepCounter(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); #else // ARCHI_HAS_DMA_DEMUX - plp_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); + plp_mchan_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - plp_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); +static inline int plp_mchan_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + plp_mchan_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); return counter; } -static inline int plp_cl_dma_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +static inline int plp_cl_mchan_memcpy_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - plp_cl_dma_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); + unsigned int counter = plp_cl_mchan_counter_alloc(); + plp_cl_mchan_memcpy_2d_keepCounter(ext, loc, size, stride, length, ext2loc); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy_2d(ext, loc, size, stride, length, ext2loc); + return plp_mchan_memcpy_2d(ext, loc, size, stride, length, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_l1ToExt_2d(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt_2d(ext, loc, size, stride, length); + return plp_mchan_l1ToExt_2d(ext, loc, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_extToL1_2d(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_TRIG_EVT, PLP_DMA_NO_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_TRIG_EVT, PLP_MCHAN_NO_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1_2d(loc, ext, size, stride, length); + return plp_mchan_extToL1_2d(loc, ext, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { +static inline int plp_cl_mchan_memcpy_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length, int ext2loc) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(ext2loc, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(ext2loc, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_memcpy_2d_irq(ext, loc, size, stride, length, ext2loc); + return plp_mchan_memcpy_2d_irq(ext, loc, size, stride, length, ext2loc); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_l1ToExt_2d_irq(mchan_ext_t ext, unsigned int loc, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_LOC2EXT, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_LOC2EXT, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_l1ToExt_2d_irq(ext, loc, size, stride, length); + return plp_mchan_l1ToExt_2d_irq(ext, loc, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline int plp_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { - unsigned int counter = plp_dma_counter_alloc(); - unsigned int cmd = plp_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_dma_cmd_push_2d(cmd, loc, ext, stride, length); +static inline int plp_mchan_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { + unsigned int counter = plp_mchan_counter_alloc(); + unsigned int cmd = plp_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; } -static inline int plp_cl_dma_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { +static inline int plp_cl_mchan_extToL1_2d_irq(unsigned int loc, mchan_ext_t ext, unsigned short size, unsigned short stride, unsigned short length) { #ifdef ARCHI_HAS_DMA_DEMUX - unsigned int counter = plp_cl_dma_counter_alloc(); - unsigned int cmd = plp_cl_dma_getCmd(PLP_DMA_EXT2LOC, size, PLP_DMA_2D, PLP_DMA_NO_TRIG_EVT, PLP_DMA_TRIG_IRQ, PLP_DMA_SHARED); - plp_cl_dma_cmd_push_2d(cmd, loc, ext, stride, length); + unsigned int counter = plp_cl_mchan_counter_alloc(); + unsigned int cmd = plp_cl_mchan_getCmd(PLP_MCHAN_EXT2LOC, size, PLP_MCHAN_2D, PLP_MCHAN_NO_TRIG_EVT, PLP_MCHAN_TRIG_IRQ, PLP_MCHAN_SHARED); + plp_cl_mchan_cmd_push_2d(cmd, loc, ext, stride, length); return counter; #else // ARCHI_HAS_DMA_DEMUX - return plp_dma_extToL1_2d_irq(loc, ext, size, stride, length); + return plp_mchan_extToL1_2d_irq(loc, ext, size, stride, length); #endif // ARCHI_HAS_DMA_DEMUX } -static inline void plp_dma_barrier() { - while(DMA_READ(MCHAN_STATUS_OFFSET) & 0xFFFF) { +static inline void plp_mchan_barrier() { + while(MCHAN_READ(MCHAN_STATUS_OFFSET) & 0xFFFF) { eu_evt_maskWaitAndClr(1< #include @@ -28,6 +29,11 @@ pos_alloc_t pos_alloc_fc_tcdm; #if defined(ARCHI_HAS_L2) pos_alloc_t pos_alloc_l2[POS_NB_ALLOC_L2]; + +#define POS_L2_PRIV0 0 +#define POS_L2_PRIV1 1 +#define POS_L2_SHARED 2 + #endif #ifdef CONFIG_ALLOC_L2_PWD_NB_BANKS @@ -38,7 +44,7 @@ static uint32_t pos_alloc_account_1[CONFIG_ALLOC_L2_PWD_NB_BANKS]; #if defined(ARCHI_HAS_FC_TCDM) static inline pos_alloc_t *get_fc_alloc() { return &pos_alloc_fc_tcdm; } #else -static inline pos_alloc_t *get_fc_alloc() { return &pos_alloc_l2[0]; } +static inline pos_alloc_t *get_fc_alloc() { return &pos_alloc_l2[POS_L2_PRIV0]; } #endif @@ -49,31 +55,31 @@ void pos_allocs_init() #if defined(ARCHI_HAS_L2) #if defined(ARCHI_HAS_L2_MULTI) - //pos_trace(//pos_trace_INIT, "Initializing L2 private bank0 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv0_base(), pos_l2_priv0_size()); - pos_alloc_init(&pos_alloc_l2[0], pos_l2_priv0_base(), pos_l2_priv0_size()); + ALLOC_TRACE(POS_LOG_TRACE, "Initializing L2 private bank0 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv0_base(), pos_l2_priv0_size()); + pos_alloc_init(&pos_alloc_l2[POS_L2_PRIV0], pos_l2_priv0_base(), pos_l2_priv0_size()); - //pos_trace(//pos_trace_INIT, "Initializing L2 private bank1 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv1_base(), pos_l2_priv1_size()); - pos_alloc_init(&pos_alloc_l2[1], pos_l2_priv1_base(), pos_l2_priv1_size()); + ALLOC_TRACE(POS_LOG_TRACE, "Initializing L2 private bank1 allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_priv1_base(), pos_l2_priv1_size()); + pos_alloc_init(&pos_alloc_l2[POS_L2_PRIV1], pos_l2_priv1_base(), pos_l2_priv1_size()); - //pos_trace(//pos_trace_INIT, "Initializing L2 shared banks allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_shared_base(), pos_l2_shared_size()); - pos_alloc_init(&pos_alloc_l2[2], pos_l2_shared_base(), pos_l2_shared_size()); + ALLOC_TRACE(POS_LOG_TRACE, "Initializing L2 shared banks allocator (base: 0x%8x, size: 0x%8x)\n", (int)pos_l2_shared_base(), pos_l2_shared_size()); + pos_alloc_init(&pos_alloc_l2[POS_L2_SHARED], pos_l2_shared_base(), pos_l2_shared_size()); #ifdef CONFIG_ALLOC_L2_PWD_NB_BANKS - pos_alloc_l2[2].track_pwd = 1; - pos_alloc_l2[2].pwd_count = pos_alloc_account_0; - pos_alloc_l2[2].ret_count = pos_alloc_account_0; + pos_alloc_l2[POS_L2_SHARED].track_pwd = 1; + pos_alloc_l2[POS_L2_SHARED].pwd_count = pos_alloc_account_0; + pos_alloc_l2[POS_L2_SHARED].ret_count = pos_alloc_account_0; for (int i=0; i