AIR-11693 driver: ethosu: Enable NPU memory cache to increase the performance

1.Flush input buffer cache when start inference
2.Invalidate output buffer cache when inference finish

Signed-off-by: Feng Guo <feng.guo@nxp.com>
Acked-by: Peng Fan <peng.fan@nxp.com>
This commit is contained in:
Feng Guo 2024-07-25 16:26:09 +08:00
parent c955ed6a9d
commit 7ca0f0f180
9 changed files with 84 additions and 7 deletions

View File

@ -28,6 +28,10 @@
#include <linux/kref.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/dma-direct.h>
#include <linux/dma-mapping.h>
#include <linux/dma-map-ops.h>
/****************************************************************************
* Types
****************************************************************************/

View File

@ -138,6 +138,8 @@ struct ethosu_core_inference_req {
struct ethosu_core_network_buffer network;
uint8_t pmu_event_config[ETHOSU_CORE_PMU_MAX];
uint32_t pmu_cycle_counter_enable;
uint32_t arena_offset;
uint32_t flash_offset;
uint32_t inference_type;
};

View File

@ -34,6 +34,7 @@
#include "uapi/ethosu.h"
#include <linux/dma-mapping.h>
#include <linux/dma-map-ops.h>
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/interrupt.h>
@ -355,6 +356,7 @@ int ethosu_dev_init(struct ethosu_device *edev,
return ret;
dma_set_mask_and_coherent(edev->dev, DMA_BIT_MASK(DMA_ADDR_BITS));
arch_setup_dma_ops(edev->dev, 0, 0, NULL, true);
ret = ethosu_rpmsg_init(&edev->erp, ethosu_rpmsg_rx, edev);
if (ret)

View File

@ -95,6 +95,14 @@ static int ethosu_inference_send(struct ethosu_inference *inf)
int ret;
inf->status = ETHOSU_UAPI_STATUS_ERROR;
inf->done = false;
/* Get pointer to arena buffer, sync the input data */
phys_addr_t paddr = dma_to_phys(inf->edev->dev, inf->ifm[0]->dma_addr_orig);
for (int i = 0; i < inf->memory_layout.input_count; i ++) {
arch_sync_dma_for_device(paddr + inf->memory_layout.input_offset[i],
inf->memory_layout.input_size[i], DMA_TO_DEVICE);
}
ret = ethosu_rpmsg_inference(&inf->edev->erp, &inf->msg,
inf->ifm_count, inf->ifm,
@ -104,6 +112,8 @@ static int ethosu_inference_send(struct ethosu_inference *inf)
inf->pmu_event_config,
ETHOSU_PMU_EVENT_MAX,
inf->pmu_cycle_counter_enable,
inf->memory_layout.flash_offset,
inf->memory_layout.arena_offset,
inf->inference_type);
if (ret) {
dev_warn(inf->edev->dev,
@ -221,9 +231,23 @@ static unsigned int ethosu_inference_poll(struct file *file,
poll_wait(file, &inf->waitq, wait);
if (inf->done)
if (inf->done) {
ret |= POLLIN;
/* Get pointer to arena buffer, sync the output data */
phys_addr_t paddr = dma_to_phys(inf->edev->dev, inf->ifm[0]->dma_addr_orig);
for (int i = 0; i < inf->memory_layout.output_count; i ++) {
arch_sync_dma_for_cpu(paddr + inf->memory_layout.output_offset[i],
inf->memory_layout.output_size[i], DMA_FROM_DEVICE);
}
/* Get pointer to OFM buffer, sync the PMU data */
for (int i = 0; i < inf->ofm_count; i++) {
paddr = dma_to_phys(inf->edev->dev, inf->ofm[i]->dma_addr_orig);
arch_sync_dma_for_cpu(paddr, inf->ofm[i]->capacity, DMA_FROM_DEVICE);
}
}
return ret;
}
@ -268,6 +292,15 @@ static long ethosu_inference_ioctl(struct file *file,
break;
}
case ETHOSU_IOCTL_INFERENCE_INVOKE: {
struct ethosu_uapi_result_status uapi;
ret = copy_from_user(&uapi, udata, sizeof(uapi));
if (ret)
break;
/* Send inference request to Arm Ethos-U subsystem */
ret = ethosu_inference_send(inf);
break;
}
case ETHOSU_IOCTL_INFERENCE_CANCEL: {
struct ethosu_uapi_cancel_inference_status uapi;
@ -337,12 +370,16 @@ int ethosu_inference_create(struct ethosu_device *edev,
init_waitqueue_head(&inf->waitq);
inf->msg.fail = ethosu_inference_fail;
inf->msg.resend = ethosu_inference_resend;
inf->memory_layout = uapi->memory_layout;
/* Add inference to pending list */
ret = ethosu_rpmsg_register(&edev->erp, &inf->msg);
if (ret < 0)
goto kfree;
phys_addr_t paddr;
paddr = dma_to_phys(edev->dev, inf->net->buf->dma_addr_orig);
arch_sync_dma_for_device(paddr, inf->net->buf->capacity, DMA_TO_DEVICE);
/* Get pointer to IFM buffers */
for (i = 0; i < uapi->ifm_count; i++) {
inf->ifm[i] = ethosu_buffer_get_from_fd(uapi->ifm_fd[i]);
@ -352,6 +389,8 @@ int ethosu_inference_create(struct ethosu_device *edev,
}
inf->ifm_count++;
paddr = dma_to_phys(edev->dev, inf->ifm[i]->dma_addr_orig);
arch_sync_dma_for_device(paddr, inf->ifm[i]->capacity,DMA_TO_DEVICE);
}
/* Get pointer to OFM buffer */
@ -387,11 +426,6 @@ int ethosu_inference_create(struct ethosu_device *edev,
/* Increment network reference count */
ethosu_network_get(net);
/* Send inference request to Arm Ethos-U subsystem */
ret = ethosu_inference_send(inf);
if (ret)
goto put_net;
/* Create file descriptor */
ret = fd = anon_inode_getfd("ethosu-inference", &ethosu_inference_fops,
inf, O_RDWR | O_CLOEXEC);

View File

@ -77,6 +77,7 @@ struct ethosu_inference {
uint32_t pmu_cycle_counter_enable;
uint64_t pmu_cycle_counter_count;
uint32_t inference_type;
struct ethosu_uapi_memory_layout memory_layout;
struct ethosu_rpmsg_msg msg;
};

View File

@ -168,6 +168,9 @@ int ethosu_network_create(struct ethosu_device *edev,
ret = PTR_ERR(net->buf);
goto free_net;
}
phys_addr_t paddr;
paddr = dma_to_phys(edev->dev, net->buf->dma_addr_orig);
arch_sync_dma_for_device(paddr, net->buf->capacity, DMA_TO_DEVICE);
} else {
net->index = uapi->index;
}

View File

@ -201,6 +201,8 @@ int ethosu_rpmsg_inference(struct ethosu_rpmsg *erp,
uint8_t *pmu_event_config,
uint8_t pmu_event_config_count,
uint8_t pmu_cycle_counter_enable,
uint32_t flash_offset,
uint32_t arena_offset,
uint32_t inference_type)
{
struct ethosu_core_msg msg = {
@ -227,6 +229,8 @@ int ethosu_rpmsg_inference(struct ethosu_rpmsg *erp,
req.ofm_count = ofm_count;
req.pmu_cycle_counter_enable = pmu_cycle_counter_enable;
req.inference_type = inference_type;
req.flash_offset = flash_offset;
req.arena_offset = arena_offset;
for (i = 0; i < ifm_count; i++)
ethosu_core_set_size(ifm[i], &req.ifm[i]);

View File

@ -89,6 +89,8 @@ int ethosu_rpmsg_inference(struct ethosu_rpmsg *erp,
uint8_t *pmu_event_config,
uint8_t pmu_event_config_count,
uint8_t pmu_cycle_counter_enable,
uint32_t flash_offset,
uint32_t arena_offset,
uint32_t inference_type
);

View File

@ -62,6 +62,8 @@ namespace EthosU {
struct ethosu_uapi_result_status)
#define ETHOSU_IOCTL_INFERENCE_CANCEL ETHOSU_IOR(0x32, \
struct ethosu_uapi_cancel_inference_status)
#define ETHOSU_IOCTL_INFERENCE_INVOKE ETHOSU_IOR(0x33, \
struct ethosu_uapi_result_status)
/* Maximum number of IFM/OFM file descriptors per network */
#define ETHOSU_FD_MAX 16
@ -126,7 +128,7 @@ enum ethosu_uapi_network_type {
* @index: Buffer index compiled into firmware binary.
*/
struct ethosu_uapi_network_create {
u32 type;
__u32 type;
union {
__u32 fd;
__u32 index;
@ -242,6 +244,28 @@ enum ethosu_uapi_inference_type {
ETHOSU_UAPI_INFERENCE_OP
};
/**
* struct ethosu_uapi_memory_layout - The memory layout of arena buffer
* @flash_offset: The flash offset in the buffer
* @arena_offset: The arena offset in the buffer
* @input_count: Number of inputs
* @input_offset: The inputs offset in the buffer
* @input_size: The inputs size
* @output_count: Number of outputs
* @output_offset: The outputs offset in the buffer
* @output_size: The outputs size
*/
struct ethosu_uapi_memory_layout {
__u32 flash_offset;
__u32 arena_offset;
__u32 input_count;
__u32 input_offset[ETHOSU_FD_MAX];
__u32 input_size[ETHOSU_FD_MAX];
__u32 output_count;
__u32 output_offset[ETHOSU_FD_MAX];
__u32 output_size[ETHOSU_FD_MAX];
};
/**
* struct ethosu_uapi_inference_create - Create network request
* @ifm_count: Number of IFM file descriptors
@ -254,6 +278,7 @@ struct ethosu_uapi_inference_create {
__u32 ifm_fd[ETHOSU_FD_MAX];
__u32 ofm_count;
__u32 ofm_fd[ETHOSU_FD_MAX];
struct ethosu_uapi_memory_layout memory_layout;
enum ethosu_uapi_inference_type inference_type;
struct ethosu_uapi_pmu_config pmu_config;
};