mirror of
https://github.com/nxp-imx/linux-imx.git
synced 2025-09-02 18:06:13 +02:00
AIR-11693 driver: ethosu: Enable NPU memory cache to increase the performance
1.Flush input buffer cache when start inference 2.Invalidate output buffer cache when inference finish Signed-off-by: Feng Guo <feng.guo@nxp.com> Acked-by: Peng Fan <peng.fan@nxp.com>
This commit is contained in:
parent
c955ed6a9d
commit
7ca0f0f180
|
@ -28,6 +28,10 @@
|
|||
#include <linux/kref.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/dma-direct.h>
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
/****************************************************************************
|
||||
* Types
|
||||
****************************************************************************/
|
||||
|
|
|
@ -138,6 +138,8 @@ struct ethosu_core_inference_req {
|
|||
struct ethosu_core_network_buffer network;
|
||||
uint8_t pmu_event_config[ETHOSU_CORE_PMU_MAX];
|
||||
uint32_t pmu_cycle_counter_enable;
|
||||
uint32_t arena_offset;
|
||||
uint32_t flash_offset;
|
||||
uint32_t inference_type;
|
||||
};
|
||||
|
||||
|
|
|
@ -34,6 +34,7 @@
|
|||
#include "uapi/ethosu.h"
|
||||
|
||||
#include <linux/dma-mapping.h>
|
||||
#include <linux/dma-map-ops.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
@ -355,6 +356,7 @@ int ethosu_dev_init(struct ethosu_device *edev,
|
|||
return ret;
|
||||
|
||||
dma_set_mask_and_coherent(edev->dev, DMA_BIT_MASK(DMA_ADDR_BITS));
|
||||
arch_setup_dma_ops(edev->dev, 0, 0, NULL, true);
|
||||
|
||||
ret = ethosu_rpmsg_init(&edev->erp, ethosu_rpmsg_rx, edev);
|
||||
if (ret)
|
||||
|
|
|
@ -95,6 +95,14 @@ static int ethosu_inference_send(struct ethosu_inference *inf)
|
|||
int ret;
|
||||
|
||||
inf->status = ETHOSU_UAPI_STATUS_ERROR;
|
||||
inf->done = false;
|
||||
|
||||
/* Get pointer to arena buffer, sync the input data */
|
||||
phys_addr_t paddr = dma_to_phys(inf->edev->dev, inf->ifm[0]->dma_addr_orig);
|
||||
for (int i = 0; i < inf->memory_layout.input_count; i ++) {
|
||||
arch_sync_dma_for_device(paddr + inf->memory_layout.input_offset[i],
|
||||
inf->memory_layout.input_size[i], DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
ret = ethosu_rpmsg_inference(&inf->edev->erp, &inf->msg,
|
||||
inf->ifm_count, inf->ifm,
|
||||
|
@ -104,6 +112,8 @@ static int ethosu_inference_send(struct ethosu_inference *inf)
|
|||
inf->pmu_event_config,
|
||||
ETHOSU_PMU_EVENT_MAX,
|
||||
inf->pmu_cycle_counter_enable,
|
||||
inf->memory_layout.flash_offset,
|
||||
inf->memory_layout.arena_offset,
|
||||
inf->inference_type);
|
||||
if (ret) {
|
||||
dev_warn(inf->edev->dev,
|
||||
|
@ -221,9 +231,23 @@ static unsigned int ethosu_inference_poll(struct file *file,
|
|||
|
||||
poll_wait(file, &inf->waitq, wait);
|
||||
|
||||
if (inf->done)
|
||||
if (inf->done) {
|
||||
ret |= POLLIN;
|
||||
|
||||
/* Get pointer to arena buffer, sync the output data */
|
||||
phys_addr_t paddr = dma_to_phys(inf->edev->dev, inf->ifm[0]->dma_addr_orig);
|
||||
for (int i = 0; i < inf->memory_layout.output_count; i ++) {
|
||||
arch_sync_dma_for_cpu(paddr + inf->memory_layout.output_offset[i],
|
||||
inf->memory_layout.output_size[i], DMA_FROM_DEVICE);
|
||||
}
|
||||
|
||||
/* Get pointer to OFM buffer, sync the PMU data */
|
||||
for (int i = 0; i < inf->ofm_count; i++) {
|
||||
paddr = dma_to_phys(inf->edev->dev, inf->ofm[i]->dma_addr_orig);
|
||||
arch_sync_dma_for_cpu(paddr, inf->ofm[i]->capacity, DMA_FROM_DEVICE);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -268,6 +292,15 @@ static long ethosu_inference_ioctl(struct file *file,
|
|||
|
||||
break;
|
||||
}
|
||||
case ETHOSU_IOCTL_INFERENCE_INVOKE: {
|
||||
struct ethosu_uapi_result_status uapi;
|
||||
ret = copy_from_user(&uapi, udata, sizeof(uapi));
|
||||
if (ret)
|
||||
break;
|
||||
/* Send inference request to Arm Ethos-U subsystem */
|
||||
ret = ethosu_inference_send(inf);
|
||||
break;
|
||||
}
|
||||
case ETHOSU_IOCTL_INFERENCE_CANCEL: {
|
||||
struct ethosu_uapi_cancel_inference_status uapi;
|
||||
|
||||
|
@ -337,12 +370,16 @@ int ethosu_inference_create(struct ethosu_device *edev,
|
|||
init_waitqueue_head(&inf->waitq);
|
||||
inf->msg.fail = ethosu_inference_fail;
|
||||
inf->msg.resend = ethosu_inference_resend;
|
||||
inf->memory_layout = uapi->memory_layout;
|
||||
|
||||
/* Add inference to pending list */
|
||||
ret = ethosu_rpmsg_register(&edev->erp, &inf->msg);
|
||||
if (ret < 0)
|
||||
goto kfree;
|
||||
|
||||
phys_addr_t paddr;
|
||||
paddr = dma_to_phys(edev->dev, inf->net->buf->dma_addr_orig);
|
||||
arch_sync_dma_for_device(paddr, inf->net->buf->capacity, DMA_TO_DEVICE);
|
||||
/* Get pointer to IFM buffers */
|
||||
for (i = 0; i < uapi->ifm_count; i++) {
|
||||
inf->ifm[i] = ethosu_buffer_get_from_fd(uapi->ifm_fd[i]);
|
||||
|
@ -352,6 +389,8 @@ int ethosu_inference_create(struct ethosu_device *edev,
|
|||
}
|
||||
|
||||
inf->ifm_count++;
|
||||
paddr = dma_to_phys(edev->dev, inf->ifm[i]->dma_addr_orig);
|
||||
arch_sync_dma_for_device(paddr, inf->ifm[i]->capacity,DMA_TO_DEVICE);
|
||||
}
|
||||
|
||||
/* Get pointer to OFM buffer */
|
||||
|
@ -387,11 +426,6 @@ int ethosu_inference_create(struct ethosu_device *edev,
|
|||
/* Increment network reference count */
|
||||
ethosu_network_get(net);
|
||||
|
||||
/* Send inference request to Arm Ethos-U subsystem */
|
||||
ret = ethosu_inference_send(inf);
|
||||
if (ret)
|
||||
goto put_net;
|
||||
|
||||
/* Create file descriptor */
|
||||
ret = fd = anon_inode_getfd("ethosu-inference", ðosu_inference_fops,
|
||||
inf, O_RDWR | O_CLOEXEC);
|
||||
|
|
|
@ -77,6 +77,7 @@ struct ethosu_inference {
|
|||
uint32_t pmu_cycle_counter_enable;
|
||||
uint64_t pmu_cycle_counter_count;
|
||||
uint32_t inference_type;
|
||||
struct ethosu_uapi_memory_layout memory_layout;
|
||||
struct ethosu_rpmsg_msg msg;
|
||||
};
|
||||
|
||||
|
|
|
@ -168,6 +168,9 @@ int ethosu_network_create(struct ethosu_device *edev,
|
|||
ret = PTR_ERR(net->buf);
|
||||
goto free_net;
|
||||
}
|
||||
phys_addr_t paddr;
|
||||
paddr = dma_to_phys(edev->dev, net->buf->dma_addr_orig);
|
||||
arch_sync_dma_for_device(paddr, net->buf->capacity, DMA_TO_DEVICE);
|
||||
} else {
|
||||
net->index = uapi->index;
|
||||
}
|
||||
|
|
|
@ -201,6 +201,8 @@ int ethosu_rpmsg_inference(struct ethosu_rpmsg *erp,
|
|||
uint8_t *pmu_event_config,
|
||||
uint8_t pmu_event_config_count,
|
||||
uint8_t pmu_cycle_counter_enable,
|
||||
uint32_t flash_offset,
|
||||
uint32_t arena_offset,
|
||||
uint32_t inference_type)
|
||||
{
|
||||
struct ethosu_core_msg msg = {
|
||||
|
@ -227,6 +229,8 @@ int ethosu_rpmsg_inference(struct ethosu_rpmsg *erp,
|
|||
req.ofm_count = ofm_count;
|
||||
req.pmu_cycle_counter_enable = pmu_cycle_counter_enable;
|
||||
req.inference_type = inference_type;
|
||||
req.flash_offset = flash_offset;
|
||||
req.arena_offset = arena_offset;
|
||||
|
||||
for (i = 0; i < ifm_count; i++)
|
||||
ethosu_core_set_size(ifm[i], &req.ifm[i]);
|
||||
|
|
|
@ -89,6 +89,8 @@ int ethosu_rpmsg_inference(struct ethosu_rpmsg *erp,
|
|||
uint8_t *pmu_event_config,
|
||||
uint8_t pmu_event_config_count,
|
||||
uint8_t pmu_cycle_counter_enable,
|
||||
uint32_t flash_offset,
|
||||
uint32_t arena_offset,
|
||||
uint32_t inference_type
|
||||
);
|
||||
|
||||
|
|
|
@ -62,6 +62,8 @@ namespace EthosU {
|
|||
struct ethosu_uapi_result_status)
|
||||
#define ETHOSU_IOCTL_INFERENCE_CANCEL ETHOSU_IOR(0x32, \
|
||||
struct ethosu_uapi_cancel_inference_status)
|
||||
#define ETHOSU_IOCTL_INFERENCE_INVOKE ETHOSU_IOR(0x33, \
|
||||
struct ethosu_uapi_result_status)
|
||||
|
||||
/* Maximum number of IFM/OFM file descriptors per network */
|
||||
#define ETHOSU_FD_MAX 16
|
||||
|
@ -126,7 +128,7 @@ enum ethosu_uapi_network_type {
|
|||
* @index: Buffer index compiled into firmware binary.
|
||||
*/
|
||||
struct ethosu_uapi_network_create {
|
||||
u32 type;
|
||||
__u32 type;
|
||||
union {
|
||||
__u32 fd;
|
||||
__u32 index;
|
||||
|
@ -242,6 +244,28 @@ enum ethosu_uapi_inference_type {
|
|||
ETHOSU_UAPI_INFERENCE_OP
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ethosu_uapi_memory_layout - The memory layout of arena buffer
|
||||
* @flash_offset: The flash offset in the buffer
|
||||
* @arena_offset: The arena offset in the buffer
|
||||
* @input_count: Number of inputs
|
||||
* @input_offset: The inputs offset in the buffer
|
||||
* @input_size: The inputs size
|
||||
* @output_count: Number of outputs
|
||||
* @output_offset: The outputs offset in the buffer
|
||||
* @output_size: The outputs size
|
||||
*/
|
||||
struct ethosu_uapi_memory_layout {
|
||||
__u32 flash_offset;
|
||||
__u32 arena_offset;
|
||||
__u32 input_count;
|
||||
__u32 input_offset[ETHOSU_FD_MAX];
|
||||
__u32 input_size[ETHOSU_FD_MAX];
|
||||
__u32 output_count;
|
||||
__u32 output_offset[ETHOSU_FD_MAX];
|
||||
__u32 output_size[ETHOSU_FD_MAX];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct ethosu_uapi_inference_create - Create network request
|
||||
* @ifm_count: Number of IFM file descriptors
|
||||
|
@ -254,6 +278,7 @@ struct ethosu_uapi_inference_create {
|
|||
__u32 ifm_fd[ETHOSU_FD_MAX];
|
||||
__u32 ofm_count;
|
||||
__u32 ofm_fd[ETHOSU_FD_MAX];
|
||||
struct ethosu_uapi_memory_layout memory_layout;
|
||||
enum ethosu_uapi_inference_type inference_type;
|
||||
struct ethosu_uapi_pmu_config pmu_config;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue
Block a user