mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
VFIO updates for v6.16-rc1
- Remove an outdated DMA unmap optimization that relies on a feature only implemented in AMDv1 page tables. (Jason Gunthorpe) - Fix various migration issues in the hisi_acc_vfio_pci variant driver, including use of a wrong DMA address requiring an update to the migration data structure, resending task completion interrupt after migration to re-sync queues, fixing a write-back cache sequencing issue, fixing a driver unload issue, behaving correctly when the guest driver is not loaded, and avoiding to squash errors from sub-functions. (Longfang Liu) - mlx5-vfio-pci variant driver update to make use of the new two-step DMA API for migration, using a page array directly rather than using a page list mapped across a scatter list. (Leon Romanovsky) - Fix an incorrect loop index used when unwinding allocation of dirty page bitmaps on error, resulting in temporary failure in freeing unused bitmaps. (Li RongQing) -----BEGIN PGP SIGNATURE----- iQJPBAABCAA5FiEEQvbATlQL0amee4qQI5ubbjuwiyIFAmg4wx8bHGFsZXgud2ls bGlhbXNvbkByZWRoYXQuY29tAAoJECObm247sIsimw8P/RBRP+rtcV/64h9QikXd vBl+jbPoGRuZqfuoFp2HsfPws4lhTpNO5djTiiMpJ5P7BIcA3POUZxGF8pwE7Bu3 WlGFUpuuPA2bAoM/odoqPqZbjKgNKcgyrJWDSw3c90FZay7smpkqNv4qPC/4D1H0 ZovytMAHSbEccXQa7459NvWWW/YBmsFkS/zWMdoNOAXYRBILUBUdaJgubhBqxEME cj2yBEdQfq01/nVMQmx2Y0j7GLQUv0zyBhwradOz6nah08d0MtnpjfZTZMHPccqL eLAxo/8xUP5mUdrnBeckLNNrHrOtB457mysFr0dtG1GX5x4PWQCcW2NandGkkMOo rL4Keyt4mKsMqX8N/4hWjr0pXt/bfpiBbPHwLFRXK0QCStobmDlZHiZB++Q9VFZd LVosdkgg4+nuaqUZ5D6H+tftfupLHhbAaIcYSanga0DCKSXTdLmZVZwDazKKml2C okoHHlw2NS3JwHOPTIss29Egc/0UeSS3+8AvYuJSHwhcaPiATGfG6w6Iuq5t/R7I M+mwdVhbH3wpjtDCz3LEjY+tOJM79ILEhbRPqlSa3mVbOlmoysyXUa0zh+ZZFCol 5QPEA/VR8/KTLsgEF+S4EKFp6O+zuIZ8GGpx2fz2BQSe8hjJewvq4ovW9xqQiUB9 7ld0BH1VAycMBLkkfP1ywpM6 =0fk8 -----END PGP SIGNATURE----- Merge tag 'vfio-v6.16-rc1' of https://github.com/awilliam/linux-vfio Pull VFIO updates from Alex Williamson: - Remove an outdated DMA unmap optimization that relies on a feature only implemented in AMDv1 page tables. (Jason Gunthorpe) - Fix various migration issues in the hisi_acc_vfio_pci variant driver, including use of a wrong DMA address requiring an update to the migration data structure, resending task completion interrupt after migration to re-sync queues, fixing a write-back cache sequencing issue, fixing a driver unload issue, behaving correctly when the guest driver is not loaded, and avoiding to squash errors from sub-functions. (Longfang Liu) - mlx5-vfio-pci variant driver update to make use of the new two-step DMA API for migration, using a page array directly rather than using a page list mapped across a scatter list. (Leon Romanovsky) - Fix an incorrect loop index used when unwinding allocation of dirty page bitmaps on error, resulting in temporary failure in freeing unused bitmaps. (Li RongQing) * tag 'vfio-v6.16-rc1' of https://github.com/awilliam/linux-vfio: vfio/type1: Fix error unwind in migration dirty bitmap allocation vfio/mlx5: Enable the DMA link API vfio/mlx5: Rewrite create mkey flow to allow better code reuse vfio/mlx5: Explicitly use number of pages instead of allocated length hisi_acc_vfio_pci: update function return values. hisi_acc_vfio_pci: bugfix live migration function without VF device driver hisi_acc_vfio_pci: bugfix the problem of uninstalling driver hisi_acc_vfio_pci: bugfix cache write-back issue hisi_acc_vfio_pci: add eq and aeq interruption restore hisi_acc_vfio_pci: fix XQE dma address error vfio/type1: Remove Fine Grained Superpages detection
This commit is contained in:
commit
3536049822
|
@ -190,9 +190,10 @@ static int qm_set_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data)
|
|||
int ret;
|
||||
|
||||
/* Check VF state */
|
||||
if (unlikely(hisi_qm_wait_mb_ready(qm))) {
|
||||
ret = hisi_qm_wait_mb_ready(qm);
|
||||
if (unlikely(ret)) {
|
||||
dev_err(&qm->pdev->dev, "QM device is not ready to write\n");
|
||||
return -EBUSY;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qm_write_regs(qm, QM_VF_AEQ_INT_MASK, &vf_data->aeq_int_mask, 1);
|
||||
|
@ -325,13 +326,15 @@ static void qm_dev_cmd_init(struct hisi_qm *qm)
|
|||
static int vf_qm_cache_wb(struct hisi_qm *qm)
|
||||
{
|
||||
unsigned int val;
|
||||
int ret;
|
||||
|
||||
writel(0x1, qm->io_base + QM_CACHE_WB_START);
|
||||
if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
|
||||
ret = readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
|
||||
val, val & BIT(0), MB_POLL_PERIOD_US,
|
||||
MB_POLL_TIMEOUT_US)) {
|
||||
MB_POLL_TIMEOUT_US);
|
||||
if (ret) {
|
||||
dev_err(&qm->pdev->dev, "vf QM writeback sqc cache fail\n");
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -350,6 +353,32 @@ static int vf_qm_func_stop(struct hisi_qm *qm)
|
|||
return hisi_qm_mb(qm, QM_MB_CMD_PAUSE_QM, 0, 0, 0);
|
||||
}
|
||||
|
||||
static int vf_qm_version_check(struct acc_vf_data *vf_data, struct device *dev)
|
||||
{
|
||||
switch (vf_data->acc_magic) {
|
||||
case ACC_DEV_MAGIC_V2:
|
||||
if (vf_data->major_ver != ACC_DRV_MAJOR_VER) {
|
||||
dev_info(dev, "migration driver version<%u.%u> not match!\n",
|
||||
vf_data->major_ver, vf_data->minor_ver);
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case ACC_DEV_MAGIC_V1:
|
||||
/* Correct dma address */
|
||||
vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
|
||||
vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
|
||||
vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
|
||||
vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
|
||||
vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
|
||||
vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
||||
struct hisi_acc_vf_migration_file *migf)
|
||||
{
|
||||
|
@ -363,9 +392,10 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
|
||||
return 0;
|
||||
|
||||
if (vf_data->acc_magic != ACC_DEV_MAGIC) {
|
||||
ret = vf_qm_version_check(vf_data, dev);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (vf_data->dev_id != hisi_acc_vdev->vf_dev->device) {
|
||||
|
@ -377,7 +407,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
ret = qm_get_vft(vf_qm, &vf_qm->qp_base);
|
||||
if (ret <= 0) {
|
||||
dev_err(dev, "failed to get vft qp nums\n");
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (ret != vf_data->qp_num) {
|
||||
|
@ -399,13 +429,6 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to write QM_VF_STATE\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
|
||||
hisi_acc_vdev->match_done = true;
|
||||
return 0;
|
||||
}
|
||||
|
@ -418,7 +441,9 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
int vf_id = hisi_acc_vdev->vf_id;
|
||||
int ret;
|
||||
|
||||
vf_data->acc_magic = ACC_DEV_MAGIC;
|
||||
vf_data->acc_magic = ACC_DEV_MAGIC_V2;
|
||||
vf_data->major_ver = ACC_DRV_MAJOR_VER;
|
||||
vf_data->minor_ver = ACC_DRV_MINOR_VER;
|
||||
/* Save device id */
|
||||
vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
|
||||
|
||||
|
@ -441,6 +466,19 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void vf_qm_xeqc_save(struct hisi_qm *qm,
|
||||
struct hisi_acc_vf_migration_file *migf)
|
||||
{
|
||||
struct acc_vf_data *vf_data = &migf->vf_data;
|
||||
u16 eq_head, aeq_head;
|
||||
|
||||
eq_head = vf_data->qm_eqc_dw[0] & 0xFFFF;
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_EQ, eq_head, 0);
|
||||
|
||||
aeq_head = vf_data->qm_aeqc_dw[0] & 0xFFFF;
|
||||
qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, aeq_head, 0);
|
||||
}
|
||||
|
||||
static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
||||
struct hisi_acc_vf_migration_file *migf)
|
||||
{
|
||||
|
@ -456,6 +494,20 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
if (migf->total_length < sizeof(struct acc_vf_data))
|
||||
return -EINVAL;
|
||||
|
||||
if (!vf_data->eqe_dma || !vf_data->aeqe_dma ||
|
||||
!vf_data->sqc_dma || !vf_data->cqc_dma) {
|
||||
dev_info(dev, "resume dma addr is NULL!\n");
|
||||
hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = qm_write_regs(qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to write QM_VF_STATE\n");
|
||||
return ret;
|
||||
}
|
||||
hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
|
||||
|
||||
qm->eqe_dma = vf_data->eqe_dma;
|
||||
qm->aeqe_dma = vf_data->aeqe_dma;
|
||||
qm->sqc_dma = vf_data->sqc_dma;
|
||||
|
@ -493,27 +545,27 @@ static int vf_qm_read_data(struct hisi_qm *vf_qm, struct acc_vf_data *vf_data)
|
|||
|
||||
ret = qm_get_regs(vf_qm, vf_data);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
|
||||
/* Every reg is 32 bit, the dma address is 64 bit. */
|
||||
vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
|
||||
vf_data->eqe_dma = vf_data->qm_eqc_dw[QM_XQC_ADDR_HIGH];
|
||||
vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
|
||||
vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
|
||||
vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
|
||||
vf_data->eqe_dma |= vf_data->qm_eqc_dw[QM_XQC_ADDR_LOW];
|
||||
vf_data->aeqe_dma = vf_data->qm_aeqc_dw[QM_XQC_ADDR_HIGH];
|
||||
vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
|
||||
vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
|
||||
vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[QM_XQC_ADDR_LOW];
|
||||
|
||||
/* Through SQC_BT/CQC_BT to get sqc and cqc address */
|
||||
ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to read SQC addr!\n");
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = qm_get_cqc(vf_qm, &vf_data->cqc_dma);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to read CQC addr!\n");
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -524,7 +576,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
{
|
||||
struct acc_vf_data *vf_data = &migf->vf_data;
|
||||
struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
|
||||
struct device *dev = &vf_qm->pdev->dev;
|
||||
int ret;
|
||||
|
||||
if (unlikely(qm_wait_dev_not_ready(vf_qm))) {
|
||||
|
@ -538,17 +589,14 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
|
|||
vf_data->vf_qm_state = QM_READY;
|
||||
hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
|
||||
|
||||
ret = vf_qm_cache_wb(vf_qm);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to writeback QM Cache!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vf_qm_read_data(vf_qm, vf_data);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
return ret;
|
||||
|
||||
migf->total_length = sizeof(struct acc_vf_data);
|
||||
/* Save eqc and aeqc interrupt information */
|
||||
vf_qm_xeqc_save(vf_qm, migf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -967,6 +1015,13 @@ static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev
|
|||
dev_err(dev, "failed to check QM INT state!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = vf_qm_cache_wb(vf_qm);
|
||||
if (ret) {
|
||||
dev_err(dev, "failed to writeback QM cache!\n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1327,7 +1382,7 @@ static int hisi_acc_vf_debug_check(struct seq_file *seq, struct vfio_device *vde
|
|||
ret = qm_wait_dev_not_ready(vf_qm);
|
||||
if (ret) {
|
||||
seq_puts(seq, "VF device not ready!\n");
|
||||
return -EBUSY;
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1463,6 +1518,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
|
|||
struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_get_vf_dev(core_vdev);
|
||||
struct hisi_qm *vf_qm = &hisi_acc_vdev->vf_qm;
|
||||
|
||||
hisi_acc_vf_disable_fds(hisi_acc_vdev);
|
||||
mutex_lock(&hisi_acc_vdev->open_mutex);
|
||||
hisi_acc_vdev->dev_opened = false;
|
||||
iounmap(vf_qm->io_base);
|
||||
|
@ -1485,6 +1541,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
|
|||
hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
|
||||
hisi_acc_vdev->pf_qm = pf_qm;
|
||||
hisi_acc_vdev->vf_dev = pdev;
|
||||
hisi_acc_vdev->vf_qm_state = QM_NOT_READY;
|
||||
mutex_init(&hisi_acc_vdev->state_mutex);
|
||||
mutex_init(&hisi_acc_vdev->open_mutex);
|
||||
|
||||
|
|
|
@ -39,6 +39,9 @@
|
|||
#define QM_REG_ADDR_OFFSET 0x0004
|
||||
|
||||
#define QM_XQC_ADDR_OFFSET 32U
|
||||
#define QM_XQC_ADDR_LOW 0x1
|
||||
#define QM_XQC_ADDR_HIGH 0x2
|
||||
|
||||
#define QM_VF_AEQ_INT_MASK 0x0004
|
||||
#define QM_VF_EQ_INT_MASK 0x000c
|
||||
#define QM_IFC_INT_SOURCE_V 0x0020
|
||||
|
@ -50,10 +53,15 @@
|
|||
#define QM_EQC_DW0 0X8000
|
||||
#define QM_AEQC_DW0 0X8020
|
||||
|
||||
#define ACC_DRV_MAJOR_VER 1
|
||||
#define ACC_DRV_MINOR_VER 0
|
||||
|
||||
#define ACC_DEV_MAGIC_V1 0XCDCDCDCDFEEDAACC
|
||||
#define ACC_DEV_MAGIC_V2 0xAACCFEEDDECADEDE
|
||||
|
||||
struct acc_vf_data {
|
||||
#define QM_MATCH_SIZE offsetofend(struct acc_vf_data, qm_rsv_state)
|
||||
/* QM match information */
|
||||
#define ACC_DEV_MAGIC 0XCDCDCDCDFEEDAACC
|
||||
u64 acc_magic;
|
||||
u32 qp_num;
|
||||
u32 dev_id;
|
||||
|
@ -61,7 +69,9 @@ struct acc_vf_data {
|
|||
u32 qp_base;
|
||||
u32 vf_qm_state;
|
||||
/* QM reserved match information */
|
||||
u32 qm_rsv_state[3];
|
||||
u16 major_ver;
|
||||
u16 minor_ver;
|
||||
u32 qm_rsv_state[2];
|
||||
|
||||
/* QM RW regs */
|
||||
u32 aeq_int_mask;
|
||||
|
|
|
@ -313,40 +313,21 @@ err_exec:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
|
||||
struct mlx5_vhca_data_buffer *buf,
|
||||
struct mlx5_vhca_recv_buf *recv_buf,
|
||||
u32 *mkey)
|
||||
static u32 *alloc_mkey_in(u32 npages, u32 pdn)
|
||||
{
|
||||
size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) :
|
||||
recv_buf->npages;
|
||||
int err = 0, inlen;
|
||||
__be64 *mtt;
|
||||
int inlen;
|
||||
void *mkc;
|
||||
u32 *in;
|
||||
|
||||
inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
|
||||
sizeof(*mtt) * round_up(npages, 2);
|
||||
sizeof(__be64) * round_up(npages, 2);
|
||||
|
||||
in = kvzalloc(inlen, GFP_KERNEL);
|
||||
in = kvzalloc(inlen, GFP_KERNEL_ACCOUNT);
|
||||
if (!in)
|
||||
return -ENOMEM;
|
||||
return NULL;
|
||||
|
||||
MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
|
||||
DIV_ROUND_UP(npages, 2));
|
||||
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
|
||||
|
||||
if (buf) {
|
||||
struct sg_dma_page_iter dma_iter;
|
||||
|
||||
for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
|
||||
*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
|
||||
} else {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < npages; i++)
|
||||
*mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
|
||||
}
|
||||
|
||||
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
|
||||
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
|
||||
|
@ -360,8 +341,81 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
|
|||
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
|
||||
MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
|
||||
MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE);
|
||||
err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
|
||||
kvfree(in);
|
||||
|
||||
return in;
|
||||
}
|
||||
|
||||
static int create_mkey(struct mlx5_core_dev *mdev, u32 npages, u32 *mkey_in,
|
||||
u32 *mkey)
|
||||
{
|
||||
int inlen = MLX5_ST_SZ_BYTES(create_mkey_in) +
|
||||
sizeof(__be64) * round_up(npages, 2);
|
||||
|
||||
return mlx5_core_create_mkey(mdev, mkey, mkey_in, inlen);
|
||||
}
|
||||
|
||||
static void unregister_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
|
||||
u32 *mkey_in, struct dma_iova_state *state,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
dma_addr_t addr;
|
||||
__be64 *mtt;
|
||||
int i;
|
||||
|
||||
if (dma_use_iova(state)) {
|
||||
dma_iova_destroy(mdev->device, state, npages * PAGE_SIZE, dir,
|
||||
0);
|
||||
} else {
|
||||
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in,
|
||||
klm_pas_mtt);
|
||||
for (i = npages - 1; i >= 0; i--) {
|
||||
addr = be64_to_cpu(mtt[i]);
|
||||
dma_unmap_page(mdev->device, addr, PAGE_SIZE, dir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int register_dma_pages(struct mlx5_core_dev *mdev, u32 npages,
|
||||
struct page **page_list, u32 *mkey_in,
|
||||
struct dma_iova_state *state,
|
||||
enum dma_data_direction dir)
|
||||
{
|
||||
dma_addr_t addr;
|
||||
size_t mapped = 0;
|
||||
__be64 *mtt;
|
||||
int i, err;
|
||||
|
||||
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, mkey_in, klm_pas_mtt);
|
||||
|
||||
if (dma_iova_try_alloc(mdev->device, state, 0, npages * PAGE_SIZE)) {
|
||||
addr = state->addr;
|
||||
for (i = 0; i < npages; i++) {
|
||||
err = dma_iova_link(mdev->device, state,
|
||||
page_to_phys(page_list[i]), mapped,
|
||||
PAGE_SIZE, dir, 0);
|
||||
if (err)
|
||||
goto error;
|
||||
*mtt++ = cpu_to_be64(addr);
|
||||
addr += PAGE_SIZE;
|
||||
mapped += PAGE_SIZE;
|
||||
}
|
||||
err = dma_iova_sync(mdev->device, state, 0, mapped);
|
||||
if (err)
|
||||
goto error;
|
||||
} else {
|
||||
for (i = 0; i < npages; i++) {
|
||||
addr = dma_map_page(mdev->device, page_list[i], 0,
|
||||
PAGE_SIZE, dir);
|
||||
err = dma_mapping_error(mdev->device, addr);
|
||||
if (err)
|
||||
goto error;
|
||||
*mtt++ = cpu_to_be64(addr);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
error:
|
||||
unregister_dma_pages(mdev, i, mkey_in, state, dir);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -375,97 +429,97 @@ static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
|
|||
if (mvdev->mdev_detach)
|
||||
return -ENOTCONN;
|
||||
|
||||
if (buf->dmaed || !buf->allocated_length)
|
||||
if (buf->mkey_in || !buf->npages)
|
||||
return -EINVAL;
|
||||
|
||||
ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
buf->mkey_in = alloc_mkey_in(buf->npages, buf->migf->pdn);
|
||||
if (!buf->mkey_in)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey);
|
||||
ret = register_dma_pages(mdev, buf->npages, buf->page_list,
|
||||
buf->mkey_in, &buf->state, buf->dma_dir);
|
||||
if (ret)
|
||||
goto err;
|
||||
goto err_register_dma;
|
||||
|
||||
buf->dmaed = true;
|
||||
ret = create_mkey(mdev, buf->npages, buf->mkey_in, &buf->mkey);
|
||||
if (ret)
|
||||
goto err_create_mkey;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
|
||||
|
||||
err_create_mkey:
|
||||
unregister_dma_pages(mdev, buf->npages, buf->mkey_in, &buf->state,
|
||||
buf->dma_dir);
|
||||
err_register_dma:
|
||||
kvfree(buf->mkey_in);
|
||||
buf->mkey_in = NULL;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void free_page_list(u32 npages, struct page **page_list)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Undo alloc_pages_bulk() */
|
||||
for (i = npages - 1; i >= 0; i--)
|
||||
__free_page(page_list[i]);
|
||||
|
||||
kvfree(page_list);
|
||||
}
|
||||
|
||||
void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
|
||||
{
|
||||
struct mlx5_vf_migration_file *migf = buf->migf;
|
||||
struct sg_page_iter sg_iter;
|
||||
struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev;
|
||||
struct mlx5_core_dev *mdev = mvdev->mdev;
|
||||
|
||||
lockdep_assert_held(&migf->mvdev->state_mutex);
|
||||
WARN_ON(migf->mvdev->mdev_detach);
|
||||
lockdep_assert_held(&mvdev->state_mutex);
|
||||
WARN_ON(mvdev->mdev_detach);
|
||||
|
||||
if (buf->dmaed) {
|
||||
mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey);
|
||||
dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt,
|
||||
buf->dma_dir, 0);
|
||||
if (buf->mkey_in) {
|
||||
mlx5_core_destroy_mkey(mdev, buf->mkey);
|
||||
unregister_dma_pages(mdev, buf->npages, buf->mkey_in,
|
||||
&buf->state, buf->dma_dir);
|
||||
kvfree(buf->mkey_in);
|
||||
}
|
||||
|
||||
/* Undo alloc_pages_bulk() */
|
||||
for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
|
||||
__free_page(sg_page_iter_page(&sg_iter));
|
||||
sg_free_append_table(&buf->table);
|
||||
free_page_list(buf->npages, buf->page_list);
|
||||
kfree(buf);
|
||||
}
|
||||
|
||||
static int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
|
||||
unsigned int npages)
|
||||
static int mlx5vf_add_pages(struct page ***page_list, unsigned int npages)
|
||||
{
|
||||
unsigned int to_alloc = npages;
|
||||
struct page **page_list;
|
||||
unsigned long filled;
|
||||
unsigned int to_fill;
|
||||
int ret;
|
||||
unsigned int filled, done = 0;
|
||||
int i;
|
||||
|
||||
to_fill = min_t(unsigned int, npages, PAGE_SIZE / sizeof(*page_list));
|
||||
page_list = kvzalloc(to_fill * sizeof(*page_list), GFP_KERNEL_ACCOUNT);
|
||||
if (!page_list)
|
||||
*page_list =
|
||||
kvcalloc(npages, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
|
||||
if (!*page_list)
|
||||
return -ENOMEM;
|
||||
|
||||
do {
|
||||
filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, to_fill,
|
||||
page_list);
|
||||
if (!filled) {
|
||||
ret = -ENOMEM;
|
||||
for (;;) {
|
||||
filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT, npages - done,
|
||||
*page_list + done);
|
||||
if (!filled)
|
||||
goto err;
|
||||
|
||||
done += filled;
|
||||
if (done == npages)
|
||||
break;
|
||||
}
|
||||
to_alloc -= filled;
|
||||
ret = sg_alloc_append_table_from_pages(
|
||||
&buf->table, page_list, filled, 0,
|
||||
filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
|
||||
if (ret)
|
||||
goto err_append;
|
||||
buf->allocated_length += filled * PAGE_SIZE;
|
||||
/* clean input for another bulk allocation */
|
||||
memset(page_list, 0, filled * sizeof(*page_list));
|
||||
to_fill = min_t(unsigned int, to_alloc,
|
||||
PAGE_SIZE / sizeof(*page_list));
|
||||
} while (to_alloc > 0);
|
||||
|
||||
kvfree(page_list);
|
||||
return 0;
|
||||
|
||||
err_append:
|
||||
for (i = filled - 1; i >= 0; i--)
|
||||
__free_page(page_list[i]);
|
||||
err:
|
||||
kvfree(page_list);
|
||||
return ret;
|
||||
for (i = 0; i < done; i++)
|
||||
__free_page(*page_list[i]);
|
||||
|
||||
kvfree(*page_list);
|
||||
*page_list = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
struct mlx5_vhca_data_buffer *
|
||||
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
|
||||
size_t length,
|
||||
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
|
||||
enum dma_data_direction dma_dir)
|
||||
{
|
||||
struct mlx5_vhca_data_buffer *buf;
|
||||
|
@ -477,12 +531,13 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
|
|||
|
||||
buf->dma_dir = dma_dir;
|
||||
buf->migf = migf;
|
||||
if (length) {
|
||||
ret = mlx5vf_add_migration_pages(buf,
|
||||
DIV_ROUND_UP_ULL(length, PAGE_SIZE));
|
||||
if (npages) {
|
||||
ret = mlx5vf_add_pages(&buf->page_list, npages);
|
||||
if (ret)
|
||||
goto end;
|
||||
|
||||
buf->npages = npages;
|
||||
|
||||
if (dma_dir != DMA_NONE) {
|
||||
ret = mlx5vf_dma_data_buffer(buf);
|
||||
if (ret)
|
||||
|
@ -505,8 +560,8 @@ void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
|
|||
}
|
||||
|
||||
struct mlx5_vhca_data_buffer *
|
||||
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
|
||||
size_t length, enum dma_data_direction dma_dir)
|
||||
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
|
||||
enum dma_data_direction dma_dir)
|
||||
{
|
||||
struct mlx5_vhca_data_buffer *buf, *temp_buf;
|
||||
struct list_head free_list;
|
||||
|
@ -521,7 +576,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
|
|||
list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
|
||||
if (buf->dma_dir == dma_dir) {
|
||||
list_del_init(&buf->buf_elm);
|
||||
if (buf->allocated_length >= length) {
|
||||
if (buf->npages >= npages) {
|
||||
spin_unlock_irq(&migf->list_lock);
|
||||
goto found;
|
||||
}
|
||||
|
@ -535,7 +590,7 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
|
|||
}
|
||||
}
|
||||
spin_unlock_irq(&migf->list_lock);
|
||||
buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir);
|
||||
buf = mlx5vf_alloc_data_buffer(migf, npages, dma_dir);
|
||||
|
||||
found:
|
||||
while ((temp_buf = list_first_entry_or_null(&free_list,
|
||||
|
@ -716,7 +771,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
|
|||
MLX5_SET(save_vhca_state_in, in, op_mod, 0);
|
||||
MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
|
||||
MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey);
|
||||
MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length);
|
||||
MLX5_SET(save_vhca_state_in, in, size, buf->npages * PAGE_SIZE);
|
||||
MLX5_SET(save_vhca_state_in, in, incremental, inc);
|
||||
MLX5_SET(save_vhca_state_in, in, set_track, track);
|
||||
|
||||
|
@ -738,8 +793,11 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
|
|||
}
|
||||
|
||||
if (!header_buf) {
|
||||
header_buf = mlx5vf_get_data_buffer(migf,
|
||||
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
|
||||
header_buf = mlx5vf_get_data_buffer(
|
||||
migf,
|
||||
DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
|
||||
PAGE_SIZE),
|
||||
DMA_NONE);
|
||||
if (IS_ERR(header_buf)) {
|
||||
err = PTR_ERR(header_buf);
|
||||
goto err_free;
|
||||
|
@ -783,7 +841,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
|
|||
if (mvdev->mdev_detach)
|
||||
return -ENOTCONN;
|
||||
|
||||
if (!buf->dmaed) {
|
||||
if (!buf->mkey_in) {
|
||||
err = mlx5vf_dma_data_buffer(buf);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -1338,103 +1396,16 @@ static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
|
|||
kfree(qp);
|
||||
}
|
||||
|
||||
static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Undo alloc_pages_bulk() */
|
||||
for (i = 0; i < recv_buf->npages; i++)
|
||||
__free_page(recv_buf->page_list[i]);
|
||||
|
||||
kvfree(recv_buf->page_list);
|
||||
}
|
||||
|
||||
static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
|
||||
unsigned int npages)
|
||||
{
|
||||
unsigned int filled = 0, done = 0;
|
||||
int i;
|
||||
|
||||
recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!recv_buf->page_list)
|
||||
return -ENOMEM;
|
||||
|
||||
for (;;) {
|
||||
filled = alloc_pages_bulk(GFP_KERNEL_ACCOUNT,
|
||||
npages - done,
|
||||
recv_buf->page_list + done);
|
||||
if (!filled)
|
||||
goto err;
|
||||
|
||||
done += filled;
|
||||
if (done == npages)
|
||||
break;
|
||||
}
|
||||
|
||||
recv_buf->npages = npages;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
for (i = 0; i < npages; i++) {
|
||||
if (recv_buf->page_list[i])
|
||||
__free_page(recv_buf->page_list[i]);
|
||||
}
|
||||
|
||||
kvfree(recv_buf->page_list);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
|
||||
struct mlx5_vhca_recv_buf *recv_buf)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
|
||||
sizeof(*recv_buf->dma_addrs),
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!recv_buf->dma_addrs)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < recv_buf->npages; i++) {
|
||||
recv_buf->dma_addrs[i] = dma_map_page(mdev->device,
|
||||
recv_buf->page_list[i],
|
||||
0, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i]))
|
||||
goto error;
|
||||
}
|
||||
return 0;
|
||||
|
||||
error:
|
||||
for (j = 0; j < i; j++)
|
||||
dma_unmap_single(mdev->device, recv_buf->dma_addrs[j],
|
||||
PAGE_SIZE, DMA_FROM_DEVICE);
|
||||
|
||||
kvfree(recv_buf->dma_addrs);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev,
|
||||
struct mlx5_vhca_recv_buf *recv_buf)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < recv_buf->npages; i++)
|
||||
dma_unmap_single(mdev->device, recv_buf->dma_addrs[i],
|
||||
PAGE_SIZE, DMA_FROM_DEVICE);
|
||||
|
||||
kvfree(recv_buf->dma_addrs);
|
||||
}
|
||||
|
||||
static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
|
||||
struct mlx5_vhca_qp *qp)
|
||||
{
|
||||
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
|
||||
|
||||
mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
|
||||
unregister_dma_recv_pages(mdev, recv_buf);
|
||||
free_recv_pages(&qp->recv_buf);
|
||||
unregister_dma_pages(mdev, recv_buf->npages, recv_buf->mkey_in,
|
||||
&recv_buf->state, DMA_FROM_DEVICE);
|
||||
kvfree(recv_buf->mkey_in);
|
||||
free_page_list(recv_buf->npages, recv_buf->page_list);
|
||||
}
|
||||
|
||||
static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
|
||||
|
@ -1445,24 +1416,38 @@ static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
|
|||
struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
|
||||
int err;
|
||||
|
||||
err = alloc_recv_pages(recv_buf, npages);
|
||||
if (err < 0)
|
||||
err = mlx5vf_add_pages(&recv_buf->page_list, npages);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = register_dma_recv_pages(mdev, recv_buf);
|
||||
if (err)
|
||||
goto end;
|
||||
recv_buf->npages = npages;
|
||||
|
||||
err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey);
|
||||
recv_buf->mkey_in = alloc_mkey_in(npages, pdn);
|
||||
if (!recv_buf->mkey_in) {
|
||||
err = -ENOMEM;
|
||||
goto end;
|
||||
}
|
||||
|
||||
err = register_dma_pages(mdev, npages, recv_buf->page_list,
|
||||
recv_buf->mkey_in, &recv_buf->state,
|
||||
DMA_FROM_DEVICE);
|
||||
if (err)
|
||||
goto err_register_dma;
|
||||
|
||||
err = create_mkey(mdev, npages, recv_buf->mkey_in, &recv_buf->mkey);
|
||||
if (err)
|
||||
goto err_create_mkey;
|
||||
|
||||
return 0;
|
||||
|
||||
err_create_mkey:
|
||||
unregister_dma_recv_pages(mdev, recv_buf);
|
||||
unregister_dma_pages(mdev, npages, recv_buf->mkey_in, &recv_buf->state,
|
||||
DMA_FROM_DEVICE);
|
||||
err_register_dma:
|
||||
kvfree(recv_buf->mkey_in);
|
||||
recv_buf->mkey_in = NULL;
|
||||
end:
|
||||
free_recv_pages(recv_buf);
|
||||
free_page_list(npages, recv_buf->page_list);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
@ -53,20 +53,17 @@ struct mlx5_vf_migration_header {
|
|||
};
|
||||
|
||||
struct mlx5_vhca_data_buffer {
|
||||
struct sg_append_table table;
|
||||
struct page **page_list;
|
||||
struct dma_iova_state state;
|
||||
loff_t start_pos;
|
||||
u64 length;
|
||||
u64 allocated_length;
|
||||
u32 npages;
|
||||
u32 mkey;
|
||||
u32 *mkey_in;
|
||||
enum dma_data_direction dma_dir;
|
||||
u8 dmaed:1;
|
||||
u8 stop_copy_chunk_num;
|
||||
struct list_head buf_elm;
|
||||
struct mlx5_vf_migration_file *migf;
|
||||
/* Optimize mlx5vf_get_migration_page() for sequential access */
|
||||
struct scatterlist *last_offset_sg;
|
||||
unsigned int sg_last_entry;
|
||||
unsigned long last_offset;
|
||||
};
|
||||
|
||||
struct mlx5vf_async_data {
|
||||
|
@ -133,8 +130,9 @@ struct mlx5_vhca_cq {
|
|||
struct mlx5_vhca_recv_buf {
|
||||
u32 npages;
|
||||
struct page **page_list;
|
||||
dma_addr_t *dma_addrs;
|
||||
struct dma_iova_state state;
|
||||
u32 next_rq_offset;
|
||||
u32 *mkey_in;
|
||||
u32 mkey;
|
||||
};
|
||||
|
||||
|
@ -217,15 +215,24 @@ int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf);
|
|||
void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf);
|
||||
void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf);
|
||||
struct mlx5_vhca_data_buffer *
|
||||
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
|
||||
size_t length, enum dma_data_direction dma_dir);
|
||||
mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
|
||||
enum dma_data_direction dma_dir);
|
||||
void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf);
|
||||
struct mlx5_vhca_data_buffer *
|
||||
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
|
||||
size_t length, enum dma_data_direction dma_dir);
|
||||
mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, u32 npages,
|
||||
enum dma_data_direction dma_dir);
|
||||
void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
|
||||
struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
|
||||
unsigned long offset);
|
||||
static inline struct page *
|
||||
mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
|
||||
unsigned long offset)
|
||||
{
|
||||
int page_entry = offset / PAGE_SIZE;
|
||||
|
||||
if (page_entry >= buf->npages)
|
||||
return NULL;
|
||||
|
||||
return buf->page_list[page_entry];
|
||||
}
|
||||
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
|
||||
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev,
|
||||
enum mlx5_vf_migf_state *last_save_state);
|
||||
|
|
|
@ -34,37 +34,6 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
|
|||
core_device);
|
||||
}
|
||||
|
||||
struct page *
|
||||
mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
|
||||
unsigned long offset)
|
||||
{
|
||||
unsigned long cur_offset = 0;
|
||||
struct scatterlist *sg;
|
||||
unsigned int i;
|
||||
|
||||
/* All accesses are sequential */
|
||||
if (offset < buf->last_offset || !buf->last_offset_sg) {
|
||||
buf->last_offset = 0;
|
||||
buf->last_offset_sg = buf->table.sgt.sgl;
|
||||
buf->sg_last_entry = 0;
|
||||
}
|
||||
|
||||
cur_offset = buf->last_offset;
|
||||
|
||||
for_each_sg(buf->last_offset_sg, sg,
|
||||
buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
|
||||
if (offset < sg->length + cur_offset) {
|
||||
buf->last_offset_sg = sg;
|
||||
buf->sg_last_entry += i;
|
||||
buf->last_offset = cur_offset;
|
||||
return nth_page(sg_page(sg),
|
||||
(offset - cur_offset) / PAGE_SIZE);
|
||||
}
|
||||
cur_offset += sg->length;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
|
||||
{
|
||||
mutex_lock(&migf->lock);
|
||||
|
@ -308,6 +277,7 @@ static struct mlx5_vhca_data_buffer *
|
|||
mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf,
|
||||
u8 index, size_t required_length)
|
||||
{
|
||||
u32 npages = DIV_ROUND_UP(required_length, PAGE_SIZE);
|
||||
struct mlx5_vhca_data_buffer *buf = migf->buf[index];
|
||||
u8 chunk_num;
|
||||
|
||||
|
@ -315,12 +285,11 @@ mlx5vf_mig_file_get_stop_copy_buf(struct mlx5_vf_migration_file *migf,
|
|||
chunk_num = buf->stop_copy_chunk_num;
|
||||
buf->migf->buf[index] = NULL;
|
||||
/* Checking whether the pre-allocated buffer can fit */
|
||||
if (buf->allocated_length >= required_length)
|
||||
if (buf->npages >= npages)
|
||||
return buf;
|
||||
|
||||
mlx5vf_put_data_buffer(buf);
|
||||
buf = mlx5vf_get_data_buffer(buf->migf, required_length,
|
||||
DMA_FROM_DEVICE);
|
||||
buf = mlx5vf_get_data_buffer(buf->migf, npages, DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf))
|
||||
return buf;
|
||||
|
||||
|
@ -373,7 +342,8 @@ static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf,
|
|||
u8 *to_buff;
|
||||
int ret;
|
||||
|
||||
header_buf = mlx5vf_get_data_buffer(migf, size, DMA_NONE);
|
||||
header_buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(size, PAGE_SIZE),
|
||||
DMA_NONE);
|
||||
if (IS_ERR(header_buf))
|
||||
return PTR_ERR(header_buf);
|
||||
|
||||
|
@ -388,7 +358,7 @@ static int mlx5vf_add_stop_copy_header(struct mlx5_vf_migration_file *migf,
|
|||
to_buff = kmap_local_page(page);
|
||||
memcpy(to_buff, &header, sizeof(header));
|
||||
header_buf->length = sizeof(header);
|
||||
data.stop_copy_size = cpu_to_le64(migf->buf[0]->allocated_length);
|
||||
data.stop_copy_size = cpu_to_le64(migf->buf[0]->npages * PAGE_SIZE);
|
||||
memcpy(to_buff + sizeof(header), &data, sizeof(data));
|
||||
header_buf->length += sizeof(data);
|
||||
kunmap_local(to_buff);
|
||||
|
@ -437,15 +407,20 @@ static int mlx5vf_prep_stop_copy(struct mlx5vf_pci_core_device *mvdev,
|
|||
|
||||
num_chunks = mvdev->chunk_mode ? MAX_NUM_CHUNKS : 1;
|
||||
for (i = 0; i < num_chunks; i++) {
|
||||
buf = mlx5vf_get_data_buffer(migf, inc_state_size, DMA_FROM_DEVICE);
|
||||
buf = mlx5vf_get_data_buffer(
|
||||
migf, DIV_ROUND_UP(inc_state_size, PAGE_SIZE),
|
||||
DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto err;
|
||||
}
|
||||
|
||||
migf->buf[i] = buf;
|
||||
buf = mlx5vf_get_data_buffer(migf,
|
||||
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
|
||||
buf = mlx5vf_get_data_buffer(
|
||||
migf,
|
||||
DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
|
||||
PAGE_SIZE),
|
||||
DMA_NONE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto err;
|
||||
|
@ -553,7 +528,8 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
|
|||
* We finished transferring the current state and the device has a
|
||||
* dirty state, save a new state to be ready for.
|
||||
*/
|
||||
buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE);
|
||||
buf = mlx5vf_get_data_buffer(migf, DIV_ROUND_UP(inc_length, PAGE_SIZE),
|
||||
DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
mlx5vf_mark_err(migf);
|
||||
|
@ -675,8 +651,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
|
|||
|
||||
if (track) {
|
||||
/* leave the allocated buffer ready for the stop-copy phase */
|
||||
buf = mlx5vf_alloc_data_buffer(migf,
|
||||
migf->buf[0]->allocated_length, DMA_FROM_DEVICE);
|
||||
buf = mlx5vf_alloc_data_buffer(migf, migf->buf[0]->npages,
|
||||
DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto out_pd;
|
||||
|
@ -917,11 +893,14 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
|
|||
goto out_unlock;
|
||||
break;
|
||||
case MLX5_VF_LOAD_STATE_PREP_HEADER_DATA:
|
||||
if (vhca_buf_header->allocated_length < migf->record_size) {
|
||||
{
|
||||
u32 npages = DIV_ROUND_UP(migf->record_size, PAGE_SIZE);
|
||||
|
||||
if (vhca_buf_header->npages < npages) {
|
||||
mlx5vf_free_data_buffer(vhca_buf_header);
|
||||
|
||||
migf->buf_header[0] = mlx5vf_alloc_data_buffer(migf,
|
||||
migf->record_size, DMA_NONE);
|
||||
migf->buf_header[0] = mlx5vf_alloc_data_buffer(
|
||||
migf, npages, DMA_NONE);
|
||||
if (IS_ERR(migf->buf_header[0])) {
|
||||
ret = PTR_ERR(migf->buf_header[0]);
|
||||
migf->buf_header[0] = NULL;
|
||||
|
@ -934,6 +913,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
|
|||
vhca_buf_header->start_pos = migf->max_pos;
|
||||
migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER_DATA;
|
||||
break;
|
||||
}
|
||||
case MLX5_VF_LOAD_STATE_READ_HEADER_DATA:
|
||||
ret = mlx5vf_resume_read_header_data(migf, vhca_buf_header,
|
||||
&buf, &len, pos, &done);
|
||||
|
@ -944,12 +924,13 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
|
|||
{
|
||||
u64 size = max(migf->record_size,
|
||||
migf->stop_copy_prep_size);
|
||||
u32 npages = DIV_ROUND_UP(size, PAGE_SIZE);
|
||||
|
||||
if (vhca_buf->allocated_length < size) {
|
||||
if (vhca_buf->npages < npages) {
|
||||
mlx5vf_free_data_buffer(vhca_buf);
|
||||
|
||||
migf->buf[0] = mlx5vf_alloc_data_buffer(migf,
|
||||
size, DMA_TO_DEVICE);
|
||||
migf->buf[0] = mlx5vf_alloc_data_buffer(
|
||||
migf, npages, DMA_TO_DEVICE);
|
||||
if (IS_ERR(migf->buf[0])) {
|
||||
ret = PTR_ERR(migf->buf[0]);
|
||||
migf->buf[0] = NULL;
|
||||
|
@ -1037,8 +1018,11 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
|
|||
}
|
||||
|
||||
migf->buf[0] = buf;
|
||||
buf = mlx5vf_alloc_data_buffer(migf,
|
||||
sizeof(struct mlx5_vf_migration_header), DMA_NONE);
|
||||
buf = mlx5vf_alloc_data_buffer(
|
||||
migf,
|
||||
DIV_ROUND_UP(sizeof(struct mlx5_vf_migration_header),
|
||||
PAGE_SIZE),
|
||||
DMA_NONE);
|
||||
if (IS_ERR(buf)) {
|
||||
ret = PTR_ERR(buf);
|
||||
goto out_buf;
|
||||
|
@ -1148,7 +1132,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
|
|||
MLX5VF_QUERY_INC | MLX5VF_QUERY_CLEANUP);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
buf = mlx5vf_get_data_buffer(migf, size, DMA_FROM_DEVICE);
|
||||
buf = mlx5vf_get_data_buffer(migf,
|
||||
DIV_ROUND_UP(size, PAGE_SIZE), DMA_FROM_DEVICE);
|
||||
if (IS_ERR(buf))
|
||||
return ERR_CAST(buf);
|
||||
/* pre_copy cleanup */
|
||||
|
|
|
@ -80,7 +80,6 @@ struct vfio_domain {
|
|||
struct iommu_domain *domain;
|
||||
struct list_head next;
|
||||
struct list_head group_list;
|
||||
bool fgsp : 1; /* Fine-grained super pages */
|
||||
bool enforce_cache_coherency : 1;
|
||||
};
|
||||
|
||||
|
@ -293,7 +292,7 @@ static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
|
|||
struct rb_node *p;
|
||||
|
||||
for (p = rb_prev(n); p; p = rb_prev(p)) {
|
||||
struct vfio_dma *dma = rb_entry(n,
|
||||
struct vfio_dma *dma = rb_entry(p,
|
||||
struct vfio_dma, node);
|
||||
|
||||
vfio_dma_bitmap_free(dma);
|
||||
|
@ -1095,8 +1094,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
|
|||
* may require hardware cache flushing, try to find the
|
||||
* largest contiguous physical memory chunk to unmap.
|
||||
*/
|
||||
for (len = PAGE_SIZE;
|
||||
!domain->fgsp && iova + len < end; len += PAGE_SIZE) {
|
||||
for (len = PAGE_SIZE; iova + len < end; len += PAGE_SIZE) {
|
||||
next = iommu_iova_to_phys(domain->domain, iova + len);
|
||||
if (next != phys + len)
|
||||
break;
|
||||
|
@ -1833,49 +1831,6 @@ unwind:
|
|||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* We change our unmap behavior slightly depending on whether the IOMMU
|
||||
* supports fine-grained superpages. IOMMUs like AMD-Vi will use a superpage
|
||||
* for practically any contiguous power-of-two mapping we give it. This means
|
||||
* we don't need to look for contiguous chunks ourselves to make unmapping
|
||||
* more efficient. On IOMMUs with coarse-grained super pages, like Intel VT-d
|
||||
* with discrete 2M/1G/512G/1T superpages, identifying contiguous chunks
|
||||
* significantly boosts non-hugetlbfs mappings and doesn't seem to hurt when
|
||||
* hugetlbfs is in use.
|
||||
*/
|
||||
static void vfio_test_domain_fgsp(struct vfio_domain *domain, struct list_head *regions)
|
||||
{
|
||||
int ret, order = get_order(PAGE_SIZE * 2);
|
||||
struct vfio_iova *region;
|
||||
struct page *pages;
|
||||
dma_addr_t start;
|
||||
|
||||
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
|
||||
if (!pages)
|
||||
return;
|
||||
|
||||
list_for_each_entry(region, regions, list) {
|
||||
start = ALIGN(region->start, PAGE_SIZE * 2);
|
||||
if (start >= region->end || (region->end - start < PAGE_SIZE * 2))
|
||||
continue;
|
||||
|
||||
ret = iommu_map(domain->domain, start, page_to_phys(pages), PAGE_SIZE * 2,
|
||||
IOMMU_READ | IOMMU_WRITE | IOMMU_CACHE,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (!ret) {
|
||||
size_t unmapped = iommu_unmap(domain->domain, start, PAGE_SIZE);
|
||||
|
||||
if (unmapped == PAGE_SIZE)
|
||||
iommu_unmap(domain->domain, start + PAGE_SIZE, PAGE_SIZE);
|
||||
else
|
||||
domain->fgsp = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
__free_pages(pages, order);
|
||||
}
|
||||
|
||||
static struct vfio_iommu_group *find_iommu_group(struct vfio_domain *domain,
|
||||
struct iommu_group *iommu_group)
|
||||
{
|
||||
|
@ -2314,8 +2269,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
|
|||
}
|
||||
}
|
||||
|
||||
vfio_test_domain_fgsp(domain, &iova_copy);
|
||||
|
||||
/* replay mappings on new domains */
|
||||
ret = vfio_iommu_replay(iommu, domain);
|
||||
if (ret)
|
||||
|
|
Loading…
Reference in New Issue
Block a user