linux-yocto/arch/um/drivers/virt-pci.c
Linus Torvalds ac7473a179 Updates for the interrupt subsystem:
- Core:
 
     - Provide a new mechanism to create interrupt domains. The existing
       interfaces have already too many parameters and it's a pain to expand
       any of this for new required functionality.
 
       The new function takes a pointer to a data structure as argument. The
       data structure combines all existing parameters and allows for easy
       extension.
 
       The first extension for this is to handle the instantiation of
       generic interrupt chips at the core level and to allow drivers to
       provide extra init/exit callbacks.
 
       This is necessary to do the full interrupt chip initialization before
       the new domain is published, so that concurrent usage sites won't see
       a half initialized interrupt domain. Similar problems exist on
       teardown.
 
       This has turned out to be a real problem due to the deferred and
       parallel probing which was added in recent years.
 
       Handling this at the core level allows to remove quite some accrued
       boilerplate code in existing drivers and avoids horrible workarounds
       at the driver level.
 
     - The usual small improvements all over the place
 
   - Drivers
 
     - Add support for LAN966x OIC and RZ/Five SoC
 
     - Split the STM ExtI driver into a microcontroller and a SMP version to
       allow building the latter as a module for multi-platform kernels.
 
     - Enable MSI support for Armada 370XP on platforms which do not support
       IPIs.
 
     - The usual small fixes and enhancements all over the place.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmaVJbUTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoXTuD/9Tc9BhY5CW7HQkdPQu2Db1O+esprkQ
 Uo9lMpTTpPiy9btg4LONzLf4mjbufZpyKBxkRWoZFO0Zj5q4UE9NZYh7EcxrF5Tl
 CIFJmyteLsYuOyCmPrtSDSovonXjQKYBE3u2LVJNNkwEkhYbYW9sqIKeT8nneLv6
 53gd28ESFUEUjHNTblw/eXviweyUKSXc0qyg+3hgZQPMoh9RkdkEPvyaw9Y/s5Ce
 FelLLxzMqX86dR2TJMLqiaGiMpUu/kl+Yz2m5c77TwA2D68qjhHywbtKtlH7b3C6
 LMHu2dMrrKSJrLL8roVIYJdHAd1TKWVdnYhqv9WBHFTu1sDuztpR44mewbo8exUU
 L2RgVSGYNmeFC3p4wztWYSQfIVa9uOg7+TnJJdh7G0jLIeKM/TbufWqDAJAuoVPL
 QhGbZ5xNbZJZ8bvhhItjxpRN/kPs44p3mUGyRJBQzm+mDN118bqfmQzhLcwRbfE2
 smp73SQzg9alG2rGdNVEqkKmp8zhg2Crx2VCeVdgbeOxWQRet9zLWcp4FfCEUE9e
 eK3iEi8z+rmwafaf3rsxYdrdIRLaUmcni0v7R/16cJH/Cs7bU3Re8XyGhevo3lsO
 pJiP5wZDxbckwXNpLm3S/qPDW7vSCnuFPF7QmOvC3a70PsD+E4NKUgiwJuHtn/ZV
 pFBKzbQgCsowQA==
 =QCRH
 -----END PGP SIGNATURE-----

Merge tag 'irq-core-2024-07-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull interrupt subsystem updates from Thomas Gleixner:
 "Core:

   - Provide a new mechanism to create interrupt domains. The existing
     interfaces have already too many parameters and it's a pain to
     expand any of this for new required functionality.

     The new function takes a pointer to a data structure as argument.
     The data structure combines all existing parameters and allows for
     easy extension.

     The first extension for this is to handle the instantiation of
     generic interrupt chips at the core level and to allow drivers to
     provide extra init/exit callbacks.

     This is necessary to do the full interrupt chip initialization
     before the new domain is published, so that concurrent usage sites
     won't see a half initialized interrupt domain. Similar problems
     exist on teardown.

     This has turned out to be a real problem due to the deferred and
     parallel probing which was added in recent years.

     Handling this at the core level allows to remove quite some accrued
     boilerplate code in existing drivers and avoids horrible
     workarounds at the driver level.

   - The usual small improvements all over the place

  Drivers:

   - Add support for LAN966x OIC and RZ/Five SoC

   - Split the STM ExtI driver into a microcontroller and a SMP version
     to allow building the latter as a module for multi-platform
     kernels

   - Enable MSI support for Armada 370XP on platforms which do not
     support IPIs

   - The usual small fixes and enhancements all over the place"

* tag 'irq-core-2024-07-15' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (59 commits)
  irqdomain: Fix the kernel-doc and plug it into Documentation
  genirq: Set IRQF_COND_ONESHOT in request_irq()
  irqchip/imx-irqsteer: Handle runtime power management correctly
  irqchip/gic-v3: Pass #redistributor-regions to gic_of_setup_kvm_info()
  irqchip/bcm2835: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND
  irqchip/gic-v4: Make sure a VPE is locked when VMAPP is issued
  irqchip/gic-v4: Substitute vmovp_lock for a per-VM lock
  irqchip/gic-v4: Always configure affinity on VPE activation
  Revert "irqchip/dw-apb-ictl: Support building as module"
  Revert "Loongarch: Support loongarch avec"
  arm64: Kconfig: Allow build irq-stm32mp-exti driver as module
  ARM: stm32: Allow build irq-stm32mp-exti driver as module
  irqchip/stm32mp-exti: Allow building as module
  irqchip/stm32mp-exti: Rename internal symbols
  irqchip/stm32-exti: Split MCU and MPU code
  arm64: Kconfig: Select STM32MP_EXTI on STM32 platforms
  ARM: stm32: Use different EXTI driver on ARMv7m and ARMv7a
  irqchip/stm32-exti: Add CONFIG_STM32MP_EXTI
  irqchip/dw-apb-ictl: Support building as module
  irqchip/riscv-aplic: Simplify the initialization code
  ...
2024-07-22 13:52:05 -07:00

1088 lines
23 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2020 Intel Corporation
* Author: Johannes Berg <johannes@sipsolutions.net>
*/
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/logic_iomem.h>
#include <linux/of_platform.h>
#include <linux/irqdomain.h>
#include <linux/virtio_pcidev.h>
#include <linux/virtio-uml.h>
#include <linux/delay.h>
#include <linux/msi.h>
#include <asm/unaligned.h>
#include <irq_kern.h>
#define MAX_DEVICES 8
#define MAX_MSI_VECTORS 32
#define CFG_SPACE_SIZE 4096
/* for MSI-X we have a 32-bit payload */
#define MAX_IRQ_MSG_SIZE (sizeof(struct virtio_pcidev_msg) + sizeof(u32))
#define NUM_IRQ_MSGS 10
#define HANDLE_NO_FREE(ptr) ((void *)((unsigned long)(ptr) | 1))
#define HANDLE_IS_NO_FREE(ptr) ((unsigned long)(ptr) & 1)
struct um_pci_device {
struct virtio_device *vdev;
/* for now just standard BARs */
u8 resptr[PCI_STD_NUM_BARS];
struct virtqueue *cmd_vq, *irq_vq;
#define UM_PCI_STAT_WAITING 0
unsigned long status;
int irq;
bool platform;
};
struct um_pci_device_reg {
struct um_pci_device *dev;
void __iomem *iomem;
};
static struct pci_host_bridge *bridge;
static DEFINE_MUTEX(um_pci_mtx);
static struct um_pci_device *um_pci_platform_device;
static struct um_pci_device_reg um_pci_devices[MAX_DEVICES];
static struct fwnode_handle *um_pci_fwnode;
static struct irq_domain *um_pci_inner_domain;
static struct irq_domain *um_pci_msi_domain;
static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
static unsigned int um_pci_max_delay_us = 40000;
module_param_named(max_delay_us, um_pci_max_delay_us, uint, 0644);
struct um_pci_message_buffer {
struct virtio_pcidev_msg hdr;
u8 data[8];
};
static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
static int um_pci_send_cmd(struct um_pci_device *dev,
struct virtio_pcidev_msg *cmd,
unsigned int cmd_size,
const void *extra, unsigned int extra_size,
void *out, unsigned int out_size)
{
struct scatterlist out_sg, extra_sg, in_sg;
struct scatterlist *sgs_list[] = {
[0] = &out_sg,
[1] = extra ? &extra_sg : &in_sg,
[2] = extra ? &in_sg : NULL,
};
struct um_pci_message_buffer *buf;
int delay_count = 0;
int ret, len;
bool posted;
if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
return -EINVAL;
switch (cmd->op) {
case VIRTIO_PCIDEV_OP_CFG_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_WRITE:
case VIRTIO_PCIDEV_OP_MMIO_MEMSET:
/* in PCI, writes are posted, so don't wait */
posted = !out;
WARN_ON(!posted);
break;
default:
posted = false;
break;
}
buf = get_cpu_var(um_pci_msg_bufs);
if (buf)
memcpy(buf, cmd, cmd_size);
if (posted) {
u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
if (ncmd) {
memcpy(ncmd, cmd, cmd_size);
if (extra)
memcpy(ncmd + cmd_size, extra, extra_size);
cmd = (void *)ncmd;
cmd_size += extra_size;
extra = NULL;
extra_size = 0;
} else {
/* try without allocating memory */
posted = false;
cmd = (void *)buf;
}
} else {
cmd = (void *)buf;
}
sg_init_one(&out_sg, cmd, cmd_size);
if (extra)
sg_init_one(&extra_sg, extra, extra_size);
if (out)
sg_init_one(&in_sg, out, out_size);
/* add to internal virtio queue */
ret = virtqueue_add_sgs(dev->cmd_vq, sgs_list,
extra ? 2 : 1,
out ? 1 : 0,
posted ? cmd : HANDLE_NO_FREE(cmd),
GFP_ATOMIC);
if (ret) {
if (posted)
kfree(cmd);
goto out;
}
if (posted) {
virtqueue_kick(dev->cmd_vq);
ret = 0;
goto out;
}
/* kick and poll for getting a response on the queue */
set_bit(UM_PCI_STAT_WAITING, &dev->status);
virtqueue_kick(dev->cmd_vq);
while (1) {
void *completed = virtqueue_get_buf(dev->cmd_vq, &len);
if (completed == HANDLE_NO_FREE(cmd))
break;
if (completed && !HANDLE_IS_NO_FREE(completed))
kfree(completed);
if (WARN_ONCE(virtqueue_is_broken(dev->cmd_vq) ||
++delay_count > um_pci_max_delay_us,
"um virt-pci delay: %d", delay_count)) {
ret = -EIO;
break;
}
udelay(1);
}
clear_bit(UM_PCI_STAT_WAITING, &dev->status);
out:
put_cpu_var(um_pci_msg_bufs);
return ret;
}
static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
int size)
{
struct um_pci_device_reg *reg = priv;
struct um_pci_device *dev = reg->dev;
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_READ,
.size = size,
.addr = offset,
};
/* buf->data is maximum size - we may only use parts of it */
struct um_pci_message_buffer *buf;
u8 *data;
unsigned long ret = ULONG_MAX;
size_t bytes = sizeof(buf->data);
if (!dev)
return ULONG_MAX;
buf = get_cpu_var(um_pci_msg_bufs);
data = buf->data;
if (buf)
memset(data, 0xff, bytes);
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
break;
default:
WARN(1, "invalid config space read size %d\n", size);
goto out;
}
if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes))
goto out;
switch (size) {
case 1:
ret = data[0];
break;
case 2:
ret = le16_to_cpup((void *)data);
break;
case 4:
ret = le32_to_cpup((void *)data);
break;
#ifdef CONFIG_64BIT
case 8:
ret = le64_to_cpup((void *)data);
break;
#endif
default:
break;
}
out:
put_cpu_var(um_pci_msg_bufs);
return ret;
}
static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
unsigned long val)
{
struct um_pci_device_reg *reg = priv;
struct um_pci_device *dev = reg->dev;
struct {
struct virtio_pcidev_msg hdr;
/* maximum size - we may only use parts of it */
u8 data[8];
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.size = size,
.addr = offset,
},
};
if (!dev)
return;
switch (size) {
case 1:
msg.data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)msg.data);
break;
case 4:
put_unaligned_le32(val, (void *)msg.data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)msg.data);
break;
#endif
default:
WARN(1, "invalid config space write size %d\n", size);
return;
}
WARN_ON(um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0));
}
static const struct logic_iomem_ops um_pci_device_cfgspace_ops = {
.read = um_pci_cfgspace_read,
.write = um_pci_cfgspace_write,
};
static void um_pci_bar_copy_from(void *priv, void *buffer,
unsigned int offset, int size)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_READ,
.bar = *resptr,
.size = size,
.addr = offset,
};
memset(buffer, 0xff, size);
um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, buffer, size);
}
static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
int size)
{
/* buf->data is maximum size - we may only use parts of it */
struct um_pci_message_buffer *buf;
u8 *data;
unsigned long ret = ULONG_MAX;
buf = get_cpu_var(um_pci_msg_bufs);
data = buf->data;
switch (size) {
case 1:
case 2:
case 4:
#ifdef CONFIG_64BIT
case 8:
#endif
break;
default:
WARN(1, "invalid config space read size %d\n", size);
goto out;
}
um_pci_bar_copy_from(priv, data, offset, size);
switch (size) {
case 1:
ret = data[0];
break;
case 2:
ret = le16_to_cpup((void *)data);
break;
case 4:
ret = le32_to_cpup((void *)data);
break;
#ifdef CONFIG_64BIT
case 8:
ret = le64_to_cpup((void *)data);
break;
#endif
default:
break;
}
out:
put_cpu_var(um_pci_msg_bufs);
return ret;
}
static void um_pci_bar_copy_to(void *priv, unsigned int offset,
const void *buffer, int size)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct virtio_pcidev_msg hdr = {
.op = VIRTIO_PCIDEV_OP_MMIO_WRITE,
.bar = *resptr,
.size = size,
.addr = offset,
};
um_pci_send_cmd(dev, &hdr, sizeof(hdr), buffer, size, NULL, 0);
}
static void um_pci_bar_write(void *priv, unsigned int offset, int size,
unsigned long val)
{
/* maximum size - we may only use parts of it */
u8 data[8];
switch (size) {
case 1:
data[0] = (u8)val;
break;
case 2:
put_unaligned_le16(val, (void *)data);
break;
case 4:
put_unaligned_le32(val, (void *)data);
break;
#ifdef CONFIG_64BIT
case 8:
put_unaligned_le64(val, (void *)data);
break;
#endif
default:
WARN(1, "invalid config space write size %d\n", size);
return;
}
um_pci_bar_copy_to(priv, offset, data, size);
}
static void um_pci_bar_set(void *priv, unsigned int offset, u8 value, int size)
{
u8 *resptr = priv;
struct um_pci_device *dev = container_of(resptr - *resptr,
struct um_pci_device,
resptr[0]);
struct {
struct virtio_pcidev_msg hdr;
u8 data;
} msg = {
.hdr = {
.op = VIRTIO_PCIDEV_OP_CFG_WRITE,
.bar = *resptr,
.size = size,
.addr = offset,
},
.data = value,
};
um_pci_send_cmd(dev, &msg.hdr, sizeof(msg), NULL, 0, NULL, 0);
}
static const struct logic_iomem_ops um_pci_device_bar_ops = {
.read = um_pci_bar_read,
.write = um_pci_bar_write,
.set = um_pci_bar_set,
.copy_from = um_pci_bar_copy_from,
.copy_to = um_pci_bar_copy_to,
};
static void __iomem *um_pci_map_bus(struct pci_bus *bus, unsigned int devfn,
int where)
{
struct um_pci_device_reg *dev;
unsigned int busn = bus->number;
if (busn > 0)
return NULL;
/* not allowing functions for now ... */
if (devfn % 8)
return NULL;
if (devfn / 8 >= ARRAY_SIZE(um_pci_devices))
return NULL;
dev = &um_pci_devices[devfn / 8];
if (!dev)
return NULL;
return (void __iomem *)((unsigned long)dev->iomem + where);
}
static struct pci_ops um_pci_ops = {
.map_bus = um_pci_map_bus,
.read = pci_generic_config_read,
.write = pci_generic_config_write,
};
static void um_pci_rescan(void)
{
pci_lock_rescan_remove();
pci_rescan_bus(bridge->bus);
pci_unlock_rescan_remove();
}
static void um_pci_irq_vq_addbuf(struct virtqueue *vq, void *buf, bool kick)
{
struct scatterlist sg[1];
sg_init_one(sg, buf, MAX_IRQ_MSG_SIZE);
if (virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC))
kfree(buf);
else if (kick)
virtqueue_kick(vq);
}
static void um_pci_handle_irq_message(struct virtqueue *vq,
struct virtio_pcidev_msg *msg)
{
struct virtio_device *vdev = vq->vdev;
struct um_pci_device *dev = vdev->priv;
if (!dev->irq)
return;
/* we should properly chain interrupts, but on ARCH=um we don't care */
switch (msg->op) {
case VIRTIO_PCIDEV_OP_INT:
generic_handle_irq(dev->irq);
break;
case VIRTIO_PCIDEV_OP_MSI:
/* our MSI message is just the interrupt number */
if (msg->size == sizeof(u32))
generic_handle_irq(le32_to_cpup((void *)msg->data));
else
generic_handle_irq(le16_to_cpup((void *)msg->data));
break;
case VIRTIO_PCIDEV_OP_PME:
/* nothing to do - we already woke up due to the message */
break;
default:
dev_err(&vdev->dev, "unexpected virt-pci message %d\n", msg->op);
break;
}
}
static void um_pci_cmd_vq_cb(struct virtqueue *vq)
{
struct virtio_device *vdev = vq->vdev;
struct um_pci_device *dev = vdev->priv;
void *cmd;
int len;
if (test_bit(UM_PCI_STAT_WAITING, &dev->status))
return;
while ((cmd = virtqueue_get_buf(vq, &len))) {
if (WARN_ON(HANDLE_IS_NO_FREE(cmd)))
continue;
kfree(cmd);
}
}
static void um_pci_irq_vq_cb(struct virtqueue *vq)
{
struct virtio_pcidev_msg *msg;
int len;
while ((msg = virtqueue_get_buf(vq, &len))) {
if (len >= sizeof(*msg))
um_pci_handle_irq_message(vq, msg);
/* recycle the message buffer */
um_pci_irq_vq_addbuf(vq, msg, true);
}
}
#ifdef CONFIG_OF
/* Copied from arch/x86/kernel/devicetree.c */
struct device_node *pcibios_get_phb_of_node(struct pci_bus *bus)
{
struct device_node *np;
for_each_node_by_type(np, "pci") {
const void *prop;
unsigned int bus_min;
prop = of_get_property(np, "bus-range", NULL);
if (!prop)
continue;
bus_min = be32_to_cpup(prop);
if (bus->number == bus_min)
return np;
}
return NULL;
}
#endif
static int um_pci_init_vqs(struct um_pci_device *dev)
{
struct virtqueue_info vqs_info[] = {
{ "cmd", um_pci_cmd_vq_cb },
{ "irq", um_pci_irq_vq_cb },
};
struct virtqueue *vqs[2];
int err, i;
err = virtio_find_vqs(dev->vdev, 2, vqs, vqs_info, NULL);
if (err)
return err;
dev->cmd_vq = vqs[0];
dev->irq_vq = vqs[1];
virtio_device_ready(dev->vdev);
for (i = 0; i < NUM_IRQ_MSGS; i++) {
void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
if (msg)
um_pci_irq_vq_addbuf(dev->irq_vq, msg, false);
}
virtqueue_kick(dev->irq_vq);
return 0;
}
static void __um_pci_virtio_platform_remove(struct virtio_device *vdev,
struct um_pci_device *dev)
{
virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
mutex_lock(&um_pci_mtx);
um_pci_platform_device = NULL;
mutex_unlock(&um_pci_mtx);
kfree(dev);
}
static int um_pci_virtio_platform_probe(struct virtio_device *vdev,
struct um_pci_device *dev)
{
int ret;
dev->platform = true;
mutex_lock(&um_pci_mtx);
if (um_pci_platform_device) {
mutex_unlock(&um_pci_mtx);
ret = -EBUSY;
goto out_free;
}
ret = um_pci_init_vqs(dev);
if (ret) {
mutex_unlock(&um_pci_mtx);
goto out_free;
}
um_pci_platform_device = dev;
mutex_unlock(&um_pci_mtx);
ret = of_platform_default_populate(vdev->dev.of_node, NULL, &vdev->dev);
if (ret)
__um_pci_virtio_platform_remove(vdev, dev);
return ret;
out_free:
kfree(dev);
return ret;
}
static int um_pci_virtio_probe(struct virtio_device *vdev)
{
struct um_pci_device *dev;
int i, free = -1;
int err = -ENOSPC;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
dev->vdev = vdev;
vdev->priv = dev;
if (of_device_is_compatible(vdev->dev.of_node, "simple-bus"))
return um_pci_virtio_platform_probe(vdev, dev);
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev)
continue;
free = i;
break;
}
if (free < 0)
goto error;
err = um_pci_init_vqs(dev);
if (err)
goto error;
dev->irq = irq_alloc_desc(numa_node_id());
if (dev->irq < 0) {
err = dev->irq;
goto err_reset;
}
um_pci_devices[free].dev = dev;
vdev->priv = dev;
mutex_unlock(&um_pci_mtx);
device_set_wakeup_enable(&vdev->dev, true);
/*
* In order to do suspend-resume properly, don't allow VQs
* to be suspended.
*/
virtio_uml_set_no_vq_suspend(vdev, true);
um_pci_rescan();
return 0;
err_reset:
virtio_reset_device(vdev);
vdev->config->del_vqs(vdev);
error:
mutex_unlock(&um_pci_mtx);
kfree(dev);
return err;
}
static void um_pci_virtio_remove(struct virtio_device *vdev)
{
struct um_pci_device *dev = vdev->priv;
int i;
if (dev->platform) {
of_platform_depopulate(&vdev->dev);
__um_pci_virtio_platform_remove(vdev, dev);
return;
}
device_set_wakeup_enable(&vdev->dev, false);
mutex_lock(&um_pci_mtx);
for (i = 0; i < MAX_DEVICES; i++) {
if (um_pci_devices[i].dev != dev)
continue;
um_pci_devices[i].dev = NULL;
irq_free_desc(dev->irq);
break;
}
mutex_unlock(&um_pci_mtx);
if (i < MAX_DEVICES) {
struct pci_dev *pci_dev;
pci_dev = pci_get_slot(bridge->bus, i);
if (pci_dev)
pci_stop_and_remove_bus_device_locked(pci_dev);
}
/* Stop all virtqueues */
virtio_reset_device(vdev);
dev->cmd_vq = NULL;
dev->irq_vq = NULL;
vdev->config->del_vqs(vdev);
kfree(dev);
}
static struct virtio_device_id id_table[] = {
{ CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID, VIRTIO_DEV_ANY_ID },
{ 0 },
};
MODULE_DEVICE_TABLE(virtio, id_table);
static struct virtio_driver um_pci_virtio_driver = {
.driver.name = "virtio-pci",
.id_table = id_table,
.probe = um_pci_virtio_probe,
.remove = um_pci_virtio_remove,
};
static struct resource virt_cfgspace_resource = {
.name = "PCI config space",
.start = 0xf0000000 - MAX_DEVICES * CFG_SPACE_SIZE,
.end = 0xf0000000 - 1,
.flags = IORESOURCE_MEM,
};
static long um_pci_map_cfgspace(unsigned long offset, size_t size,
const struct logic_iomem_ops **ops,
void **priv)
{
if (WARN_ON(size > CFG_SPACE_SIZE || offset % CFG_SPACE_SIZE))
return -EINVAL;
if (offset / CFG_SPACE_SIZE < MAX_DEVICES) {
*ops = &um_pci_device_cfgspace_ops;
*priv = &um_pci_devices[offset / CFG_SPACE_SIZE];
return 0;
}
WARN(1, "cannot map offset 0x%lx/0x%zx\n", offset, size);
return -ENOENT;
}
static const struct logic_iomem_region_ops um_pci_cfgspace_ops = {
.map = um_pci_map_cfgspace,
};
static struct resource virt_iomem_resource = {
.name = "PCI iomem",
.start = 0xf0000000,
.end = 0xffffffff,
.flags = IORESOURCE_MEM,
};
struct um_pci_map_iomem_data {
unsigned long offset;
size_t size;
const struct logic_iomem_ops **ops;
void **priv;
long ret;
};
static int um_pci_map_iomem_walk(struct pci_dev *pdev, void *_data)
{
struct um_pci_map_iomem_data *data = _data;
struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
struct um_pci_device *dev;
int i;
if (!reg->dev)
return 0;
for (i = 0; i < ARRAY_SIZE(dev->resptr); i++) {
struct resource *r = &pdev->resource[i];
if ((r->flags & IORESOURCE_TYPE_BITS) != IORESOURCE_MEM)
continue;
/*
* must be the whole or part of the resource,
* not allowed to only overlap
*/
if (data->offset < r->start || data->offset > r->end)
continue;
if (data->offset + data->size - 1 > r->end)
continue;
dev = reg->dev;
*data->ops = &um_pci_device_bar_ops;
dev->resptr[i] = i;
*data->priv = &dev->resptr[i];
data->ret = data->offset - r->start;
/* no need to continue */
return 1;
}
return 0;
}
static long um_pci_map_iomem(unsigned long offset, size_t size,
const struct logic_iomem_ops **ops,
void **priv)
{
struct um_pci_map_iomem_data data = {
/* we want the full address here */
.offset = offset + virt_iomem_resource.start,
.size = size,
.ops = ops,
.priv = priv,
.ret = -ENOENT,
};
pci_walk_bus(bridge->bus, um_pci_map_iomem_walk, &data);
return data.ret;
}
static const struct logic_iomem_region_ops um_pci_iomem_ops = {
.map = um_pci_map_iomem,
};
static void um_pci_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
/*
* This is a very low address and not actually valid 'physical' memory
* in UML, so we can simply map MSI(-X) vectors to there, it cannot be
* legitimately written to by the device in any other way.
* We use the (virtual) IRQ number here as the message to simplify the
* code that receives the message, where for now we simply trust the
* device to send the correct message.
*/
msg->address_hi = 0;
msg->address_lo = 0xa0000;
msg->data = data->irq;
}
static struct irq_chip um_pci_msi_bottom_irq_chip = {
.name = "UM virtio MSI",
.irq_compose_msi_msg = um_pci_compose_msi_msg,
};
static int um_pci_inner_domain_alloc(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs,
void *args)
{
unsigned long bit;
WARN_ON(nr_irqs != 1);
mutex_lock(&um_pci_mtx);
bit = find_first_zero_bit(um_pci_msi_used, MAX_MSI_VECTORS);
if (bit >= MAX_MSI_VECTORS) {
mutex_unlock(&um_pci_mtx);
return -ENOSPC;
}
set_bit(bit, um_pci_msi_used);
mutex_unlock(&um_pci_mtx);
irq_domain_set_info(domain, virq, bit, &um_pci_msi_bottom_irq_chip,
domain->host_data, handle_simple_irq,
NULL, NULL);
return 0;
}
static void um_pci_inner_domain_free(struct irq_domain *domain,
unsigned int virq, unsigned int nr_irqs)
{
struct irq_data *d = irq_domain_get_irq_data(domain, virq);
mutex_lock(&um_pci_mtx);
if (!test_bit(d->hwirq, um_pci_msi_used))
pr_err("trying to free unused MSI#%lu\n", d->hwirq);
else
__clear_bit(d->hwirq, um_pci_msi_used);
mutex_unlock(&um_pci_mtx);
}
static const struct irq_domain_ops um_pci_inner_domain_ops = {
.alloc = um_pci_inner_domain_alloc,
.free = um_pci_inner_domain_free,
};
static struct irq_chip um_pci_msi_irq_chip = {
.name = "UM virtio PCIe MSI",
.irq_mask = pci_msi_mask_irq,
.irq_unmask = pci_msi_unmask_irq,
};
static struct msi_domain_info um_pci_msi_domain_info = {
.flags = MSI_FLAG_USE_DEF_DOM_OPS |
MSI_FLAG_USE_DEF_CHIP_OPS |
MSI_FLAG_PCI_MSIX,
.chip = &um_pci_msi_irq_chip,
};
static struct resource busn_resource = {
.name = "PCI busn",
.start = 0,
.end = 0,
.flags = IORESOURCE_BUS,
};
static int um_pci_map_irq(const struct pci_dev *pdev, u8 slot, u8 pin)
{
struct um_pci_device_reg *reg = &um_pci_devices[pdev->devfn / 8];
if (WARN_ON(!reg->dev))
return -EINVAL;
/* Yes, we map all pins to the same IRQ ... doesn't matter for now. */
return reg->dev->irq;
}
void *pci_root_bus_fwnode(struct pci_bus *bus)
{
return um_pci_fwnode;
}
static long um_pci_map_platform(unsigned long offset, size_t size,
const struct logic_iomem_ops **ops,
void **priv)
{
if (!um_pci_platform_device)
return -ENOENT;
*ops = &um_pci_device_bar_ops;
*priv = &um_pci_platform_device->resptr[0];
return offset;
}
static const struct logic_iomem_region_ops um_pci_platform_ops = {
.map = um_pci_map_platform,
};
static struct resource virt_platform_resource = {
.name = "platform",
.start = 0x10000000,
.end = 0x1fffffff,
.flags = IORESOURCE_MEM,
};
static int __init um_pci_init(void)
{
struct irq_domain_info inner_domain_info = {
.size = MAX_MSI_VECTORS,
.hwirq_max = MAX_MSI_VECTORS,
.ops = &um_pci_inner_domain_ops,
};
int err, i;
WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
&um_pci_cfgspace_ops));
WARN_ON(logic_iomem_add_region(&virt_iomem_resource,
&um_pci_iomem_ops));
WARN_ON(logic_iomem_add_region(&virt_platform_resource,
&um_pci_platform_ops));
if (WARN(CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID < 0,
"No virtio device ID configured for PCI - no PCI support\n"))
return 0;
um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
if (!um_pci_msg_bufs)
return -ENOMEM;
bridge = pci_alloc_host_bridge(0);
if (!bridge) {
err = -ENOMEM;
goto free;
}
um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
if (!um_pci_fwnode) {
err = -ENOMEM;
goto free;
}
inner_domain_info.fwnode = um_pci_fwnode;
um_pci_inner_domain = irq_domain_instantiate(&inner_domain_info);
if (IS_ERR(um_pci_inner_domain)) {
err = PTR_ERR(um_pci_inner_domain);
goto free;
}
um_pci_msi_domain = pci_msi_create_irq_domain(um_pci_fwnode,
&um_pci_msi_domain_info,
um_pci_inner_domain);
if (!um_pci_msi_domain) {
err = -ENOMEM;
goto free;
}
pci_add_resource(&bridge->windows, &virt_iomem_resource);
pci_add_resource(&bridge->windows, &busn_resource);
bridge->ops = &um_pci_ops;
bridge->map_irq = um_pci_map_irq;
for (i = 0; i < MAX_DEVICES; i++) {
resource_size_t start;
start = virt_cfgspace_resource.start + i * CFG_SPACE_SIZE;
um_pci_devices[i].iomem = ioremap(start, CFG_SPACE_SIZE);
if (WARN(!um_pci_devices[i].iomem, "failed to map %d\n", i)) {
err = -ENOMEM;
goto free;
}
}
err = pci_host_probe(bridge);
if (err)
goto free;
err = register_virtio_driver(&um_pci_virtio_driver);
if (err)
goto free;
return 0;
free:
if (!IS_ERR_OR_NULL(um_pci_inner_domain))
irq_domain_remove(um_pci_inner_domain);
if (um_pci_fwnode)
irq_domain_free_fwnode(um_pci_fwnode);
if (bridge) {
pci_free_resource_list(&bridge->windows);
pci_free_host_bridge(bridge);
}
free_percpu(um_pci_msg_bufs);
return err;
}
module_init(um_pci_init);
static void __exit um_pci_exit(void)
{
unregister_virtio_driver(&um_pci_virtio_driver);
irq_domain_remove(um_pci_msi_domain);
irq_domain_remove(um_pci_inner_domain);
pci_free_resource_list(&bridge->windows);
pci_free_host_bridge(bridge);
free_percpu(um_pci_msg_bufs);
}
module_exit(um_pci_exit);