Skip to content

Commit

Permalink
pcie: Add support for Single Root I/O Virtualization (SR/IOV)
Browse files Browse the repository at this point in the history
This patch provides the building blocks for creating an SR/IOV
PCIe Extended Capability header and register/unregister
SR/IOV Virtual Functions.

Signed-off-by: Knut Omang <knuto@ifi.uio.no>
Message-Id: <20220217174504.1051716-2-lukasz.maniak@linux.intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
  • Loading branch information
Knut Omang authored and mstsirkin committed Mar 6, 2022
1 parent 0ea5778 commit 7c0fa8d
Show file tree
Hide file tree
Showing 9 changed files with 470 additions and 26 deletions.
1 change: 1 addition & 0 deletions hw/pci/meson.build
Expand Up @@ -5,6 +5,7 @@ pci_ss.add(files(
'pci.c',
'pci_bridge.c',
'pci_host.c',
'pcie_sriov.c',
'shpc.c',
'slotid_cap.c'
))
Expand Down
100 changes: 77 additions & 23 deletions hw/pci/pci.c
Expand Up @@ -239,6 +239,9 @@ int pci_bar(PCIDevice *d, int reg)
{
uint8_t type;

/* PCIe virtual functions do not have their own BARs */
assert(!pci_is_vf(d));

if (reg != PCI_ROM_SLOT)
return PCI_BASE_ADDRESS_0 + reg * 4;

Expand Down Expand Up @@ -304,10 +307,30 @@ void pci_device_deassert_intx(PCIDevice *dev)
}
}

static void pci_do_device_reset(PCIDevice *dev)
static void pci_reset_regions(PCIDevice *dev)
{
int r;
if (pci_is_vf(dev)) {
return;
}

for (r = 0; r < PCI_NUM_REGIONS; ++r) {
PCIIORegion *region = &dev->io_regions[r];
if (!region->size) {
continue;
}

if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
pci_set_quad(dev->config + pci_bar(dev, r), region->type);
} else {
pci_set_long(dev->config + pci_bar(dev, r), region->type);
}
}
}

static void pci_do_device_reset(PCIDevice *dev)
{
pci_device_deassert_intx(dev);
assert(dev->irq_state == 0);

Expand All @@ -323,19 +346,7 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
for (r = 0; r < PCI_NUM_REGIONS; ++r) {
PCIIORegion *region = &dev->io_regions[r];
if (!region->size) {
continue;
}

if (!(region->type & PCI_BASE_ADDRESS_SPACE_IO) &&
region->type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
pci_set_quad(dev->config + pci_bar(dev, r), region->type);
} else {
pci_set_long(dev->config + pci_bar(dev, r), region->type);
}
}
pci_reset_regions(dev);
pci_update_mappings(dev);

msi_reset(dev);
Expand Down Expand Up @@ -884,6 +895,16 @@ static void pci_init_multifunction(PCIBus *bus, PCIDevice *dev, Error **errp)
dev->config[PCI_HEADER_TYPE] |= PCI_HEADER_TYPE_MULTI_FUNCTION;
}

/*
* With SR/IOV and ARI, a device at function 0 need not be a multifunction
* device, as it may just be a VF that ended up with function 0 in
* the legacy PCI interpretation. Avoid failing in such cases:
*/
if (pci_is_vf(dev) &&
dev->exp.sriov_vf.pf->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
return;
}

/*
* multifunction bit is interpreted in two ways as follows.
* - all functions must set the bit to 1.
Expand Down Expand Up @@ -1083,6 +1104,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev,
bus->devices[devfn]->name);
return NULL;
} else if (dev->hotplugged &&
!pci_is_vf(pci_dev) &&
pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already occupied by %s,"
" new func %s cannot be exposed to guest.",
Expand Down Expand Up @@ -1191,6 +1213,7 @@ void pci_register_bar(PCIDevice *pci_dev, int region_num,
pcibus_t size = memory_region_size(memory);
uint8_t hdr_type;

assert(!pci_is_vf(pci_dev)); /* VFs must use pcie_sriov_vf_register_bar */
assert(region_num >= 0);
assert(region_num < PCI_NUM_REGIONS);
assert(is_power_of_2(size));
Expand Down Expand Up @@ -1294,11 +1317,45 @@ pcibus_t pci_get_bar_addr(PCIDevice *pci_dev, int region_num)
return pci_dev->io_regions[region_num].addr;
}

static pcibus_t pci_bar_address(PCIDevice *d,
int reg, uint8_t type, pcibus_t size)
static pcibus_t pci_config_get_bar_addr(PCIDevice *d, int reg,
uint8_t type, pcibus_t size)
{
pcibus_t new_addr;
if (!pci_is_vf(d)) {
int bar = pci_bar(d, reg);
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(d->config + bar);
} else {
new_addr = pci_get_long(d->config + bar);
}
} else {
PCIDevice *pf = d->exp.sriov_vf.pf;
uint16_t sriov_cap = pf->exp.sriov_cap;
int bar = sriov_cap + PCI_SRIOV_BAR + reg * 4;
uint16_t vf_offset =
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_OFFSET);
uint16_t vf_stride =
pci_get_word(pf->config + sriov_cap + PCI_SRIOV_VF_STRIDE);
uint32_t vf_num = (d->devfn - (pf->devfn + vf_offset)) / vf_stride;

if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(pf->config + bar);
} else {
new_addr = pci_get_long(pf->config + bar);
}
new_addr += vf_num * size;
}
/* The ROM slot has a specific enable bit, keep it intact */
if (reg != PCI_ROM_SLOT) {
new_addr &= ~(size - 1);
}
return new_addr;
}

pcibus_t pci_bar_address(PCIDevice *d,
int reg, uint8_t type, pcibus_t size)
{
pcibus_t new_addr, last_addr;
int bar = pci_bar(d, reg);
uint16_t cmd = pci_get_word(d->config + PCI_COMMAND);
Object *machine = qdev_get_machine();
ObjectClass *oc = object_get_class(machine);
Expand All @@ -1309,7 +1366,7 @@ static pcibus_t pci_bar_address(PCIDevice *d,
if (!(cmd & PCI_COMMAND_IO)) {
return PCI_BAR_UNMAPPED;
}
new_addr = pci_get_long(d->config + bar) & ~(size - 1);
new_addr = pci_config_get_bar_addr(d, reg, type, size);
last_addr = new_addr + size - 1;
/* Check if 32 bit BAR wraps around explicitly.
* TODO: make priorities correct and remove this work around.
Expand All @@ -1324,11 +1381,7 @@ static pcibus_t pci_bar_address(PCIDevice *d,
if (!(cmd & PCI_COMMAND_MEMORY)) {
return PCI_BAR_UNMAPPED;
}
if (type & PCI_BASE_ADDRESS_MEM_TYPE_64) {
new_addr = pci_get_quad(d->config + bar);
} else {
new_addr = pci_get_long(d->config + bar);
}
new_addr = pci_config_get_bar_addr(d, reg, type, size);
/* the ROM slot has a specific enable bit */
if (reg == PCI_ROM_SLOT && !(new_addr & PCI_ROM_ADDRESS_ENABLE)) {
return PCI_BAR_UNMAPPED;
Expand Down Expand Up @@ -1473,6 +1526,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int

msi_write_config(d, addr, val_in, l);
msix_write_config(d, addr, val_in, l);
pcie_sriov_config_write(d, addr, val_in, l);
}

/***********************************************************/
Expand Down
5 changes: 5 additions & 0 deletions hw/pci/pcie.c
Expand Up @@ -446,6 +446,11 @@ void pcie_cap_slot_plug_cb(HotplugHandler *hotplug_dev, DeviceState *dev,
PCIDevice *pci_dev = PCI_DEVICE(dev);
uint32_t lnkcap = pci_get_long(exp_cap + PCI_EXP_LNKCAP);

if (pci_is_vf(pci_dev)) {
/* Virtual function cannot be physically disconnected */
return;
}

/* Don't send event when device is enabled during qemu machine creation:
* it is present on boot, no hotplug event is necessary. We do send an
* event when the device is disabled later. */
Expand Down

0 comments on commit 7c0fa8d

Please sign in to comment.