Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d037970

Browse files
Interrupts: implement CPU affinity
In the current code, all device interrupts are routed to either the boot CPU (on x86 and ARM), or to any CPU (on RISC-V). This new feature allows interrupts to be configured to target a specific CPU, which allows optimizing interrupt allocation to spread a workload over multiple CPUS. The arch-specific msi_format() and dev_irq_enable() functions have been changed to take an additional `target_cpu` parameter, while msi_get_vector() has been replaced by msi_get_config() which allows retrieving both the interrupt number and the target CPU associated to a given PCI MSI. Other changes in arch-specific interrupt code have been made in order to support targeting an arbitrary interrupt to an arbitrary CPU. Various functions in the PCI and virtIO code have been amended to take an additional `cpu_affinity` range parameter, which is used as a hint to select a target CPU when a given interrupt vector is enabled in the interrupt controller. The irq_get_target_cpu() utility function takes a CPU affinity range as parameter and returns an optimal target CPU, selected from the supplied range based on which CPUs have the smallest number of device interrupts targeted at them, so as to spread the interrupt handling work over different CPUs. The irq_put_target_cpu() function is called to signal that an interrupt has been removed from a given target CPU.
1 parent 0e52038 commit d037970

File tree

27 files changed

+260
-105
lines changed

27 files changed

+260
-105
lines changed

platform/pc/pci.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -147,14 +147,14 @@ void pci_bar_write_8(struct pci_bar *b, u64 offset, u64 val)
147147
out64(b->addr + offset, val);
148148
}
149149

150-
void pci_setup_non_msi_irq(pci_dev dev, thunk h, sstring name)
150+
void pci_setup_irq_aff(pci_dev dev, thunk h, sstring name, range cpu_affinity)
151151
{
152152
pci_plat_debug("%s: h %F, name %s\n", func_ss, h, name);
153153

154154
/* For maximum portability, the GSI should be retrieved via the ACPI _PRT method. */
155155
unsigned int gsi = pci_cfgread(dev, PCIR_INTERRUPT_LINE, 1);
156156

157-
ioapic_register_int(gsi, h, name);
157+
ioapic_register_int(gsi, h, name, cpu_affinity);
158158
}
159159

160160
void pci_platform_init_bar(pci_dev dev, int bar)
@@ -193,13 +193,14 @@ void pci_platform_init_bar(pci_dev dev, int bar)
193193
}
194194
}
195195

196-
u64 pci_platform_allocate_msi(pci_dev dev, thunk h, sstring name, u32 *address, u32 *data)
196+
u64 pci_platform_allocate_msi(pci_dev dev, thunk h, sstring name, u32 target_cpu,
197+
u32 *address, u32 *data)
197198
{
198199
u64 v = allocate_interrupt();
199200
if (v == INVALID_PHYSICAL)
200201
return v;
201202
register_interrupt(v, h, name);
202-
msi_format(address, data, v);
203+
msi_format(address, data, v, target_cpu);
203204
return v;
204205
}
205206

platform/riscv-virt/pci.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,15 @@ MK_PCI_BAR_WRITE(1, 8)
8989
MK_PCI_BAR_WRITE(2, 16)
9090
MK_PCI_BAR_WRITE(4, 32)
9191

92-
void pci_setup_non_msi_irq(pci_dev dev, thunk h, sstring name)
92+
void pci_setup_irq_aff(pci_dev dev, thunk h, sstring name, range cpu_affinity)
9393
{
9494
/* To mimic the interrupt assignment swizzle we need to know the
9595
device/slot and interrupt pin assignment:
9696
0x20 + (slot + pin) % 4 */
9797
u64 irq = 0x20 + (dev->slot + (pci_cfgread(dev, PCIR_INT_PIN, 1)-1)) % 4;
9898
pci_plat_debug("%s: bus %d slot %d func %d pin %d irq 0x%x\n", func_ss,
9999
dev->bus, dev->slot, dev->function, pci_cfgread(dev, PCIR_INT_PIN, 1), irq);
100-
register_interrupt(irq, h, name);
100+
irq_register_handler(irq, h, name, cpu_affinity);
101101
}
102102

103103
/* Rudimentary resource allocation based on fixed offests for virt
@@ -123,13 +123,14 @@ void pci_platform_init_bar(pci_dev dev, int bar_idx)
123123
}
124124
}
125125

126-
u64 pci_platform_allocate_msi(pci_dev dev, thunk h, sstring name, u32 *address, u32 *data)
126+
u64 pci_platform_allocate_msi(pci_dev dev, thunk h, sstring name, u32 target_cpu,
127+
u32 *address, u32 *data)
127128
{
128129
u64 v = allocate_msi_interrupt();
129130
if (v == INVALID_PHYSICAL)
130131
return v;
131132
register_interrupt(v, h, name);
132-
msi_format(address, data, v);
133+
msi_format(address, data, v, target_cpu);
133134
return v;
134135
}
135136

platform/virt/pci.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,12 @@ MK_PCI_BAR_WRITE(2, 16)
9898
MK_PCI_BAR_WRITE(4, 32)
9999
MK_PCI_BAR_WRITE(8, 64)
100100

101-
void pci_setup_non_msi_irq(pci_dev dev, thunk h, sstring name)
101+
void pci_setup_irq_aff(pci_dev dev, thunk h, sstring name, range cpu_affinity)
102102
{
103103
/* queue index ignored; virtio ints are shared */
104104
u64 v = GIC_SPI_INTS_START + VIRT_PCIE_IRQ_BASE + (dev->slot % VIRT_PCIE_IRQ_NUM);
105105
pci_plat_debug("%s: dev %p, irq %d, handler %F, name %s\n", func_ss, dev, v, h, name);
106-
register_interrupt(v, h, name);
106+
irq_register_handler(v, h, name, cpu_affinity);
107107
}
108108

109109
closure_func_basic(mcfg_handler, boolean, pci_mcfg_handler,
@@ -146,18 +146,19 @@ void pci_platform_init_bar(pci_dev dev, int bar_idx)
146146
}
147147
}
148148

149-
u64 pci_platform_allocate_msi(pci_dev dev, thunk h, sstring name, u32 *address, u32 *data)
149+
u64 pci_platform_allocate_msi(pci_dev dev, thunk h, sstring name, u32 target_cpu,
150+
u32 *address, u32 *data)
150151
{
151152
u64 v = allocate_msi_interrupt();
152153
if (v == INVALID_PHYSICAL)
153154
return v;
154155
register_interrupt(v, h, name);
155-
if (!dev_irq_enable(pci_dev_id(dev), v)) {
156+
if (!dev_irq_enable(pci_dev_id(dev), v, target_cpu)) {
156157
unregister_interrupt(v);
157158
deallocate_msi_interrupt(v);
158159
return INVALID_PHYSICAL;
159160
}
160-
msi_format(address, data, v);
161+
msi_format(address, data, v, target_cpu);
161162
return v;
162163
}
163164

src/aarch64/acpi.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
void acpi_register_irq_handler(int irq, thunk t, sstring name)
77
{
8-
register_interrupt(irq, t, name);
8+
irq_register_handler(irq, t, name, irange(0, 0));
99
}
1010

1111
/* OS services layer */

src/aarch64/gic.c

Lines changed: 82 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
#define gic_debug(x, ...)
1212
#endif
1313

14-
#define GIC_ICID 0 /* a single interrupt collection is being used */
14+
#define GIC_ICID(cpuid) (cpuid) /* mapping between target CPU and interrupt collection */
1515

1616
/* The physical address of the command queue must be aligned to 64 KB. */
1717
#define GIC_CMD_QUEUE_SIZE (64 * KB)
@@ -22,14 +22,16 @@ static struct {
2222
u64 dist_base;
2323
struct {
2424
u64 base;
25-
u64 rdbase;
26-
} redist; /* redistributor associated to CPU 0 */
25+
u64 propbase;
26+
u64 pendbase;
27+
} redist;
2728
u64 its_base;
2829
u8 *lpi_cfg_table;
2930
u64 its_typer;
3031
u32 dev_id_limit;
3132
void *its_cmd_queue;
3233
struct list devices;
34+
table its_irq_map; /* maps interrupt vectors to target CPUs */
3335
} gic;
3436

3537
typedef struct its_dev {
@@ -40,6 +42,7 @@ typedef struct its_dev {
4042

4143
#define gicd_read_32(reg) mmio_read_32(gic.dist_base + GICD_ ## reg)
4244
#define gicd_write_32(reg, value) mmio_write_32(gic.dist_base + GICD_ ## reg, value)
45+
#define gicd_write_64(reg, value) mmio_write_64(gic.dist_base + GICD_ ## reg, value)
4346

4447
#define gicr_base (current_cpu()->m.gic_rdist_base)
4548
#define gicr_read_32(reg) mmio_read_32(gicr_base + GICR_ ## reg)
@@ -149,6 +152,25 @@ void gic_clear_pending_int(int irq)
149152
GIC_SET_INTFIELD(priority, IPRIORITY)
150153
GIC_SET_INTFIELD(config, ICFG)
151154

155+
void gic_set_int_target(int irq, u32 target_cpu)
156+
{
157+
gic_debug("irq %d, target %d\n", irq, target_cpu);
158+
if ((irq < GIC_SPI_INTS_START) || (irq >= GIC_SPI_INTS_END))
159+
return;
160+
if (gic.v3_iface) { /* use affinity routing */
161+
u64 mpid = mpid_from_cpuid(target_cpu);
162+
gicd_write_64(IROUTER(irq), mpid);
163+
} else {
164+
int reg_num = irq / GICD_INTS_PER_ITARGETS_REG;
165+
u32 itargets = gicd_read_32(ITARGETSR(reg_num));
166+
int width = 32 / GICD_INTS_PER_ITARGETS_REG;
167+
int reg_offset = (irq - reg_num * GICD_INTS_PER_ITARGETS_REG) * width;
168+
itargets &= ~(MASK32(width) << reg_offset);
169+
itargets |= U32_FROM_BIT(target_cpu) << reg_offset;
170+
gicd_write_32(ITARGETSR(reg_num), itargets);
171+
}
172+
}
173+
152174
boolean gic_int_is_pending(int irq)
153175
{
154176
int w = irq / GICD_INTS_PER_IPEND_REG;
@@ -226,19 +248,14 @@ static void init_gicd(void)
226248
i < GIC_SPI_INTS_END / GICD_INTS_PER_IGROUP_REG; i++)
227249
gicd_write_32(IGROUPR(i), MASK(32));
228250

229-
/* shared periph target cpu0 */
230-
for (int i = GIC_SPI_INTS_START / GICD_INTS_PER_ITARGETS_REG;
231-
i < GIC_SPI_INTS_END / GICD_INTS_PER_ITARGETS_REG; i++)
232-
gicd_write_32(ITARGETSR(i), 0x01010101);
233-
234-
/* enable
235-
XXX - turn on affinity routing (ARE)? */
236-
237251
/* Kludge: We seem to have one gicv2 variant (qemu w/ noaccel) that honors
238252
bit 1 as GRP1 enable, and another (qemu w/ kvm on bcm2711) which
239253
doesn't, so set both for now until the variants can be sorted
240254
out. (This may be due to the presence of GIC Security Extensions. */
241-
gicd_write_32(CTLR, GICD_CTLR_ENABLEGRP1 | GICD_CTLR_ENABLEGRP0);
255+
u32 ctrl = GICD_CTLR_ENABLEGRP1 | GICD_CTLR_ENABLEGRP0;
256+
if (gic.v3_iface)
257+
ctrl |= GICD_CTLR_ARE_NS; /* enable affinity routing */
258+
gicd_write_32(CTLR, ctrl);
242259
}
243260

244261
/* aliases for macro use */
@@ -265,7 +282,7 @@ void gic_eoi(int irq)
265282
gicc_write(EOIR1, irq);
266283
}
267284

268-
boolean dev_irq_enable(u32 dev_id, int vector)
285+
boolean dev_irq_enable(u32 dev_id, int vector, u32 target_cpu)
269286
{
270287
gic_debug("dev 0x%x, irq %d\n", dev_id, vector);
271288
if ((vector >= gic_msi_vector_base) && gic.its_base) {
@@ -304,9 +321,13 @@ boolean dev_irq_enable(u32 dev_id, int vector)
304321
}
305322
u32 event_id = vector - gic_msi_vector_base;
306323
gic_its_cmd(((u64)dev_id << 32) | ITS_CMD_MAPTI, ((u64)vector << 32) | event_id,
307-
GIC_ICID, 0);
324+
GIC_ICID(target_cpu), 0);
308325
gic_its_cmd(((u64)dev_id << 32) | ITS_CMD_INV, event_id, 0, 0);
309-
gic_its_cmd(ITS_CMD_SYNC, 0, gic.redist.rdbase << 16, 0);
326+
cpuinfo ci = cpuinfo_from_id(target_cpu);
327+
gic_its_cmd(ITS_CMD_SYNC, 0, ci->m.gic_rdist_rdbase << 16, 0);
328+
table_set(gic.its_irq_map, pointer_from_u64((u64)vector), ci);
329+
} else {
330+
gic_set_int_target(vector, target_cpu);
310331
}
311332
return true;
312333
}
@@ -318,11 +339,13 @@ void dev_irq_disable(u32 dev_id, int vector)
318339
u32 event_id = vector - gic_msi_vector_base;
319340
gic_its_cmd(((u64)dev_id << 32) | ITS_CMD_DISCARD, event_id, 0, 0);
320341
gic_its_cmd(((u64)dev_id << 32) | ITS_CMD_INV, event_id, 0, 0);
321-
gic_its_cmd(ITS_CMD_SYNC, 0, gic.redist.rdbase << 16, 0);
342+
cpuinfo ci = table_remove(gic.its_irq_map, pointer_from_u64((u64)vector));
343+
if (ci)
344+
gic_its_cmd(ITS_CMD_SYNC, 0, ci->m.gic_rdist_rdbase << 16, 0);
322345
}
323346
}
324347

325-
void msi_format(u32 *address, u32 *data, int vector)
348+
void msi_format(u32 *address, u32 *data, int vector, u32 target_cpu)
326349
{
327350
if (gic.its_base) {
328351
*address = gic.its_base + GITS_TRANSLATER - DEVICE_BASE;
@@ -333,12 +356,18 @@ void msi_format(u32 *address, u32 *data, int vector)
333356
}
334357
}
335358

336-
int msi_get_vector(u32 data)
337-
{
338-
if (gic.its_base)
339-
return (data + gic_msi_vector_base);
340-
else
341-
return data;
359+
void msi_get_config(u32 address, u32 data, int *vector, u32 *target_cpu) {
360+
if (gic.its_base) {
361+
*vector = data + gic_msi_vector_base;
362+
cpuinfo ci = table_find(gic.its_irq_map, pointer_from_u64((u64)*vector));
363+
if (ci)
364+
*target_cpu = ci->id;
365+
else
366+
*target_cpu = 0;
367+
} else {
368+
*vector = data;
369+
*target_cpu = 0; /* retrieval of target CPU not supported */
370+
}
342371
}
343372

344373
static void init_gicc(void)
@@ -376,14 +405,26 @@ static void init_gicc(void)
376405
}
377406
}
378407

408+
static void gits_percpu_init(void)
409+
{
410+
cpuinfo ci = current_cpu();
411+
u64 rdbase;
412+
if (gic.its_typer & GITS_TYPER_PTA)
413+
rdbase = ci->m.gic_rdist_base >> 16;
414+
else
415+
rdbase = GICR_TYPER_PROC_NUM(gicr_read_64(TYPER));
416+
gic_debug("cpu %d, rdbase 0x%lx\n", ci->id, rdbase);
417+
ci->m.gic_rdist_rdbase = rdbase;
418+
419+
/* map an interrupt collection to the redistributor associated to this CPU */
420+
gic_its_cmd(ITS_CMD_MAPC, 0, ITS_MAPC_V | (rdbase << 16) | GIC_ICID(ci->id), 0);
421+
}
422+
379423
static void init_gits(kernel_heaps kh)
380424
{
381425
gic.its_typer = gits_read_64(TYPER);
382426
gic_debug("typer 0x%lx\n", gic.its_typer);
383-
if (gic.its_typer & GITS_TYPER_PTA)
384-
gic.redist.rdbase = gic.redist.base >> 16;
385-
else
386-
gic.redist.rdbase = GICR_TYPER_PROC_NUM(gicr_read_64(TYPER));
427+
heap h = heap_locked(kh);
387428
backed_heap backed = heap_linear_backed(kh);
388429
u64 pa;
389430
for (int n = 0; n < 8; n++) {
@@ -419,6 +460,8 @@ static void init_gits(kernel_heaps kh)
419460
break;
420461
}
421462
}
463+
gic.its_irq_map = allocate_table(h, identity_key, pointer_equal);
464+
assert(gic.its_irq_map != INVALID_ADDRESS);
422465
list_init(&gic.devices);
423466

424467
/* Set up the command queue. */
@@ -429,8 +472,7 @@ static void init_gits(kernel_heaps kh)
429472

430473
gits_write_32(CTLR, GITS_CTRL_ENABLED); /* Enable the ITS. */
431474

432-
/* Map an interrupt collection to the redistributor associated to CPU 0. */
433-
gic_its_cmd(ITS_CMD_MAPC, 0, ITS_MAPC_V | (gic.redist.rdbase << 16) | GIC_ICID, 0);
475+
gits_percpu_init();
434476
}
435477

436478
BSS_RO_AFTER_INIT u16 gic_msi_vector_base;
@@ -496,13 +538,15 @@ int init_gic(void)
496538
assert(gic.lpi_cfg_table != INVALID_ADDRESS);
497539
zero(gic.lpi_cfg_table, PAGESIZE);
498540
u64 id_bits = find_order(gic_msi_vector_base + gic_msi_vector_num) - 1;
499-
gicr_write_64(PROPBASER, pa | id_bits);
541+
gic.redist.propbase = pa | id_bits;
542+
gicr_write_64(PROPBASER, gic.redist.propbase);
500543

501544
/* Set up LPI pending table, which must be aligned to 64 KB. */
502545
void *lpi_pending_table = alloc_map(backed, 64 * KB, &pa);
503546
assert(lpi_pending_table != INVALID_ADDRESS);
504547
zero(lpi_pending_table, 64 * KB);
505-
gicr_write_64(PENDBASER, GICR_PENDBASER_PTZ | pa);
548+
gic.redist.pendbase = GICR_PENDBASER_PTZ | pa;
549+
gicr_write_64(PENDBASER, gic.redist.pendbase);
506550

507551
gicr_write_32(CTLR, GICR_CTLR_EnableLPIs);
508552
if (gic.its_base)
@@ -525,8 +569,14 @@ int init_gic(void)
525569

526570
void gic_percpu_init(void)
527571
{
528-
if (gic.v3_iface)
572+
if (gic.v3_iface) {
529573
gicr_get_base();
574+
gicr_write_64(PROPBASER, gic.redist.propbase);
575+
gicr_write_64(PENDBASER, gic.redist.pendbase);
576+
gicr_write_32(CTLR, GICR_CTLR_EnableLPIs);
577+
if (gic.its_base)
578+
gits_percpu_init();
579+
}
530580
init_gicd_percpu();
531581
init_gicc();
532582
}

src/aarch64/gic.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
#define GICD_CTLR_DISABLE 0
6464
#define GICD_CTLR_ENABLEGRP0 1
6565
#define GICD_CTLR_ENABLEGRP1 2
66+
#define GICD_CTLR_ARE_NS U32_FROM_BIT(5)
6667
#define GICD_TYPER 0x0004
6768
#define GICD_IDbits_BITS 5
6869
#define GICD_IDbits_SHIFT 19
@@ -103,6 +104,7 @@
103104
#define GICD_SGIR_NSATT U64_FROM_BIT(15)
104105
#define GICD_CPENDSGIR(n) 0x0f10
105106
#define GICD_SPENDSGIR(n) 0x0f20
107+
#define GICD_IROUTER(n) (0x6000 + 8 * (n))
106108

107109
#define GICR_CTLR 0x0000
108110
#define GICR_CTLR_EnableLPIs U64_FROM_BIT(0)
@@ -241,6 +243,7 @@ void gic_enable_int(int irq);
241243
void gic_clear_pending_int(int irq);
242244
void gic_set_int_priority(int irq, u32 pri);
243245
void gic_set_int_config(int irq, u32 cfg);
246+
void gic_set_int_target(int irq, u32 target_cpu);
244247
boolean gic_int_is_pending(int irq);
245248
u64 gic_dispatch_int(void);
246249
void gic_eoi(int irq);

0 commit comments

Comments
 (0)