Linux version: v6.0

Architecture: ARMv8

This post is the continuation of Linux Interrupt Subsystem Intro(1): Interrupt Handling Initialization.

Introduction

I’m surprised almost half a year had pasted since my last post… I got sidetracked by researching other things in between, so I’ve been leaving this gap unfilled. Now coming back to it I feel a bit rusty. Indeed, when there’s something I want to write about, it is better do it promptly.

The basic architecture of Linux’s interrupt handling and the preparation is introduced last time, including setting up interrupt handlers: handle_arch_irq , irq_desc→handle_irq , irqaction→handler . Let’s see how they are invoked when an interrupt is serviced.

Interrupt Handling Process

PC jumps to the IRQ vector in the CPU’s exception vectors when and IPI comes:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
// arch/arm64/kernel/entry.S

/*
* Exception vectors.
*/
.pushsection ".entry.text", "ax"

.align 11
SYM_CODE_START(vectors)
kernel_ventry 1, t, 64, sync // Synchronous EL1t
kernel_ventry 1, t, 64, irq // IRQ EL1t
kernel_ventry 1, t, 64, fiq // FIQ EL1t
kernel_ventry 1, t, 64, error // Error EL1t

kernel_ventry 1, h, 64, sync // Synchronous EL1h
kernel_ventry 1, h, 64, irq // IRQ EL1h
kernel_ventry 1, h, 64, fiq // FIQ EL1h
kernel_ventry 1, h, 64, error // Error EL1h

kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0
kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0
kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0
kernel_ventry 0, t, 64, error // Error 64-bit EL0

kernel_ventry 0, t, 32, sync // Synchronous 32-bit EL0
kernel_ventry 0, t, 32, irq // IRQ 32-bit EL0
kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0
kernel_ventry 0, t, 32, error // Error 32-bit EL0
SYM_CODE_END(vectors)

kernel_ventry is an assembly macro:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
        .macro kernel_ventry, el:req, ht:req, regsize:req, label:req
.align 7

[...] // some complex initial preparations
// reserve space on the stack for saving registers
sub sp, sp, #PT_REGS_SIZE

#ifdef CONFIG_VMAP_STACK

[...] // SP overflow check and handling

#endif
b el\el\ht\()_\regsize\()_\label
.org .Lventry_start\@ + 128 // Did we overflow the ventry slot?
.endm

the last instruction is a branch instruction, if we assume the CPU is in AArch64 EL0 when the interrupt arrives,

b el\el\ht\()_\regsize\()_\label expands into

b el0t_64_irq

el0t_64_irq is defined in the same file, and is also generated by macros:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
// arch/arm64/kernel/entry.S

.macro entry_handler el:req, ht:req, regsize:req, label:req
SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label)
kernel_entry \el, \regsize
mov x0, sp
bl el\el\ht\()_\regsize\()_\label\()_handler
.if \el == 0
b ret_to_user
.else
b ret_to_kernel
.endif
SYM_CODE_END(el\el\ht\()_\regsize\()_\label)
.endm

/*
* Early exception handlers
*/
entry_handler 1, t, 64, sync
entry_handler 1, t, 64, irq
entry_handler 1, t, 64, fiq
entry_handler 1, t, 64, error

entry_handler 1, h, 64, sync
entry_handler 1, h, 64, irq
entry_handler 1, h, 64, fiq
entry_handler 1, h, 64, error

entry_handler 0, t, 64, sync
entry_handler 0, t, 64, irq
entry_handler 0, t, 64, fiq
entry_handler 0, t, 64, error

entry_handler 0, t, 32, sync
entry_handler 0, t, 32, irq
entry_handler 0, t, 32, fiq
entry_handler 0, t, 32, error

focus on entry_handler 0, t, 64, irq it expands into

1
2
3
4
5
6
7
SYM_CODE_START_LOCAL(el0t_64_irq)
kernel_entry 0, 64
mov x0, sp
bl el0t_64_irq_handler
b ret_to_user
.endif
SYM_CODE_END(el0t_64_irq)

continue: see the comments added

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
// arch/arm64/kernel/entry-common.c
// please read from the last function in this block
// since the functions are called bottom to top, with the
// last being do_interrupt_handler

static void do_interrupt_handler(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
struct pt_regs *old_regs = set_irq_regs(regs);

// change stack if necessary,but calls
// handle_arch_irq (gic_handle_irq) either way
if (on_thread_stack())
call_on_irq_stack(regs, handler);
else
handler(regs);

set_irq_regs(old_regs);
}

static void noinstr el0_interrupt(struct pt_regs *regs,
void (*handler)(struct pt_regs *))
{
enter_from_user_mode(regs);

write_sysreg(DAIF_PROCCTX_NOIRQ, daif);

if (regs->pc & BIT(55))
arm64_apply_bp_hardening();

irq_enter_rcu();
// this is the important part
do_interrupt_handler(regs, handler);
irq_exit_rcu();

exit_to_user_mode(regs);
}

static void noinstr __el0_irq_handler_common(struct pt_regs *regs)
{
// use the function we set in part 1 (handle_arch_irq)
el0_interrupt(regs, handle_arch_irq);
}

asmlinkage void noinstr el0t_64_irq_handler(struct pt_regs *regs)
{
__el0_irq_handler_common(regs);
}

now let’s look at gic_handle_irq :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
{
u32 irqstat, irqnr;
struct gic_chip_data *gic = &gic_data[0];
void __iomem *cpu_base = gic_data_cpu_base(gic);

do {
// read GICC_IAR
irqstat = readl_relaxed(cpu_base + GIC_CPU_INTACK);
// extract the hw irq number
irqnr = irqstat & GICC_IAR_INT_ID_MASK;
// GIC sets the irqnr to 1023 if there is no interrupt to be serviced,
// therefore this loop keeps servicing interrupts until none is left
if (unlikely(irqnr >= 1020))
break;
// skip
if (static_branch_likely(&supports_deactivate_key))
writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
isb();

/*
* Ensure any shared data written by the CPU sending the IPI
* is read after we've read the ACK register on the GIC.
*
* Pairs with the write barrier in gic_ipi_send_mask
*/
// inqnr <= 15 means it is an IPI (called SGI in GIC spec)
if (irqnr <= 15) {
smp_rmb();

/*
* The GIC encodes the source CPU in GICC_IAR,
* leading to the deactivation to fail if not
* written back as is to GICC_EOI. Stash the INTID
* away for gic_eoi_irq() to write back. This only
* works because we don't nest SGIs...
*/
// save the value of GICC_IAR, needed for EOI write
this_cpu_write(sgi_intid, irqstat);
}

generic_handle_domain_irq(gic->domain, irqnr);
} while (1);
}

/**
* generic_handle_domain_irq - Invoke the handler for a HW irq belonging
* to a domain.
* @domain: The domain where to perform the lookup
* @hwirq: The HW irq number to convert to a logical one
*
* Returns: 0 on success, or -EINVAL if conversion has failed
*
* This function must be called from an IRQ context with irq regs
* initialized.
*/
int generic_handle_domain_irq(struct irq_domain *domain, unsigned int hwirq)
{
// irq_resolve_mapping uses domain->revmap to translate hardware IRQ number
// into Linux IRQ number
return handle_irq_desc(irq_resolve_mapping(domain, hwirq));
}

int handle_irq_desc(struct irq_desc *desc)
{
[...]
generic_handle_irq_desc(desc);
return 0;
}

static inline void generic_handle_irq_desc(struct irq_desc *desc)
{
desc->handle_irq(desc);
}

as in the last post, desc→handle_irq points to handle_percpu_devid_irq

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
/**
* handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
* @desc: the interrupt description structure for this irq
*
* Per CPU interrupts on SMP machines without locking requirements. Same as
* handle_percpu_irq() above but with the following extras:
*
* action->percpu_dev_id is a pointer to percpu variables which
* contain the real device id for the cpu on which this handler is
* called
*/
void handle_percpu_devid_irq(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irqaction *action = desc->action;
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;

/*
* PER CPU interrupts are not serialized. Do not touch
* desc->tot_count.
*/
__kstat_incr_irqs_this_cpu(desc);

// GIC does not have a irq_ack callback
// (reading hwirq also acks the interrupt)
if (chip->irq_ack)
chip->irq_ack(&desc->irq_data);

if (likely(action)) {
trace_irq_handler_entry(irq, action);
// call the registered handler in request_percpu_irq (ipi_handler)
res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
trace_irq_handler_exit(irq, action, res);
} else {
unsigned int cpu = smp_processor_id();
bool enabled = cpumask_test_cpu(cpu, desc->percpu_enabled);

if (enabled)
irq_percpu_disable(desc, cpu);

pr_err_once("Spurious%s percpu IRQ%u on CPU%u\n",
enabled ? " and unmasked" : "", irq, cpu);
}
// tell GIC it is the end of interrupt (EOI)
// writes the hwirq number into the EOI register
if (chip->irq_eoi)
chip->irq_eoi(&desc->irq_data);
}

And that’s it! EZPZ, right?