Building an OOM Killer Event Tracer with eBPF + Go

bpftrace is great for quick probing and ad-hoc debugging. For production-grade monitoring tools, you need full eBPF programs. The architecture splits into two layers:

  • Kernel side: eBPF program written in C, attached to hook points, collecting event data
  • User side: loader written in Go (or Rust / libbpf C), loading the eBPF program and reading events

Architecture

mermaid
flowchart LR
    classDef kern fill:#E3F2FD,stroke:#1565C0,color:#1565C0
    classDef user fill:#FFF3E0,stroke:#E65100,color:#BF360C
    classDef data fill:#E8F5E9,stroke:#2E7D32,color:#1B5E20

    subgraph kernel["Kernel Space"]
        hook@{ shape: rounded, label: "oom_kill_process (kprobe)" }
        ebpf@{ shape: proc, label: "eBPF Program\nEvent Collection" }
        ring@{ shape: cyl, label: "Ring Buffer" }
    end

    subgraph userspace["User Space (Go)"]
        loader@{ shape: notch-rect, label: "bpf2go Loader" }
        reader@{ shape: proc, label: "RingBuf Reader\nEvent Parsing" }
    end

    hook --> ebpf --> ring
    ring --> reader
    loader -.-> ebpf

    class hook,ebpf,ring kern
    class loader,reader user

eBPF Kernel Program (C)

Name the C file oom_kprobe.bpf.c — the bpf suffix is a cilium/ebpf convention for bpf2go code generation:

c
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
//go:build ignore

#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>

char LICENSE[] SEC("license") = "Dual BSD/GPL";

struct oom_event {
    u32 pid;
    u32 tgid;
    u64 fpid;
    long pages;
    char comm[TASK_COMM_LEN];
    char fcomm[TASK_COMM_LEN];
    u64 timestamp;
};

struct {
    __uint(type, BPF_MAP_TYPE_RINGBUF);
    __uint(max_entries, 1 << 24);  // 16MB
} events SEC(".maps");

SEC("kprobe/oom_kill_process")
int BPF_KPROBE(oom_kill_process, struct oom_control *oc,
               struct task_struct *p, const char *message)
{
    struct oom_event *event;

    event = bpf_ringbuf_reserve(&events, sizeof(*event), 0);
    if (!event) return 0;

    event->pid = BPF_CORE_READ(p, pid);
    event->tgid = BPF_CORE_READ(p, tgid);
    bpf_probe_read_kernel_str(&event->comm, sizeof(event->comm),
                              BPF_CORE_READ(p, comm));

    struct task_struct *fp = BPF_CORE_READ(oc, chosen);
    if (fp) {
        event->fpid = BPF_CORE_READ(fp, tgid);
        bpf_probe_read_kernel_str(&event->fcomm, sizeof(event->fcomm),
                                  BPF_CORE_READ(fp, comm));
    }

    event->pages = BPF_CORE_READ(oc, totalpages);
    event->timestamp = bpf_ktime_get_ns();

    bpf_ringbuf_submit(event, 0);
    return 0;
}

Key Concepts

  • BPF_KPROBE macro handles kprobe argument extraction automatically — no manual struct pt_regs unpacking needed
  • BPF_CORE_READ accesses kernel data structures via BTF info, no hardcoded offsets — this is CO-RE in action
  • bpf_ringbuf_reserve + bpf_ringbuf_submit is a lock-free producer-consumer pattern; events are readable by user-space immediately after submission
  • vmlinux.h is generated by bpftool btf dump and contains definitions for all kernel data structures

Go User-Space Program

Use the bpf2go code generator from the cilium/ebpf library:

go
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go -cc clang-14 \
//    -target bpfel -type oom_event oom_kprobe oom_kprobe.bpf.c \
//    -- -I../headers -O2 -g -D__TARGET_ARCH_x86

package main

import (
    "encoding/binary"
    "fmt"
    "log"
    "os"
    "os/signal"
    "syscall"
    "time"

    "github.com/cilium/ebpf/link"
    "github.com/cilium/ebpf/ringbuf"
    "github.com/cilium/ebpf/rlimit"
)

func main() {
    // 1. Raise rlimit (eBPF needs memory locking)
    if err := rlimit.RemoveMemlock(); err != nil {
        log.Fatal(err)
    }

    // 2. Load eBPF objects
    objs := oom_kprobeObjects{}
    if err := loadOom_kprobeObjects(&objs, nil); err != nil {
        log.Fatalf("loading objects: %v", err)
    }
    defer objs.Close()

    // 3. Attach kprobe
    kp, err := link.Kprobe("oom_kill_process", objs.OomKillProcess, nil)
    if err != nil {
        log.Fatalf("opening kprobe: %v", err)
    }
    defer kp.Close()

    // 4. Create ring buffer reader
    rd, err := ringbuf.NewReader(objs.OomKprobeMaps.Events)
    if err != nil {
        log.Fatalf("opening ringbuf reader: %v", err)
    }
    defer rd.Close()

    // 5. Signal handling
    stop := make(chan os.Signal, 1)
    signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
    go func() {
        <-stop
        rd.Close()
    }()

    log.Println("OOM monitor started. Press Ctrl+C to exit.")

    // 6. Event loop
    var event oom_kprobeOomEvent
    for {
        record, err := rd.Read()
        if err != nil {
            if err == ringbuf.ErrClosed {
                return
            }
            log.Printf("reading from ringbuf: %v", err)
            continue
        }

        if err := binary.Read(record.Reader, binary.LittleEndian, &event); err != nil {
            log.Printf("parsing event: %v", err)
            continue
        }

        fmt.Printf("\n[%s] OOM KILL DETECTED\n", time.Now().Format("15:04:05"))
        fmt.Printf("  Killed:  PID=%d COMM=%s\n", event.Pid, trimNull(event.Comm[:]))
        fmt.Printf("  Trigger: PID=%d COMM=%s\n", event.Fpid, trimNull(event.Fcomm[:]))
        fmt.Printf("  Pages:   %d (%.2f MB)\n", event.Pages, float64(event.Pages)*4/1024)
    }
}

func trimNull(b []byte) string {
    for i, c := range b {
        if c == 0 {
            return string(b[:i])
        }
    }
    return string(b)
}

Build and Run

bash
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# Generate bpf2go Go code
go generate ./...

# Build
go build -o oom-monitor .

# Run (requires root)
sudo ./oom-monitor

# Trigger OOM in another terminal(will consume significant memory — avoid on production systems)
sudo stress-ng --vm 1 --vm-bytes 80% -t 30s

# Safer alternative: use Docker with memory limit(requires Docker)
# docker run --rm -m 64m ubuntu:22.04 bash -c "apt-get update -qq && apt-get install -y -qq stress-ng && stress-ng --vm 1 --vm-bytes 50m -t 10s"
sudo stress-ng --vm 1 --vm-bytes 80% -t 30s

The compiled binary is self-contained — BPF bytecode is embedded into the Go binary via go:embed.

Expected Output

When an OOM is triggered, the monitor should output something like:

1
2
3
4
5
6
7
8
9
[14:30:25] OOM KILL DETECTED
  Killed:  PID=12345 COMM=stress-ng
  Trigger: PID=9876 COMM=oom-monitor
  Pages:   262144 (1024.00 MB)

[14:30:26] OOM KILL DETECTED
  Killed:  PID=12346 COMM=stress-ng
  Trigger: PID=9876 COMM=oom-monitor
  Pages:   131072 (512.00 MB)

If nothing appears, the kernel didn’t trigger OOM — increase stress-ng’s memory ratio or free up some memory first.

Extension Ideas

Add More Hook Points

Correlating data from multiple kernel hook points gives a more complete picture:

c
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
// Capture container-level OOM
SEC("kprobe/mem_cgroup_out_of_memory")
int BPF_KPROBE(memcg_oom, struct mem_cgroup *memcg, gfp_t gfp_mask, int order)
{
    // Collect cgroup ID, container memory limit, current usage
}

// Memory pressure events
SEC("tracepoint/psi/memory_stall")
int trace_memory_stall(struct trace_event_raw_psi_group *ctx)
{
    // Early warning before OOM actually hits
}

User-Side Enhancements

  • Prometheus Exporter: Convert OOM events to Counter and Gauge metrics
  • Container mapping: Read /proc/<pid>/cgroup to map OOM events to Kubernetes Pods
  • Event persistence: Write to ClickHouse / Loki for historical analysis
  • Alerting: Real-time notification when OOM events occur

Summary

This article implemented a complete OOM event tracing tool: a C eBPF kernel program handles data collection, and a Go user-space program handles loading and reading, with events passed through a Ring Buffer. This architecture is the standard pattern for eBPF application development and can be reused across many observability scenarios.