Skip to content

Make L3 cache info per-core rather than per-socket #7

@nazar-pc

Description

@nazar-pc

Current assumption according to the comments is "The L3 cache (Last-Level Cache or LLC in many contexts) is typically shared among all cores within the same physical CPU socket" is very false in practice. It is only really true on small consumer CPUs.

Here is what things look like on AMD Threadripper 7970X:

Machine (125GB total) cpuset=0xffffffff,0xffffffff
  Package L#0 cpuset=0xffffffff,0xffffffff
    Die L#0 cpuset=0x000000ff,0x000000ff
      NUMANode(HBM) L#0 (P#0 31GB) cpuset=0x000000ff,0x000000ff
      L3 L#0 (32MB) cpuset=0x000000ff,0x000000ff
        L2 L#0 (1024KB) cpuset=0x00000001,0x00000001
          L1d L#0 (32KB) cpuset=0x00000001,0x00000001
            L1i L#0 (32KB) cpuset=0x00000001,0x00000001
              Core L#0 cpuset=0x00000001,0x00000001
                PU L#0 (P#0) cpuset=0x00000001
                PU L#1 (P#32) cpuset=0x00000001,0x0
        L2 L#1 (1024KB) cpuset=0x00000002,0x00000002
          L1d L#1 (32KB) cpuset=0x00000002,0x00000002
            L1i L#1 (32KB) cpuset=0x00000002,0x00000002
              Core L#1 cpuset=0x00000002,0x00000002
                PU L#2 (P#1) cpuset=0x00000002
                PU L#3 (P#33) cpuset=0x00000002,0x0
        L2 L#2 (1024KB) cpuset=0x00000004,0x00000004
          L1d L#2 (32KB) cpuset=0x00000004,0x00000004
            L1i L#2 (32KB) cpuset=0x00000004,0x00000004
              Core L#2 cpuset=0x00000004,0x00000004
                PU L#4 (P#2) cpuset=0x00000004
                PU L#5 (P#34) cpuset=0x00000004,0x0
        L2 L#3 (1024KB) cpuset=0x00000008,0x00000008
          L1d L#3 (32KB) cpuset=0x00000008,0x00000008
            L1i L#3 (32KB) cpuset=0x00000008,0x00000008
              Core L#3 cpuset=0x00000008,0x00000008
                PU L#6 (P#3) cpuset=0x00000008
                PU L#7 (P#35) cpuset=0x00000008,0x0
        L2 L#4 (1024KB) cpuset=0x00000010,0x00000010
          L1d L#4 (32KB) cpuset=0x00000010,0x00000010
            L1i L#4 (32KB) cpuset=0x00000010,0x00000010
              Core L#4 cpuset=0x00000010,0x00000010
                PU L#8 (P#4) cpuset=0x00000010
                PU L#9 (P#36) cpuset=0x00000010,0x0
        L2 L#5 (1024KB) cpuset=0x00000020,0x00000020
          L1d L#5 (32KB) cpuset=0x00000020,0x00000020
            L1i L#5 (32KB) cpuset=0x00000020,0x00000020
              Core L#5 cpuset=0x00000020,0x00000020
                PU L#10 (P#5) cpuset=0x00000020
                PU L#11 (P#37) cpuset=0x00000020,0x0
        L2 L#6 (1024KB) cpuset=0x00000040,0x00000040
          L1d L#6 (32KB) cpuset=0x00000040,0x00000040
            L1i L#6 (32KB) cpuset=0x00000040,0x00000040
              Core L#6 cpuset=0x00000040,0x00000040
                PU L#12 (P#6) cpuset=0x00000040
                PU L#13 (P#38) cpuset=0x00000040,0x0
        L2 L#7 (1024KB) cpuset=0x00000080,0x00000080
          L1d L#7 (32KB) cpuset=0x00000080,0x00000080
            L1i L#7 (32KB) cpuset=0x00000080,0x00000080
              Core L#7 cpuset=0x00000080,0x00000080
                PU L#14 (P#7) cpuset=0x00000080
                PU L#15 (P#39) cpuset=0x00000080,0x0
    Die L#1 cpuset=0x0000ff00,0x0000ff00
      NUMANode(DRAM) L#1 (P#3 31GB) cpuset=0x0000ff00,0x0000ff00
      L3 L#1 (32MB) cpuset=0x0000ff00,0x0000ff00
        L2 L#8 (1024KB) cpuset=0x00000100,0x00000100
          L1d L#8 (32KB) cpuset=0x00000100,0x00000100
            L1i L#8 (32KB) cpuset=0x00000100,0x00000100
              Core L#8 cpuset=0x00000100,0x00000100
                PU L#16 (P#8) cpuset=0x00000100
                PU L#17 (P#40) cpuset=0x00000100,0x0
        L2 L#9 (1024KB) cpuset=0x00000200,0x00000200
          L1d L#9 (32KB) cpuset=0x00000200,0x00000200
            L1i L#9 (32KB) cpuset=0x00000200,0x00000200
              Core L#9 cpuset=0x00000200,0x00000200
                PU L#18 (P#9) cpuset=0x00000200
                PU L#19 (P#41) cpuset=0x00000200,0x0
        L2 L#10 (1024KB) cpuset=0x00000400,0x00000400
          L1d L#10 (32KB) cpuset=0x00000400,0x00000400
            L1i L#10 (32KB) cpuset=0x00000400,0x00000400
              Core L#10 cpuset=0x00000400,0x00000400
                PU L#20 (P#10) cpuset=0x00000400
                PU L#21 (P#42) cpuset=0x00000400,0x0
        L2 L#11 (1024KB) cpuset=0x00000800,0x00000800
          L1d L#11 (32KB) cpuset=0x00000800,0x00000800
            L1i L#11 (32KB) cpuset=0x00000800,0x00000800
              Core L#11 cpuset=0x00000800,0x00000800
                PU L#22 (P#11) cpuset=0x00000800
                PU L#23 (P#43) cpuset=0x00000800,0x0
        L2 L#12 (1024KB) cpuset=0x00001000,0x00001000
          L1d L#12 (32KB) cpuset=0x00001000,0x00001000
            L1i L#12 (32KB) cpuset=0x00001000,0x00001000
              Core L#12 cpuset=0x00001000,0x00001000
                PU L#24 (P#12) cpuset=0x00001000
                PU L#25 (P#44) cpuset=0x00001000,0x0
        L2 L#13 (1024KB) cpuset=0x00002000,0x00002000
          L1d L#13 (32KB) cpuset=0x00002000,0x00002000
            L1i L#13 (32KB) cpuset=0x00002000,0x00002000
              Core L#13 cpuset=0x00002000,0x00002000
                PU L#26 (P#13) cpuset=0x00002000
                PU L#27 (P#45) cpuset=0x00002000,0x0
        L2 L#14 (1024KB) cpuset=0x00004000,0x00004000
          L1d L#14 (32KB) cpuset=0x00004000,0x00004000
            L1i L#14 (32KB) cpuset=0x00004000,0x00004000
              Core L#14 cpuset=0x00004000,0x00004000
                PU L#28 (P#14) cpuset=0x00004000
                PU L#29 (P#46) cpuset=0x00004000,0x0
        L2 L#15 (1024KB) cpuset=0x00008000,0x00008000
          L1d L#15 (32KB) cpuset=0x00008000,0x00008000
            L1i L#15 (32KB) cpuset=0x00008000,0x00008000
              Core L#15 cpuset=0x00008000,0x00008000
                PU L#30 (P#15) cpuset=0x00008000
                PU L#31 (P#47) cpuset=0x00008000,0x0
    Die L#2 cpuset=0x00ff0000,0x00ff0000
      NUMANode(DRAM) L#2 (P#1 31GB) cpuset=0x00ff0000,0x00ff0000
      L3 L#2 (32MB) cpuset=0x00ff0000,0x00ff0000
        L2 L#16 (1024KB) cpuset=0x00010000,0x00010000
          L1d L#16 (32KB) cpuset=0x00010000,0x00010000
            L1i L#16 (32KB) cpuset=0x00010000,0x00010000
              Core L#16 cpuset=0x00010000,0x00010000
                PU L#32 (P#16) cpuset=0x00010000
                PU L#33 (P#48) cpuset=0x00010000,0x0
        L2 L#17 (1024KB) cpuset=0x00020000,0x00020000
          L1d L#17 (32KB) cpuset=0x00020000,0x00020000
            L1i L#17 (32KB) cpuset=0x00020000,0x00020000
              Core L#17 cpuset=0x00020000,0x00020000
                PU L#34 (P#17) cpuset=0x00020000
                PU L#35 (P#49) cpuset=0x00020000,0x0
        L2 L#18 (1024KB) cpuset=0x00040000,0x00040000
          L1d L#18 (32KB) cpuset=0x00040000,0x00040000
            L1i L#18 (32KB) cpuset=0x00040000,0x00040000
              Core L#18 cpuset=0x00040000,0x00040000
                PU L#36 (P#18) cpuset=0x00040000
                PU L#37 (P#50) cpuset=0x00040000,0x0
        L2 L#19 (1024KB) cpuset=0x00080000,0x00080000
          L1d L#19 (32KB) cpuset=0x00080000,0x00080000
            L1i L#19 (32KB) cpuset=0x00080000,0x00080000
              Core L#19 cpuset=0x00080000,0x00080000
                PU L#38 (P#19) cpuset=0x00080000
                PU L#39 (P#51) cpuset=0x00080000,0x0
        L2 L#20 (1024KB) cpuset=0x00100000,0x00100000
          L1d L#20 (32KB) cpuset=0x00100000,0x00100000
            L1i L#20 (32KB) cpuset=0x00100000,0x00100000
              Core L#20 cpuset=0x00100000,0x00100000
                PU L#40 (P#20) cpuset=0x00100000
                PU L#41 (P#52) cpuset=0x00100000,0x0
        L2 L#21 (1024KB) cpuset=0x00200000,0x00200000
          L1d L#21 (32KB) cpuset=0x00200000,0x00200000
            L1i L#21 (32KB) cpuset=0x00200000,0x00200000
              Core L#21 cpuset=0x00200000,0x00200000
                PU L#42 (P#21) cpuset=0x00200000
                PU L#43 (P#53) cpuset=0x00200000,0x0
        L2 L#22 (1024KB) cpuset=0x00400000,0x00400000
          L1d L#22 (32KB) cpuset=0x00400000,0x00400000
            L1i L#22 (32KB) cpuset=0x00400000,0x00400000
              Core L#22 cpuset=0x00400000,0x00400000
                PU L#44 (P#22) cpuset=0x00400000
                PU L#45 (P#54) cpuset=0x00400000,0x0
        L2 L#23 (1024KB) cpuset=0x00800000,0x00800000
          L1d L#23 (32KB) cpuset=0x00800000,0x00800000
            L1i L#23 (32KB) cpuset=0x00800000,0x00800000
              Core L#23 cpuset=0x00800000,0x00800000
                PU L#46 (P#23) cpuset=0x00800000
                PU L#47 (P#55) cpuset=0x00800000,0x0
    Die L#3 cpuset=0xff000000,0xff000000
      NUMANode(DRAM) L#3 (P#2 31GB) cpuset=0xff000000,0xff000000
      L3 L#3 (32MB) cpuset=0xff000000,0xff000000
        L2 L#24 (1024KB) cpuset=0x01000000,0x01000000
          L1d L#24 (32KB) cpuset=0x01000000,0x01000000
            L1i L#24 (32KB) cpuset=0x01000000,0x01000000
              Core L#24 cpuset=0x01000000,0x01000000
                PU L#48 (P#24) cpuset=0x01000000
                PU L#49 (P#56) cpuset=0x01000000,0x0
        L2 L#25 (1024KB) cpuset=0x02000000,0x02000000
          L1d L#25 (32KB) cpuset=0x02000000,0x02000000
            L1i L#25 (32KB) cpuset=0x02000000,0x02000000
              Core L#25 cpuset=0x02000000,0x02000000
                PU L#50 (P#25) cpuset=0x02000000
                PU L#51 (P#57) cpuset=0x02000000,0x0
        L2 L#26 (1024KB) cpuset=0x04000000,0x04000000
          L1d L#26 (32KB) cpuset=0x04000000,0x04000000
            L1i L#26 (32KB) cpuset=0x04000000,0x04000000
              Core L#26 cpuset=0x04000000,0x04000000
                PU L#52 (P#26) cpuset=0x04000000
                PU L#53 (P#58) cpuset=0x04000000,0x0
        L2 L#27 (1024KB) cpuset=0x08000000,0x08000000
          L1d L#27 (32KB) cpuset=0x08000000,0x08000000
            L1i L#27 (32KB) cpuset=0x08000000,0x08000000
              Core L#27 cpuset=0x08000000,0x08000000
                PU L#54 (P#27) cpuset=0x08000000
                PU L#55 (P#59) cpuset=0x08000000,0x0
        L2 L#28 (1024KB) cpuset=0x10000000,0x10000000
          L1d L#28 (32KB) cpuset=0x10000000,0x10000000
            L1i L#28 (32KB) cpuset=0x10000000,0x10000000
              Core L#28 cpuset=0x10000000,0x10000000
                PU L#56 (P#28) cpuset=0x10000000
                PU L#57 (P#60) cpuset=0x10000000,0x0
        L2 L#29 (1024KB) cpuset=0x20000000,0x20000000
          L1d L#29 (32KB) cpuset=0x20000000,0x20000000
            L1i L#29 (32KB) cpuset=0x20000000,0x20000000
              Core L#29 cpuset=0x20000000,0x20000000
                PU L#58 (P#29) cpuset=0x20000000
                PU L#59 (P#61) cpuset=0x20000000,0x0
        L2 L#30 (1024KB) cpuset=0x40000000,0x40000000
          L1d L#30 (32KB) cpuset=0x40000000,0x40000000
            L1i L#30 (32KB) cpuset=0x40000000,0x40000000
              Core L#30 cpuset=0x40000000,0x40000000
                PU L#60 (P#30) cpuset=0x40000000
                PU L#61 (P#62) cpuset=0x40000000,0x0
        L2 L#31 (1024KB) cpuset=0x80000000,0x80000000
          L1d L#31 (32KB) cpuset=0x80000000,0x80000000
            L1i L#31 (32KB) cpuset=0x80000000,0x80000000
              Core L#31 cpuset=0x80000000,0x80000000
                PU L#62 (P#31) cpuset=0x80000000
                PU L#63 (P#63) cpuset=0x80000000,0x0

And here is AMD Epyc 7302P:

Machine (126GB total) cpuset=0xffffffff
  Package L#0 cpuset=0xffffffff
    Group0 L#0 cpuset=0x000f000f
      NUMANode L#0 (P#0 31GB) cpuset=0x000f000f
      L3 L#0 (16MB) cpuset=0x00030003
        L2 L#0 (512KB) cpuset=0x00010001
          L1d L#0 (32KB) cpuset=0x00010001
            L1i L#0 (32KB) cpuset=0x00010001
              Core L#0 cpuset=0x00010001
                PU L#0 (P#0) cpuset=0x00000001
                PU L#1 (P#16) cpuset=0x00010000
        L2 L#1 (512KB) cpuset=0x00020002
          L1d L#1 (32KB) cpuset=0x00020002
            L1i L#1 (32KB) cpuset=0x00020002
              Core L#1 cpuset=0x00020002
                PU L#2 (P#1) cpuset=0x00000002
                PU L#3 (P#17) cpuset=0x00020000
      L3 L#1 (16MB) cpuset=0x000c000c
        L2 L#2 (512KB) cpuset=0x00040004
          L1d L#2 (32KB) cpuset=0x00040004
            L1i L#2 (32KB) cpuset=0x00040004
              Core L#2 cpuset=0x00040004
                PU L#4 (P#2) cpuset=0x00000004
                PU L#5 (P#18) cpuset=0x00040000
        L2 L#3 (512KB) cpuset=0x00080008
          L1d L#3 (32KB) cpuset=0x00080008
            L1i L#3 (32KB) cpuset=0x00080008
              Core L#3 cpuset=0x00080008
                PU L#6 (P#3) cpuset=0x00000008
                PU L#7 (P#19) cpuset=0x00080000
    Group0 L#1 cpuset=0x00f000f0
      NUMANode L#1 (P#1 31GB) cpuset=0x00f000f0
      L3 L#2 (16MB) cpuset=0x00300030
        L2 L#4 (512KB) cpuset=0x00100010
          L1d L#4 (32KB) cpuset=0x00100010
            L1i L#4 (32KB) cpuset=0x00100010
              Core L#4 cpuset=0x00100010
                PU L#8 (P#4) cpuset=0x00000010
                PU L#9 (P#20) cpuset=0x00100000
        L2 L#5 (512KB) cpuset=0x00200020
          L1d L#5 (32KB) cpuset=0x00200020
            L1i L#5 (32KB) cpuset=0x00200020
              Core L#5 cpuset=0x00200020
                PU L#10 (P#5) cpuset=0x00000020
                PU L#11 (P#21) cpuset=0x00200000
      L3 L#3 (16MB) cpuset=0x00c000c0
        L2 L#6 (512KB) cpuset=0x00400040
          L1d L#6 (32KB) cpuset=0x00400040
            L1i L#6 (32KB) cpuset=0x00400040
              Core L#6 cpuset=0x00400040
                PU L#12 (P#6) cpuset=0x00000040
                PU L#13 (P#22) cpuset=0x00400000
        L2 L#7 (512KB) cpuset=0x00800080
          L1d L#7 (32KB) cpuset=0x00800080
            L1i L#7 (32KB) cpuset=0x00800080
              Core L#7 cpuset=0x00800080
                PU L#14 (P#7) cpuset=0x00000080
                PU L#15 (P#23) cpuset=0x00800000
    Group0 L#2 cpuset=0x0f000f00
      NUMANode L#2 (P#2 31GB) cpuset=0x0f000f00
      L3 L#4 (16MB) cpuset=0x03000300
        L2 L#8 (512KB) cpuset=0x01000100
          L1d L#8 (32KB) cpuset=0x01000100
            L1i L#8 (32KB) cpuset=0x01000100
              Core L#8 cpuset=0x01000100
                PU L#16 (P#8) cpuset=0x00000100
                PU L#17 (P#24) cpuset=0x01000000
        L2 L#9 (512KB) cpuset=0x02000200
          L1d L#9 (32KB) cpuset=0x02000200
            L1i L#9 (32KB) cpuset=0x02000200
              Core L#9 cpuset=0x02000200
                PU L#18 (P#9) cpuset=0x00000200
                PU L#19 (P#25) cpuset=0x02000000
      L3 L#5 (16MB) cpuset=0x0c000c00
        L2 L#10 (512KB) cpuset=0x04000400
          L1d L#10 (32KB) cpuset=0x04000400
            L1i L#10 (32KB) cpuset=0x04000400
              Core L#10 cpuset=0x04000400
                PU L#20 (P#10) cpuset=0x00000400
                PU L#21 (P#26) cpuset=0x04000000
        L2 L#11 (512KB) cpuset=0x08000800
          L1d L#11 (32KB) cpuset=0x08000800
            L1i L#11 (32KB) cpuset=0x08000800
              Core L#11 cpuset=0x08000800
                PU L#22 (P#11) cpuset=0x00000800
                PU L#23 (P#27) cpuset=0x08000000
    Group0 L#3 cpuset=0xf000f000
      NUMANode L#3 (P#3 31GB) cpuset=0xf000f000
      L3 L#6 (16MB) cpuset=0x30003000
        L2 L#12 (512KB) cpuset=0x10001000
          L1d L#12 (32KB) cpuset=0x10001000
            L1i L#12 (32KB) cpuset=0x10001000
              Core L#12 cpuset=0x10001000
                PU L#24 (P#12) cpuset=0x00001000
                PU L#25 (P#28) cpuset=0x10000000
        L2 L#13 (512KB) cpuset=0x20002000
          L1d L#13 (32KB) cpuset=0x20002000
            L1i L#13 (32KB) cpuset=0x20002000
              Core L#13 cpuset=0x20002000
                PU L#26 (P#13) cpuset=0x00002000
                PU L#27 (P#29) cpuset=0x20000000
      L3 L#7 (16MB) cpuset=0xc000c000
        L2 L#14 (512KB) cpuset=0x40004000
          L1d L#14 (32KB) cpuset=0x40004000
            L1i L#14 (32KB) cpuset=0x40004000
              Core L#14 cpuset=0x40004000
                PU L#28 (P#14) cpuset=0x00004000
                PU L#29 (P#30) cpuset=0x40000000
        L2 L#15 (512KB) cpuset=0x80008000
          L1d L#15 (32KB) cpuset=0x80008000
            L1i L#15 (32KB) cpuset=0x80008000
              Core L#15 cpuset=0x80008000
                PU L#30 (P#15) cpuset=0x00008000
                PU L#31 (P#31) cpuset=0x80000000

Both systems have a single socket and configured to NPS4 (the default BIOS configuration presents both as a single NUMA node instead, but from L3 caches you can clearly discern that they belong to different physical dies). This is not specific to workstation/server CPUs either, many consumer CPUs these days have chiplet design and present themselves as such that have multiple L3 caches.

The best way to manage cores properly regardless of BIOS configuration from my experience is to look at how cores are grouped by L3 cache since latency between cores that do not share L3 is significantly higher.

There is no such API in the library right now, with https://github.com/HadrienG2/hwlocality it looks something like this:

let topology = hwlocality::Topology::new().unwrap();
let cpu_cores = topology
    // Iterate over groups of L3 caches
    .objects_with_type(hwlocality::object::types::ObjectType::L3Cache)
    // For each L3 cache get CPU set
    .filter_map(|node| node.cpuset())
    // For each CPU set extract individual cores
    .map(|cpuset| cpuset.iter_set().map(usize::from).collect::<Vec<_>>())
    .filter(|cores| !cores.is_empty())
    .collect::<Vec<_>>();

BTW I'd appreciate a similar API where no implicit static variable is used.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions