|
| 1 | +/* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | +/* |
| 3 | + * PID-based Prefetch Policy |
| 4 | + * |
| 5 | + * Allocates prefetch bandwidth based on process priority. |
| 6 | + * High priority PID gets lower threshold (easier to prefetch, more bandwidth). |
| 7 | + * Low priority PID gets higher threshold (harder to prefetch, less bandwidth). |
| 8 | + */ |
| 9 | +#include <vmlinux.h> |
| 10 | +#include <bpf/bpf_helpers.h> |
| 11 | +#include <bpf/bpf_tracing.h> |
| 12 | +#include <bpf/bpf_core_read.h> |
| 13 | +#include "uvm_types.h" |
| 14 | +#include "bpf_testmod.h" |
| 15 | +#include "eviction_common.h" |
| 16 | +#include "trace_helper.h" |
| 17 | + |
| 18 | +char _license[] SEC("license") = "GPL"; |
| 19 | + |
| 20 | +/* Configuration map - same format as eviction policies */ |
| 21 | +struct { |
| 22 | + __uint(type, BPF_MAP_TYPE_ARRAY); |
| 23 | + __uint(max_entries, 8); |
| 24 | + __type(key, u32); |
| 25 | + __type(value, u64); |
| 26 | +} config SEC(".maps"); |
| 27 | + |
| 28 | +/* Per-PID statistics */ |
| 29 | +struct { |
| 30 | + __uint(type, BPF_MAP_TYPE_HASH); |
| 31 | + __uint(max_entries, 1024); |
| 32 | + __type(key, u32); /* PID */ |
| 33 | + __type(value, struct pid_chunk_stats); |
| 34 | +} pid_stats SEC(".maps"); |
| 35 | + |
| 36 | +/* Per-CPU cache for current VA block's owner PID */ |
| 37 | +struct { |
| 38 | + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); |
| 39 | + __uint(max_entries, 1); |
| 40 | + __type(key, u32); |
| 41 | + __type(value, u32); /* owner_tgid */ |
| 42 | +} va_block_pid_cache SEC(".maps"); |
| 43 | + |
| 44 | +/* Helper: Get threshold for a specific PID */ |
| 45 | +static __always_inline u32 get_threshold_for_pid(u32 pid) |
| 46 | +{ |
| 47 | + u64 *high_pid_ptr, *high_param_ptr; |
| 48 | + u64 *low_pid_ptr, *low_param_ptr; |
| 49 | + u64 *default_param_ptr; |
| 50 | + u32 key; |
| 51 | + |
| 52 | + /* Get high priority PID and its threshold */ |
| 53 | + key = CONFIG_PRIORITY_PID; |
| 54 | + high_pid_ptr = bpf_map_lookup_elem(&config, &key); |
| 55 | + key = CONFIG_PRIORITY_PARAM; |
| 56 | + high_param_ptr = bpf_map_lookup_elem(&config, &key); |
| 57 | + |
| 58 | + if (high_pid_ptr && high_param_ptr && *high_pid_ptr == pid) { |
| 59 | + return (u32)*high_param_ptr; |
| 60 | + } |
| 61 | + |
| 62 | + /* Get low priority PID and its threshold */ |
| 63 | + key = CONFIG_LOW_PRIORITY_PID; |
| 64 | + low_pid_ptr = bpf_map_lookup_elem(&config, &key); |
| 65 | + key = CONFIG_LOW_PRIORITY_PARAM; |
| 66 | + low_param_ptr = bpf_map_lookup_elem(&config, &key); |
| 67 | + |
| 68 | + if (low_pid_ptr && low_param_ptr && *low_pid_ptr == pid) { |
| 69 | + return (u32)*low_param_ptr; |
| 70 | + } |
| 71 | + |
| 72 | + /* Default threshold for other PIDs */ |
| 73 | + key = CONFIG_DEFAULT_PARAM; |
| 74 | + default_param_ptr = bpf_map_lookup_elem(&config, &key); |
| 75 | + if (default_param_ptr) { |
| 76 | + return (u32)*default_param_ptr; |
| 77 | + } |
| 78 | + |
| 79 | + return 50; /* Default 50% if not configured */ |
| 80 | +} |
| 81 | + |
| 82 | +/* Helper: Update PID stats */ |
| 83 | +static __always_inline void update_pid_stats(u32 pid, bool allowed) |
| 84 | +{ |
| 85 | + struct pid_chunk_stats *stats, new_stats = {}; |
| 86 | + |
| 87 | + stats = bpf_map_lookup_elem(&pid_stats, &pid); |
| 88 | + if (!stats) { |
| 89 | + new_stats.total_activate = 1; |
| 90 | + if (allowed) |
| 91 | + new_stats.policy_allow = 1; |
| 92 | + else |
| 93 | + new_stats.policy_deny = 1; |
| 94 | + bpf_map_update_elem(&pid_stats, &pid, &new_stats, BPF_ANY); |
| 95 | + return; |
| 96 | + } |
| 97 | + |
| 98 | + __sync_fetch_and_add(&stats->total_activate, 1); |
| 99 | + if (allowed) |
| 100 | + __sync_fetch_and_add(&stats->policy_allow, 1); |
| 101 | + else |
| 102 | + __sync_fetch_and_add(&stats->policy_deny, 1); |
| 103 | +} |
| 104 | + |
| 105 | +/* |
| 106 | + * Hook: uvm_perf_prefetch_get_hint_va_block (via kprobe) |
| 107 | + * |
| 108 | + * Called BEFORE before_compute. Captures owner PID and stores in per-CPU cache. |
| 109 | + */ |
| 110 | +SEC("kprobe/uvm_perf_prefetch_get_hint_va_block") |
| 111 | +int BPF_KPROBE(prefetch_get_hint_va_block, |
| 112 | + uvm_va_block_t *va_block, |
| 113 | + void *va_block_context, |
| 114 | + u32 new_residency, |
| 115 | + void *faulted_pages, |
| 116 | + u32 faulted_region_packed, |
| 117 | + uvm_perf_prefetch_bitmap_tree_t *bitmap_tree) |
| 118 | +{ |
| 119 | + u32 key = 0; |
| 120 | + u32 *cached_pid = bpf_map_lookup_elem(&va_block_pid_cache, &key); |
| 121 | + if (!cached_pid) |
| 122 | + return 0; |
| 123 | + |
| 124 | + /* Use trace_helper.h to get owner PID */ |
| 125 | + *cached_pid = get_owner_pid_from_va_block(va_block); |
| 126 | + return 0; |
| 127 | +} |
| 128 | + |
| 129 | +/* Helper: Get cached owner PID */ |
| 130 | +static __always_inline u32 get_cached_owner_pid(void) |
| 131 | +{ |
| 132 | + u32 key = 0; |
| 133 | + u32 *cached_pid = bpf_map_lookup_elem(&va_block_pid_cache, &key); |
| 134 | + return cached_pid ? *cached_pid : 0; |
| 135 | +} |
| 136 | + |
| 137 | +SEC("struct_ops/uvm_prefetch_before_compute") |
| 138 | +int BPF_PROG(uvm_prefetch_before_compute, |
| 139 | + uvm_page_index_t page_index, |
| 140 | + uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, |
| 141 | + uvm_va_block_region_t *max_prefetch_region, |
| 142 | + uvm_va_block_region_t *result_region) |
| 143 | +{ |
| 144 | + u32 owner_tgid = get_cached_owner_pid(); |
| 145 | + u32 threshold = get_threshold_for_pid(owner_tgid); |
| 146 | + |
| 147 | + bpf_printk("PID-Prefetch: pid=%u, page=%u, threshold=%u%%\n", |
| 148 | + owner_tgid, page_index, threshold); |
| 149 | + |
| 150 | + /* Initialize result_region to empty */ |
| 151 | + bpf_uvm_set_va_block_region(result_region, 0, 0); |
| 152 | + |
| 153 | + /* Return ENTER_LOOP to let driver iterate tree and call on_tree_iter */ |
| 154 | + return 2; // UVM_BPF_ACTION_ENTER_LOOP |
| 155 | +} |
| 156 | + |
| 157 | +SEC("struct_ops/uvm_prefetch_on_tree_iter") |
| 158 | +int BPF_PROG(uvm_prefetch_on_tree_iter, |
| 159 | + uvm_perf_prefetch_bitmap_tree_t *bitmap_tree, |
| 160 | + uvm_va_block_region_t *max_prefetch_region, |
| 161 | + uvm_va_block_region_t *current_region, |
| 162 | + unsigned int counter, |
| 163 | + uvm_va_block_region_t *prefetch_region) |
| 164 | +{ |
| 165 | + u32 owner_tgid = get_cached_owner_pid(); |
| 166 | + u32 threshold = get_threshold_for_pid(owner_tgid); |
| 167 | + |
| 168 | + /* Calculate subregion_pages from current_region */ |
| 169 | + uvm_page_index_t first = BPF_CORE_READ(current_region, first); |
| 170 | + uvm_page_index_t outer = BPF_CORE_READ(current_region, outer); |
| 171 | + unsigned int subregion_pages = outer - first; |
| 172 | + |
| 173 | + /* Apply PID-based threshold: counter * 100 > subregion_pages * threshold |
| 174 | + * |
| 175 | + * Lower threshold -> easier to pass -> more prefetch -> more bandwidth |
| 176 | + * Higher threshold -> harder to pass -> less prefetch -> less bandwidth |
| 177 | + */ |
| 178 | + bool allowed = (counter * 100 > subregion_pages * threshold); |
| 179 | + |
| 180 | + /* Update stats for this PID */ |
| 181 | + if (owner_tgid > 0) { |
| 182 | + update_pid_stats(owner_tgid, allowed); |
| 183 | + } |
| 184 | + |
| 185 | + if (allowed) { |
| 186 | + bpf_uvm_set_va_block_region(prefetch_region, first, outer); |
| 187 | + return 1; // Selected this region |
| 188 | + } |
| 189 | + |
| 190 | + return 0; // Region doesn't meet threshold |
| 191 | +} |
| 192 | + |
| 193 | +/* Dummy implementation for test trigger */ |
| 194 | +SEC("struct_ops/uvm_bpf_test_trigger_kfunc") |
| 195 | +int BPF_PROG(uvm_bpf_test_trigger_kfunc, const char *buf, int len) |
| 196 | +{ |
| 197 | + return 0; |
| 198 | +} |
| 199 | + |
| 200 | +/* Define the struct_ops map */ |
| 201 | +SEC(".struct_ops") |
| 202 | +struct uvm_gpu_ext uvm_ops_prefetch_pid_tree = { |
| 203 | + .uvm_bpf_test_trigger_kfunc = (void *)uvm_bpf_test_trigger_kfunc, |
| 204 | + .uvm_prefetch_before_compute = (void *)uvm_prefetch_before_compute, |
| 205 | + .uvm_prefetch_on_tree_iter = (void *)uvm_prefetch_on_tree_iter, |
| 206 | +}; |
0 commit comments