From ab8aaab874c4aa378e76d0a55ce6e0fad6e042a2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Oct 2024 15:20:17 -0300 Subject: [PATCH 01/14] tools headers UAPI: Sync linux/const.h with the kernel headers To pick up the changes in: 947697c6f0f75f98 ("uapi: Define GENMASK_U128") That causes no changes in tooling, just addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/const.h include/uapi/linux/const.h Cc: Adrian Hunter Cc: Anshuman Khandual Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Yury Norov Link: https://lore.kernel.org/lkml/ZwltGNJwujKu1Fgn@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/const.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index a429381e7ca5..e16be0d37746 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,6 +28,23 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __BIT128() would not work in the asm code, as it shifts an + * 'unsigned __init128' data type as direct representation of + * 128 bit constants is not supported in the gcc compiler, as + * they get silently truncated. + * + * TODO: Please revisit this implementation when gcc compiler + * starts representing 128 bit constants directly like long + * and unsigned long etc. Subsequently drop the comment for + * GENMASK_U128() which would then start supporting asm code. + */ +#define _BIT128(x) ((unsigned __int128)(1) << (x)) +#endif + #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) From 39c6a356201ebbd7e1db5be53fbb46ef4bfc70a4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 11 Oct 2024 16:10:01 -0300 Subject: [PATCH 02/14] perf trace: The return from 'write' isn't a pid When adding a explicit beautifier for the 'write' syscall when the BPF based buffer collector was introduced there was a cut'n'paste error that carried the syscall_fmt->errpid setting from a nearby syscall (waitid) that returns a pid. So the write return was being suppressed by the return pretty printer, remove that field, reverting it back to the default return handler, that prints positive numbers as-is and interpret negative values as errnos. I actually introduced the problem while making Howard's original patch work just with the 'write' syscall, as we couldn't just look for any buffers, the ones that are filled in by the kernel couldn't use the same sys_enter BPF collector. Fixes: b257fac12f38d7f5 ("perf trace: Pretty print buffer data") Reported-by: James Clark Link: https://lore.kernel.org/lkml/bcf50648-3c7e-4513-8717-0d14492c53b9@linaro.org Link: https://lore.kernel.org/all/Zt8jTfzDYgBPvFCd@x1/#t Cc: Adrian Hunter Cc: Alan Maguire Cc: Howard Chu Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f6e847529073..d3f11b90d025 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1399,7 +1399,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, { .name = "waitid", .errpid = true, .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, - { .name = "write", .errpid = true, + { .name = "write", .arg = { [1] = { .scnprintf = SCA_BUF /* buf */, .from_user = true, }, }, }, }; From ecabac70ff919580324b407818ee3e6c0004dcf8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 15 Oct 2024 17:03:37 -0300 Subject: [PATCH 03/14] perf trace augmented_raw_syscalls: Add extra array index bounds checking to satisfy some BPF verifiers In a RHEL8 kernel (4.18.0-513.11.1.el8_9.x86_64), that, as enterprise kernels go, have backports from modern kernels, the verifier complains about lack of bounds check for the index into the array of syscall arguments, on a BPF bytecode generated by clang 17, with: ; } else if (size < 0 && size >= -6) { /* buffer */ 116: (b7) r1 = -6 117: (2d) if r1 > r6 goto pc-30 R0=map_value(id=0,off=0,ks=4,vs=24688,imm=0) R1_w=inv-6 R2=map_value(id=0,off=16,ks=4,vs=8272,imm=0) R3=inv(id=0) R5=inv40 R6=inv(id=0,umin_value=18446744073709551610,var_off=(0xffffffff00000000; 0xffffffff)) R7=map_value(id=0,off=56,ks=4,vs=8272,imm=0) R8=invP6 R9=map_value(id=0,off=20,ks=4,vs=24,imm=0) R10=fp0 fp-8=mmmmmmmm fp-16=map_value fp-24=map_value fp-32=inv40 fp-40=ctx fp-48=map_value fp-56=inv1 fp-64=map_value fp-72=map_value fp-80=map_value ; index = -(size + 1); 118: (a7) r6 ^= -1 119: (67) r6 <<= 32 120: (77) r6 >>= 32 ; aug_size = args->args[index]; 121: (67) r6 <<= 3 122: (79) r1 = *(u64 *)(r10 -24) 123: (0f) r1 += r6 last_idx 123 first_idx 116 regs=40 stack=0 before 122: (79) r1 = *(u64 *)(r10 -24) regs=40 stack=0 before 121: (67) r6 <<= 3 regs=40 stack=0 before 120: (77) r6 >>= 32 regs=40 stack=0 before 119: (67) r6 <<= 32 regs=40 stack=0 before 118: (a7) r6 ^= -1 regs=40 stack=0 before 117: (2d) if r1 > r6 goto pc-30 regs=42 stack=0 before 116: (b7) r1 = -6 R0_w=map_value(id=0,off=0,ks=4,vs=24688,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=16,ks=4,vs=8272,imm=0) R3_w=inv(id=0) R5_w=inv40 R6_rw=invP(id=0,smin_value=-2147483648,smax_value=0) R7_w=map_value(id=0,off=56,ks=4,vs=8272,imm=0) R8_w=invP6 R9_w=map_value(id=0,off=20,ks=4,vs=24,imm=0) R10=fp0 fp-8=mmmmmmmm fp-16_w=map_value fp-24_r=map_value fp-32_w=inv40 fp-40=ctx fp-48=map_value fp-56_w=inv1 fp-64_w=map_value fp-72=map_value fp-80=map_value parent didn't have regs=40 stack=0 marks last_idx 110 first_idx 98 regs=40 stack=0 before 110: (6d) if r1 s> r6 goto pc+5 regs=42 stack=0 before 109: (b7) r1 = 1 regs=40 stack=0 before 108: (65) if r6 s> 0x1000 goto pc+7 regs=40 stack=0 before 98: (55) if r6 != 0x1 goto pc+9 R0_w=map_value(id=0,off=0,ks=4,vs=24688,imm=0) R1_w=invP12 R2_w=map_value(id=0,off=16,ks=4,vs=8272,imm=0) R3_rw=inv(id=0) R5_w=inv24 R6_rw=invP(id=0,smin_value=-2147483648,smax_value=2147483647) R7_w=map_value(id=0,off=40,ks=4,vs=8272,imm=0) R8_rw=invP4 R9_w=map_value(id=0,off=12,ks=4,vs=24,imm=0) R10=fp0 fp-8=mmmmmmmm fp-16_rw=map_value fp-24_r=map_value fp-32_rw=invP24 fp-40_r=ctx fp-48_r=map_value fp-56_w=invP1 fp-64_rw=map_value fp-72_r=map_value fp-80_r=map_value parent already had regs=40 stack=0 marks 124: (79) r6 = *(u64 *)(r1 +16) R0=map_value(id=0,off=0,ks=4,vs=24688,imm=0) R1_w=map_value(id=0,off=0,ks=4,vs=8272,umax_value=34359738360,var_off=(0x0; 0x7fffffff8),s32_max_value=2147483640,u32_max_value=-8) R2=map_value(id=0,off=16,ks=4,vs=8272,imm=0) R3=inv(id=0) R5=inv40 R6_w=invP(id=0,umax_value=34359738360,var_off=(0x0; 0x7fffffff8),s32_max_value=2147483640,u32_max_value=-8) R7=map_value(id=0,off=56,ks=4,vs=8272,imm=0) R8=invP6 R9=map_value(id=0,off=20,ks=4,vs=24,imm=0) R10=fp0 fp-8=mmmmmmmm fp-16=map_value fp-24=map_value fp-32=inv40 fp-40=ctx fp-48=map_value fp-56=inv1 fp-64=map_value fp-72=map_value fp-80=map_value R1 unbounded memory access, make sure to bounds check any such access processed 466 insns (limit 1000000) max_states_per_insn 2 total_states 20 peak_states 20 mark_read 3 If we add this line, as used in other BPF programs, to cap that index: index &= 7; The generated BPF program is considered safe by that version of the BPF verifier, allowing perf to collect the syscall args in one more kernel using the BPF based pointer contents collector. With the above one-liner it works with that kernel: [root@dell-per740-01 ~]# uname -a Linux dell-per740-01.khw.eng.rdu2.dc.redhat.com 4.18.0-513.11.1.el8_9.x86_64 #1 SMP Thu Dec 7 03:06:13 EST 2023 x86_64 x86_64 x86_64 GNU/Linux [root@dell-per740-01 ~]# ~acme/bin/perf trace -e *sleep* sleep 1.234567890 0.000 (1234.704 ms): sleep/3863610 nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 234567890 }) = 0 [root@dell-per740-01 ~]# As well as with the one in Fedora 40: root@number:~# uname -a Linux number 6.11.3-200.fc40.x86_64 #1 SMP PREEMPT_DYNAMIC Thu Oct 10 22:31:19 UTC 2024 x86_64 GNU/Linux root@number:~# perf trace -e *sleep* sleep 1.234567890 0.000 (1234.722 ms): sleep/14873 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 234567890 }, rmtp: 0x7ffe87311a40) = 0 root@number:~# Song Liu reported that this one-liner was being optimized out by clang 18, so I suggested and he tested that adding a compiler barrier before it made clang v18 to keep it and the verifier in the kernel in Song's case (Meta's 5.12 based kernel) also was happy with the resulting bytecode. I'll investigate using virtme-ng[1] to have all the perf BPF based functionality thoroughly tested over multiple kernels and clang versions. [1] https://kernel-recipes.org/en/2024/virtme-ng/ Cc: Adrian Hunter Cc: Alan Maguire Cc: Alexander Shishkin Cc: Andrea Righi Cc: Howard Chu Cc: Ian Rogers Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Link: https://lore.kernel.org/lkml/Zw7JgJc0LOwSpuvx@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index b2f17cca014b..31df5f0cb14b 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -477,6 +477,8 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) augmented = true; } else if (size < 0 && size >= -6) { /* buffer */ index = -(size + 1); + barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. + index &= 7; // Satisfy the bounds checking with the verifier in some kernels. aug_size = args->args[index]; if (aug_size > TRACE_AUG_MAX_BUF) From 395d38419f1853decab84acc16176b3fa5c96690 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 10 Oct 2024 19:14:02 -0700 Subject: [PATCH 04/14] perf trace augmented_raw_syscalls: Add more checks to pass the verifier Add some more checks to pass the verifier in more kernels. Signed-off-by: Howard Chu Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alan Maguire Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20241011021403.4089793-3-howardchu95@gmail.com [ Reduced the patch removing things that can be done later ] Signed-off-by: Arnaldo Carvalho de Melo --- .../bpf_skel/augmented_raw_syscalls.bpf.c | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 31df5f0cb14b..4a62ed593e84 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -288,6 +288,10 @@ int sys_enter_rename(struct syscall_enter_args *args) augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); len += augmented_args->arg.size; + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); @@ -315,6 +319,10 @@ int sys_enter_renameat2(struct syscall_enter_args *args) augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); len += augmented_args->arg.size; + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); @@ -423,8 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, output = 0, + int zero = 0, size, aug_size, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -490,10 +499,17 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } } + /* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */ + if (aug_size > value_size) + aug_size = value_size; + /* write data to payload */ if (augmented) { int written = offsetof(struct augmented_arg, value) + aug_size; + if (written < 0 || written > sizeof(struct augmented_arg)) + return 1; + ((struct augmented_arg *)payload_offset)->size = aug_size; output += written; payload_offset += written; @@ -501,7 +517,7 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } } - if (!do_output) + if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter)) return 1; return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output); From 7fbff3c0e085745b99f220ad56fcee3ea9643d87 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Thu, 10 Oct 2024 19:14:01 -0700 Subject: [PATCH 05/14] perf build: Change the clang check back to 12.0.1 This serves as a revert for this patch: https://lore.kernel.org/linux-perf-users/ZuGL9ROeTV2uXoSp@x1/ Signed-off-by: Howard Chu Tested-by: James Clark Cc: Adrian Hunter Cc: Alan Maguire Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20241011021403.4089793-2-howardchu95@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 4ddb27a48eed..d4332675babb 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -704,8 +704,8 @@ ifeq ($(BUILD_BPF_SKEL),1) BUILD_BPF_SKEL := 0 else CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g') - ifeq ($(call version-lt3,$(CLANG_VERSION),16.0.6),1) - $(warning Warning: Disabled BPF skeletons as at least $(CLANG) version 16.0.6 is reported to be a working setup with the current of BPF based perf features) + ifeq ($(call version-lt3,$(CLANG_VERSION),12.0.1),1) + $(warning Warning: Disabled BPF skeletons as reliable BTF generation needs at least $(CLANG) version 12.0.1) BUILD_BPF_SKEL := 0 endif endif From 5d35634ecc2d2c3938bd7dc23df0ad046da1b303 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 22 Oct 2024 17:22:36 -0300 Subject: [PATCH 06/14] perf trace: Fix non-listed archs in the syscalltbl routines This fixes a build breakage on 32-bit arm, where the syscalltbl__id_at_idx() function was missing. Committer notes: Generating a proper syscall table from a copy of arch/arm/tools/syscall.tbl ends up being too big a patch for this rc stage, I started doing it but while testing noticed some other problems with using BPF to collect pointer args on arm7 (32-bit) will maybe continue trying to make it work on the next cycle... Fixes: 7a2fb5619cc1fb53 ("perf trace: Fix iteration of syscall ids in syscalltbl->entries") Suggested-by: Howard Chu Signed-off-by: Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: Howard Chu Cc: Ian Rogers Cc: Jiri Olsa Link: https://lore.kernel.org/lkml/3a592835-a14f-40be-8961-c0cee7720a94@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/syscalltbl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 7c15dec6900d..6c45ded922b6 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -46,6 +46,11 @@ static const char *const *syscalltbl_native = syscalltbl_mips_n64; #include const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; static const char *const *syscalltbl_native = syscalltbl_loongarch; +#else +const int syscalltbl_native_max_id = 0; +static const char *const syscalltbl_native[] = { + [0] = "unknown", +}; #endif struct syscall { @@ -182,6 +187,11 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) return audit_name_to_syscall(name, tbl->audit_machine); } +int syscalltbl__id_at_idx(struct syscalltbl *tbl __maybe_unused, int idx) +{ + return idx; +} + int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused, const char *syscall_glob __maybe_unused, int *idx __maybe_unused) { From d822ca29a4fc5278fb511790dace44836e8cc40d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 22 Oct 2024 17:36:16 -0300 Subject: [PATCH 07/14] tools headers UAPI: Sync kvm headers with the kernel sources To pick the changes in: aa8d1f48d353b046 ("KVM: x86/mmu: Introduce a quirk to control memslot zap behavior") That don't change functionality in tools/perf, as no new ioctl is added for the 'perf trace' scripts to harvest. This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Paolo Bonzini Cc: Yan Zhao Link: https://lore.kernel.org/lkml/ZxgN0O02YrAJ2qIC@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index bf57a824f722..a8debbf2f702 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -439,6 +439,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) +#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 From 06a130e42a5bfc84795464bff023bff4c16f58c5 Mon Sep 17 00:00:00 2001 From: Veronika Molnarova Date: Thu, 17 Oct 2024 18:15:55 +0200 Subject: [PATCH 08/14] perf test: Handle perftool-testsuite_probe failure due to broken DWARF Test case test_adding_blacklisted ends in failure if the blacklisted probe is of an assembler function with no DWARF available. At the same time, probing the blacklisted function with ASM DWARF doesn't test the blacklist itself as the failure is a result of the broken DWARF. When the broken DWARF output is encountered, check if the probed function was compiled by the assembler. If so, the broken DWARF message is expected and does not report a perf issue, else report a failure. If the ASM DWARF affected the probe, try the next probe on the blacklist. If the first 5 probes are defective due to broken DWARF, skip the test case. Fixes: def5480d63c1e847 ("perf testsuite probe: Add test for blacklisted kprobes handling") Signed-off-by: Veronika Molnarova Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Veronika Molnarova Link: https://lore.kernel.org/r/20241017161555.236769-1-vmolnaro@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- .../base_probe/test_adding_blacklisted.sh | 65 +++++++++++++++---- 1 file changed, 52 insertions(+), 13 deletions(-) diff --git a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh index b5dc10b2a738..bead723e34af 100755 --- a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh +++ b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh @@ -19,35 +19,74 @@ TEST_RESULT=0 # skip if not supported -BLACKFUNC=`head -n 1 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2` -if [ -z "$BLACKFUNC" ]; then +BLACKFUNC_LIST=`head -n 5 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2` +if [ -z "$BLACKFUNC_LIST" ]; then print_overall_skipped exit 0 fi +# try to find vmlinux with DWARF debug info +VMLINUX_FILE=$(perf probe -v random_probe |& grep "Using.*for symbols" | sed -r 's/^Using (.*) for symbols$/\1/') + # remove all previously added probes clear_all_probes ### adding blacklisted function - -# functions from blacklist should be skipped by perf probe -! $CMD_PERF probe $BLACKFUNC > $LOGS_DIR/adding_blacklisted.log 2> $LOGS_DIR/adding_blacklisted.err -PERF_EXIT_CODE=$? - REGEX_SCOPE_FAIL="Failed to find scope of probe point" REGEX_SKIP_MESSAGE=" is blacklisted function, skip it\." -REGEX_NOT_FOUND_MESSAGE="Probe point \'$BLACKFUNC\' not found." +REGEX_NOT_FOUND_MESSAGE="Probe point \'$RE_EVENT\' not found." REGEX_ERROR_MESSAGE="Error: Failed to add events." REGEX_INVALID_ARGUMENT="Failed to write event: Invalid argument" REGEX_SYMBOL_FAIL="Failed to find symbol at $RE_ADDRESS" -REGEX_OUT_SECTION="$BLACKFUNC is out of \.\w+, skip it" -../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err -CHECK_EXIT_CODE=$? +REGEX_OUT_SECTION="$RE_EVENT is out of \.\w+, skip it" +REGEX_MISSING_DECL_LINE="A function DIE doesn't have decl_line. Maybe broken DWARF?" -print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding blacklisted function $BLACKFUNC" -(( TEST_RESULT += $? )) +BLACKFUNC="" +SKIP_DWARF=0 +for BLACKFUNC in $BLACKFUNC_LIST; do + echo "Probing $BLACKFUNC" + + # functions from blacklist should be skipped by perf probe + ! $CMD_PERF probe $BLACKFUNC > $LOGS_DIR/adding_blacklisted.log 2> $LOGS_DIR/adding_blacklisted.err + PERF_EXIT_CODE=$? + + # check for bad DWARF polluting the result + ../common/check_all_patterns_found.pl "$REGEX_MISSING_DECL_LINE" >/dev/null < $LOGS_DIR/adding_blacklisted.err + + if [ $? -eq 0 ]; then + SKIP_DWARF=1 + echo "Result polluted by broken DWARF, trying another probe" + + # confirm that the broken DWARF comes from assembler + if [ -n "$VMLINUX_FILE" ]; then + readelf -wi "$VMLINUX_FILE" | + awk -v probe="$BLACKFUNC" '/DW_AT_language/ { comp_lang = $0 } + $0 ~ probe { if (comp_lang) { print comp_lang }; exit }' | + grep -q "MIPS assembler" + + CHECK_EXIT_CODE=$? + if [ $CHECK_EXIT_CODE -ne 0 ]; then + SKIP_DWARF=0 # broken DWARF while available + break + fi + fi + else + ../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err + CHECK_EXIT_CODE=$? + + SKIP_DWARF=0 + break + fi +done + +if [ $SKIP_DWARF -eq 1 ]; then + print_testcase_skipped "adding blacklisted function $BLACKFUNC" +else + print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding blacklisted function $BLACKFUNC" + (( TEST_RESULT += $? )) +fi ### listing not-added probe From 758f18158952a6287ac23679ec04c32d44ca5368 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 23 Oct 2024 16:12:57 -0300 Subject: [PATCH 09/14] perf python: Fix up the build on architectures without HAVE_KVM_STAT_SUPPORT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Noticed while building on a raspbian arm 32-bit system. There was also this other case, fixed by adding a missing util/stat.h with the prototypes: /tmp/tmp.MbiSHoF3dj/perf-6.12.0-rc3/tools/perf/util/python.c:1396:6: error: no previous prototype for ‘perf_stat__set_no_csv_summary’ [-Werror=missing-prototypes] 1396 | void perf_stat__set_no_csv_summary(int set __maybe_unused) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /tmp/tmp.MbiSHoF3dj/perf-6.12.0-rc3/tools/perf/util/python.c:1400:6: error: no previous prototype for ‘perf_stat__set_big_num’ [-Werror=missing-prototypes] 1400 | void perf_stat__set_big_num(int set __maybe_unused) | ^~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors In other architectures this must be building due to some lucky indirect inclusion of that header. Fixes: 9dabf4003423c8d3 ("perf python: Switch module to linking libraries from building source") Reviewed-by: Ian Rogers Cc: Adrian Hunter Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/ZxllAtpmEw5fg9oy@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 31a223eaf8e6..ee3d43a7ba45 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -19,6 +19,7 @@ #include "util/bpf-filter.h" #include "util/env.h" #include "util/kvm-stat.h" +#include "util/stat.h" #include "util/kwork.h" #include "util/sample.h" #include "util/lock-contention.h" @@ -1355,6 +1356,7 @@ PyMODINIT_FUNC PyInit_perf(void) unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; +#ifdef HAVE_KVM_STAT_SUPPORT bool kvm_entry_event(struct evsel *evsel __maybe_unused) { return false; @@ -1384,6 +1386,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, char *decode __maybe_unused) { } +#endif // HAVE_KVM_STAT_SUPPORT int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused, int num __maybe_unused, int pathlen __maybe_unused) From 08a7d2525511ba07b8ab3dfb472a9d3df4c40f79 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 24 Oct 2024 10:19:06 -0300 Subject: [PATCH 10/14] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes from these csets: dc1e67f70f6d4e33 ("KVM VMX: Move MSR_IA32_VMX_MISC bit defines to asm/vmx.h") d7bfc9ffd58037ff ("KVM: VMX: Move MSR_IA32_VMX_BASIC bit defines to asm/vmx.h") beb2e446046f8dd9 ("x86/cpu: KVM: Move macro to encode PAT value to common header") e7e80b66fb242a63 ("x86/cpu: KVM: Add common defines for architectural memory types (PAT, MTRRs, etc.)") That cause no changes to tooling: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after $ To see how this works take a look at this previous update: https://git.kernel.org/torvalds/c/174372668933ede5 174372668933ede5 ("tools arch x86: Sync the msr-index.h copy with the kernel sources to pick IA32_MKTME_KEYID_PARTITIONING") Just silences this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Please see tools/include/uapi/README for further details. Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Sean Christopherson Cc: Xin Li Link: https://lore.kernel.org/lkml/ZxpLSBzGin3vjs3b@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 34 +++++++++++++++----------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index a7c06a46fb76..3ae84c3b8e6d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -36,6 +36,20 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) +/* + * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc. + * Most MSRs support/allow only a subset of memory types, but the values + * themselves are common across all relevant MSRs. + */ +#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */ +#define X86_MEMTYPE_WC 1ull /* Write Combining */ +/* RESERVED 2 */ +/* RESERVED 3 */ +#define X86_MEMTYPE_WT 4ull /* Write Through */ +#define X86_MEMTYPE_WP 5ull /* Write Protected */ +#define X86_MEMTYPE_WB 6ull /* Write Back */ +#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */ + /* FRED MSRs */ #define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */ #define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */ @@ -365,6 +379,12 @@ #define MSR_IA32_CR_PAT 0x00000277 +#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 #define MSR_IA32_LASTBRANCHFROMIP 0x000001db #define MSR_IA32_LASTBRANCHTOIP 0x000001dc @@ -1159,15 +1179,6 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 @@ -1185,11 +1196,6 @@ #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F - /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 From 21a3a3d015aeee2402d14b425197d70aa3bd0d91 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Oct 2024 10:55:09 -0300 Subject: [PATCH 11/14] tools headers: Synchronize {uapi/}linux/bits.h with the kernel sources To pick up the changes in this cset: 947697c6f0f75f98 ("uapi: Define GENMASK_U128") This addresses these perf build warnings: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/bits.h include/uapi/linux/bits.h diff -u tools/include/linux/bits.h include/linux/bits.h Please see tools/include/uapi/README for further details. Acked-by: Yury Norov Cc: Adrian Hunter Cc: Anshuman Khandual Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/Zx-ZVH7bHqtFn8Dv@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bits.h | 15 +++++++++++++++ tools/include/uapi/linux/bits.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 0eb24d21aac2..60044b608817 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -36,4 +36,19 @@ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __GENMASK_U128() depends on _BIT128() which would not work + * in the asm code, as it shifts an 'unsigned __init128' data + * type instead of direct representation of 128 bit constants + * such as long and unsigned long. The fundamental problem is + * that a 128 bit constant will get silently truncated by the + * gcc compiler. + */ +#define GENMASK_U128(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) +#endif + #endif /* __LINUX_BITS_H */ diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 3c2a101986a3..5ee30f882736 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -12,4 +12,7 @@ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_U128(h, l) \ + ((_BIT128((h)) << 1) - (_BIT128(l))) + #endif /* _UAPI_LINUX_BITS_H */ From 93e4b86b3e74e19c95b762cfeb42baa0a94f212f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Oct 2024 11:13:57 -0300 Subject: [PATCH 12/14] tools headers arm64: Sync arm64's cputype.h with the kernel sources To get the changes in: 924725707d80bc25 ("arm64: cputype: Add Neoverse-N3 definitions") That makes this perf source code to be rebuilt: CC /tmp/build/perf-tools/util/arm-spe.o The changes in the above patch add MIDR_NEOVERSE_N3, that probably need changes in arm-spe.c, so probably we need to add it to that array? Or maybe we need to leave this for later when this is all tested on those machines? static const struct midr_range neoverse_spe[] = { MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), {}, }; Mark Rutland recommended about arm-spe.c in a previous update to this file: "I would not touch this for now -- someone would have to go audit the TRMs to check that those other cores have the same encoding, and I think it'd be better to do that as a follow-up." That addresses this perf build warning: Warning: Kernel ABI header differences: diff -u tools/arch/arm64/include/asm/cputype.h arch/arm64/include/asm/cputype.h Cc: Adrian Hunter Cc: Catalin Marinas Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/Zx-dffKdGsgkhG96@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/asm/cputype.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 5a7dfeb8e8eb..488f8e751349 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -94,6 +94,7 @@ #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 #define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -176,6 +177,7 @@ #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) From 55f1b540d893da740a81200450014c45a8103f54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 28 Oct 2024 12:24:37 -0300 Subject: [PATCH 13/14] tools headers: Update the linux/unaligned.h copy with the kernel sources To pick up the changes in: 7f053812dab3946c ("random: vDSO: minimize and simplify header includes") That required adding a copy of include/vdso/unaligned.h and its checking in tools/perf/check-headers.h. Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/linux/unaligned.h include/linux/unaligned.h Please see tools/include/uapi/README for further details. Cc: Adrian Hunter Cc: Christophe Leroy Cc: Ian Rogers Cc: Jason A. Donenfeld Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/lkml/Zx-uHvAbPAESofEN@x1 Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/unaligned.h | 11 +---------- tools/include/vdso/unaligned.h | 15 +++++++++++++++ tools/perf/check-headers.sh | 1 + 3 files changed, 17 insertions(+), 10 deletions(-) create mode 100644 tools/include/vdso/unaligned.h diff --git a/tools/include/linux/unaligned.h b/tools/include/linux/unaligned.h index bc0633bc4650..395a4464fe73 100644 --- a/tools/include/linux/unaligned.h +++ b/tools/include/linux/unaligned.h @@ -9,16 +9,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpacked" #pragma GCC diagnostic ignored "-Wattributes" - -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ -}) - -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ -} while (0) +#include #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) #define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h new file mode 100644 index 000000000000..eee3d2a4dbe4 --- /dev/null +++ b/tools/include/vdso/unaligned.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_UNALIGNED_H +#define __VDSO_UNALIGNED_H + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#endif /* __VDSO_UNALIGNED_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 29adbb423327..a05c1c105c51 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -22,6 +22,7 @@ FILES=( "include/vdso/bits.h" "include/linux/const.h" "include/vdso/const.h" + "include/vdso/unaligned.h" "include/linux/hash.h" "include/linux/list-sort.h" "include/uapi/linux/hw_breakpoint.h" From a5384c426744ebe41dafc6e5fa3acecc05e43462 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 25 Oct 2024 22:54:48 -0700 Subject: [PATCH 14/14] perf cap: Add __NR_capget to arch/x86 unistd As there are duplicated kernel headers in tools/include libc can pick up the wrong definitions. This was causing the wrong system call for capget in perf. Reported-by: Adrian Hunter Fixes: e25ebda78e230283 ("perf cap: Tidy up and improve capability testing") Closes: https://lore.kernel.org/lkml/cc7d6bdf-1aeb-4179-9029-4baf50b59342@intel.com/ Signed-off-by: Ian Rogers Tested-by: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20241026055448.312247-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/unistd_32.h | 3 +++ tools/arch/x86/include/uapi/asm/unistd_64.h | 3 +++ tools/perf/util/cap.c | 10 +++------- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 9de35df1afc3..63182a023e9d 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -11,6 +11,9 @@ #ifndef __NR_getpgid #define __NR_getpgid 132 #endif +#ifndef __NR_capget +#define __NR_capget 184 +#endif #ifndef __NR_gettid #define __NR_gettid 224 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index d0f2043d7132..77311e8d1b5d 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -11,6 +11,9 @@ #ifndef __NR_getpgid #define __NR_getpgid 121 #endif +#ifndef __NR_capget +#define __NR_capget 125 +#endif #ifndef __NR_gettid #define __NR_gettid 186 #endif diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c index 7574a67651bc..69d9a2bcd40b 100644 --- a/tools/perf/util/cap.c +++ b/tools/perf/util/cap.c @@ -7,13 +7,9 @@ #include "debug.h" #include #include -#include #include #include - -#ifndef SYS_capget -#define SYS_capget 90 -#endif +#include #define MAX_LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 @@ -21,9 +17,9 @@ bool perf_cap__capable(int cap, bool *used_root) { struct __user_cap_header_struct header = { .version = _LINUX_CAPABILITY_VERSION_3, - .pid = getpid(), + .pid = 0, }; - struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S]; + struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S] = {}; __u32 cap_val; *used_root = false;