BPF fixes:

- Fix BPF verifier to force a checkpoint when the program's jump history becomes too long (Eduard Zingerman) - Add several fixes to the BPF bits iterator addressing issues like memory leaks and overflow problems (Hou Tao) - Fix an out-of-bounds write in trie_get_next_key (Byeonguk Jeong) - Fix BPF test infra's LIVE_FRAME frame update after a page has been recycled (Toke Høiland-Jørgensen) - Fix BPF verifier and undo the 40-bytes extra stack space for bpf_fastcall patterns due to various bugs (Eduard Zingerman) - Fix a BPF sockmap race condition which could trigger a NULL pointer dereference in sock_map_link_update_prog (Cong Wang) - Fix tcp_bpf_recvmsg_parser to retrieve seq_copied from tcp_sk under the socket lock (Jiayuan Chen) Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> -----BEGIN PGP SIGNATURE----- iIsEABYIADMWIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZyQO/RUcZGFuaWVsQGlv Z2VhcmJveC5uZXQACgkQ2yufC7HISIO2vAD+NAng11x6W9tnIOVDHTwvsWL4aafQ pmf1zda90bwCIyIA/07ptFPWOH+WTmWqP8pZ9PGY5279KAxurZZDud0SOwIO =28aY -----END PGP SIGNATURE----- Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf Pull bpf fixes from Daniel Borkmann: - Fix BPF verifier to force a checkpoint when the program's jump history becomes too long (Eduard Zingerman) - Add several fixes to the BPF bits iterator addressing issues like memory leaks and overflow problems (Hou Tao) - Fix an out-of-bounds write in trie_get_next_key (Byeonguk Jeong) - Fix BPF test infra's LIVE_FRAME frame update after a page has been recycled (Toke Høiland-Jørgensen) - Fix BPF verifier and undo the 40-bytes extra stack space for bpf_fastcall patterns due to various bugs (Eduard Zingerman) - Fix a BPF sockmap race condition which could trigger a NULL pointer dereference in sock_map_link_update_prog (Cong Wang) - Fix tcp_bpf_recvmsg_parser to retrieve seq_copied from tcp_sk under the socket lock (Jiayuan Chen) * tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf, test_run: Fix LIVE_FRAME frame update after a page has been recycled selftests/bpf: Add three test cases for bits_iter bpf: Use __u64 to save the bits in bits iterator bpf: Check the validity of nr_words in bpf_iter_bits_new() bpf: Add bpf_mem_alloc_check_size() helper bpf: Free dynamically allocated bits in bpf_iter_bits_destroy() bpf: disallow 40-bytes extra stack for bpf_fastcall patterns selftests/bpf: Add test for trie_get_next_key() bpf: Fix out-of-bounds write in trie_get_next_key() selftests/bpf: Test with a very short loop bpf: Force checkpoint when jmp history is too long bpf: fix filed access without lock sock_map: fix a NULL pointer dereference in sock_map_link_update_prog()
2024-11-01 04:53:36 +01:00 · 2024-10-31 14:56:19 -10:00 · 2024-10-31 14:56:19 -10:00 · 5635f18942
commit 5635f18942
parent 90602c251c c40dd8c473
13 changed files with 269 additions and 88 deletions
--- a/include/linux/bpf_mem_alloc.h
+++ b/include/linux/bpf_mem_alloc.h
@ -33,6 +33,9 @@ int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg
 int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size);
 void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma);

+/* Check the allocation size for kmalloc equivalent allocator */
+int bpf_mem_alloc_check_size(bool percpu, size_t size);
+
 /* kmalloc/kfree equivalent: */
 void *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size);
 void bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr);
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@ -2851,21 +2851,47 @@ struct bpf_iter_bits {
 	__u64 __opaque[2];
 } __aligned(8);

+#define BITS_ITER_NR_WORDS_MAX 511
+
 struct bpf_iter_bits_kern {
 	union {
-		unsigned long *bits;
-		unsigned long bits_copy;
+		__u64 *bits;
+		__u64 bits_copy;
 	};
-	u32 nr_bits;
+	int nr_bits;
 	int bit;
 } __aligned(8);

+/* On 64-bit hosts, unsigned long and u64 have the same size, so passing
+ * a u64 pointer and an unsigned long pointer to find_next_bit() will
+ * return the same result, as both point to the same 8-byte area.
+ *
+ * For 32-bit little-endian hosts, using a u64 pointer or unsigned long
+ * pointer also makes no difference. This is because the first iterated
+ * unsigned long is composed of bits 0-31 of the u64 and the second unsigned
+ * long is composed of bits 32-63 of the u64.
+ *
+ * However, for 32-bit big-endian hosts, this is not the case. The first
+ * iterated unsigned long will be bits 32-63 of the u64, so swap these two
+ * ulong values within the u64.
+ */
+static void swap_ulong_in_u64(u64 *bits, unsigned int nr)
+{
+#if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN)
+	unsigned int i;
+
+	for (i = 0; i < nr; i++)
+		bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32);
+#endif
+}
+
 /**
 * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
 * @it: The new bpf_iter_bits to be created
 * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
 * @nr_words: The size of the specified memory area, measured in 8-byte units.
- * Due to the limitation of memalloc, it can't be greater than 512.
+ * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be
+ * further reduced by the BPF memory allocator implementation.
 *
 * This function initializes a new bpf_iter_bits structure for iterating over
 * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
@ -2892,6 +2918,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w

 	if (!unsafe_ptr__ign || !nr_words)
 		return -EINVAL;
+	if (nr_words > BITS_ITER_NR_WORDS_MAX)
+		return -E2BIG;

 	/* Optimization for u64 mask */
 	if (nr_bits == 64) {
@ -2899,10 +2927,15 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
 		if (err)
 			return -EFAULT;

+		swap_ulong_in_u64(&kit->bits_copy, nr_words);
+
 		kit->nr_bits = nr_bits;
 		return 0;
 	}

+	if (bpf_mem_alloc_check_size(false, nr_bytes))
+		return -E2BIG;
+
 	/* Fallback to memalloc */
 	kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
 	if (!kit->bits)
@ -2914,6 +2947,8 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
 		return err;
 	}

+	swap_ulong_in_u64(kit->bits, nr_words);
+
 	kit->nr_bits = nr_bits;
 	return 0;
 }
@ -2930,17 +2965,16 @@ bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_w
 __bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
 {
 	struct bpf_iter_bits_kern *kit = (void *)it;
-	u32 nr_bits = kit->nr_bits;
-	const unsigned long *bits;
-	int bit;
+	int bit = kit->bit, nr_bits = kit->nr_bits;
+	const void *bits;

-	if (nr_bits == 0)
+	if (!nr_bits || bit >= nr_bits)
 		return NULL;

 	bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
-	bit = find_next_bit(bits, nr_bits, kit->bit + 1);
+	bit = find_next_bit(bits, nr_bits, bit + 1);
 	if (bit >= nr_bits) {
-		kit->nr_bits = 0;
+		kit->bit = bit;
 		return NULL;
 	}

--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@ -655,7 +655,7 @@ static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key)
 	if (!key || key->prefixlen > trie->max_prefixlen)
 		goto find_leftmost;

-	node_stack = kmalloc_array(trie->max_prefixlen,
+	node_stack = kmalloc_array(trie->max_prefixlen + 1,
 				   sizeof(struct lpm_trie_node *),
 				   GFP_ATOMIC | __GFP_NOWARN);
 	if (!node_stack)
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@ -35,6 +35,8 @@
 */
 #define LLIST_NODE_SZ sizeof(struct llist_node)

+#define BPF_MEM_ALLOC_SIZE_MAX 4096
+
 /* similar to kmalloc, but sizeof == 8 bucket is gone */
 static u8 size_index[24] __ro_after_init = {
 	3,	/* 8 */
@ -65,7 +67,7 @@ static u8 size_index[24] __ro_after_init = {

 static int bpf_mem_cache_idx(size_t size)
 {
-	if (!size || size > 4096)
+	if (!size || size > BPF_MEM_ALLOC_SIZE_MAX)
 		return -1;

 	if (size <= 192)
@ -1005,3 +1007,13 @@ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags)

 	return !ret ? NULL : ret + LLIST_NODE_SZ;
 }
+
+int bpf_mem_alloc_check_size(bool percpu, size_t size)
+{
+	/* The size of percpu allocation doesn't have LLIST_NODE_SZ overhead */
+	if ((percpu && size > BPF_MEM_ALLOC_SIZE_MAX) ||
+	    (!percpu && size > BPF_MEM_ALLOC_SIZE_MAX - LLIST_NODE_SZ))
+		return -E2BIG;
+
+	return 0;
+}
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@ -6804,20 +6804,10 @@ static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
                                          struct bpf_func_state *state,
                                          enum bpf_access_type t)
 {
-	struct bpf_insn_aux_data *aux = &env->insn_aux_data[env->insn_idx];
-	int min_valid_off, max_bpf_stack;
-
-	/* If accessing instruction is a spill/fill from bpf_fastcall pattern,
-	 * add room for all caller saved registers below MAX_BPF_STACK.
-	 * In case if bpf_fastcall rewrite won't happen maximal stack depth
-	 * would be checked by check_max_stack_depth_subprog().
-	 */
-	max_bpf_stack = MAX_BPF_STACK;
-	if (aux->fastcall_pattern)
-		max_bpf_stack += CALLER_SAVED_REGS * BPF_REG_SIZE;
+	int min_valid_off;

 	if (t == BPF_WRITE || env->allow_uninit_stack)
-		min_valid_off = -max_bpf_stack;
+		min_valid_off = -MAX_BPF_STACK;
 	else
 		min_valid_off = -state->allocated_stack;

@ -17886,9 +17876,11 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	struct bpf_verifier_state_list *sl, **pprev;
 	struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
 	int i, j, n, err, states_cnt = 0;
-	bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
-	bool add_new_state = force_new_state;
-	bool force_exact;
+	bool force_new_state, add_new_state, force_exact;
+
+	force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) ||
+			  /* Avoid accumulating infinitely long jmp history */
+			  cur->jmp_history_cnt > 40;

 	/* bpf progs typically have pruning point every 4 instructions
 	 * http://vger.kernel.org/bpfconf2019.html#session-1
@ -17898,6 +17890,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	 * In tests that amounts to up to 50% reduction into total verifier
 	 * memory consumption and 20% verifier time speedup.
 	 */
+	add_new_state = force_new_state;
 	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
 	    env->insn_processed - env->prev_insn_processed >= 8)
 		add_new_state = true;
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@ -246,6 +246,7 @@ static void reset_ctx(struct xdp_page_head *head)
 	head->ctx.data_meta = head->orig_ctx.data_meta;
 	head->ctx.data_end = head->orig_ctx.data_end;
 	xdp_update_frame_from_buff(&head->ctx, head->frame);
+	head->frame->mem = head->orig_ctx.rxq->mem;
 }

 static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@ -1760,6 +1760,10 @@ static int sock_map_link_update_prog(struct bpf_link *link,
 		ret = -EINVAL;
 		goto out;
 	}
+	if (!sockmap_link->map) {
+		ret = -ENOLINK;
+		goto out;
+	}

 	ret = sock_map_prog_link_lookup(sockmap_link->map, &pprog, &plink,
 					sockmap_link->attach_type);
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@ -221,11 +221,11 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
 				  int flags,
 				  int *addr_len)
 {
-	struct tcp_sock *tcp = tcp_sk(sk);
 	int peek = flags & MSG_PEEK;
-	u32 seq = tcp->copied_seq;
 	struct sk_psock *psock;
+	struct tcp_sock *tcp;
 	int copied = 0;
+	u32 seq;

 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
@ -238,7 +238,8 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
 		return tcp_recvmsg(sk, msg, len, flags, addr_len);

 	lock_sock(sk);
-
+	tcp = tcp_sk(sk);
+	seq = tcp->copied_seq;
 	/* We may have received data on the sk_receive_queue pre-accept and
 	 * then we can not use read_skb in this context because we haven't
 	 * assigned a sk_socket yet so have no link to the ops. The work-around
--- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c
@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <linux/bpf.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+	__u32 prefix;
+	__u32 data;
+};
+
+struct get_next_key_ctx {
+	struct test_lpm_key key;
+	bool start;
+	bool stop;
+	int map_fd;
+	int loop;
+};
+
+static void *get_next_key_fn(void *arg)
+{
+	struct get_next_key_ctx *ctx = arg;
+	struct test_lpm_key next_key;
+	int i = 0;
+
+	while (!ctx->start)
+		usleep(1);
+
+	while (!ctx->stop && i++ < ctx->loop)
+		bpf_map_get_next_key(ctx->map_fd, &ctx->key, &next_key);
+
+	return NULL;
+}
+
+static void abort_get_next_key(struct get_next_key_ctx *ctx, pthread_t *tids,
+			       unsigned int nr)
+{
+	unsigned int i;
+
+	ctx->stop = true;
+	ctx->start = true;
+	for (i = 0; i < nr; i++)
+		pthread_join(tids[i], NULL);
+}
+
+/* This test aims to prevent regression of future. As long as the kernel does
+ * not panic, it is considered as success.
+ */
+void test_lpm_trie_map_get_next_key(void)
+{
+#define MAX_NR_THREADS 8
+	LIBBPF_OPTS(bpf_map_create_opts, create_opts,
+		    .map_flags = BPF_F_NO_PREALLOC);
+	struct test_lpm_key key = {};
+	__u32 val = 0;
+	int map_fd;
+	const __u32 max_prefixlen = 8 * (sizeof(key) - sizeof(key.prefix));
+	const __u32 max_entries = max_prefixlen + 1;
+	unsigned int i, nr = MAX_NR_THREADS, loop = 65536;
+	pthread_t tids[MAX_NR_THREADS];
+	struct get_next_key_ctx ctx;
+	int err;
+
+	map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map",
+				sizeof(struct test_lpm_key), sizeof(__u32),
+				max_entries, &create_opts);
+	CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n",
+	      strerror(errno));
+
+	for (i = 0; i <= max_prefixlen; i++) {
+		key.prefix = i;
+		err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+		CHECK(err, "bpf_map_update_elem()", "error:%s\n",
+		      strerror(errno));
+	}
+
+	ctx.start = false;
+	ctx.stop = false;
+	ctx.map_fd = map_fd;
+	ctx.loop = loop;
+	memcpy(&ctx.key, &key, sizeof(key));
+
+	for (i = 0; i < nr; i++) {
+		err = pthread_create(&tids[i], NULL, get_next_key_fn, &ctx);
+		if (err) {
+			abort_get_next_key(&ctx, tids, i);
+			CHECK(err, "pthread_create", "error %d\n", err);
+		}
+	}
+
+	ctx.start = true;
+	for (i = 0; i < nr; i++)
+		pthread_join(tids[i], NULL);
+
+	printf("%s:PASS\n", __func__);
+
+	close(map_fd);
+}
--- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
@ -15,6 +15,8 @@ int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign,
 int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
 void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;

+u64 bits_array[511] = {};
+
 SEC("iter.s/cgroup")
 __description("bits iter without destroy")
 __failure __msg("Unreleased reference")
@ -110,16 +112,16 @@ int bit_index(void)
 }

 SEC("syscall")
-__description("bits nomem")
+__description("bits too big")
 __success __retval(0)
-int bits_nomem(void)
+int bits_too_big(void)
 {
 	u64 data[4];
 	int nr = 0;
 	int *bit;

 	__builtin_memset(&data, 0xff, sizeof(data));
-	bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */
+	bpf_for_each(bits, bit, &data[0], 512) /* Be greater than 511 */
 		nr++;
 	return nr;
 }
@ -151,3 +153,56 @@ int zero_words(void)
 		nr++;
 	return nr;
 }
+
+SEC("syscall")
+__description("huge words")
+__success __retval(0)
+int huge_words(void)
+{
+	u64 data[8] = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1};
+	int nr = 0;
+	int *bit;
+
+	bpf_for_each(bits, bit, &data[0], 67108865)
+		nr++;
+	return nr;
+}
+
+SEC("syscall")
+__description("max words")
+__success __retval(4)
+int max_words(void)
+{
+	volatile int nr = 0;
+	int *bit;
+
+	bits_array[0] = (1ULL << 63) | 1U;
+	bits_array[510] = (1ULL << 33) | (1ULL << 32);
+
+	bpf_for_each(bits, bit, bits_array, 511) {
+		if (nr == 0 && *bit != 0)
+			break;
+		if (nr == 2 && *bit != 32672)
+			break;
+		nr++;
+	}
+	return nr;
+}
+
+SEC("syscall")
+__description("bad words")
+__success __retval(0)
+int bad_words(void)
+{
+	void *bad_addr = (void *)(3UL << 30);
+	int nr = 0;
+	int *bit;
+
+	bpf_for_each(bits, bit, bad_addr, 1)
+		nr++;
+
+	bpf_for_each(bits, bit, bad_addr, 4)
+		nr++;
+
+	return nr;
+}
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@ -790,61 +790,6 @@ __naked static void cumulative_stack_depth_subprog(void)
 	:: __imm(bpf_get_smp_processor_id) : __clobber_all);
 }

-SEC("raw_tp")
-__arch_x86_64
-__log_level(4)
-__msg("stack depth 512")
-__xlated("0: r1 = 42")
-__xlated("1: *(u64 *)(r10 -512) = r1")
-__xlated("2: w0 = ")
-__xlated("3: r0 = &(void __percpu *)(r0)")
-__xlated("4: r0 = *(u32 *)(r0 +0)")
-__xlated("5: exit")
-__success
-__naked int bpf_fastcall_max_stack_ok(void)
-{
-	asm volatile(
-	"r1 = 42;"
-	"*(u64 *)(r10 - %[max_bpf_stack]) = r1;"
-	"*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;"
-	"call %[bpf_get_smp_processor_id];"
-	"r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);"
-	"exit;"
-	:
-	: __imm_const(max_bpf_stack, MAX_BPF_STACK),
-	  __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8),
-	  __imm(bpf_get_smp_processor_id)
-	: __clobber_all
-	);
-}
-
-SEC("raw_tp")
-__arch_x86_64
-__log_level(4)
-__msg("stack depth 520")
-__failure
-__naked int bpf_fastcall_max_stack_fail(void)
-{
-	asm volatile(
-	"r1 = 42;"
-	"*(u64 *)(r10 - %[max_bpf_stack]) = r1;"
-	"*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;"
-	"call %[bpf_get_smp_processor_id];"
-	"r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);"
-	/* call to prandom blocks bpf_fastcall rewrite */
-	"*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;"
-	"call %[bpf_get_prandom_u32];"
-	"r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);"
-	"exit;"
-	:
-	: __imm_const(max_bpf_stack, MAX_BPF_STACK),
-	  __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8),
-	  __imm(bpf_get_smp_processor_id),
-	  __imm(bpf_get_prandom_u32)
-	: __clobber_all
-	);
-}
-
 SEC("cgroup/getsockname_unix")
 __xlated("0: r2 = 1")
 /* bpf_cast_to_kern_ctx is replaced by a single assignment */
--- a/tools/testing/selftests/bpf/progs/verifier_search_pruning.c
+++ b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c
@ -2,6 +2,7 @@
 /* Converted from tools/testing/selftests/bpf/verifier/search_pruning.c */

 #include <linux/bpf.h>
+#include <../../../include/linux/filter.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"

@ -336,4 +337,26 @@ l0_%=:	r1 = 42;					\
 	: __clobber_all);
 }

+/* Without checkpoint forcibly inserted at the back-edge a loop this
+ * test would take a very long time to verify.
+ */
+SEC("kprobe")
+__failure __log_level(4)
+__msg("BPF program is too large.")
+__naked void short_loop1(void)
+{
+	asm volatile (
+	"   r7 = *(u16 *)(r1 +0);"
+	"1: r7 += 0x1ab064b9;"
+	"   .8byte %[jset];" /* same as 'if r7 & 0x702000 goto 1b;' */
+	"   r7 &= 0x1ee60e;"
+	"   r7 += r1;"
+	"   if r7 s> 0x37d2 goto +0;"
+	"   r0 = 0;"
+	"   exit;"
+	:
+	: __imm_insn(jset, BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x702000, -2))
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
--- a/tools/testing/selftests/bpf/veristat.cfg
+++ b/tools/testing/selftests/bpf/veristat.cfg
@ -15,3 +15,4 @@ test_usdt*
 test_verif_scale*
 test_xdp_noinline*
 xdp_synproxy*
+verifier_search_pruning*