Fix Kselftest's vfork() side effects

See https://lore.kernel.org/r/20240511171445.904356-1-mic@digikod.net
 -----BEGIN PGP SIGNATURE-----
 
 iIYEABYKAC4WIQSVyBthFV4iTW/VU1/l49DojIL20gUCZkCZRxAcbWljQGRpZ2lr
 b2QubmV0AAoJEOXj0OiMgvbSQCwBAJlgxbBIQbzfPOOaAQu/QMXYuDdwXJhMogOq
 XVp4F/i7AQCZUC+rRWVFD3KdQo7L8OqdLnPKiw4fDk3Zph3t9zqbCQ==
 =czaR
 -----END PGP SIGNATURE-----

Merge tag 'kselftest-fix-vfork-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux

Pull Kselftest fixes from Mickaël Salaün:
 "Fix Kselftest's vfork() side effects.

  As reported by Kernel Test Robot and Sean Christopherson, some
  tests fail since v6.9-rc1 . This is due to the use of vfork() which
  introduced some side effects. Similarly, while making it more generic,
  a previous commit made some Landlock file system tests flaky, and
  subject to the host's file system mount configuration.

  This fixes all these side effects by replacing vfork() with clone3()
  and CLONE_VFORK, which is cleaner (no arbitrary shared memory) and
  makes the Kselftest framework more robust"

Link: https://lore.kernel.org/oe-lkp/202403291015.1fcfa957-oliver.sang@intel.com
Link: https://lore.kernel.org/r/ZjPelW6-AbtYvslu@google.com
Link: https://lore.kernel.org/r/20240511171445.904356-1-mic@digikod.net

* tag 'kselftest-fix-vfork-2024-05-12' of git://git.kernel.org/pub/scm/linux/kernel/git/mic/linux:
  selftests/harness: Handle TEST_F()'s explicit exit codes
  selftests/harness: Fix vfork() side effects
  selftests/harness: Share _metadata between forked processes
  selftests/pidfd: Fix wrong expectation
  selftests/harness: Constify fixture variants
  selftests/landlock: Do not allocate memory in fixture data
  selftests/harness: Fix interleaved scheduling leading to race conditions
  selftests/harness: Fix fixture teardown
  selftests/landlock: Fix FS tests when run on a private mount point
  selftests/pidfd: Fix config for pidfd_setns_test
This commit is contained in:
Linus Torvalds 2024-05-12 13:01:59 -07:00
commit af300a3959
4 changed files with 147 additions and 67 deletions

View file

@ -66,6 +66,8 @@
#include <sys/wait.h>
#include <unistd.h>
#include <setjmp.h>
#include <syscall.h>
#include <linux/sched.h>
#include "kselftest.h"
@ -80,6 +82,17 @@
# define TH_LOG_ENABLED 1
#endif
/* Wait for the child process to end but without sharing memory mapping. */
static inline pid_t clone3_vfork(void)
{
struct clone_args args = {
.flags = CLONE_VFORK,
.exit_signal = SIGCHLD,
};
return syscall(__NR_clone3, &args, sizeof(args));
}
/**
* TH_LOG()
*
@ -281,6 +294,32 @@
* A bare "return;" statement may be used to return early.
*/
#define FIXTURE_TEARDOWN(fixture_name) \
static const bool fixture_name##_teardown_parent; \
__FIXTURE_TEARDOWN(fixture_name)
/**
* FIXTURE_TEARDOWN_PARENT()
* *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
*
* @fixture_name: fixture name
*
* .. code-block:: c
*
* FIXTURE_TEARDOWN_PARENT(fixture_name) { implementation }
*
* Same as FIXTURE_TEARDOWN() but run this code in a parent process. This
* enables the test process to drop its privileges without impacting the
* related FIXTURE_TEARDOWN_PARENT() (e.g. to remove files from a directory
* where write access was dropped).
*
* To make it possible for the parent process to use *self*, share (MAP_SHARED)
* the fixture data between all forked processes.
*/
#define FIXTURE_TEARDOWN_PARENT(fixture_name) \
static const bool fixture_name##_teardown_parent = true; \
__FIXTURE_TEARDOWN(fixture_name)
#define __FIXTURE_TEARDOWN(fixture_name) \
void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
@ -325,7 +364,7 @@
* variant.
*/
#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \
extern FIXTURE_VARIANT(fixture_name) \
extern const FIXTURE_VARIANT(fixture_name) \
_##fixture_name##_##variant_name##_variant; \
static struct __fixture_variant_metadata \
_##fixture_name##_##variant_name##_object = \
@ -337,7 +376,7 @@
__register_fixture_variant(&_##fixture_name##_fixture_object, \
&_##fixture_name##_##variant_name##_object); \
} \
FIXTURE_VARIANT(fixture_name) \
const FIXTURE_VARIANT(fixture_name) \
_##fixture_name##_##variant_name##_variant =
/**
@ -355,10 +394,11 @@
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
*
* The @test_name code is run in a separate process sharing the same memory
* (i.e. vfork), which means that the test process can update its privileges
* without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from
* a directory where write access was dropped).
* The _metadata object is shared (MAP_SHARED) with all the potential forked
* processes, which enables them to use EXCEPT_*() and ASSERT_*().
*
* The *self* object is only shared with the potential forked processes if
* FIXTURE_TEARDOWN_PARENT() is used instead of FIXTURE_TEARDOWN().
*/
#define TEST_F(fixture_name, test_name) \
__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
@ -379,53 +419,71 @@
struct __fixture_variant_metadata *variant) \
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
FIXTURE_DATA(fixture_name) self; \
FIXTURE_DATA(fixture_name) self_private, *self = NULL; \
pid_t child = 1; \
int status = 0; \
bool jmp = false; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
/* Makes sure there is only one teardown, even when child forks again. */ \
bool *teardown = mmap(NULL, sizeof(*teardown), \
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
*teardown = false; \
if (sizeof(*self) > 0) { \
if (fixture_name##_teardown_parent) { \
self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
} else { \
memset(&self_private, 0, sizeof(self_private)); \
self = &self_private; \
} \
} \
if (setjmp(_metadata->env) == 0) { \
/* Use the same _metadata. */ \
child = vfork(); \
/* _metadata and potentially self are shared with all forks. */ \
child = clone3_vfork(); \
if (child == 0) { \
fixture_name##_setup(_metadata, &self, variant->data); \
fixture_name##_setup(_metadata, self, variant->data); \
/* Let setup failure terminate early. */ \
if (_metadata->exit_code) \
_exit(0); \
_metadata->setup_completed = true; \
fixture_name##_##test_name(_metadata, &self, variant->data); \
fixture_name##_##test_name(_metadata, self, variant->data); \
} else if (child < 0 || child != waitpid(child, &status, 0)) { \
ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
_metadata->exit_code = KSFT_FAIL; \
} \
} \
else \
jmp = true; \
if (child == 0) { \
if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \
fixture_name##_teardown(_metadata, &self, variant->data); \
if (_metadata->setup_completed && !fixture_name##_teardown_parent && \
__sync_bool_compare_and_swap(teardown, false, true)) \
fixture_name##_teardown(_metadata, self, variant->data); \
_exit(0); \
} \
if (_metadata->setup_completed && _metadata->teardown_parent) \
fixture_name##_teardown(_metadata, &self, variant->data); \
if (!WIFEXITED(status) && WIFSIGNALED(status)) \
if (_metadata->setup_completed && fixture_name##_teardown_parent && \
__sync_bool_compare_and_swap(teardown, false, true)) \
fixture_name##_teardown(_metadata, self, variant->data); \
munmap(teardown, sizeof(*teardown)); \
if (self && fixture_name##_teardown_parent) \
munmap(self, sizeof(*self)); \
if (WIFEXITED(status)) { \
if (WEXITSTATUS(status)) \
_metadata->exit_code = WEXITSTATUS(status); \
} else if (WIFSIGNALED(status)) { \
/* Forward signal to __wait_for_test(). */ \
kill(getpid(), WTERMSIG(status)); \
} \
__test_check_assert(_metadata); \
} \
static struct __test_metadata \
_##fixture_name##_##test_name##_object = { \
.name = #test_name, \
.fn = &wrapper_##fixture_name##_##test_name, \
.fixture = &_##fixture_name##_fixture_object, \
.termsig = signal, \
.timeout = tmout, \
.teardown_parent = false, \
}; \
static struct __test_metadata *_##fixture_name##_##test_name##_object; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##test_name(void) \
{ \
__register_test(&_##fixture_name##_##test_name##_object); \
struct __test_metadata *object = mmap(NULL, sizeof(*object), \
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
object->name = #test_name; \
object->fn = &wrapper_##fixture_name##_##test_name; \
object->fixture = &_##fixture_name##_fixture_object; \
object->termsig = signal; \
object->timeout = tmout; \
_##fixture_name##_##test_name##_object = object; \
__register_test(object); \
} \
static void fixture_name##_##test_name( \
struct __test_metadata __attribute__((unused)) *_metadata, \
@ -833,11 +891,12 @@ struct __test_xfail {
{ \
.fixture = &_##fixture_name##_fixture_object, \
.variant = &_##fixture_name##_##variant_name##_object, \
.test = &_##fixture_name##_##test_name##_object, \
}; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
{ \
_##fixture_name##_##variant_name##_##test_name##_xfail.test = \
_##fixture_name##_##test_name##_object; \
__register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \
}
@ -880,7 +939,6 @@ struct __test_metadata {
bool timed_out; /* did this test timeout instead of exiting? */
bool aborted; /* stopped test due to failed ASSERT */
bool setup_completed; /* did setup finish? */
bool teardown_parent; /* run teardown in a parent process */
jmp_buf env; /* for exiting out of test early */
struct __test_results *results;
struct __test_metadata *prev, *next;
@ -1164,6 +1222,9 @@ void __run_test(struct __fixture_metadata *f,
/* reset test struct */
t->exit_code = KSFT_PASS;
t->trigger = 0;
t->aborted = false;
t->setup_completed = false;
memset(t->env, 0, sizeof(t->env));
memset(t->results->reason, 0, sizeof(t->results->reason));
if (asprintf(&test_name, "%s%s%s.%s", f->name,
@ -1179,7 +1240,7 @@ void __run_test(struct __fixture_metadata *f,
fflush(stdout);
fflush(stderr);
t->pid = fork();
t->pid = clone3_vfork();
if (t->pid < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->exit_code = KSFT_FAIL;

View file

@ -9,6 +9,7 @@
#define _GNU_SOURCE
#include <fcntl.h>
#include <libgen.h>
#include <linux/landlock.h>
#include <linux/magic.h>
#include <sched.h>
@ -285,15 +286,21 @@ static void prepare_layout_opt(struct __test_metadata *const _metadata,
static void prepare_layout(struct __test_metadata *const _metadata)
{
_metadata->teardown_parent = true;
prepare_layout_opt(_metadata, &mnt_tmp);
}
static void cleanup_layout(struct __test_metadata *const _metadata)
{
set_cap(_metadata, CAP_SYS_ADMIN);
EXPECT_EQ(0, umount(TMP_DIR));
if (umount(TMP_DIR)) {
/*
* According to the test environment, the mount point of the
* current directory may be shared or not, which changes the
* visibility of the nested TMP_DIR mount point for the test's
* parent process doing this cleanup.
*/
ASSERT_EQ(EINVAL, errno);
}
clear_cap(_metadata, CAP_SYS_ADMIN);
EXPECT_EQ(0, remove_path(TMP_DIR));
}
@ -307,7 +314,7 @@ FIXTURE_SETUP(layout0)
prepare_layout(_metadata);
}
FIXTURE_TEARDOWN(layout0)
FIXTURE_TEARDOWN_PARENT(layout0)
{
cleanup_layout(_metadata);
}
@ -370,7 +377,7 @@ FIXTURE_SETUP(layout1)
create_layout1(_metadata);
}
FIXTURE_TEARDOWN(layout1)
FIXTURE_TEARDOWN_PARENT(layout1)
{
remove_layout1(_metadata);
@ -3683,7 +3690,7 @@ FIXTURE_SETUP(ftruncate)
create_file(_metadata, file1_s1d1);
}
FIXTURE_TEARDOWN(ftruncate)
FIXTURE_TEARDOWN_PARENT(ftruncate)
{
EXPECT_EQ(0, remove_path(file1_s1d1));
cleanup_layout(_metadata);
@ -3861,7 +3868,7 @@ FIXTURE_SETUP(layout1_bind)
clear_cap(_metadata, CAP_SYS_ADMIN);
}
FIXTURE_TEARDOWN(layout1_bind)
FIXTURE_TEARDOWN_PARENT(layout1_bind)
{
/* umount(dir_s2d2)) is handled by namespace lifetime. */
@ -4266,7 +4273,7 @@ FIXTURE_SETUP(layout2_overlay)
clear_cap(_metadata, CAP_SYS_ADMIN);
}
FIXTURE_TEARDOWN(layout2_overlay)
FIXTURE_TEARDOWN_PARENT(layout2_overlay)
{
if (self->skip_test)
SKIP(return, "overlayfs is not supported (teardown)");
@ -4616,7 +4623,6 @@ FIXTURE(layout3_fs)
{
bool has_created_dir;
bool has_created_file;
char *dir_path;
bool skip_test;
};
@ -4675,11 +4681,24 @@ FIXTURE_VARIANT_ADD(layout3_fs, hostfs) {
.cwd_fs_magic = HOSTFS_SUPER_MAGIC,
};
static char *dirname_alloc(const char *path)
{
char *dup;
if (!path)
return NULL;
dup = strdup(path);
if (!dup)
return NULL;
return dirname(dup);
}
FIXTURE_SETUP(layout3_fs)
{
struct stat statbuf;
const char *slash;
size_t dir_len;
char *dir_path = dirname_alloc(variant->file_path);
if (!supports_filesystem(variant->mnt.type) ||
!cwd_matches_fs(variant->cwd_fs_magic)) {
@ -4687,27 +4706,15 @@ FIXTURE_SETUP(layout3_fs)
SKIP(return, "this filesystem is not supported (setup)");
}
_metadata->teardown_parent = true;
slash = strrchr(variant->file_path, '/');
ASSERT_NE(slash, NULL);
dir_len = (size_t)slash - (size_t)variant->file_path;
ASSERT_LT(0, dir_len);
self->dir_path = malloc(dir_len + 1);
self->dir_path[dir_len] = '\0';
strncpy(self->dir_path, variant->file_path, dir_len);
prepare_layout_opt(_metadata, &variant->mnt);
/* Creates directory when required. */
if (stat(self->dir_path, &statbuf)) {
if (stat(dir_path, &statbuf)) {
set_cap(_metadata, CAP_DAC_OVERRIDE);
EXPECT_EQ(0, mkdir(self->dir_path, 0700))
EXPECT_EQ(0, mkdir(dir_path, 0700))
{
TH_LOG("Failed to create directory \"%s\": %s",
self->dir_path, strerror(errno));
free(self->dir_path);
self->dir_path = NULL;
dir_path, strerror(errno));
}
self->has_created_dir = true;
clear_cap(_metadata, CAP_DAC_OVERRIDE);
@ -4728,9 +4735,11 @@ FIXTURE_SETUP(layout3_fs)
self->has_created_file = true;
clear_cap(_metadata, CAP_DAC_OVERRIDE);
}
free(dir_path);
}
FIXTURE_TEARDOWN(layout3_fs)
FIXTURE_TEARDOWN_PARENT(layout3_fs)
{
if (self->skip_test)
SKIP(return, "this filesystem is not supported (teardown)");
@ -4746,16 +4755,17 @@ FIXTURE_TEARDOWN(layout3_fs)
}
if (self->has_created_dir) {
char *dir_path = dirname_alloc(variant->file_path);
set_cap(_metadata, CAP_DAC_OVERRIDE);
/*
* Don't check for error because the directory might already
* have been removed (cf. release_inode test).
*/
rmdir(self->dir_path);
rmdir(dir_path);
clear_cap(_metadata, CAP_DAC_OVERRIDE);
free(dir_path);
}
free(self->dir_path);
self->dir_path = NULL;
cleanup_layout(_metadata);
}
@ -4822,7 +4832,10 @@ TEST_F_FORK(layout3_fs, tag_inode_dir_mnt)
TEST_F_FORK(layout3_fs, tag_inode_dir_child)
{
layer3_fs_tag_inode(_metadata, self, variant, self->dir_path);
char *dir_path = dirname_alloc(variant->file_path);
layer3_fs_tag_inode(_metadata, self, variant, dir_path);
free(dir_path);
}
TEST_F_FORK(layout3_fs, tag_inode_file)
@ -4849,9 +4862,13 @@ TEST_F_FORK(layout3_fs, release_inodes)
if (self->has_created_file)
EXPECT_EQ(0, remove_path(variant->file_path));
if (self->has_created_dir)
if (self->has_created_dir) {
char *dir_path = dirname_alloc(variant->file_path);
/* Don't check for error because of cgroup specificities. */
remove_path(self->dir_path);
remove_path(dir_path);
free(dir_path);
}
ruleset_fd =
create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);

View file

@ -3,5 +3,7 @@ CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_TIME_NS=y
CONFIG_GENERIC_VDSO_TIME_NS=y
CONFIG_CGROUPS=y
CONFIG_CHECKPOINT_RESTORE=y

View file

@ -158,7 +158,7 @@ FIXTURE_SETUP(current_nsset)
/* Create task that exits right away. */
self->child_pid_exited = create_child(&self->child_pidfd_exited,
CLONE_NEWUSER | CLONE_NEWNET);
EXPECT_GT(self->child_pid_exited, 0);
EXPECT_GE(self->child_pid_exited, 0);
if (self->child_pid_exited == 0)
_exit(EXIT_SUCCESS);