voidnsrun/voidnsrun.c

515 lines
17 KiB
C

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sched.h>
#include <stdbool.h>
#include <dirent.h>
#include <signal.h>
#include <libgen.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/prctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <linux/limits.h>
#include "config.h"
#include "utils.h"
#include "macros.h"
volatile sig_atomic_t term_caught = 0;
bool g_verbose = false;
void usage(const char *progname)
{
printf("Usage: %s [OPTIONS] PROGRAM [ARGS]\n", progname);
printf("\n"
"Options:\n"
" -r <path>: Container path. When this option is not present,\n"
" " CONTAINER_DIR_VAR " environment variable is used.\n"
" -m <path>: Add bind mount. You can add up to %d paths.\n"
" -u <path>: Add undo bind mount. You can add up to %d paths.\n"
" -d <path>: Add /usr subdirectory bind mount.\n"
" -U <path>: Path to " VOIDNSUNDO_NAME ". When this option is not present,\n"
" " UNDO_BIN_VAR " environment variable is used.\n"
" -i: Don't treat missing source or target for added mounts as error.\n"
" -V: Enable verbose output.\n"
" -h: Print this help.\n"
" -v: Print version.\n",
USER_LISTS_MAX, USER_LISTS_MAX);
}
size_t mount_dirs(const char *source_prefix,
size_t source_prefix_len,
struct strarray *targets,
struct intarray *created)
{
char buf[PATH_MAX];
int successful = 0;
mode_t mode;
for (size_t i = 0; i < targets->end; i++) {
/* Check if it's safe to proceed. */
if (source_prefix_len + strlen(targets->list[i]) >= PATH_MAX) {
ERROR("error: path %s%s is too large.\n", source_prefix, targets->list[i]);
continue;
}
/* Should be safe as we just checked that total length of source_prefix
* and targets->list[i] is no more than PATH_MAX-1. */
strcpy(buf, source_prefix);
strcat(buf, targets->list[i]);
if (!isdir(buf)) {
ERROR("error: source mount dir %s does not exists.\n", buf);
continue;
}
if (!exists(targets->list[i])) {
if (created != NULL) {
mode = getmode(buf);
if (mode == 0) {
ERROR("error: can't get mode for %s.\n", buf);
continue;
}
if (mkdir(targets->list[i], mode) == -1) {
ERROR("error: failed to create mountpotint at %s: %s.\n",
targets->list[i], strerror(errno));
continue;
} else
intarray_append(created, i);
} else {
ERROR("error: mount dir %s does not exists.\n", buf);
continue;
}
}
if (!isdir(targets->list[i])) {
ERROR("error: mount point %s is not a directory.\n", targets->list[i]);
continue;
}
if (mount(buf, targets->list[i], NULL, MS_BIND|MS_REC, NULL) == -1)
ERROR("mount: failed to mount %s: %s\n", targets->list[i], strerror(errno));
else
successful++;
}
return successful;
}
size_t mount_undo(const char *source,
const struct strarray *targets,
struct intarray *created)
{
int successful = 0;
for (size_t i = 0; i < targets->end; i++) {
/* If the mount point does not exist, create an empty file, otherwise
* mount() call will fail. In this case, remember which files we have
* created to unlink() them before exit. */
if (!exists(targets->list[i])) {
if (mkfile(targets->list[i]))
intarray_append(created, i);
else
continue;
}
DEBUG("%s: source=%s, target=%s\n", __func__, source, targets->list[i]);
if (mount(source, targets->list[i], NULL, MS_BIND, NULL) == -1)
ERROR("mount: failed to mount %s to %s: %s",
source, targets->list[i], strerror(errno));
else
successful++;
}
return successful;
}
void onterm(int sig)
{
UNUSED(sig);
term_caught = 1;
}
int main(int argc, char **argv)
{
if (argc < 2) {
usage(argv[0]);
return 0;
}
int nsfd = -1;
char *dir = NULL;
char buf[PATH_MAX*2];
char *undo_bin = NULL;
int sock_fd = -1, sock_conn = -1;
size_t dirlen;
int c;
int exit_code = 1;
DIR *dirptr = NULL;
bool ignore_missing = false;
bool forked = false;
pid_t pid = 0;
char cwd[PATH_MAX];
struct strarray user_mounts;
strarray_alloc(&user_mounts, USER_LISTS_MAX);
struct strarray undo_mounts;
strarray_alloc(&undo_mounts, USER_LISTS_MAX);
/* List of user-specified /usr subdirectories to mount. */
struct strarray dir_mounts;
strarray_alloc(&dir_mounts, USER_LISTS_MAX);
/* List of indexes of items in the undo_mounts array. See comments in
* mount_undo() function for more info. */
struct intarray created_undos;
intarray_alloc(&created_undos, USER_LISTS_MAX);
/* List of indexes of items in the dir_mounts array. */
struct intarray created_dirs;
intarray_alloc(&created_dirs, USER_LISTS_MAX);
while ((c = getopt(argc, argv, "vhm:r:u:U:iVd:")) != -1) {
switch (c) {
case 'v':
printf("%s\n", PROG_VERSION);
return 0;
case 'h':
usage(argv[0]);
return 0;
case 'i':
ignore_missing = true;
break;
case 'r':
dir = optarg;
break;
case 'U':
undo_bin = optarg;
break;
case 'V':
g_verbose = true;
break;
case 'm':
if (!strarray_append(&user_mounts, optarg))
ERROR_EXIT("error: only up to %lu user mounts allowed.\n",
user_mounts.size);
break;
case 'u':
if (!strarray_append(&undo_mounts, optarg))
ERROR_EXIT("error: only up to %lu user mounts allowed.\n",
undo_mounts.size);
break;
case 'd':
if (!startswith(optarg, "/usr/"))
ERROR_EXIT("only subdirectories of /usr are allowed for bind mounting this way.\n");
if (!strarray_append(&dir_mounts, optarg))
ERROR_EXIT("error: only up to %lu dir mounts allowed.\n",
dir_mounts.size);
break;
case '?':
return 1;
}
}
if (!argv[optind]) {
usage(argv[0]);
return 1;
}
/* Get container path. */
if (!dir)
dir = getenv(CONTAINER_DIR_VAR);
if (!dir)
ERROR_EXIT("error: environment variable %s not found.\n",
CONTAINER_DIR_VAR);
/* Validate it. */
if (!isdir(dir))
ERROR_EXIT("error: %s is not a directory.\n", dir);
dirlen = strlen(dir);
if (dirlen >= PATH_MAX)
ERROR_EXIT("error: container's path is too long.\n");
DEBUG("dir=%s\n", dir);
/* Get voidnsundo path, if needed. */
if (undo_mounts.end > 0) {
if (!undo_bin)
undo_bin = getenv(UNDO_BIN_VAR);
if (!undo_bin) {
ERROR_EXIT("error: environment variable %s not found.\n",
UNDO_BIN_VAR);
}
size_t undo_bin_len = strlen(undo_bin);
if (undo_bin_len >= PATH_MAX)
ERROR_EXIT("error: undo binary path is too long.\n");
/*
* Check that it exists and it is an executable.
* These strcpy and strcat calls should be safe, as we already know that
* both dir and undo_bin are no longer than PATH_MAX-1 and the buf's size
* is PATH_MAX*2.
*/
strcpy(buf, dir);
strcat(buf, undo_bin);
if (!isexe(buf))
ERROR_EXIT("error: %s is not an executable.\n", undo_bin);
DEBUG("undo_bin=%s\n", undo_bin);
}
/* Get current namespace's file descriptor. It may be needed later
* for voidnsundo. */
nsfd = open("/proc/self/ns/mnt", O_RDONLY);
if (nsfd == -1)
ERROR_EXIT("error: failed to acquire mount namespace's fd.%s\n",
strerror(errno));
/* Get current working directory. Will need to restore it later in the
* new mount namespace. */
getcwd(cwd, PATH_MAX);
DEBUG("cwd=%s\n", cwd);
/* Create new mount namespace. */
if (unshare(CLONE_NEWNS) == -1)
ERROR_EXIT("unshare: %s\n", strerror(errno));
/* Mount stuff from the container to the namespace. */
/* First, mount what user asked us to mount. */
if (mount_dirs(dir, dirlen, &user_mounts, NULL) < user_mounts.end && !ignore_missing)
ERROR_EXIT("error: some mounts failed.\n");
/* Then preserve original /usr at /oldroot if needed. */
if (dir_mounts.end > 0) {
mode_t mode = getmode("/usr");
if (mode == 0)
ERROR_EXIT("error: failed to get mode of /usr.\n");
if (mount("tmpfs", OLDROOT, "tmpfs", 0, "size=4k,mode=0700,uid=0,gid=0") == -1)
ERROR_EXIT("mount: error mounting tmpfs in %s.\n", OLDROOT);
strcpy(buf, OLDROOT);
strcat(buf, "/usr");
if (mkdir(buf, mode) == -1)
ERROR_EXIT("error: failed to mkdir %s: %s.\n", buf, strerror(errno));
if (mount("/usr", buf, NULL, MS_BIND|MS_REC, NULL) == -1)
ERROR_EXIT("error: failed to mount /usr at %s: %s.",
buf, strerror(errno));
}
/* Then the necessary stuff. */
struct strarray default_mounts;
strarray_alloc(&default_mounts, 3);
strarray_append(&default_mounts, "/usr");
if (isxbpscommand(argv[optind])) {
strarray_append(&default_mounts, "/var");
strarray_append(&default_mounts, "/etc");
}
if (mount_dirs(dir, dirlen, &default_mounts, NULL) < default_mounts.end)
ERROR_EXIT("error: some necessary mounts failed.\n");
/* Mount /usr subdirectories if needed. */
if (dir_mounts.end > 0
&& mount_dirs(OLDROOT, strlen(OLDROOT), &dir_mounts, &created_dirs) < dir_mounts.end)
ERROR_EXIT("error: some dir mounts failed.\n");
/* Now lets do bind mounts of voidnsundo (if needed). */
if (mount_undo(undo_bin, &undo_mounts, &created_undos) < undo_mounts.end
&& !ignore_missing)
ERROR_EXIT("error: some undo mounts failed.\n");
/* Check socket directory. */
/* TODO: fix invalid permissions, or just die in that case. */
/* This should be safe, SOCK_PATH is hardcoded in config.h and it's definitely
* smaller than buffer. */
strcpy(buf, SOCK_PATH);
char *sock_dir = dirname(buf);
if (access(sock_dir, F_OK) == -1) {
if (mkdir(sock_dir, 0700) == -1)
ERROR_EXIT("error: failed to create %s directory.\n", sock_dir);
} else {
if ((dirptr = opendir(sock_dir)) == NULL)
ERROR_EXIT("error: %s is not a directory.\n", sock_dir);
if (exists(SOCK_PATH) && unlink(SOCK_PATH) == -1)
ERROR_EXIT("failed to unlink %s: %s", SOCK_PATH, strerror(errno));
}
DEBUG("sock_dir=%s\n", sock_dir);
/* Mount socket directory as tmpfs. It will only be visible in this namespace,
* and the socket file will also be available from this namespace only.*/
if (mount("tmpfs", sock_dir, "tmpfs", 0, "size=4k,mode=0700,uid=0,gid=0") == -1)
ERROR_EXIT("mount: error mounting tmpfs in %s: %s.\n", sock_dir, strerror(errno));
/*
* Fork. We need it because we need to preserve file descriptor of the
* original namespace.
*
* Linux doesn't allow to bind mount /proc/self/ns/mnt from the original
* namespace in the child namespace because that would lead to dependency
* loop. So I came up with another solution.
*
* Unix sockets are capable of passing file descriptors. We need to start a
* server that will listen on a unix socket and pass the namespace's file
* descriptor to connected clients over this socket. voidnsundo will connect
* to the socket, receive the file descriptor and perform the setns() system
* call.
*
* We also need to make sure the socket will only be accessible by root.
* The path to the socket should be hardcoded.
*
* So we fork(), start the server in the child process, while the parent
* drops root privileges and runs the programs it was asked to run.
*/
pid_t ppid_before_fork = getpid();
pid = fork();
if (pid == -1)
ERROR_EXIT("fork: %s\n", strerror(errno));
forked = true;
if (pid == 0) {
/* This is the child process.
* Catch SIGTERM: it will be sent here when parent dies. The signal will
* interrupt the accept() call, so we can clean up and exit immediately.
*/
struct sigaction sa = {0};
sa.sa_handler = onterm;
sigaction(SIGTERM, &sa, NULL);
/* Ignore SIGINT. Otherwise it will be affected by Ctrl+C in the parent
* process. */
signal(SIGINT, SIG_IGN);
/* Set the child to get SIGTERM when parent thread dies. */
int r = prctl(PR_SET_PDEATHSIG, SIGTERM);
if (r == -1)
ERROR_EXIT("prctl: %s\n", strerror(errno));
/* Maybe it already has died? */
if (getppid() != ppid_before_fork)
ERROR_EXIT("error: parent has died already.\n");
/* Create unix socket. */
sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sock_fd == -1)
ERROR_EXIT("socket: %s.\n", strerror(errno));
struct sockaddr_un sock_addr = {0};
sock_addr.sun_family = AF_UNIX;
/* The size of sun_path is 108 bytes, our SOCK_PATH is definitely
* smaller. */
strcpy(sock_addr.sun_path, SOCK_PATH);
if (bind(sock_fd, (struct sockaddr *)&sock_addr, sizeof(sock_addr)) == -1)
ERROR_EXIT("bind: %s\n", strerror(errno));
listen(sock_fd, 1);
/* Accept incoming connections until SIGTERM. */
while (!term_caught) {
sock_conn = accept(sock_fd, NULL, 0);
if (sock_conn == -1)
continue;
send_fd(sock_conn, nsfd);
}
} else {
/* Parent process. Drop root rights. */
uid_t uid = getuid();
gid_t gid = getgid();
if (setreuid(uid, uid) == -1)
ERROR_EXIT("setreuid: %s\n", strerror(errno));
if (setregid(gid, gid) == -1)
ERROR_EXIT("setregid: %s\n", strerror(errno));
/* Restore working directory. */
if (chdir(cwd) == -1)
DEBUG("chdir: %s\n", strerror(errno));
/* Launch program. */
if (execvp(argv[optind], (char *const *)argv+optind) == -1)
ERROR_EXIT("execvp(%s): %s\n", argv[optind], strerror(errno));
}
exit_code = 0;
end:
if (nsfd != -1)
close(nsfd);
if (sock_fd != -1)
close(sock_fd);
if (sock_conn != -1)
close(sock_conn);
if (dirptr != NULL)
closedir(dirptr);
if (!forked || pid == 0) {
/* If we created some empty files to bind the voidnsundo utility,
* delete them here. */
if (created_undos.end > 0) {
for (size_t i = 0; i < created_undos.end; i++) {
char *path = undo_mounts.list[created_undos.list[i]];
if (umount(path) == -1)
DEBUG("umount(%s): %s\n", path, strerror(errno));
if (unlink(path) == -1)
ERROR("unlink(%s): %s\n", path, strerror(errno));
else
DEBUG("unlink(%s)\n", path);
}
}
/* If we had to create mount tmpfs to /oldroot and do other
* dirty hacks related to /usr subdirs bind mounting, clean up here. */
if (dir_mounts.end > 0) {
for (size_t i = 0; i < dir_mounts.end; i++) {
char *path = dir_mounts.list[i];
if (umount(path) == -1)
ERROR("umount(%s): %s\n", path, strerror(errno));
}
/* If we created some empty dirs to use them as mountpoints for
* bind mounts, delete them here. */
if (created_dirs.end > 0) {
for (size_t i = 0; i < created_dirs.end; i++) {
char *path = dir_mounts.list[created_dirs.list[i]];
if (rmdir(path) == -1)
ERROR("rmdir(%s): %s\n", path, strerror(errno));
else
DEBUG("rmdir(%s)\n", path);
}
}
strcpy(buf, OLDROOT);
strcat(buf, "/usr");
if (umount(buf) == -1)
ERROR("umount(%s): %s\n", buf, strerror(errno));
/* This call always fails with EBUSY and I don't know why.
* We can safely ignore any errors here (I hope) because
* the mount namespace will be destroyed as soon as there
* will be no processes attached to it. */
umount(OLDROOT);
/*if (umount(OLDROOT) == -1)
ERROR("umount(%s): %s\n", OLDROOT, strerror(errno));*/
}
}
return exit_code;
}