/* broot.c - brutalist build roots - Richard Lupton 2025 * * Expects to have cap_sys_admin privileges in order to create * namespaces, setup mounts, and pivot root. These are are not * required to persist over exec calls (and indeed are explicity * dropped). * * setcap cap_sys_admin=ep PATH_TO_BROOT * * should set the correct permissions, to a first approximation, * but it's up to the system admin to decide how to allow this. * * broot expects the directories for binding source and output * to be present in the buildroot. * * Version history * =============== * * v0.1 - 2025-07-18 - initial version - test scenario building * a small game in a debian based buildroot. * */ #define _GNU_SOURCE /* unshare (sched.h) */ #include #include #include #include #include #include /* strerror */ #include /* syscall, chdir */ #include /* unshare */ #include /* mount, umount2 */ #include /* SYS_pivot_root, SYS_capset */ #include #define PATHLEN_MAX 2048 #define ENV_MAX 255 #define LOG(...) fprintf(stderr, __VA_ARGS__) #define ERROR(...) LOG("[ERROR] " __VA_ARGS__) #define static_strlen(s) ((isize)(sizeof("" s "") - 1)) #define static_arraylen(a) ((isize)(sizeof(a) / sizeof(a[0]))) #define static_assert _Static_assert #define memcmp __builtin_memcmp #define memcpy __builtin_memcpy typedef long long int isize; typedef struct str str; static_assert(sizeof(isize) == sizeof(sizeof(void *)), "isize is the signed variant of size_t"); struct str { isize length; const char *data; }; #define str_from_static(s) ((str){ .length = static_strlen(s), .data = (s) }) #define STR(s) { .length = static_strlen(s), .data = (s) } static isize cstrlen(const char *s) { isize len = 0; while (*s++ != '\0') { len++; } return len; } static str str_from_cstr(const char *s) { str ret = { .length = cstrlen(s), .data = s, }; return ret; } static int str_eq(str s, str t) { return s.length == t.length && (s.length == 0 || !memcmp(s.data, t.data, s.length)); } struct cstrb { isize remaining; const char *start; char *cursor; }; static struct cstrb cstrb_from_buf(isize length, char *start) { struct cstrb b = { .remaining = length, .start = start, .cursor = start, }; return b; } static void cstrb_push(struct cstrb *b, const char *s) { isize length = cstrlen(s); if (length == 0) return; if (length > b->remaining) { b->start = NULL; return; } memcpy(b->cursor, s, length); b->remaining -= length; b->cursor += length; } static const char * cstrb_complete(struct cstrb *b) { if (b->remaining == 0) { b->start = NULL; } else { *b->cursor++ = '\0'; b->remaining = 0; } return b->start; } static const char * cstrjoin_unchecked(const char *initial, const char *term, isize length, char buf[length]) { struct cstrb csb = cstrb_from_buf(length, buf); cstrb_push(&csb, initial); cstrb_push(&csb, term); const char *path = cstrb_complete(&csb); assert(path != NULL); return path; } static int pivot_root(const char *new_root, const char *put_old) { return syscall(SYS_pivot_root, new_root, put_old); } struct capabilities { uint64_t effective; uint64_t permitted; uint64_t inheritable; }; static int capabilities_set(const struct capabilities *caps) { struct __user_cap_header_struct hdr = { .version = _LINUX_CAPABILITY_VERSION_3, .pid = 0, }; struct __user_cap_data_struct data[2]; data[0].effective = caps->effective & 0xFFFFFFFF; data[0].permitted = caps->permitted & 0xFFFFFFFF; data[0].inheritable = caps->inheritable & 0xFFFFFFFF; data[1].effective = caps->effective >> 32; data[1].permitted = caps->permitted >> 32; data[1].inheritable = caps->inheritable >> 32; return syscall(SYS_capset, &hdr, &data); } static int capabilities_get(struct capabilities *out) { struct __user_cap_header_struct hdr = { .version = _LINUX_CAPABILITY_VERSION_3, .pid = 0, }; struct __user_cap_data_struct data[2] = { 0 }; int rc = syscall(SYS_capget, &hdr, &data); if (rc == 0) { out->effective = (uint64_t)data[0].effective | ((uint64_t)data[1].effective << 32); out->permitted = (uint64_t)data[0].permitted | ((uint64_t)data[1].permitted << 32); out->inheritable = (uint64_t)data[0].inheritable | ((uint64_t)data[1].inheritable << 32); } return rc; } struct options { const char *buildroot; const char *src; const char *srcbind; const char *out; const char *outbind; const char *tmpbind; char *const *cmd; int cmd_len; char *env[ENV_MAX + 1]; int env_len; }; static struct options opts = { 0 }; /* NOTE flags capturing paths - we split the required from the optional. */ struct pathflag { str longname; const char **set; const char *description; }; static const struct pathflag required_flags[] = { { .longname = STR("--buildroot"), .set = &opts.buildroot, .description = "path to folder containing the root filesystem" }, { .longname = STR("--src"), .set = &opts.src, .description = "path to folder containing source code to mount" }, { .longname = STR("--out"), .set = &opts.out, .description = "path to folder for writing output" }, { .longname = STR("--src-bind"), .set = &opts.srcbind, .description = "path inside the build environment to bind source directory" }, { .longname = STR("--out-bind"), .set = &opts.outbind, .description = "path inside the build environment to bind output directory" }, }; static const struct pathflag optional_flags[] = { { .longname = STR("--tmpfs-bind"), .set = &opts.tmpbind, .description = "path inside the build environment to bind a tmpfs" }, }; static bool try_match(str arg, const struct pathflag **out) { for (int i = 0; i < static_arraylen(required_flags); ++i) { const struct pathflag *pflag = &required_flags[i]; if (str_eq(arg, pflag->longname)) { *out = pflag; return true; } } for (int i = 0; i < static_arraylen(optional_flags); ++i) { const struct pathflag *pflag = &optional_flags[i]; if (str_eq(arg, pflag->longname)) { *out = pflag; return true; } } return false; } static void usage(const char *bin) { LOG("%s: brutalist build sandbox\n", bin); LOG("\nRequired flags\n"); for (int i = 0; i < static_arraylen(required_flags); ++i) { const struct pathflag *pflag = &required_flags[i]; assert(pflag->longname.length < 16); LOG(" %-16.*s - %s\n", (int)pflag->longname.length, pflag->longname.data, pflag->description); } LOG(" %-16s - the command to run inside the sandbox\n", "-- COMMAND"); LOG("\nOther flags\n"); for (int i = 0; i < static_arraylen(optional_flags); ++i) { const struct pathflag *pflag = &optional_flags[i]; assert(pflag->longname.length < 16); LOG(" %-16.*s - %s\n", (int)pflag->longname.length, pflag->longname.data, pflag->description); } LOG(" %-16s - specify an environment variable with form VAR=VALUE\n", "--env"); LOG(" %-16s - display this message\n", "--help"); LOG("\nSubcommands\n"); LOG(" %-16s - check the binary has the needed capabilities to execute\n", "check"); LOG(" %-16s - display this message\n", "help"); } enum action { ACTION_RUN, ACTION_HELP, ACTION_CHECK, ACTION_ERROR_WITH_USAGE, ACTION_ERROR, }; static enum action parse_opts(int argc, char *const argv[]) { assert(argc >= 1); assert(argv[argc] == NULL); opts.cmd_len = 0; if (argc == 1) { return ACTION_ERROR_WITH_USAGE; } /* Check for subcommands */ { str first = str_from_cstr(argv[1]); if (str_eq(first, str_from_static("help"))) { if (argc > 2) { ERROR("help subcommand does not take any arguments\n"); return ACTION_ERROR_WITH_USAGE; } return ACTION_HELP; } if (str_eq(first, str_from_static("check"))) { if (argc > 2) { ERROR("check subcommand does not take any arguments"); return ACTION_ERROR_WITH_USAGE; } return ACTION_CHECK; } } for (int i = 1; i < argc;) { str arg = str_from_cstr(argv[i]); const struct pathflag *pflag = NULL; if (str_eq(arg, str_from_static("--"))) { i++; assert(i <= argc); /* NOTE argv[argc] == NULL */ opts.cmd_len = argc - i; opts.cmd = &argv[i]; break; } else if (str_eq(arg, str_from_static("--help"))) { /* Even though this might appear amongst other flags and cause them * to be ignored, this is actually useful when you just want to dump * out help information while building up a command. */ return ACTION_HELP; } else if (str_eq(arg, str_from_static("--env"))) { i++; if (i == argc) { ERROR("flag --env requires an argument\n"); return ACTION_ERROR; } if (opts.env_len == ENV_MAX) { ERROR("exceeded maximum number of environment variables (%d)\n", ENV_MAX); return ACTION_ERROR; } opts.env[opts.env_len++] = argv[i++]; } else if (try_match(arg, &pflag)) { i++; if (*pflag->set != NULL) { ERROR("flag %.*s occurs twice\n", (int)pflag->longname.length, pflag->longname.data); return ACTION_ERROR; } if (i == argc) { ERROR("flag %.*s requires a path\n", (int)pflag->longname.length, pflag->longname.data); return ACTION_ERROR; } const char *value = argv[i++]; isize value_len = cstrlen(value); if (value_len > PATHLEN_MAX) { ERROR("argument for flag %.*s exceeds maximum path length (currently %lld, but must not exceed %d)\n", (int)pflag->longname.length, pflag->longname.data, value_len, PATHLEN_MAX); return ACTION_ERROR; } *pflag->set = value; } else { ERROR("unknown option: %s\n", argv[i]); return ACTION_ERROR_WITH_USAGE; } } for (int i = 0; i < (int)static_arraylen(required_flags); ++i) { const struct pathflag *pflag = &required_flags[i]; if (*pflag->set == NULL) { ERROR("flag %.*s is required\n", (int)pflag->longname.length, pflag->longname.data); return ACTION_ERROR_WITH_USAGE; } } if (opts.cmd_len == 0) { ERROR("command required\n"); return ACTION_ERROR_WITH_USAGE; } assert(opts.env_len <= ENV_MAX); opts.env[opts.env_len] = NULL; /* opts.env has length ENV_MAX + 1 */ return ACTION_RUN; } static char buf[2 * PATHLEN_MAX] = { 0 }; int main(int argc, char **argv) { int rc = -1; switch (parse_opts(argc, argv)) { case ACTION_RUN: break; case ACTION_HELP: { usage(argv[0]); return 0; } break; case ACTION_ERROR_WITH_USAGE: { LOG("usage: "); usage(argv[0]); goto error; } break; case ACTION_ERROR: { goto error; } break; case ACTION_CHECK: { struct capabilities caps; if (capabilities_get(&caps)) { ERROR("could not read capabilities: %s\n", strerror(errno)); goto error; } if (!(caps.effective & ((uint64_t)1 << CAP_SYS_ADMIN))) { ERROR("%s does not have CAP_SYS_ADMIN capability - this usually means the binary has not been installed properly\n", argv[0]); goto error; } LOG("ok\n"); return 0; } break; } /* TODO determine other things that should be isolated. */ if (unshare(CLONE_NEWNS|CLONE_NEWNET)) { ERROR("failed to create new namespace for build root: %s\n", strerror(errno)); goto error; } if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)) { ERROR("failed to make root mount private: %s\n", strerror(errno)); goto error; } if (mount(opts.buildroot, opts.buildroot, NULL, MS_BIND, NULL)) { ERROR("failed to bind target root filesystem in new namespace: %s\n", strerror(errno)); goto error; } if (mount(opts.buildroot, opts.buildroot, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL)) { ERROR("failed to remount new root filesystem as readonly: %s\n", strerror(errno)); goto error; } /* Setup other mounts needed for functioning pivot_root: /proc, /sys, /dev and /dev/pts. */ { const char *mntdst; mntdst = cstrjoin_unchecked(opts.buildroot, "/proc", sizeof(buf), buf); if (mount(NULL, mntdst, "proc", 0, NULL)) { ERROR("failed to mount /proc filesystem in new namespace: %s\n", strerror(errno)); goto error; } mntdst = cstrjoin_unchecked(opts.buildroot, "/sys", sizeof(buf), buf); if (mount("/sys", mntdst, NULL, MS_BIND|MS_REC, NULL)) { ERROR("failed to bind /sys filesystem in new namespace: %s\n", strerror(errno)); goto error; } mntdst = cstrjoin_unchecked(opts.buildroot, "/dev", sizeof(buf), buf); if (mount("/dev", mntdst, NULL, MS_BIND|MS_REC, NULL)) { ERROR("failed to bind /dev filesystem in new namespace: %s\n", strerror(errno)); goto error; } mntdst = cstrjoin_unchecked(opts.buildroot, "/dev/pts", sizeof(buf), buf); if (mount("pts", mntdst, "devpts", 0, NULL)) { ERROR("failed to bind pts filesystem in new namespace: %s\n", strerror(errno)); goto error; } } if (opts.tmpbind != NULL) { const char *tmpbind = cstrjoin_unchecked(opts.buildroot, opts.tmpbind, sizeof(buf), buf); if (mount(NULL, tmpbind, "tmpfs", 0, NULL)) { ERROR("failed to mount tmpfs in new namespace: %s\n", strerror(errno)); goto error; } } { const char *srcbind = cstrjoin_unchecked(opts.buildroot, opts.srcbind, sizeof(buf), buf); if (mount(opts.src, srcbind, NULL, MS_BIND, NULL)) { ERROR("failed to bind source filesystem in new namespace: %s\n", strerror(errno)); goto error; } if (mount(srcbind, srcbind, NULL, MS_BIND|MS_REMOUNT|MS_RDONLY, NULL)) { ERROR("failed to make source filesystem read-only in new namespace: %s\n", strerror(errno)); goto error; } } { const char *outbind = cstrjoin_unchecked(opts.buildroot, opts.outbind, sizeof(buf), buf); if (mount(opts.out, outbind, NULL, MS_BIND, NULL)) { ERROR("failed to bind output filesystem in new namespace: %s\n", strerror(errno)); goto error; } } /* NOTE we use the directory used for source mounts (since it must exist) * as the putold for pivot root. This is later unmounted to reveal the * actual source mount. This avoids needing to create a temporary directory * for pivot_root. */ const char *putold = cstrjoin_unchecked(opts.buildroot, opts.srcbind, sizeof(buf), buf); if (pivot_root(opts.buildroot, putold)) { ERROR("failed to pivot root to new target filesystem: %s\n", strerror(errno)); goto error; } if (chdir("/")) { ERROR("failed to change directory to new root: %s\n", strerror(errno)); goto error; } if (umount2(opts.srcbind, MNT_DETACH)) { ERROR("failed to detach old root: %s\n", strerror(errno)); goto error; } if (chdir(opts.srcbind)) { ERROR("failed to change directory to source directory in new root: %s\n", strerror(errno)); goto error; } /* Drop all privileges. */ struct capabilities caps = { 0 }; if (capabilities_set(&caps)) { ERROR("failed to drop privileges before exec: %s\n", strerror(errno)); goto error; } if (capabilities_get(&caps)) { ERROR("failed to read back capabilities after capability drop: %s\n", strerror(errno)); goto error; } if (caps.effective != 0 || caps.permitted != 0 || caps.inheritable != 0) { ERROR("capabilites remain after drop (effective:0x%.16lx permitted:0x%.16lx inheritable:0x%.16lx)\n", caps.effective, caps.permitted, caps.inheritable); goto error; } if (execve(opts.cmd[0], opts.cmd, opts.env)) { ERROR("failed to exec: %s\n", strerror(errno)); goto error; } rc = 0; error: return rc; }