Changeset View
Standalone View
sys/compat/linux/linux_socket.c
Show First 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | |||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/mbuf.h> | #include <sys/mbuf.h> | ||||
#include <sys/socket.h> | #include <sys/socket.h> | ||||
#include <sys/socketvar.h> | #include <sys/socketvar.h> | ||||
#include <sys/syscallsubr.h> | #include <sys/syscallsubr.h> | ||||
#include <sys/uio.h> | #include <sys/uio.h> | ||||
#include <sys/stat.h> | |||||
#include <sys/syslog.h> | #include <sys/syslog.h> | ||||
#include <sys/un.h> | #include <sys/un.h> | ||||
#include <sys/unistd.h> | |||||
#include <security/audit/audit.h> | |||||
#include <net/if.h> | #include <net/if.h> | ||||
#include <net/vnet.h> | #include <net/vnet.h> | ||||
#include <netinet/in.h> | #include <netinet/in.h> | ||||
#include <netinet/in_systm.h> | #include <netinet/in_systm.h> | ||||
#include <netinet/ip.h> | #include <netinet/ip.h> | ||||
#include <netinet/tcp.h> | #include <netinet/tcp.h> | ||||
#ifdef INET6 | #ifdef INET6 | ||||
#include <netinet/ip6.h> | #include <netinet/ip6.h> | ||||
▲ Show 20 Lines • Show All 1,502 Lines • ▼ Show 20 Lines | out: | ||||
if (error == 0) | if (error == 0) | ||||
error = copyout(&len, PTRIN(args->optlen), | error = copyout(&len, PTRIN(args->optlen), | ||||
sizeof(len)); | sizeof(len)); | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
#if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) | static int | ||||
linux_sendfile_common(struct thread *td, l_int out, l_int in, | |||||
l_loff_t *offset, l_size_t count) | |||||
emaste: It seems we don't actually write to the offset? | |||||
Done Inline ActionsI will clarify the part according to mark's comment below 2333_outlook.jp: I will clarify the part according to mark's comment below | |||||
{ | |||||
/* handles sandfile in kernel space */ | |||||
Not Done Inline ActionsI think this comment isn't useful. markj: I think this comment isn't useful. | |||||
Done Inline Actionsint error prefered dchagin: int error prefered | |||||
off_t bytes_read; | |||||
int error; | |||||
l_loff_t current_offset; | |||||
struct file *fp; | |||||
if (offset != NULL) | |||||
current_offset = *offset; | |||||
Not Done Inline ActionsThe indentation looks wrong here, like there's a mix of tabs and spaces being used. markj: The indentation looks wrong here, like there's a mix of tabs and spaces being used. | |||||
else | |||||
current_offset = kern_lseek(td, in, 0, SEEK_CUR); | |||||
if (current_offset < 0) | |||||
markjUnsubmitted Not Done Inline Actionskern_lseek() returns an error number or 0, not the offset. It is an unusual interface in that it returns the offset by setting a field in the current thread's structure. Take a look at vn_seek() or shm_seek(), which implement the fo_seek() interface: the set td->td_uretoff.tdu_off to the offset, where td is the current thread (curthread, a magic global variable). You need to fetch the seek offset from there. (In my opinion fo_seek() should return the offset in an output argument and kern_lseek() should set curthread->td_uretoff.tdu_off, but this is a separate discussion.) markj: kern_lseek() returns an error number or 0, not the offset.
It is an unusual interface in that… | |||||
2333_outlook.jpAuthorUnsubmitted Done Inline ActionsShould I not use kern_lseek here as well and go with fo_seek? 2333_outlook.jp: Should I not use kern_lseek here as well and go with fo_seek? | |||||
markjUnsubmitted Not Done Inline ActionsYes, I think you can fetch the current offset after the fget_read() call, using fo_seek(). markj: Yes, I think you can fetch the current offset after the fget_read() call, using fo_seek(). | |||||
return (EINVAL); | |||||
bytes_read = 0; | |||||
AUDIT_ARG_FD(in); | |||||
error = fget_read(td, in, &cap_pread_rights, &fp); | |||||
if (error != 0) | |||||
return (error); | |||||
/* call real sendfile iff count != 0 */ | |||||
if (count != 0) { | |||||
error = fo_sendfile(fp, out, NULL, NULL, current_offset, count, | |||||
&bytes_read, 0, td); | |||||
fdrop(fp, td); | |||||
if (error < 0) | |||||
return (error); | |||||
current_offset += bytes_read; | |||||
} else { | |||||
fdrop(fp, td); | |||||
} | |||||
Not Done Inline ActionsWe don't seem to use the result of this call anywhere. I think it is unnecessary. markj: We don't seem to use the result of this call anywhere. I think it is unnecessary. | |||||
Done Inline ActionsI will remove this call 2333_outlook.jp: I will remove this call | |||||
Not Done Inline Actionsfo_sendfile() returns a positive error number (see sys/sys/errno.h). markj: fo_sendfile() returns a positive error number (see sys/sys/errno.h). | |||||
if (offset != NULL) { | |||||
*offset = current_offset; | |||||
} else { | |||||
error = kern_lseek(td, in, current_offset, SEEK_SET); | |||||
markjUnsubmitted Not Done Inline ActionsInstead of using kern_lseek(), it is better to call fo_seek() directly, before fdrop()ing the file. Otherwise this function is doing three fget()s per call, which is unnecessary, and moreover it introduces some ugly error cases. For instance, suppose the process closes the fd after the fdrop() call but before the second kern_lseek() call. Then the second kern_lseek() call will fail, so sendfile() will return an error (EBADF) even though it technically succeeded. That is not a fatal problem, but it is better to avoid it. In fact, using fo_seek() itself is a bit racy. Right now we have the following sequence of operations when offset == NULL:
Suppose another thread changes the current offset between 1 and 3: the result of the call isn't really consistent. If you look at the fo_seek() implementations, like vn_seek(), you will see that foffset_lock() and foffset_unlock() are used to synchronize operations on the file's current offset. I think it is ok to ignore this problem, I am just mentioning it for completeness. markj: Instead of using kern_lseek(), it is better to call fo_seek() directly, before fdrop()ing the… | |||||
if (error < 0) | |||||
return (error); | |||||
} | |||||
td->td_retval[0] = (ssize_t) bytes_read; | |||||
return (0); | |||||
} | |||||
Not Done Inline ActionsStyle: there should be no space after the cast. markj: Style: there should be no space after the cast. | |||||
int | |||||
linux_sendfile(struct thread *td, struct linux_sendfile_args *arg) | |||||
{ | |||||
/* XXX: The differences between freebsd and linux sendfile: | |||||
* - linux_sendfile doesn't send anything when count is 0 | |||||
* whereas freebsd_sendfile sends the whole file. However, | |||||
* in linux_sendfile given fds are still checked for if they | |||||
* are valid or not when count is 0. | |||||
* - linux_sendfile can send to any fd whereas freebsd_sendfile | |||||
* only sends to a socket stream. The same restriction follows | |||||
* for linux_sendfile. | |||||
* - linux_sendfile doesn't have equivalents of flags and sf_hdtr of | |||||
* freebsd_sendfile. | |||||
* - linux_sendfile takes in an offset pointer and updates it to where it | |||||
Done Inline ActionsIn Linux, sendfile() may also adjust the file's seek offset: "If offset is not NULL, then sendfile() does not modify the file offset of in_fd; otherwise the file offset is adjusted to reflect the number of bytes read from in_fd." In particular, if offset == NULL there is still some work that you need to do. As a hint for how this should be fixed, read the manual page for the lseek() system call, which does nothing but adjust the seek offset for a file descriptor. Also take a look at its implementation in FreeBSD, in kern_lseek(). markj: In Linux, sendfile() may also adjust the file's seek offset: "If offset is not NULL, then… | |||||
* was read until. freebsd_sendfile takes in an offset and a | |||||
* 'bytes read' parameter which is only filled if it isn't NULL. | |||||
* We use this parameter to update the offset pointer if it exists. | |||||
* - linux_sendfile returns bytes read on success while freebsd_sendfile | |||||
* returns 0. We use the 'bytes read' parameter to get this value. | |||||
*/ | |||||
/* YYY: The only difference between sendfile and sendfile64 (for 32bit | |||||
* platform where LFS is enabled) is offset type. The latter must be | |||||
* 64 bit | |||||
* If no offset pointer is given, in_fd's offset detail is read and | |||||
* updated, otherwise given pointer is used | |||||
*/ | |||||
Not Done Inline Actionsmissied fp, you need fdrop() dchagin: missied fp, you need fdrop() | |||||
Not Done Inline Actionsthis cast isn't right - we need something like: l_loff_t offset; int rv; offset = arg->offset; rv = linux_sendfile_common(..., &offset, ...); // do something with offset return (rv); emaste: this cast isn't right - we need something like:
```
l_loff_t offset;
int rv;
offset = arg… | |||||
Done Inline Actionsended up with some thing: int linux_sendfile(struct thread *td, struct linux_sendfile_args *arg) { if (arg->offset == NULL) return linux_sendfile_common(td, arg->out, arg->in, NULL, arg->count); l_loff_t offset64; int ret; int error; l_long offset; error = copyin(arg->offset, &offset, sizeof(offset)); if (error < 0) return (error); offset64 = (l_loff_t) offset; ret = linux_sendfile_common(td, arg->out, arg->in, &offset64, arg->count); offset = (l_long) offset64; error = copyout(&offset, arg->offset, sizeof(offset)); if (error < 0) return (error); return (ret); } 2333_outlook.jp: ended up with some thing:
```
int
linux_sendfile(struct thread *td, struct linux_sendfile_args… | |||||
if (arg->offset == NULL) | |||||
return linux_sendfile_common(td, arg->out, arg->in, | |||||
NULL, arg->count); | |||||
markjUnsubmitted Not Done Inline ActionsAs a matter of style I think this function and the one below should contain only a single call to linux_sendfile_common(). That is, write: if (offset != NULL) <copyin> ret = linux_sendfile_common() if (offset != NULL) <copyout> At least, per FreeBSD style, local variable declarations always come before code. markj: As a matter of style I think this function and the one below should contain only a single call… | |||||
Not Done Inline ActionsI think these comments can be deleted or at least written more concisely. markj: I think these comments can be deleted or at least written more concisely. | |||||
l_loff_t offset64; | |||||
int ret; | |||||
int error; | |||||
l_long offset; | |||||
error = copyin(arg->offset, &offset, sizeof(offset)); | |||||
Done Inline ActionsThe way we are passing arg->offset (l_long) here is not right. Suppose you are running a 32-bit Linux application on a 64-bit amd64 kernel. From the application's point of view, sizeof(offset) is 4 but from the kernel's point of view sizeof(offset) is 8. When linux_sendfile_common() uses copyout() to copy the returned offset back into userspace, it will always copy 8 bytes. 32-bit applications, which expect only 4 bytes, are going to experience memory corruption as a result. markj: The way we are passing arg->offset (l_long) here is not right.
Suppose you are running a 32… | |||||
Done Inline Actionsarg->offset is l_loff_t* 2333_outlook.jp: arg->offset is l_loff_t* | |||||
Done Inline ActionsSorry, I put the comment in the wrong place. The issue is with linux_sendfile(), where arg->offset is l_long_t *. markj: Sorry, I put the comment in the wrong place. The issue is with linux_sendfile(), where arg… | |||||
if (error != 0) | |||||
return (error); | |||||
offset64 = (l_loff_t) offset; | |||||
ret = linux_sendfile_common(td, arg->out, arg->in, | |||||
&offset64, arg->count); | |||||
if (offset64 > LONG_MAX) | |||||
return (EOVERFLOW); | |||||
offset = (l_long) offset64; | |||||
error = copyout(&offset, arg->offset, sizeof(offset)); | |||||
if (error != 0) | |||||
return (error); | |||||
return (ret); | |||||
Not Done Inline ActionsStyle: there should be no space after the cast. markj: Style: there should be no space after the cast. | |||||
} | |||||
#if defined(__i386__) || defined(__arm__) || \ | |||||
(defined(__amd64__) && defined(COMPAT_LINUX32)) | |||||
int | |||||
linux_sendfile64(struct thread *td, struct linux_sendfile64_args *arg) | |||||
{ | |||||
if (arg->offset == NULL) | |||||
return linux_sendfile_common(td, arg->out, arg->in, | |||||
NULL, arg->count); | |||||
int ret; | |||||
int error; | |||||
l_loff_t offset; | |||||
error = copyin(arg->offset, &offset, sizeof(offset)); | |||||
if (error != 0) | |||||
return (error); | |||||
ret = linux_sendfile_common(td, arg->out, arg->in, | |||||
&offset, arg->count); | |||||
error = copyout(&offset, arg->offset, sizeof(offset)); | |||||
if (error != 0) | |||||
return (error); | |||||
return (ret); | |||||
} | |||||
/* Argument list sizes for linux_socketcall */ | /* Argument list sizes for linux_socketcall */ | ||||
static const unsigned char lxs_args_cnt[] = { | static const unsigned char lxs_args_cnt[] = { | ||||
0 /* unused*/, 3 /* socket */, | 0 /* unused*/, 3 /* socket */, | ||||
3 /* bind */, 3 /* connect */, | 3 /* bind */, 3 /* connect */, | ||||
2 /* listen */, 3 /* accept */, | 2 /* listen */, 3 /* accept */, | ||||
3 /* getsockname */, 3 /* getpeername */, | 3 /* getsockname */, 3 /* getpeername */, | ||||
4 /* socketpair */, 4 /* send */, | 4 /* socketpair */, 4 /* send */, | ||||
4 /* recv */, 6 /* sendto */, | 4 /* recv */, 6 /* sendto */, | ||||
6 /* recvfrom */, 2 /* shutdown */, | 6 /* recvfrom */, 2 /* shutdown */, | ||||
5 /* setsockopt */, 5 /* getsockopt */, | 5 /* setsockopt */, 5 /* getsockopt */, | ||||
3 /* sendmsg */, 3 /* recvmsg */, | 3 /* sendmsg */, 3 /* recvmsg */, | ||||
4 /* accept4 */, 5 /* recvmmsg */, | 4 /* accept4 */, 5 /* recvmmsg */, | ||||
4 /* sendmmsg */ | 4 /* sendmmsg */, 4 /* sendfile */ | ||||
Not Done Inline ActionsIt looks like there might be a whitespace consistency issue yet. P.S. Not to be fixed in this commit, but this should really be using designated initializers (e.g. [LINUX_SENDFILE] = 4). brooks: It looks like there might be a whitespace consistency issue yet.
P.S. Not to be fixed in this… | |||||
Done Inline ActionsThere are tabs before the second column in all instances. It looks fine using diff tools and on vim. This might just be phabricator not showing it correctly, unless my editor is doing something weird. borako.ozarslan_gmail.com: There are tabs before the second column in all instances. It looks fine using diff tools and on… | |||||
}; | }; | ||||
#define LINUX_ARGS_CNT (nitems(lxs_args_cnt) - 1) | #define LINUX_ARGS_CNT (nitems(lxs_args_cnt) - 1) | ||||
#define LINUX_ARG_SIZE(x) (lxs_args_cnt[x] * sizeof(l_ulong)) | #define LINUX_ARG_SIZE(x) (lxs_args_cnt[x] * sizeof(l_ulong)) | ||||
int | int | ||||
linux_socketcall(struct thread *td, struct linux_socketcall_args *args) | linux_socketcall(struct thread *td, struct linux_socketcall_args *args) | ||||
{ | { | ||||
l_ulong a[6]; | l_ulong a[6]; | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | #endif | ||||
case LINUX_RECVMSG: | case LINUX_RECVMSG: | ||||
return (linux_recvmsg(td, arg)); | return (linux_recvmsg(td, arg)); | ||||
case LINUX_ACCEPT4: | case LINUX_ACCEPT4: | ||||
return (linux_accept4(td, arg)); | return (linux_accept4(td, arg)); | ||||
case LINUX_RECVMMSG: | case LINUX_RECVMMSG: | ||||
return (linux_recvmmsg(td, arg)); | return (linux_recvmmsg(td, arg)); | ||||
case LINUX_SENDMMSG: | case LINUX_SENDMMSG: | ||||
return (linux_sendmmsg(td, arg)); | return (linux_sendmmsg(td, arg)); | ||||
case LINUX_SENDFILE: | |||||
return (linux_sendfile(td, arg)); | |||||
} | } | ||||
uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what); | uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what); | ||||
return (ENOSYS); | return (ENOSYS); | ||||
} | } | ||||
#endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ | #endif /* __i386__ || __arm__ || (__amd64__ && COMPAT_LINUX32) */ |
It seems we don't actually write to the offset?