From 7a8a71674ef20db36d322b5fe0b8277e776536c9 Mon Sep 17 00:00:00 2001 From: Jonas 'Sortie' Termansen Date: Mon, 9 Jan 2017 23:40:56 +0100 Subject: [PATCH] Move readv/writev family and sendmsg/recvmsg into drivers. --- kernel/descriptor.cpp | 158 +++++++++++- kernel/fcache.cpp | 146 ++++++----- kernel/fs/kram.cpp | 18 +- kernel/fs/kram.h | 6 +- kernel/fs/user.cpp | 137 ++++++++++- kernel/include/sortix/kernel/descriptor.h | 12 +- kernel/include/sortix/kernel/fcache.h | 10 +- kernel/include/sortix/kernel/inode.h | 24 +- kernel/include/sortix/kernel/pipe.h | 13 +- kernel/include/sortix/kernel/vnode.h | 12 +- kernel/include/sortix/limits.h | 3 +- kernel/inode.cpp | 204 +++++++++++++-- kernel/io.cpp | 186 ++------------ kernel/net/fs.cpp | 45 +++- kernel/pipe.cpp | 287 ++++++++++++++++++---- kernel/vnode.cpp | 34 ++- regress/Makefile | 1 + regress/test-pipe-one-byte.c | 51 ++++ 18 files changed, 1027 insertions(+), 320 deletions(-) create mode 100644 regress/test-pipe-one-byte.c diff --git a/kernel/descriptor.cpp b/kernel/descriptor.cpp index f1d91d07..fe7c9268 100644 --- a/kernel/descriptor.cpp +++ b/kernel/descriptor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,16 +17,22 @@ * A file descriptor. */ +#include #include +#include #include #include #include +#include #include #include #include #include +#ifndef IOV_MAX +#include +#endif #include #include #include @@ -293,6 +299,36 @@ ssize_t Descriptor::read(ioctx_t* ctx, uint8_t* buf, size_t count) return ret; } +ssize_t Descriptor::readv(ioctx_t* ctx, const struct iovec* iov_ptr, int iovcnt) +{ + if ( !(dflags & O_READ) ) + return errno = EPERM, -1; + if ( iovcnt < 0 || IOV_MAX < iovcnt ) + return errno = EINVAL, -1; + struct iovec* iov = new struct iovec[iovcnt]; + if ( !iov ) + return -1; + size_t iov_size = sizeof(struct iovec) * iovcnt; + if ( !ctx->copy_from_src(iov, iov_ptr, iov_size) ) + return delete[] iov, -1; + int old_ctx_dflags = ctx->dflags; + ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + if ( !IsSeekable() ) + { + ssize_t result = vnode->readv(ctx, iov, iovcnt); + ctx->dflags = old_ctx_dflags; + delete[] iov; + return result; + } + ScopedLock lock(¤t_offset_lock); + ssize_t ret = vnode->preadv(ctx, iov, iovcnt, current_offset); + if ( 0 <= ret ) + current_offset += ret; + ctx->dflags = old_ctx_dflags; + delete[] iov; + return ret; +} + ssize_t Descriptor::pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off) { if ( !(dflags & O_READ) ) @@ -310,6 +346,27 @@ ssize_t Descriptor::pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off) return result; } +ssize_t Descriptor::preadv(ioctx_t* ctx, const struct iovec* iov_ptr, + int iovcnt, off_t off) +{ + if ( !(dflags & O_READ) ) + return errno = EPERM, -1; + if ( off < 0 || iovcnt < 0 || IOV_MAX < iovcnt ) + return errno = EINVAL, -1; + struct iovec* iov = new struct iovec[iovcnt]; + if ( !iov ) + return -1; + size_t iov_size = sizeof(struct iovec) * iovcnt; + if ( !ctx->copy_from_src(iov, iov_ptr, iov_size) ) + return delete[] iov, -1; + int old_ctx_dflags = ctx->dflags; + ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + ssize_t result = vnode->preadv(ctx, iov, iovcnt, off); + ctx->dflags = old_ctx_dflags; + delete[] iov; + return result; +} + ssize_t Descriptor::write(ioctx_t* ctx, const uint8_t* buf, size_t count) { if ( !(dflags & O_WRITE) ) @@ -344,7 +401,49 @@ ssize_t Descriptor::write(ioctx_t* ctx, const uint8_t* buf, size_t count) return ret; } -ssize_t Descriptor::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off) +ssize_t Descriptor::writev(ioctx_t* ctx, const struct iovec* iov_ptr, + int iovcnt) +{ + if ( !(dflags & O_WRITE) ) + return errno = EPERM, -1; + if ( iovcnt < 0 || IOV_MAX < iovcnt ) + return errno = EINVAL, -1; + struct iovec* iov = new struct iovec[iovcnt]; + if ( !iov ) + return -1; + size_t iov_size = sizeof(struct iovec) * iovcnt; + if ( !ctx->copy_from_src(iov, iov_ptr, iov_size) ) + return delete[] iov, -1; + int old_ctx_dflags = ctx->dflags; + ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + if ( !IsSeekable() ) + { + ssize_t result = vnode->writev(ctx, iov, iovcnt); + ctx->dflags = old_ctx_dflags; + delete[] iov; + return result; + } + ScopedLock lock(¤t_offset_lock); + if ( ctx->dflags & O_APPEND ) + { + off_t end = vnode->lseek(ctx, 0, SEEK_END); + if ( end < 0 ) + { + ctx->dflags = old_ctx_dflags; + return -1; + } + current_offset = end; + } + ssize_t ret = vnode->pwritev(ctx, iov, iovcnt, current_offset); + if ( 0 <= ret ) + current_offset += ret; + ctx->dflags = old_ctx_dflags; + delete[] iov; + return ret; +} + +ssize_t Descriptor::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, + off_t off) { if ( !(dflags & O_WRITE) ) return errno = EPERM, -1; @@ -361,6 +460,27 @@ ssize_t Descriptor::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t return result; } +ssize_t Descriptor::pwritev(ioctx_t* ctx, const struct iovec* iov_ptr, + int iovcnt, off_t off) +{ + if ( !(dflags & O_WRITE) ) + return errno = EPERM, -1; + if ( off < 0 || iovcnt < 0 || IOV_MAX < iovcnt ) + return errno = EINVAL, -1; + struct iovec* iov = new struct iovec[iovcnt]; + if ( !iov ) + return -1; + size_t iov_size = sizeof(struct iovec) * iovcnt; + if ( !ctx->copy_from_src(iov, iov_ptr, iov_size) ) + return delete[] iov, -1; + int old_ctx_dflags = ctx->dflags; + ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + ssize_t result = vnode->pwritev(ctx, iov, iovcnt, off); + ctx->dflags = old_ctx_dflags; + delete[] iov; + return result; +} + static inline bool valid_utimens_timespec(struct timespec ts) { return ts.tv_nsec < 1000000000 || @@ -755,22 +875,56 @@ int Descriptor::listen(ioctx_t* ctx, int backlog) ssize_t Descriptor::recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags) { + if ( SIZE_MAX < count ) + count = SSIZE_MAX; int old_ctx_dflags = ctx->dflags; ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + if ( flags & MSG_DONTWAIT ) + ctx->dflags |= O_NONBLOCK; + flags &= ~MSG_DONTWAIT; ssize_t result = vnode->recv(ctx, buf, count, flags); ctx->dflags = old_ctx_dflags; return result; } +ssize_t Descriptor::recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags) +{ + int old_ctx_dflags = ctx->dflags; + ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + if ( flags & MSG_DONTWAIT ) + ctx->dflags |= O_NONBLOCK; + flags &= ~MSG_DONTWAIT; + ssize_t result = vnode->recvmsg(ctx, msg, flags); + ctx->dflags = old_ctx_dflags; + return result; +} + ssize_t Descriptor::send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags) { + if ( SIZE_MAX < count ) + count = SSIZE_MAX; int old_ctx_dflags = ctx->dflags; ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + if ( flags & MSG_DONTWAIT ) + ctx->dflags |= O_NONBLOCK; + flags &= ~MSG_DONTWAIT; ssize_t result = vnode->send(ctx, buf, count, flags); ctx->dflags = old_ctx_dflags; return result; } +ssize_t Descriptor::sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags) +{ + int old_ctx_dflags = ctx->dflags; + ctx->dflags = ContextFlags(old_ctx_dflags, dflags); + if ( flags & MSG_DONTWAIT ) + ctx->dflags |= O_NONBLOCK; + flags &= ~MSG_DONTWAIT; + ssize_t result = vnode->sendmsg(ctx, msg, flags); + ctx->dflags = old_ctx_dflags; + return result; +} + int Descriptor::getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr) { diff --git a/kernel/fcache.cpp b/kernel/fcache.cpp index d07e533c..4513374d 100644 --- a/kernel/fcache.cpp +++ b/kernel/fcache.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Jonas 'Sortie' Termansen. + * Copyright (c) 2013, 2014, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -18,6 +18,7 @@ */ #include +#include #include #include @@ -309,91 +310,118 @@ void FileCache::InitializeFileData(off_t to_where) } } -ssize_t FileCache::pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off) +ssize_t FileCache::preadv(ioctx_t* ctx, const struct iovec* iovs, int iovcnt, + off_t off) { ScopedLock lock(&fcache_mutex); - if ( off < 0 ) - return errno = EINVAL, -1; - if ( file_size <= off ) - return 0; - off_t available_bytes = file_size - off; - if ( (uintmax_t) available_bytes < (uintmax_t) count ) - count = available_bytes; - if ( (size_t) SSIZE_MAX < count ) - count = (size_t) SSIZE_MAX; - size_t sofar = 0; - while ( sofar < count ) + ssize_t so_far = 0; + int iov_i = 0; + size_t iov_offset = 0; + while ( iov_i < iovcnt && so_far < SSIZE_MAX ) { - off_t current_off = off + (off_t) sofar; - size_t left = count - sofar; + off_t current_off = off + (off_t) so_far; + if ( file_size <= current_off ) + break; + size_t maxcount = SSIZE_MAX - so_far; + if ( (uintmax_t) (file_size - current_off) < maxcount ) + maxcount = file_size - current_off; + if ( maxcount == 0 ) + break; + const struct iovec* iov = &iovs[iov_i]; + uint8_t* buf = (uint8_t*) iov->iov_base + iov_offset; + size_t count = iov->iov_len - iov_offset; + if ( maxcount < count ) + count = maxcount; + if ( count == 0 ) + { + iov_i++; + iov_offset = 0; + continue; + } size_t block_off = (size_t) (current_off % Page::Size()); size_t block_num = (size_t) (current_off / Page::Size()); size_t block_left = Page::Size() - block_off; - size_t amount_to_copy = left < block_left ? left : block_left; + size_t amount = count < block_left ? count : block_left; assert(block_num < blocks_used); BlockCacheBlock* block = blocks[block_num]; const uint8_t* block_data = kernel_block_cache->BlockData(block); const uint8_t* src_data = block_data + block_off; - uint8_t* dest_buf = buf + sofar; - off_t end_at = current_off + (off_t) amount_to_copy; - if ( file_written < end_at ) - InitializeFileData(end_at); - if ( !ctx->copy_to_dest(dest_buf, src_data, amount_to_copy) ) - return sofar ? (ssize_t) sofar : -1; - sofar += amount_to_copy; + if ( file_written < current_off + (off_t) amount ) + InitializeFileData(current_off + (off_t) amount); + if ( !ctx->copy_to_dest(buf, src_data, amount) ) + return so_far ? (ssize_t) so_far : -1; + so_far += amount; kernel_block_cache->MarkUsed(block); + iov_offset += amount; + if ( iov_offset == iov->iov_len ) + { + iov_i++; + iov_offset = 0; + } } - return (ssize_t) sofar; + return so_far; } -ssize_t FileCache::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off) +ssize_t FileCache::pwritev(ioctx_t* ctx, const struct iovec* iovs, int iovcnt, + off_t off) { ScopedLock lock(&fcache_mutex); - if ( off < 0 ) - return errno = EINVAL, -1; - off_t available_growth = OFF_MAX - off; - if ( (uintmax_t) available_growth < (uintmax_t) count ) - count = (size_t) available_growth; - // TODO: Rather than doing an EOF - shouldn't errno be set to something like - // "Hey, the filesize limit has been reached"? - if ( (size_t) SSIZE_MAX < count ) - count = (size_t) SSIZE_MAX; - off_t write_end = off + (off_t) count; - if ( file_size < write_end && !ChangeSize(write_end, false) ) + ssize_t so_far = 0; + int iov_i = 0; + size_t iov_offset = 0; + while ( iov_i < iovcnt && so_far < SSIZE_MAX ) { - if ( file_size < off ) - return -1; - count = (size_t) (file_size - off); - write_end = off + (off_t) count; - } - assert(write_end <= file_size); - size_t sofar = 0; - while ( sofar < count ) - { - off_t current_off = off + (off_t) sofar; - size_t left = count - sofar; + off_t current_off = off + (off_t) so_far; + size_t maxcount = SSIZE_MAX - so_far; + if ( (uintmax_t) (OFF_MAX - current_off) < maxcount ) + maxcount = OFF_MAX - current_off; + const struct iovec* iov = &iovs[iov_i]; + uint8_t* buf = (uint8_t*) iov->iov_base + iov_offset; + size_t count = iov->iov_len - iov_offset; + if ( maxcount < count ) + count = maxcount; + if ( count == 0 ) + { + if ( so_far == 0 && maxcount == 0 && iov->iov_len != 0 ) + return errno = ENOSPC, -1; + iov_i++; + iov_offset = 0; + continue; + } + off_t write_end = current_off + count; + if ( file_size < write_end && !ChangeSize(write_end, false) ) + { + if ( file_size <= current_off ) + return -1; + if ( (uintmax_t) (file_size - current_off) < count ) + count = file_size - current_off; + } size_t block_off = (size_t) (current_off % Page::Size()); size_t block_num = (size_t) (current_off / Page::Size()); size_t block_left = Page::Size() - block_off; - size_t amount_to_copy = left < block_left ? left : block_left; + size_t amount = count < block_left ? count : block_left; assert(block_num < blocks_used); BlockCacheBlock* block = blocks[block_num]; uint8_t* block_data = kernel_block_cache->BlockData(block); uint8_t* data = block_data + block_off; - const uint8_t* src_buf = buf + sofar; - off_t begin_at = off + (off_t) sofar; - off_t end_at = current_off + (off_t) amount_to_copy; - if ( file_written < begin_at ) - InitializeFileData(begin_at); + if ( file_written < current_off ) + InitializeFileData(current_off); + assert(amount); modified = true; /* Unconditionally - copy_from_src can fail midway. */ - if ( !ctx->copy_from_src(data, src_buf, amount_to_copy) ) - return sofar ? (ssize_t) sofar : -1; - if ( file_written < end_at ) - file_written = end_at; - sofar += amount_to_copy; + if ( !ctx->copy_from_src(data, buf, amount) ) + return so_far ? (ssize_t) so_far : -1; + if ( file_written < current_off + (off_t) amount ) + file_written = current_off + (off_t) amount; + so_far += amount; kernel_block_cache->MarkModified(block); + iov_offset += amount; + if ( iov_offset == iov->iov_len ) + { + iov_i++; + iov_offset = 0; + } } - return (ssize_t) sofar; + return so_far; } int FileCache::truncate(ioctx_t* /*ctx*/, off_t length) diff --git a/kernel/fs/kram.cpp b/kernel/fs/kram.cpp index fa6c2c0d..80ea554c 100644 --- a/kernel/fs/kram.cpp +++ b/kernel/fs/kram.cpp @@ -18,6 +18,7 @@ */ #include +#include #include #include @@ -124,6 +125,7 @@ File::File(InodeType inode_type, mode_t type, dev_t dev, ino_t ino, uid_t owner, this->stat_blksize = 1; this->dev = dev; this->ino = ino; + this->supports_iovec = true; } File::~File() @@ -147,14 +149,16 @@ off_t File::lseek(ioctx_t* ctx, off_t offset, int whence) return fcache.lseek(ctx, offset, whence); } -ssize_t File::pread(ioctx_t* ctx, uint8_t* dest, size_t count, off_t off) +ssize_t File::preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) { - return fcache.pread(ctx, dest, count, off); + return fcache.preadv(ctx, iov, iovcnt, off); } -ssize_t File::pwrite(ioctx_t* ctx, const uint8_t* src, size_t count, off_t off) +ssize_t File::pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) { - ssize_t ret = fcache.pwrite(ctx, src, count, off); + ssize_t ret = fcache.pwritev(ctx, iov, iovcnt, off); if ( 0 < ret ) { ScopedLock lock(&metalock); @@ -170,7 +174,11 @@ ssize_t File::readlink(ioctx_t* ctx, char* buf, size_t bufsize) return errno = EINVAL, -1; if ( (size_t) SSIZE_MAX < bufsize ) bufsize = SSIZE_MAX; - return fcache.pread(ctx, (uint8_t*) buf, bufsize, 0); + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = buf; + iov.iov_len = bufsize; + return fcache.preadv(ctx, &iov, 1, 0); } ssize_t File::tcgetblob(ioctx_t* ctx, const char* name, void* buffer, size_t count) diff --git a/kernel/fs/kram.h b/kernel/fs/kram.h index 38b1005a..71f32db7 100644 --- a/kernel/fs/kram.h +++ b/kernel/fs/kram.h @@ -41,10 +41,10 @@ public: virtual ~File(); virtual int truncate(ioctx_t* ctx, off_t length); virtual off_t lseek(ioctx_t* ctx, off_t offset, int whence); - virtual ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, - off_t off); - virtual ssize_t pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, + virtual ssize_t preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, off_t off); + virtual ssize_t pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); virtual ssize_t readlink(ioctx_t* ctx, char* buf, size_t bufsiz); virtual ssize_t tcgetblob(ioctx_t* ctx, const char* name, void* buffer, size_t count); diff --git a/kernel/fs/user.cpp b/kernel/fs/user.cpp index b7b9254c..2921d279 100644 --- a/kernel/fs/user.cpp +++ b/kernel/fs/user.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -204,11 +204,17 @@ public: virtual int truncate(ioctx_t* ctx, off_t length); virtual off_t lseek(ioctx_t* ctx, off_t offset, int whence); virtual ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); - virtual ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, - off_t off); + virtual ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); + virtual ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off); + virtual ssize_t preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); virtual ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + virtual ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual ssize_t pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off); + virtual ssize_t pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); + virtual int utimens(ioctx_t* ctx, const struct timespec* times); virtual int isatty(ioctx_t* ctx); virtual ssize_t readdirents(ioctx_t* ctx, struct dirent* dirent, @@ -242,8 +248,10 @@ public: virtual int connect(ioctx_t* ctx, const uint8_t* addr, size_t addrlen); virtual int listen(ioctx_t* ctx, int backlog); virtual ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); + virtual ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); virtual ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); + virtual ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); virtual int getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr); virtual int setsockopt(ioctx_t* ctx, int level, int option_name, @@ -898,6 +906,32 @@ ssize_t Unode::read(ioctx_t* ctx, uint8_t* buf, size_t count) return ret; } +ssize_t Unode::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = read(ctx, buf, count); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; +} + ssize_t Unode::pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off) { Channel* channel = server->Connect(ctx); @@ -921,6 +955,36 @@ ssize_t Unode::pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off) return ret; } +ssize_t Unode::preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) +{ + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + off_t offset; + if ( __builtin_add_overflow(off, sofar, &offset) ) + return sofar ? sofar : (errno = EOVERFLOW, -1); + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = pread(ctx, buf, count, offset); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; +} + ssize_t Unode::write(ioctx_t* ctx, const uint8_t* buf, size_t count) { Channel* channel = server->Connect(ctx); @@ -945,6 +1009,32 @@ ssize_t Unode::write(ioctx_t* ctx, const uint8_t* buf, size_t count) return ret; } +ssize_t Unode::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + const uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = write(ctx, buf, count); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; +} + ssize_t Unode::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off) { Channel* channel = server->Connect(ctx); @@ -970,6 +1060,36 @@ ssize_t Unode::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off) return ret; } +ssize_t Unode::pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) +{ + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + off_t offset; + if ( __builtin_add_overflow(off, sofar, &offset) ) + return sofar ? sofar : (errno = EOVERFLOW, -1); + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = pwrite(ctx, buf, count, offset); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; +} + int Unode::utimens(ioctx_t* ctx, const struct timespec* times) { Channel* channel = server->Connect(ctx); @@ -1368,12 +1488,23 @@ ssize_t Unode::recv(ioctx_t* /*ctx*/, uint8_t* /*buf*/, size_t /*count*/, return errno = ENOTSOCK, -1; } +ssize_t Unode::recvmsg(ioctx_t* /*ctx*/, struct msghdr* /*msg*/, int /*flags*/) +{ + return errno = ENOTSOCK, -1; +} + ssize_t Unode::send(ioctx_t* /*ctx*/, const uint8_t* /*buf*/, size_t /*count*/, int /*flags*/) { return errno = ENOTSOCK, -1; } +ssize_t Unode::sendmsg(ioctx_t* /*ctx*/, const struct msghdr* /*msg*/, + int /*flags*/) +{ + return errno = ENOTSOCK, -1; +} + int Unode::getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr) { diff --git a/kernel/include/sortix/kernel/descriptor.h b/kernel/include/sortix/kernel/descriptor.h index f995d219..7cfa4bf5 100644 --- a/kernel/include/sortix/kernel/descriptor.h +++ b/kernel/include/sortix/kernel/descriptor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -30,6 +30,8 @@ #include struct dirent; +struct iovec; +struct msghdr; struct stat; struct statvfs; struct termios; @@ -64,9 +66,15 @@ public: int truncate(ioctx_t* ctx, off_t length); off_t lseek(ioctx_t* ctx, off_t offset, int whence); ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); + ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off); + ssize_t preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); ssize_t pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off); + ssize_t pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); int utimens(ioctx_t* ctx, const struct timespec* times); int isatty(ioctx_t* ctx); ssize_t readdirents(ioctx_t* ctx, struct dirent* dirent, size_t size); @@ -92,7 +100,9 @@ public: int connect(ioctx_t* ctx, const uint8_t* addr, size_t addrlen); int listen(ioctx_t* ctx, int backlog); ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); + ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); + ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); int getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr); int setsockopt(ioctx_t* ctx, int level, int option_name, diff --git a/kernel/include/sortix/kernel/fcache.h b/kernel/include/sortix/kernel/fcache.h index bb3c92f0..2a182022 100644 --- a/kernel/include/sortix/kernel/fcache.h +++ b/kernel/include/sortix/kernel/fcache.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 Jonas 'Sortie' Termansen. + * Copyright (c) 2013, 2014, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -29,6 +29,8 @@ #include #include +struct iovec; + namespace Sortix { struct ioctx_struct; @@ -102,8 +104,10 @@ public: FileCache(/*FileCacheBackend* backend = NULL*/); ~FileCache(); int sync(ioctx_t* ctx); - ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off); - ssize_t pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off); + ssize_t preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); + ssize_t pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); int truncate(ioctx_t* ctx, off_t length); off_t lseek(ioctx_t* ctx, off_t offset, int whence); //bool ChangeBackend(FileCacheBackend* backend, bool sync_old); diff --git a/kernel/include/sortix/kernel/inode.h b/kernel/include/sortix/kernel/inode.h index 8133a8df..6b53c032 100644 --- a/kernel/include/sortix/kernel/inode.h +++ b/kernel/include/sortix/kernel/inode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -30,6 +30,8 @@ #include struct dirent; +struct iovec; +struct msghdr; struct stat; struct statvfs; struct termios; @@ -62,11 +64,19 @@ public: virtual int truncate(ioctx_t* ctx, off_t length) = 0; virtual off_t lseek(ioctx_t* ctx, off_t offset, int whence) = 0; virtual ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count) = 0; + virtual ssize_t readv(ioctx_t* ctx, const struct iovec* iov, + int iovcnt) = 0; virtual ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off) = 0; + virtual ssize_t preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) = 0; virtual ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count) = 0; + virtual ssize_t writev(ioctx_t* ctx, const struct iovec* iov, + int iovcnt) = 0; virtual ssize_t pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off) = 0; + virtual ssize_t pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) = 0; virtual int utimens(ioctx_t* ctx, const struct timespec* times) = 0; virtual int isatty(ioctx_t* ctx) = 0; virtual ssize_t readdirents(ioctx_t* ctx, struct dirent* dirent, @@ -100,8 +110,11 @@ public: virtual int connect(ioctx_t* ctx, const uint8_t* addr, size_t addrlen) = 0; virtual int listen(ioctx_t* ctx, int backlog) = 0; virtual ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags) = 0; + virtual ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags) = 0; virtual ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags) = 0; + virtual ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, + int flags) = 0; virtual int getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr) = 0; virtual int setsockopt(ioctx_t* ctx, int level, int option_name, @@ -144,6 +157,7 @@ protected: struct timespec stat_ctim; blksize_t stat_blksize; blkcnt_t stat_blocks; + bool supports_iovec; public: AbstractInode(); @@ -158,10 +172,16 @@ public: virtual int truncate(ioctx_t* ctx, off_t length); virtual off_t lseek(ioctx_t* ctx, off_t offset, int whence); virtual ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); + virtual ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off); + virtual ssize_t preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); virtual ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + virtual ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual ssize_t pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off); + virtual ssize_t pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); virtual int utimens(ioctx_t* ctx, const struct timespec* times); virtual int isatty(ioctx_t* ctx); virtual ssize_t readdirents(ioctx_t* ctx, struct dirent* dirent, @@ -195,8 +215,10 @@ public: virtual int connect(ioctx_t* ctx, const uint8_t* addr, size_t addrlen); virtual int listen(ioctx_t* ctx, int backlog); virtual ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); + virtual ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); virtual ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); + virtual ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); virtual int getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr); virtual int setsockopt(ioctx_t* ctx, int level, int option_name, diff --git a/kernel/include/sortix/kernel/pipe.h b/kernel/include/sortix/kernel/pipe.h index df7db068..effd53da 100644 --- a/kernel/include/sortix/kernel/pipe.h +++ b/kernel/include/sortix/kernel/pipe.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, 2013, 2014 Jonas 'Sortie' Termansen. + * Copyright (c) 2011, 2012, 2013, 2014, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -25,6 +25,9 @@ #include #include +struct msghdr; +struct iovec; + namespace Sortix { class PipeChannel; @@ -40,8 +43,12 @@ public: bool SetSIGPIPEDelivery(bool deliver_sigpipe); size_t Size(); bool Resize(size_t new_size); - ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); - ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); + ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); + ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); + ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); + ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); + ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); int poll(ioctx_t* ctx, PollNode* node); private: diff --git a/kernel/include/sortix/kernel/vnode.h b/kernel/include/sortix/kernel/vnode.h index 476aea7e..d7874e12 100644 --- a/kernel/include/sortix/kernel/vnode.h +++ b/kernel/include/sortix/kernel/vnode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -29,6 +29,8 @@ #include struct dirent; +struct iovec; +struct msghdr; struct stat; struct statvfs; struct termios; @@ -61,9 +63,15 @@ public: int truncate(ioctx_t* ctx, off_t length); off_t lseek(ioctx_t* ctx, off_t offset, int whence); ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); + ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); ssize_t pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off); + ssize_t preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); ssize_t pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off); + ssize_t pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off); int utimens(ioctx_t* ctx, const struct timespec* times); int isatty(ioctx_t* ctx); ssize_t readdirents(ioctx_t* ctx, struct dirent* dirent, size_t size, @@ -90,7 +98,9 @@ public: int connect(ioctx_t* ctx, const uint8_t* addr, size_t addrlen); int listen(ioctx_t* ctx, int backlog); ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); + ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); + ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); int getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr); int setsockopt(ioctx_t* ctx, int level, int option_name, diff --git a/kernel/include/sortix/limits.h b/kernel/include/sortix/limits.h index 60f7e7db..5699797f 100644 --- a/kernel/include/sortix/limits.h +++ b/kernel/include/sortix/limits.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Jonas 'Sortie' Termansen. + * Copyright (c) 2014, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -25,6 +25,7 @@ #if __USE_SORTIX || __USE_POSIX #define HOST_NAME_MAX 255 #define TTY_NAME_MAX 32 +#define IOV_MAX 1024 #endif #endif diff --git a/kernel/inode.cpp b/kernel/inode.cpp index a1c9d2ec..abc5190c 100644 --- a/kernel/inode.cpp +++ b/kernel/inode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,7 +17,10 @@ * Interfaces and utility classes for implementing inodes. */ +#include + #include +#include #include #include @@ -49,6 +52,7 @@ AbstractInode::AbstractInode() stat_mtim = Time::Get(CLOCK_REALTIME); stat_blksize = 0; stat_blocks = 0; + supports_iovec = false; } AbstractInode::~AbstractInode() @@ -147,32 +151,186 @@ off_t AbstractInode::lseek(ioctx_t* /*ctx*/, off_t /*offset*/, int /*whence*/) return errno = EBADF, -1; } -ssize_t AbstractInode::read(ioctx_t* /*ctx*/, uint8_t* /*buf*/, - size_t /*count*/) +ssize_t AbstractInode::read(ioctx_t* ctx, uint8_t* buf, size_t count) { - return errno = EBADF, -1; + if ( !supports_iovec ) + return errno = EBADF, -1; + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = (void*) buf; + iov.iov_len = count; + return readv(ctx, &iov, 1); } -ssize_t AbstractInode::pread(ioctx_t* /*ctx*/, uint8_t* /*buf*/, - size_t /*count*/, off_t /*off*/) +ssize_t AbstractInode::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) { - if ( inode_type == INODE_TYPE_STREAM || inode_type == INODE_TYPE_TTY ) - return errno = ESPIPE, -1; - return errno = EBADF, -1; + if ( supports_iovec ) + return errno = EBADF, -1; + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = read(ctx, buf, count); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; } -ssize_t AbstractInode::write(ioctx_t* /*ctx*/, const uint8_t* /*buf*/, - size_t /*count*/) +ssize_t AbstractInode::pread(ioctx_t* ctx, uint8_t* buf, size_t count, + off_t off) { - return errno = EBADF, -1; + if ( !supports_iovec ) + { + if ( inode_type == INODE_TYPE_STREAM || inode_type == INODE_TYPE_TTY ) + return errno = ESPIPE, -1; + return errno = EBADF, -1; + } + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = (void*) buf; + iov.iov_len = count; + return preadv(ctx, &iov, 1, off); } -ssize_t AbstractInode::pwrite(ioctx_t* /*ctx*/, const uint8_t* /*buf*/, - size_t /*count*/, off_t /*off*/) +ssize_t AbstractInode::preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) { - if ( inode_type == INODE_TYPE_STREAM || inode_type == INODE_TYPE_TTY ) - return errno = ESPIPE, -1; - return errno = EBADF, -1; + if ( supports_iovec ) + { + if ( inode_type == INODE_TYPE_STREAM || inode_type == INODE_TYPE_TTY ) + return errno = ESPIPE, -1; + return errno = EBADF, -1; + } + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + off_t offset; + if ( __builtin_add_overflow(off, sofar, &offset) ) + return sofar ? sofar : (errno = EOVERFLOW, -1); + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = pread(ctx, buf, count, offset); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; +} + +ssize_t AbstractInode::write(ioctx_t* ctx, const uint8_t* buf, size_t count) +{ + if ( !supports_iovec ) + return errno = EBADF, -1; + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = (void*) buf; + iov.iov_len = count; + return writev(ctx, &iov, 1); +} + +ssize_t AbstractInode::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + if ( supports_iovec ) + return errno = EBADF, -1; + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + const uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = write(ctx, buf, count); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; +} + +ssize_t AbstractInode::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, + off_t off) +{ + if ( !supports_iovec ) + { + if ( inode_type == INODE_TYPE_STREAM || inode_type == INODE_TYPE_TTY ) + return errno = ESPIPE, -1; + return errno = EBADF, -1; + } + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = (void*) buf; + iov.iov_len = count; + return pwritev(ctx, &iov, 1, off); +} + +ssize_t AbstractInode::pwritev(ioctx_t* ctx, const struct iovec* iov, + int iovcnt, off_t off) +{ + if ( supports_iovec ) + { + if ( inode_type == INODE_TYPE_STREAM || inode_type == INODE_TYPE_TTY ) + return errno = ESPIPE, -1; + return errno = EBADF, -1; + } + ssize_t sofar = 0; + for ( int i = 0; i < iovcnt && sofar < SSIZE_MAX; i++ ) + { + size_t maxcount = SSIZE_MAX - sofar; + uint8_t* buf = (uint8_t*) iov[i].iov_base; + size_t count = iov[i].iov_len; + if ( maxcount < count ) + count = maxcount; + off_t offset; + if ( __builtin_add_overflow(off, sofar, &offset) ) + return sofar ? sofar : (errno = EOVERFLOW, -1); + int old_dflags = ctx->dflags; + if ( sofar ) + ctx->dflags |= O_NONBLOCK; + ssize_t amount = pwrite(ctx, buf, count, offset); + ctx->dflags = old_dflags; + if ( amount < 0 ) + return sofar ? sofar : -1; + if ( amount == 0 ) + break; + sofar += amount; + if ( (size_t) amount < count ) + break; + } + return sofar; } int AbstractInode::utimens(ioctx_t* /*ctx*/, const struct timespec* times) @@ -383,12 +541,24 @@ ssize_t AbstractInode::recv(ioctx_t* /*ctx*/, uint8_t* /*buf*/, return errno = ENOTSOCK, -1; } +ssize_t AbstractInode::recvmsg(ioctx_t* /*ctx*/, struct msghdr* /*msg*/, + int /*flags*/) +{ + return errno = ENOTSOCK, -1; +} + ssize_t AbstractInode::send(ioctx_t* /*ctx*/, const uint8_t* /*buf*/, size_t /*count*/, int /*flags*/) { return errno = ENOTSOCK, -1; } +ssize_t AbstractInode::sendmsg(ioctx_t* /*ctx*/, const struct msghdr* /*msg*/, + int /*flags*/) +{ + return errno = ENOTSOCK, -1; +} + int AbstractInode::getsockopt(ioctx_t* /*ctx*/, int /*level*/, int /*option_name*/, void* /*option_value*/, size_t* /*option_size_ptr*/) { diff --git a/kernel/io.cpp b/kernel/io.cpp index da4f3930..273b6e2a 100644 --- a/kernel/io.cpp +++ b/kernel/io.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2011-2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -786,165 +786,40 @@ ssize_t sys_send(int fd, const void* buffer, size_t count, int flags) return desc->send(&ctx, (const uint8_t*) buffer, count, flags); } -// TODO: We need to move these vector operations into the file descriptors or -// inodes themselves to ensure that they are atomic. Currently these -// operations may overlap and cause nasty bugs/race conditions when -// multiple threads concurrently operates on a file. -// TODO: There is quite a bit of boiler plate code here. Can we do better? - -static struct iovec* FetchIOV(const struct iovec* user_iov, int iovcnt) -{ - if ( iovcnt < 0 ) - return errno = EINVAL, (struct iovec*) NULL; - struct iovec* ret = new struct iovec[iovcnt]; - if ( !ret ) - return NULL; - if ( !CopyFromUser(ret, user_iov, sizeof(struct iovec) * (size_t) iovcnt) ) - { - delete[] ret; - return NULL; - } - return ret; -} - -ssize_t sys_readv(int fd, const struct iovec* user_iov, int iovcnt) +ssize_t sys_readv(int fd, const struct iovec* iov, int iovcnt) { Ref desc = CurrentProcess()->GetDescriptor(fd); if ( !desc ) return -1; ioctx_t ctx; SetupUserIOCtx(&ctx); - struct iovec* iov = FetchIOV(user_iov, iovcnt); - if ( !iov ) - return -1; - ssize_t so_far = 0; - for ( int i = 0; i < iovcnt && so_far != SSIZE_MAX; i++ ) - { - uint8_t* buffer = (uint8_t*) iov[i].iov_base; - size_t amount = iov[i].iov_len; - ssize_t max_left = SSIZE_MAX - so_far; - if ( (size_t) max_left < amount ) - amount = (size_t) max_left; - ssize_t num_bytes = desc->read(&ctx, buffer, amount); - if ( num_bytes < 0 ) - { - delete[] iov; - return so_far ? so_far : -1; - } - if ( num_bytes == 0 ) - break; - so_far += num_bytes; - - // TODO: Is this the correct behavior? - if ( (size_t) num_bytes != amount ) - break; - } - delete[] iov; - return so_far; + return desc->readv(&ctx, iov, iovcnt); } -ssize_t sys_preadv(int fd, const struct iovec* user_iov, int iovcnt, off_t offset) +ssize_t sys_preadv(int fd, const struct iovec* iov, int iovcnt, off_t offset) { Ref desc = CurrentProcess()->GetDescriptor(fd); if ( !desc ) return -1; ioctx_t ctx; SetupUserIOCtx(&ctx); - struct iovec* iov = FetchIOV(user_iov, iovcnt); - if ( !iov ) - return -1; - ssize_t so_far = 0; - for ( int i = 0; i < iovcnt && so_far != SSIZE_MAX; i++ ) - { - uint8_t* buffer = (uint8_t*) iov[i].iov_base; - size_t amount = iov[i].iov_len; - ssize_t max_left = SSIZE_MAX - so_far; - if ( (size_t) max_left < amount ) - amount = (size_t) max_left; - ssize_t num_bytes = desc->pread(&ctx, buffer, amount, offset + so_far); - if ( num_bytes < 0 ) - { - delete[] iov; - return so_far ? so_far : -1; - } - if ( num_bytes == 0 ) - break; - so_far += num_bytes; - - // TODO: Is this the correct behavior? - if ( (size_t) num_bytes != amount ) - break; - } - delete[] iov; - return so_far; + return desc->preadv(&ctx, iov, iovcnt, offset); } -ssize_t sys_writev(int fd, const struct iovec* user_iov, int iovcnt) +ssize_t sys_writev(int fd, const struct iovec* iov, int iovcnt) { Ref desc = CurrentProcess()->GetDescriptor(fd); if ( !desc ) return -1; ioctx_t ctx; SetupUserIOCtx(&ctx); - struct iovec* iov = FetchIOV(user_iov, iovcnt); - if ( !iov ) - return -1; - ssize_t so_far = 0; - for ( int i = 0; i < iovcnt && so_far != SSIZE_MAX; i++ ) - { - const uint8_t* buffer = (const uint8_t*) iov[i].iov_base; - size_t amount = iov[i].iov_len; - ssize_t max_left = SSIZE_MAX - so_far; - if ( (size_t) max_left < amount ) - amount = (size_t) max_left; - ssize_t num_bytes = desc->write(&ctx, buffer, amount); - if ( num_bytes < 0 ) - { - delete[] iov; - return so_far ? so_far : -1; - } - if ( num_bytes == 0 ) - break; - so_far += num_bytes; - - // TODO: Is this the correct behavior? - if ( (size_t) num_bytes != amount ) - break; - } - delete[] iov; - return so_far; + return desc->writev(&ctx, iov, iovcnt); } -ssize_t sys_pwritev(int fd, const struct iovec* user_iov, int iovcnt, off_t offset) +ssize_t sys_pwritev(int fd, const struct iovec* iov, int iovcnt, off_t offset) { Ref desc = CurrentProcess()->GetDescriptor(fd); if ( !desc ) return -1; ioctx_t ctx; SetupUserIOCtx(&ctx); - struct iovec* iov = FetchIOV(user_iov, iovcnt); - if ( !iov ) - return -1; - ssize_t so_far = 0; - for ( int i = 0; i < iovcnt && so_far != SSIZE_MAX; i++ ) - { - const uint8_t* buffer = (const uint8_t*) iov[i].iov_base; - size_t amount = iov[i].iov_len; - ssize_t max_left = SSIZE_MAX - so_far; - if ( (size_t) max_left < amount ) - amount = (size_t) max_left; - ssize_t num_bytes = desc->pwrite(&ctx, buffer, amount, offset + so_far); - if ( num_bytes < 0 ) - { - delete[] iov; - return so_far ? so_far : -1; - } - if ( num_bytes == 0 ) - break; - so_far += num_bytes; - - // TODO: Is this the correct behavior? - if ( (size_t) num_bytes != amount ) - break; - } - delete[] iov; - return so_far; + return desc->pwritev(&ctx, iov, iovcnt, offset); } int sys_mkpartition(int fd, off_t start, off_t length, int flags) @@ -979,45 +854,22 @@ int sys_mkpartition(int fd, off_t start, off_t length, int flags) return CurrentProcess()->GetDTable()->Allocate(partition_desc, fdflags); } -ssize_t sys_sendmsg(int fd, const struct msghdr* user_msg, int flags) +ssize_t sys_sendmsg(int fd, const struct msghdr* msg, int flags) { - struct msghdr msg; - if ( !CopyFromUser(&msg, user_msg, sizeof(msg)) ) - return -1; - // TODO: MSG_DONTWAIT and MSG_NOSIGNAL aren't actually supported here! - if ( flags & ~(MSG_EOR | MSG_DONTWAIT | MSG_NOSIGNAL) ) - return errno = EINVAL, -1; - if ( msg.msg_name ) - return errno = EINVAL, -1; - if ( msg.msg_control && msg.msg_controllen ) - return errno = EINVAL, -1; - return sys_writev(fd, msg.msg_iov, msg.msg_iovlen); -} - -ssize_t sys_recvmsg(int fd, struct msghdr* user_msg, int flags) -{ - struct msghdr msg; - if ( !CopyFromUser(&msg, user_msg, sizeof(msg)) ) - return -1; - if ( flags & ~(MSG_CMSG_CLOEXEC | MSG_DONTWAIT) ) - return errno = EINVAL, -1; - if ( msg.msg_name ) - return errno = EINVAL, -1; Ref desc = CurrentProcess()->GetDescriptor(fd); if ( !desc ) return -1; + ioctx_t ctx; SetupUserIOCtx(&ctx); + return desc->sendmsg(&ctx, msg, flags); +} - // TODO: This is not atomic. - int old_flags = desc->GetFlags(); - desc->SetFlags(old_flags | O_NONBLOCK); - ssize_t result = sys_readv(fd, msg.msg_iov, msg.msg_iovlen); - desc->SetFlags(old_flags); - - msg.msg_flags = 0; - if ( !CopyToUser(&user_msg->msg_flags, &msg.msg_flags, sizeof(msg.msg_flags)) ) +ssize_t sys_recvmsg(int fd, struct msghdr* msg, int flags) +{ + Ref desc = CurrentProcess()->GetDescriptor(fd); + if ( !desc ) return -1; - - return result; + ioctx_t ctx; SetupUserIOCtx(&ctx); + return desc->recvmsg(&ctx, msg, flags); } int sys_getsockopt(int fd, int level, int option_name, diff --git a/kernel/net/fs.cpp b/kernel/net/fs.cpp index f16dba44..661ef436 100644 --- a/kernel/net/fs.cpp +++ b/kernel/net/fs.cpp @@ -90,10 +90,14 @@ public: virtual int connect(ioctx_t* ctx, const uint8_t* addr, size_t addrsize); virtual int listen(ioctx_t* ctx, int backlog); virtual ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); + virtual ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); virtual ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); + virtual ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); virtual ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); + virtual ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + virtual ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual int poll(ioctx_t* ctx, PollNode* node); virtual int getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr); @@ -171,6 +175,7 @@ StreamSocket::StreamSocket(uid_t owner, gid_t group, mode_t mode, this->socket_lock = KTHREAD_MUTEX_INITIALIZER; this->pending_cond = KTHREAD_COND_INITIALIZER; this->accepted_cond = KTHREAD_COND_INITIALIZER; + this->supports_iovec = true; } StreamSocket::~StreamSocket() @@ -254,21 +259,37 @@ int StreamSocket::listen(ioctx_t* /*ctx*/, int /*backlog*/) } ssize_t StreamSocket::recv(ioctx_t* ctx, uint8_t* buf, size_t count, - int /*flags*/) + int flags) { ScopedLock lock(&socket_lock); if ( !is_connected ) return errno = ENOTCONN, -1; - return incoming.read(ctx, buf, count); + return incoming.recv(ctx, buf, count, flags); +} + +ssize_t StreamSocket::recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags) +{ + ScopedLock lock(&socket_lock); + if ( !is_connected ) + return errno = ENOTCONN, -1; + return incoming.recvmsg(ctx, msg, flags); } ssize_t StreamSocket::send(ioctx_t* ctx, const uint8_t* buf, size_t count, - int /*flags*/) + int flags) { ScopedLock lock(&socket_lock); if ( !is_connected ) return errno = ENOTCONN, -1; - return outgoing.write(ctx, buf, count); + return outgoing.send(ctx, buf, count, flags); +} + +ssize_t StreamSocket::sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags) +{ + ScopedLock lock(&socket_lock); + if ( !is_connected ) + return errno = ENOTCONN, -1; + return outgoing.sendmsg(ctx, msg, flags); } ssize_t StreamSocket::read(ioctx_t* ctx, uint8_t* buf, size_t count) @@ -276,11 +297,27 @@ ssize_t StreamSocket::read(ioctx_t* ctx, uint8_t* buf, size_t count) return recv(ctx, buf, count, 0); } +ssize_t StreamSocket::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + ScopedLock lock(&socket_lock); + if ( !is_connected ) + return errno = ENOTCONN, -1; + return outgoing.readv(ctx, iov, iovcnt); +} + ssize_t StreamSocket::write(ioctx_t* ctx, const uint8_t* buf, size_t count) { return send(ctx, buf, count, 0); } +ssize_t StreamSocket::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + ScopedLock lock(&socket_lock); + if ( !is_connected ) + return errno = ENOTCONN, -1; + return outgoing.writev(ctx, iov, iovcnt); +} + int StreamSocket::poll(ioctx_t* ctx, PollNode* node) { if ( is_connected ) diff --git a/kernel/pipe.cpp b/kernel/pipe.cpp index e9fc7422..5e4d3f50 100644 --- a/kernel/pipe.cpp +++ b/kernel/pipe.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011, 2012, 2013, 2014, 2015 Jonas 'Sortie' Termansen. + * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -19,10 +19,14 @@ #include #include +#include #include #include #include +#ifndef IOV_MAX +#include +#endif #include #include #include @@ -60,8 +64,14 @@ public: size_t WriteSize(); bool ReadResize(size_t new_size); bool WriteResize(size_t new_size); - ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); - ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); + ssize_t recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags); + ssize_t recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags); + ssize_t recvmsg_internal(ioctx_t* ctx, struct msghdr* msg, int flags); + ssize_t send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags); + ssize_t sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags); + ssize_t sendmsg_internal(ioctx_t* ctx, const struct msghdr* msg, int flags); + ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); int read_poll(ioctx_t* ctx, PollNode* node); int write_poll(ioctx_t* ctx, PollNode* node); @@ -139,21 +149,83 @@ void PipeChannel::CloseWriting() delete this; } -ssize_t PipeChannel::read(ioctx_t* ctx, uint8_t* buf, size_t count) +ssize_t PipeChannel::recv(ioctx_t* ctx, uint8_t* buf, size_t count, + int flags) { - if ( SSIZE_MAX < count ) - count = SSIZE_MAX; + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = (void*) buf; + iov.iov_len = count; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + return recvmsg_internal(ctx, &msg, flags); +} + +ssize_t PipeChannel::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = (struct iovec*) iov; + msg.msg_iovlen = iovcnt; + return recvmsg_internal(ctx, &msg, 0); +} + +ssize_t PipeChannel::recvmsg(ioctx_t* ctx, struct msghdr* msg_ptr, int flags) +{ + struct msghdr msg; + if ( !ctx->copy_from_src(&msg, msg_ptr, sizeof(msg)) ) + return -1; + if ( msg.msg_iovlen < 0 || IOV_MAX < msg.msg_iovlen ) + return errno = EINVAL, -1; + size_t iov_size = msg.msg_iovlen * sizeof(struct iovec); + struct iovec* iov = new struct iovec[msg.msg_iovlen]; + if ( !iov ) + return -1; + if ( !ctx->copy_from_src(&iov, msg.msg_iov, iov_size) ) + return delete[] iov, -1; + msg.msg_iov = iov; + size_t result = recvmsg_internal(ctx, &msg, flags); + delete[] iov; + if ( !ctx->copy_to_dest(msg_ptr, &msg, sizeof(msg)) ) + return -1; + return result; +} + +ssize_t PipeChannel::recvmsg_internal(ioctx_t* ctx, struct msghdr* msg, + int flags) +{ + if ( flags & ~(MSG_PEEK | MSG_WAITALL) ) + return errno = EINVAL, -1; Thread* this_thread = CurrentThread(); this_thread->yield_to_tid = sender_system_tid; ScopedLockSignal lock(&pipelock); if ( !lock.IsAcquired() ) return errno = EINTR, -1; - size_t so_far = 0; - while ( count ) + ssize_t so_far = 0; + size_t peeked = 0; + int iov_i = 0; + size_t iov_offset = 0; + while ( iov_i < msg->msg_iovlen && so_far < SSIZE_MAX ) { - receiver_system_tid = this_thread->system_tid; - while ( anywriting && !bufferused ) + size_t maxcount = SSIZE_MAX - so_far; + struct iovec* iov = &msg->msg_iov[iov_i]; + uint8_t* buf = (uint8_t*) iov->iov_base + iov_offset; + size_t count = iov->iov_len - iov_offset; + if ( maxcount < count ) + count = maxcount; + if ( count == 0 ) { + iov_i++; + iov_offset = 0; + continue; + } + receiver_system_tid = this_thread->system_tid; + while ( anywriting && bufferused <= peeked ) + { + if ( (flags & MSG_PEEK) && so_far ) + return so_far; this_thread->yield_to_tid = sender_system_tid; if ( pledged_read ) { @@ -164,7 +236,7 @@ ssize_t PipeChannel::read(ioctx_t* ctx, uint8_t* buf, size_t count) pledged_write--; continue; } - if ( so_far ) + if ( !(flags & MSG_WAITALL) && so_far ) return so_far; if ( ctx->dflags & O_NONBLOCK ) return errno = EWOULDBLOCK, -1; @@ -172,44 +244,115 @@ ssize_t PipeChannel::read(ioctx_t* ctx, uint8_t* buf, size_t count) bool interrupted = !kthread_cond_wait_signal(&readcond, &pipelock); pledged_write--; if ( interrupted ) - return errno = EINTR, -1; + return so_far ? so_far : (errno = EINTR, -1); } - if ( !bufferused && !anywriting ) - return (ssize_t) so_far; + size_t used = bufferused - peeked; + if ( !used && !anywriting ) + return so_far; size_t amount = count; - if ( bufferused < amount ) - amount = bufferused; - size_t linear = buffersize - bufferoffset; + if ( used < amount ) + amount = used; + size_t offset = bufferoffset; + if ( peeked ) + offset = (bufferoffset + peeked) % buffersize; + size_t linear = buffersize - offset; if ( linear < amount ) amount = linear; assert(amount); - if ( !ctx->copy_to_dest(buf, buffer + bufferoffset, amount) ) - return so_far ? (ssize_t) so_far : -1; - bufferoffset = (bufferoffset + amount) % buffersize; - bufferused -= amount; - buf += amount; - count -= amount; + if ( !ctx->copy_to_dest(buf, buffer + offset, amount) ) + return so_far ? so_far : -1; so_far += amount; - kthread_cond_broadcast(&writecond); - read_poll_channel.Signal(ReadPollEventStatus()); - write_poll_channel.Signal(WritePollEventStatus()); + if ( flags & MSG_PEEK ) + peeked += amount; + else + { + bufferoffset = (bufferoffset + amount) % buffersize; + bufferused -= amount; + kthread_cond_broadcast(&writecond); + read_poll_channel.Signal(ReadPollEventStatus()); + write_poll_channel.Signal(WritePollEventStatus()); + } + iov_offset += amount; + if ( iov_offset == iov->iov_len ) + { + iov_i++; + iov_offset = 0; + } } - return (ssize_t) so_far; + return so_far; } -ssize_t PipeChannel::write(ioctx_t* ctx, const uint8_t* buf, size_t count) +ssize_t PipeChannel::send(ioctx_t* ctx, const uint8_t* buf, size_t count, + int flags) { - if ( SSIZE_MAX < count ) - count = SSIZE_MAX; + struct iovec iov; + memset(&iov, 0, sizeof(iov)); + iov.iov_base = (void*) buf; + iov.iov_len = count; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + return sendmsg_internal(ctx, &msg, flags); +} + +ssize_t PipeChannel::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = (struct iovec*) iov; + msg.msg_iovlen = iovcnt; + return sendmsg_internal(ctx, &msg, 0); +} + +ssize_t PipeChannel::sendmsg(ioctx_t* ctx, const struct msghdr* msg_ptr, + int flags) +{ + struct msghdr msg; + if ( !ctx->copy_from_src(&msg, msg_ptr, sizeof(msg)) ) + return -1; + if ( msg.msg_iovlen < 0 || IOV_MAX < msg.msg_iovlen ) + return errno = EINVAL, -1; + size_t iov_size = msg.msg_iovlen * sizeof(struct iovec); + struct iovec* iov = new struct iovec[msg.msg_iovlen]; + if ( !iov ) + return -1; + if ( !ctx->copy_from_src(&iov, msg.msg_iov, iov_size) ) + return delete[] iov, -1; + msg.msg_iov = iov; + size_t result = sendmsg_internal(ctx, &msg, flags); + delete[] iov; + return result; +} + +ssize_t PipeChannel::sendmsg_internal(ioctx_t* ctx, const struct msghdr* msg, + int flags) +{ + if ( flags & ~(MSG_WAITALL | MSG_NOSIGNAL) ) + return errno = EINVAL, -1; Thread* this_thread = CurrentThread(); this_thread->yield_to_tid = receiver_system_tid; ScopedLockSignal lock(&pipelock); if ( !lock.IsAcquired() ) return errno = EINTR, -1; sender_system_tid = this_thread->system_tid; - size_t so_far = 0; - while ( count ) + ssize_t so_far = 0; + int iov_i = 0; + size_t iov_offset = 0; + while ( iov_i < msg->msg_iovlen && so_far < SSIZE_MAX ) { + size_t maxcount = SSIZE_MAX - so_far; + struct iovec* iov = &msg->msg_iov[iov_i]; + const uint8_t* buf = (const uint8_t*) iov->iov_base + iov_offset; + size_t count = iov->iov_len - iov_offset; + if ( maxcount < count ) + count = maxcount; + if ( count == 0 ) + { + iov_i++; + iov_offset = 0; + continue; + } sender_system_tid = this_thread->system_tid; while ( anyreading && bufferused == buffersize ) { @@ -223,7 +366,7 @@ ssize_t PipeChannel::write(ioctx_t* ctx, const uint8_t* buf, size_t count) pledged_read--; continue; } - if ( so_far ) + if ( so_far && !(flags & MSG_WAITALL) ) return so_far; if ( ctx->dflags & O_NONBLOCK ) return errno = EWOULDBLOCK, -1; @@ -236,8 +379,8 @@ ssize_t PipeChannel::write(ioctx_t* ctx, const uint8_t* buf, size_t count) if ( !anyreading ) { if ( so_far ) - return (ssize_t) so_far; - if ( is_sigpipe_enabled ) + return so_far; + if ( is_sigpipe_enabled && !(flags & MSG_NOSIGNAL) ) CurrentThread()->DeliverSignal(SIGPIPE); return errno = EPIPE, -1; } @@ -250,16 +393,20 @@ ssize_t PipeChannel::write(ioctx_t* ctx, const uint8_t* buf, size_t count) amount = linear; assert(amount); if ( !ctx->copy_from_src(buffer + writeoffset, buf, amount) ) - return so_far ? (ssize_t) so_far : -1; + return so_far ? so_far : -1; bufferused += amount; - buf += amount; - count -= amount; so_far += amount; kthread_cond_broadcast(&readcond); read_poll_channel.Signal(ReadPollEventStatus()); write_poll_channel.Signal(WritePollEventStatus()); + iov_offset += amount; + if ( iov_offset == iov->iov_len ) + { + iov_i++; + iov_offset = 0; + } } - return (ssize_t) so_far; + return so_far; } short PipeChannel::ReadPollEventStatus() @@ -405,21 +552,62 @@ void PipeEndpoint::Disconnect() reading = false; } -ssize_t PipeEndpoint::read(ioctx_t* ctx, uint8_t* buf, size_t count) +ssize_t PipeEndpoint::recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags) { if ( !reading ) return errno = EBADF, -1; - ssize_t result = channel->read(ctx, buf, count); + ssize_t result = channel->recv(ctx, buf, count, flags); CurrentThread()->yield_to_tid = 0; Scheduler::ScheduleTrueThread(); return result; } -ssize_t PipeEndpoint::write(ioctx_t* ctx, const uint8_t* buf, size_t count) +ssize_t PipeEndpoint::recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags) +{ + if ( !reading ) + return errno = EBADF, -1; + ssize_t result = channel->recvmsg(ctx, msg, flags); + CurrentThread()->yield_to_tid = 0; + Scheduler::ScheduleTrueThread(); + return result; +} + +ssize_t PipeEndpoint::send(ioctx_t* ctx, const uint8_t* buf, size_t count, + int flags) { if ( reading ) return errno = EBADF, -1; - ssize_t result = channel->write(ctx, buf, count); + ssize_t result = channel->send(ctx, buf, count, flags); + CurrentThread()->yield_to_tid = 0; + Scheduler::ScheduleTrueThread(); + return result; +} + +ssize_t PipeEndpoint::sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags) +{ + if ( reading ) + return errno = EBADF, -1; + ssize_t result = channel->sendmsg(ctx, msg, flags); + CurrentThread()->yield_to_tid = 0; + Scheduler::ScheduleTrueThread(); + return result; +} + +ssize_t PipeEndpoint::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + if ( !reading ) + return errno = EBADF, -1; + ssize_t result = channel->readv(ctx, iov, iovcnt); + CurrentThread()->yield_to_tid = 0; + Scheduler::ScheduleTrueThread(); + return result; +} + +ssize_t PipeEndpoint::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + if ( reading ) + return errno = EBADF, -1; + ssize_t result = channel->writev(ctx, iov, iovcnt); CurrentThread()->yield_to_tid = 0; Scheduler::ScheduleTrueThread(); return result; @@ -462,8 +650,8 @@ class PipeNode : public AbstractInode public: PipeNode(dev_t dev, uid_t owner, gid_t group, mode_t mode); virtual ~PipeNode(); - virtual ssize_t read(ioctx_t* ctx, uint8_t* buf, size_t count); - virtual ssize_t write(ioctx_t* ctx, const uint8_t* buf, size_t count); + virtual ssize_t readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt); + virtual ssize_t writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt); virtual int poll(ioctx_t* ctx, PollNode* node); public: @@ -488,20 +676,21 @@ PipeNode::PipeNode(dev_t dev, uid_t owner, gid_t group, mode_t mode) this->stat_gid = group; this->type = S_IFCHR; this->stat_mode = (mode & S_SETABLE) | this->type; + supports_iovec = true; } PipeNode::~PipeNode() { } -ssize_t PipeNode::read(ioctx_t* ctx, uint8_t* buf, size_t count) +ssize_t PipeNode::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) { - return endpoint.read(ctx, buf, count); + return endpoint.readv(ctx, iov, iovcnt); } -ssize_t PipeNode::write(ioctx_t* ctx, const uint8_t* buf, size_t count) +ssize_t PipeNode::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) { - return endpoint.write(ctx, buf, count); + return endpoint.writev(ctx, iov, iovcnt); } int PipeNode::poll(ioctx_t* ctx, PollNode* node) diff --git a/kernel/vnode.cpp b/kernel/vnode.cpp index 25de8937..acc7a677 100644 --- a/kernel/vnode.cpp +++ b/kernel/vnode.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016 Jonas 'Sortie' Termansen. + * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -256,21 +256,43 @@ ssize_t Vnode::read(ioctx_t* ctx, uint8_t* buf, size_t count) return inode->read(ctx, buf, count); } +ssize_t Vnode::readv(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + return inode->readv(ctx, iov, iovcnt); +} + ssize_t Vnode::pread(ioctx_t* ctx, uint8_t* buf, size_t count, off_t off) { return inode->pread(ctx, buf, count, off); } +ssize_t Vnode::preadv(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) +{ + return inode->preadv(ctx, iov, iovcnt, off); +} + ssize_t Vnode::write(ioctx_t* ctx, const uint8_t* buf, size_t count) { return inode->write(ctx, buf, count); } +ssize_t Vnode::writev(ioctx_t* ctx, const struct iovec* iov, int iovcnt) +{ + return inode->writev(ctx, iov, iovcnt); +} + ssize_t Vnode::pwrite(ioctx_t* ctx, const uint8_t* buf, size_t count, off_t off) { return inode->pwrite(ctx, buf, count, off); } +ssize_t Vnode::pwritev(ioctx_t* ctx, const struct iovec* iov, int iovcnt, + off_t off) +{ + return inode->pwritev(ctx, iov, iovcnt, off); +} + int Vnode::utimens(ioctx_t* ctx, const struct timespec* times) { return inode->utimens(ctx, times); @@ -397,11 +419,21 @@ ssize_t Vnode::recv(ioctx_t* ctx, uint8_t* buf, size_t count, int flags) return inode->recv(ctx, buf, count, flags); } +ssize_t Vnode::recvmsg(ioctx_t* ctx, struct msghdr* msg, int flags) +{ + return inode->recvmsg(ctx, msg, flags); +} + ssize_t Vnode::send(ioctx_t* ctx, const uint8_t* buf, size_t count, int flags) { return inode->send(ctx, buf, count, flags); } +ssize_t Vnode::sendmsg(ioctx_t* ctx, const struct msghdr* msg, int flags) +{ + return inode->sendmsg(ctx, msg, flags); +} + int Vnode::getsockopt(ioctx_t* ctx, int level, int option_name, void* option_value, size_t* option_size_ptr) { diff --git a/regress/Makefile b/regress/Makefile index c8db76be..1873c296 100644 --- a/regress/Makefile +++ b/regress/Makefile @@ -16,6 +16,7 @@ regress \ TESTS:=\ test-fmemopen \ +test-pipe-one-byte \ test-pthread-argv \ test-pthread-basic \ test-pthread-main-exit \ diff --git a/regress/test-pipe-one-byte.c b/regress/test-pipe-one-byte.c new file mode 100644 index 00000000..d2399122 --- /dev/null +++ b/regress/test-pipe-one-byte.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 Jonas 'Sortie' Termansen. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * test-pthread-basic.c + * Tests whether basic pthread support works. + */ + +#include + +#include +#include +#include +#include + +#include "test.h" + +int main(void) +{ + int fds[2]; + pipe(fds); + pid_t pid = fork(); + test_assert(0 <= pid); + if ( pid == 0 ) + { + close(fds[0]); + char c = 'X'; + test_assert(write(fds[1], &c, 1) == 1); + while (1) + sleep(1000); + } + close(fds[1]); + char c; + test_assert(read(fds[0], &c, 1) == 1); + test_assert(c == 'X'); + kill(pid, SIGKILL); + int status; + waitpid(pid, &status, 0); + return 0; +}