[Libsysio-commit] HEAD: libsysio/drivers/native fs_native.c
Brought to you by:
lward
From: Lee W. <lw...@us...> - 2004-02-06 20:10:45
|
Update of /cvsroot/libsysio/libsysio/drivers/native In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv29990/drivers/native Modified Files: fs_native.c Log Message: Merging strided-IO branch + Strided-IO infrastructure. Internals altered to move data base don multiple targeted regions in both the file address space and local memory. + Added [i]{read,write}x, calls to perform extent-based or strided-IO directly. + Many bug fixes + Many uocnfig fixes + --with-zero-sum-memory; A config option that causes the shutdown code to carefully release *all* memory acquired from the heap, by the library. Useful for debugging tasks such as leak detection. Index: fs_native.c =================================================================== RCS file: /cvsroot/libsysio/libsysio/drivers/native/fs_native.c,v retrieving revision 1.29 retrieving revision 1.30 diff -u -w -b -B -p -r1.29 -r1.30 --- fs_native.c 26 Jan 2004 16:34:53 -0000 1.29 +++ fs_native.c 6 Feb 2004 20:07:29 -0000 1.30 @@ -9,7 +9,7 @@ * terms of the GNU Lesser General Public License * (see cit/LGPL or http://www.gnu.org/licenses/lgpl.html) * - * Cplant(TM) Copyright 1998-2003 Sandia Corporation. + * Cplant(TM) Copyright 1998-2004 Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the US Government. * Export of this program may require a license from the United States @@ -66,11 +66,15 @@ #endif #include <utime.h> #include <sys/queue.h> +#if !(defined(REDSTORM) || defined(MAX_IOVEC)) +#include <limits.h> +#endif #include "sysio.h" #include "fs.h" #include "mount.h" #include "inode.h" +#include "xtio.h" #include "fs_native.h" @@ -78,17 +82,6 @@ #include <sys/uio.h> #endif -#ifdef REDSTORM -/* - * The cnos IO routines on Red Storm can't do scatter/gather IO. We - * must use a different interface, then. The doio() routine will loop - * over the vector entries. - */ -typedef ssize_t (*iof)(int, void *, size_t, _SYSIO_OFF_T); -#else -typedef ssize_t (*iof)(int, const struct iovec *, int); -#endif - /* * Local host file system driver. */ @@ -183,12 +176,22 @@ struct native_inode_identifier { * system objects. */ struct native_inode { + unsigned + ni_seekok : 1; /* can seek? */ struct native_inode_identifier ni_ident; /* unique identifier */ struct file_identifier ni_fileid; /* ditto */ int ni_fd; /* host fildes */ int ni_oflags; /* flags, from open */ unsigned ni_nopens; /* soft ref count */ - off_t ni_fpos; /* current pos */ + _SYSIO_OFF_T ni_fpos; /* current pos */ +}; + +/* + * Native IO path arguments. + */ +struct native_io { + char nio_op; /* 'r' or 'w' */ + struct native_inode *nio_nino; /* native ino */ }; static int native_inop_lookup(struct pnode *pno, @@ -215,10 +218,8 @@ static int native_inop_close(struct inod static int native_inop_link(struct pnode *old, struct pnode *new); static int native_inop_unlink(struct pnode *pno); static int native_inop_rename(struct pnode *old, struct pnode *new); -static int native_inop_ipreadv(struct inode *ino, - struct ioctx *ioctx); -static int native_inop_ipwritev(struct inode *ino, - struct ioctx *ioctx); +static int native_inop_read(struct inode *ino, struct ioctx *ioctx); +static int native_inop_write(struct inode *ino, struct ioctx *ioctx); static int native_inop_iodone(struct ioctx *ioctx); static int native_inop_fcntl(struct inode *ino, int cmd, va_list ap); static int native_inop_sync(struct inode *ino); @@ -248,8 +249,8 @@ static struct inode_ops native_i_ops = { native_inop_link, native_inop_unlink, native_inop_rename, - native_inop_ipreadv, - native_inop_ipwritev, + native_inop_read, + native_inop_write, native_inop_iodone, native_inop_fcntl, native_inop_sync, @@ -841,40 +842,37 @@ out: } static int -native_pos(int fd, -#if _LARGEFILE64_SOURCE - loff_t *offset -#else - _SYSIO_OFF_T *offset -#endif - ) +native_pos(int fd, _SYSIO_OFF_T *offset, int whence) { + _SYSIO_OFF_T off; assert(fd >= 0); assert(*offset >= 0); + off = *offset; #if _LARGEFILE64_SOURCE && defined(SYS__llseek) { int err; err = syscall(SYS__llseek, (unsigned int)fd, - (unsigned int)(*offset >> 32), - (unsigned int)*offset, - offset, - SEEK_SET); + (unsigned int)(off >> 32), + (unsigned int)off, + &off, + whence); if (err == -1) return -errno; } #else - *offset = + off = syscall(SYS_lseek, fd, - *offset, - SEEK_SET); - if (*offset == -1) + off, + whence); + if (off == -1) return -errno; #endif + *offset = off; return 0; } @@ -889,12 +886,8 @@ native_getdirentries(struct inode *ino, struct native_inode *nino = I2NI(ino); int err; #ifndef SYS_getdirentries -#if _LARGEFILE64_SOURCE - loff_t result; -#else _SYSIO_OFF_T result; #endif -#endif ssize_t cc; assert(nino->ni_fd >= 0); @@ -902,7 +895,7 @@ native_getdirentries(struct inode *ino, #ifndef SYS_getdirentries result = *basep; if (*basep != nino->ni_fpos) { - err = native_pos(nino->ni_fd, &result); + err = native_pos(nino->ni_fd, &result, SEEK_SET); if (err) return err; } @@ -1013,6 +1006,9 @@ native_inop_open(struct pnode *pno, int flags &= ~O_WRONLY; flags |= O_RDWR; } +#ifdef O_LARGEFILE + flags |= O_LARGEFILE; +#endif fd = syscall(SYS_open, path, flags, mode); if (!pno->p_base->pb_ino && fd >= 0) { int err; @@ -1059,6 +1055,12 @@ native_inop_open(struct pnode *pno, int */ nino->ni_fpos = 0; nino->ni_fd = fd; + /* + * Need to know whether we can seek on this + * descriptor. + */ + nino->ni_seekok = + native_pos(nino->ni_fd, &nino->ni_fpos, SEEK_CUR) != 0 ? 0 : 1; return 0; } @@ -1146,183 +1148,251 @@ native_inop_unlink(struct pnode *pno) return err; } -/* - * A helper function performing the real IO operation work. - * - * We don't really have async IO. We'll just perform the function - * now. - */ static int -doio(iof f, - struct inode *ino, - struct ioctx *ioctx) +native_inop_rename(struct pnode *old, struct pnode *new) { - struct native_inode *nino = I2NI(ino); -#ifndef REDSTORM -#if _LARGEFILE64_SOURCE - loff_t result; -#else - _SYSIO_OFF_T result; -#endif -#endif + int err; + char *opath, *npath; - assert(nino->ni_fd >= 0); + opath = _sysio_pb_path(old->p_base, '/'); + npath = _sysio_pb_path(new->p_base, '/'); + if (!(opath && npath)) { + err = -ENOMEM; + goto out; + } - if ((ioctx->ioctx_iovlen && (int )ioctx->ioctx_iovlen < 0) || - !(S_ISREG(ino->i_mode) || - S_ISCHR(ino->i_mode) || - S_ISSOCK(ino->i_mode) || - S_ISFIFO(ino->i_mode))) - return -EINVAL; + err = syscall(SYS_rename, opath, npath); + +out: + if (opath) + free(opath); + if (npath) + free(npath); + + return err; +} + +static ssize_t +dopio(void *buf, size_t count, _SYSIO_OFF_T off, struct native_io *nio) +{ -#ifndef REDSTORM /* - * This implementation first positions the real system descriptor, then - * performs the operation. This is not atomic. - * - * An alternative, more complex, less efficient but atomic, - * implementation might consider each entry of the iovec separately. - * Then, the system implementations of the POSIX p{reaad,write} calls - * could be used. - * * Avoid the reposition call if we're already at the right place. * Allows us to access pipes and fifos. */ - result = ioctx->ioctx_offset; - if (ioctx->ioctx_offset != nino->ni_fpos) { + if (off != nio->nio_nino->ni_fpos) { int err; - err = native_pos(nino->ni_fd, &result); - if (err) { - ioctx->ioctx_cc = -1; - ioctx->ioctx_errno = -err; - goto out; + err = native_pos(nio->nio_nino->ni_fd, &off, SEEK_SET); + if (err) + return err; + nio->nio_nino->ni_fpos = off; } - nino->ni_fpos = result; + + return syscall(nio->nio_op == 'r' ? SYS_read : SYS_write, + nio->nio_nino->ni_fd, + buf, + count); } -#endif - /* - * Call the appropriate (read/write) IO function to - * transfer the data now. - */ -#ifdef REDSTORM +static ssize_t +doiov(const struct iovec *iov, + int count, + _SYSIO_OFF_T off, + ssize_t limit, + struct native_io *nio) { - size_t count = ioctx->ioctx_iovlen; - struct iovec *iov = ioctx->ioctx_iovec; ssize_t cc; - nino->ni_fpos = ioctx->ioctx_offset; - while (count) { - cc = - (*f)(nino->ni_fd, - iov->iov_base, - iov->iov_len, - nino->ni_fpos); - if (cc < 0) { - if (ioctx->ioctx_cc) { +#if !(defined(REDSTORM) || defined(MAX_IOVEC)) +#define MAX_IOVEC INT_MAX +#endif + + if (count <= 0) + return -EINVAL; + /* - * No data written at all. Return - * error. + * Avoid the reposition call if we're already at the right place. + * Allows us to access pipes and fifos. */ - ioctx->ioctx_cc = -1; - } - break; - } - ioctx->ioctx_cc += cc; - count--, iov++; - } - } -#else /* !defined(REDSTORM) */ - ioctx->ioctx_cc = - (*f)(nino->ni_fd, ioctx->ioctx_iovec, ioctx->ioctx_iovlen); -#endif /* defined(REDSTORM) */ - if (ioctx->ioctx_cc < 0) - ioctx->ioctx_errno = errno; - if (ioctx->ioctx_cc > 0) - nino->ni_fpos += ioctx->ioctx_cc; + if (off != nio->nio_nino->ni_fpos) { + int err; -out: - ioctx->ioctx_done = 1; - return 0; + err = native_pos(nio->nio_nino->ni_fd, &off, SEEK_SET); + if (err) + return err; + nio->nio_nino->ni_fpos = off; } /* - * Helper function passed to doio(), above, to accomplish a real readv. + * The {read,write}v is safe as this routine is only ever called + * by _sysio_enumerate_extents() and that routine is exact. It never + * passes iovectors including tails. */ -#ifdef REDSTORM -static ssize_t -native_read(int fd, void *buf, size_t count, _SYSIO_OFF_T offset) -{ - - return syscall(SYS_pread, fd, buf, count, offset); -} -#else -static ssize_t -native_read(int fd, const struct iovec *vector, int count) -{ + cc = +#ifndef REDSTORM + count <= MAX_IOVEC + ? syscall(nio->nio_op == 'r' ? SYS_readv : SYS_writev, + nio->nio_nino->ni_fd, + iov, + count) + : +#endif + _sysio_enumerate_iovec(iov, + count, + off, + limit, + (ssize_t (*)(void *, + size_t, + _SYSIO_OFF_T, + void *))dopio, + nio); + if (cc > 0) + nio->nio_nino->ni_fpos += cc; + return cc; - return syscall(SYS_readv, fd, vector, count); -} +#if !(defined(REDSTORM) || defined(MAX_IOVEC)) +#undef MAX_IOVEC #endif +} +#if 0 static int -native_inop_rename(struct pnode *old, struct pnode *new) +lockop_all(struct native_inode *nino, + struct intnl_xtvec *xtv, + size_t count, + short op) { + struct flock flock; int err; - char *opath, *npath; - opath = _sysio_pb_path(old->p_base, '/'); - npath = _sysio_pb_path(new->p_base, '/'); - if (!(opath && npath)) { - err = -ENOMEM; - goto out; + if (!count) + return -EINVAL; + flock.l_type = op; + flock.l_whence = SEEK_SET; + while (count--) { + flock.l_start = xtv->xtv_off; + flock.l_len = xtv->xtv_len; + xtv++; + err = + syscall( +#if !_LARGEFILE64_SOURCE + SYS_fcntl64 +#else + SYS_fcntl +#endif + , + nino->ni_fd, + F_SETLK, + &flock); + if (err != 0) + return -errno; + } + return 0; } - err = syscall(SYS_rename, opath, npath); - -out: - if (opath) - free(opath); - if (npath) - free(npath); +static int +order_xtv(const struct intnl_xtvec *xtv1, const struct intnl_xtvec *xtv2) +{ - return err; + if (xtv1->xtv_off < xtv2->xtv_off) + return -1; + if (xtv1->xtv_off > xtv2->xtv_off) + return 1; + return 0; } +#endif static int -native_inop_ipreadv(struct inode *ino, - struct ioctx *ioctx) +doio(char op, struct ioctx *ioctx) { + struct native_inode *nino; +#if 0 + int dolocks; + struct intnl_xtvec *oxtv; + int err; +#endif + struct native_io arguments; + ssize_t cc; +#if 0 + struct intnl_xtvec *front, *rear, tmp; +#endif - return doio(native_read, ino, ioctx); + nino = I2NI(ioctx->ioctx_ino); +#if 0 + dolocks = ioctx->ioctx_xtvlen > 1 && nino->ni_seekok; + if (dolocks) { + /* + * Must lock the regions (in order!) since we can't do + * strided-IO as a single atomic operation. + */ + oxtv = malloc(ioctx->ioctx_xtvlen * sizeof(struct intnl_xtvec)); + if (!oxtv) + return -ENOMEM; + (void )memcpy(oxtv, + ioctx->ioctx_xtv, + ioctx->ioctx_xtvlen * sizeof(struct intnl_xtvec)); + qsort(oxtv, + ioctx->ioctx_xtvlen, + sizeof(struct intnl_xtvec), + (int (*)(const void *, const void *))order_xtv); + err = + lockop_all(nino, + oxtv, ioctx->ioctx_xtvlen, + op == 'r' ? F_RDLCK : F_WRLCK); + if (err) { + free(oxtv); + return err; } - + } +#endif + arguments.nio_op = op; + arguments.nio_nino = nino; + cc = + _sysio_enumerate_extents(ioctx->ioctx_xtv, ioctx->ioctx_xtvlen, + ioctx->ioctx_iov, ioctx->ioctx_iovlen, + (ssize_t (*)(const struct iovec *, + int, + _SYSIO_OFF_T, + ssize_t, + void *))doiov, + &arguments); +#if 0 + if (dolocks) { /* - * Helper function passed to doio(), above, to accomplish a real writev. + * Must unlock in reverse order. */ -#ifdef REDSTORM -static ssize_t -native_write(int fd, void *buf, size_t count, _SYSIO_OFF_T offset) -{ - - return syscall(SYS_pwrite, fd, buf, count, offset); + front = oxtv; + rear = front + ioctx->ioctx_xtvlen - 1; + while (front < rear) { + tmp = *front; + *front++ = *rear; + *rear-- = tmp; } -#else -static ssize_t -native_write(int fd, const struct iovec *vector, int count) + if (lockop_all(nino, oxtv, ioctx->ioctx_xtvlen, F_UNLCK) != 0) + abort(); + free(oxtv); + } +#endif + if ((ioctx->ioctx_cc = cc) < 0) { + ioctx->ioctx_errno = -ioctx->ioctx_cc; + ioctx->ioctx_cc = -1; + } + return 0; +} + +static int +native_inop_read(struct inode *ino __IS_UNUSED, struct ioctx *ioctx) { - return syscall(SYS_writev, fd, vector, count); + return doio('r', ioctx); } -#endif static int -native_inop_ipwritev(struct inode *ino, - struct ioctx *ioctx) +native_inop_write(struct inode *ino __IS_UNUSED, struct ioctx *ioctx) { - return doio(native_write, ino, ioctx); + return doio('w', ioctx); } static int @@ -1361,9 +1431,9 @@ native_inop_fcntl(struct inode *ino, arg = va_arg(ap, long); return syscall(SYS_fcntl, nino->ni_fd, cmd, arg); default: - abort(); + break; } - return -1; + return -EINVAL; } static int |