From: John M. <mu...@no...> - 2009-01-14 14:16:55
|
On 14-Jan-09, at 6:18 AM, Miklos Szeredi wrote: > On Tue, 13 Jan 2009, Roman Shaposhnik wrote: >> On Jan 12, 2009, at 3:01 AM, Miklos Szeredi wrote: >>> On Fri, 09 Jan 2009, Roman V. Shaposhnik wrote: >>>> True. Btw, is it possible to asynchronously notify in-kernel FUSE >>>> that a file has changed and the cache has to be invalidated? >>>> Suppose >>>> my user-level app gets notified asynchronously too. How am I >>>> supposed >>>> to pass these events up the chain? >>> >>> Not yet, but such an API is planned. >> >> Interesting. Are there any public discussions of what it might look >> like? > > John Muir had patches implementing this. Let me know if you can't > find it in the list archives, I'll try to dig them out then. I am in fact planning to redo those patches to work against the latest kernel module and user-space libraries within the next few months. I'm hoping that starting with those patches we can come up with an implementation which could be included in the kernel in the near future. Miklos, based on the original patches (attached for your convenience) were there changes that you wanted to see in terms of naming convention or otherwise? Also, should I attempt to implement binary compatibility, or can we talk about moving to FUSE 3.0 for this API? Regards, John. Hopefully the patch doesn't get mangled... diff -Ndurp old/kernel/dev.c new/kernel/dev.c --- old/kernel/dev.c 2008-07-06 19:24:45.000000000 -0400 +++ new/kernel/dev.c 2008-07-06 19:33:21.000016000 -0400 @@ -828,6 +828,97 @@ static int copy_out_args(struct fuse_cop out->page_zeroing); } +static int reverse_invalidate_inode(struct super_block *sb, + struct fuse_copy_state *cs, + struct fuse_reverse_header *rev) +{ + struct fuse_invalidate_in inv; + int err; + + if (rev->numargs != 1 || + rev->arglen[0] != sizeof(struct fuse_invalidate_in)) { + fuse_copy_finish(cs); + return -EINVAL; + } + + err = fuse_copy_one(cs, &inv, sizeof(inv)); + fuse_copy_finish(cs); + if (err < 0) + return err; + + return fuse_reverse_invalidate_inode(sb, rev->nodeid, + inv.offset, inv.len); +} + +static int reverse_invalidate_entry(struct super_block *sb, + struct fuse_copy_state *cs, + struct fuse_reverse_header *rev) +{ + struct qstr name; + int err; + char buf[FUSE_NAME_MAX+1]; + + if (rev->numargs != 1) { + fuse_copy_finish(cs); + return -EINVAL; + } + + if (rev->arglen[0] > FUSE_NAME_MAX) { + fuse_copy_finish(cs); + return -ENAMETOOLONG; + } + + name.name = buf; + name.len = rev->arglen[0] - 1; + err = fuse_copy_one(cs, buf, rev->arglen[0]); + fuse_copy_finish(cs); + if (err < 0) + return err; + name.hash = full_name_hash(name.name, name.len); + + return fuse_reverse_invalidate_entry(sb, rev->nodeid, &name); +} + +static int reverse_operation(struct fuse_conn *fc, + struct fuse_copy_state *cs, + unsigned nbytes) +{ + int err; + struct fuse_reverse_header rev; + struct super_block *sb; + + down_read(&fc->killsb); + err = -ENOENT; + sb = fc->sb; + if (!sb) + goto err_finish; + + memset(&rev, 0, sizeof(rev)); + err = fuse_copy_one(cs, &rev.opcode, + sizeof(struct fuse_reverse_header) - + offsetof(struct fuse_reverse_header, opcode)); + if (err) + goto err_finish; + + switch(rev.opcode) { + case FUSE_INVAL_INODE: + err = reverse_invalidate_inode(sb, cs, &rev); + goto err_unlock; + case FUSE_INVAL_ENTRY: + err = reverse_invalidate_entry(sb, cs, &rev); + goto err_unlock; + default: + err = -ENOSYS; + break; + } + + err_finish: + fuse_copy_finish(cs); + err_unlock: + up_read(&fc->killsb); + return err ? err : nbytes; +} + /* * Write a single reply to a request. First the header is copied from * the write buffer. The request is then searched on the processing @@ -855,10 +946,12 @@ static ssize_t fuse_dev_writev(struct fi if (err) goto err_finish; err = -EINVAL; - if (!oh.unique || oh.error <= -1000 || oh.error > 0 || - oh.len != nbytes) + if (oh.error <= -1000 || oh.error > 0 || oh.len != nbytes) goto err_finish; + if (oh.unique == 0) + return reverse_operation(fc, &cs, nbytes); + spin_lock(&fc->lock); err = -ENOENT; if (!fc->connected) @@ -1038,6 +1131,7 @@ void fuse_abort_conn(struct fuse_conn *f wake_up_all(&fc->blocked_waitq); kill_fasync(&fc->fasync, SIGIO, POLL_IN); } + fc->sb = NULL; spin_unlock(&fc->lock); } diff -Ndurp old/kernel/dir.c new/kernel/dir.c --- old/kernel/dir.c 2008-07-06 19:28:52.000000000 -0400 +++ new/kernel/dir.c 2008-07-06 19:35:18.000019000 -0400 @@ -758,6 +758,46 @@ static int fuse_do_getattr(struct inode return err; } +int fuse_reverse_invalidate_entry(struct super_block *sb, + u64 parent_nodeid, + struct qstr *name) +{ + int err = -ENOTDIR; + struct inode *parent; + struct dentry *dir; + struct dentry *entry; + + parent = ilookup5(sb, fuse_nodeid_hash(parent_nodeid), + fuse_inode_eq, &parent_nodeid); + if (!parent) + return -ENOENT; + + down(&parent->i_sem); + if (!S_ISDIR(parent->i_mode)) + goto error_nodir; + + err = -ENOENT; + dir = d_find_alias(parent); + if (!dir) + goto error_nodir; + + entry = d_lookup(dir, name); + if (!entry) + goto error_noentry; + + err = 0; + fuse_invalidate_entry(entry); + dput(entry); + + error_noentry: + dput(dir); + + error_nodir: + up(&parent->i_sem); + iput(parent); + return err; +} + /* * Calling into a user-controlled filesystem gives the filesystem * daemon ptrace-like capabilities over the requester process. This diff -Ndurp old/kernel/fuse_i.h new/kernel/fuse_i.h --- old/kernel/fuse_i.h 2008-07-06 19:14:08.000000000 -0400 +++ new/kernel/fuse_i.h 2008-07-06 19:33:21.000046000 -0400 @@ -483,6 +483,14 @@ struct fuse_conn { /** Version counter for attribute changes */ u64 attr_version; + + /** Super block for this connection. Retrieve while holding a + read lock on the killsb rw_semaphore. */ + struct super_block *sb; + + /** Read/write semaphore to ensure that the kill_sb will wait + for all reverse operations to finish. */ + struct rw_semaphore killsb; }; static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb) @@ -528,6 +536,11 @@ extern struct export_operations fuse_exp #endif /** + * Inode to nodeid comparison. + */ +int fuse_inode_eq(struct inode *inode, void *_nodeidp); + +/** * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, @@ -687,3 +700,16 @@ int fuse_valid_type(int m); * Is task allowed to perform filesystem operation? */ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task); + +/** + * File-system tells the kernel to invalidate cache for the given node id. + */ +int fuse_reverse_invalidate_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len); + +/** + * File-system tells the kernel that dentry is gone. + */ +int fuse_reverse_invalidate_entry(struct super_block *sb, + u64 parent_nodeid, + struct qstr *name); diff -Ndurp old/kernel/fuse_kernel.h new/kernel/fuse_kernel.h --- old/kernel/fuse_kernel.h 2008-07-06 16:47:04.000000000 -0400 +++ new/kernel/fuse_kernel.h 2008-07-06 19:33:21.000061000 -0400 @@ -128,6 +128,11 @@ struct fuse_file_lock { */ #define FUSE_RELEASE_FLUSH (1 << 0) +/** + * Operations originating from user space start at this value. + */ +#define FUSE_REV_OPCODE_MIN 0x80000000 + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -167,6 +172,8 @@ enum fuse_opcode { FUSE_DESTROY = 38, FUSE_LOOKUP_PARENT = 39, FUSE_ILOOKUP = 40, + FUSE_INVAL_INODE = FUSE_REV_OPCODE_MIN, + FUSE_INVAL_ENTRY = FUSE_REV_OPCODE_MIN + 1, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -376,3 +383,19 @@ struct fuse_dirent { #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +#define FUSE_REVERSE_MAX_ARGS 3 + +struct fuse_reverse_header { + struct fuse_out_header oh; + __u32 opcode; + __u32 padding; + __u64 unique; + __u64 nodeid; + __u32 numargs; + __u32 arglen[FUSE_REVERSE_MAX_ARGS]; +}; + +struct fuse_invalidate_in { + __s64 offset; + __s64 len; +}; diff -Ndurp old/kernel/inode.c new/kernel/inode.c --- old/kernel/inode.c 2008-07-06 19:32:57.000000000 -0400 +++ new/kernel/inode.c 2008-07-06 19:34:20.000012000 -0400 @@ -202,7 +202,7 @@ static void fuse_init_inode(struct inode BUG(); } -static int fuse_inode_eq(struct inode *inode, void *_nodeidp) +int fuse_inode_eq(struct inode *inode, void *_nodeidp) { u64 nodeid = *(u64 *) _nodeidp; if (get_node_id(inode) == nodeid) @@ -255,6 +255,33 @@ struct inode *fuse_iget(struct super_blo return inode; } +int fuse_reverse_invalidate_inode(struct super_block *sb, u64 nodeid, + loff_t offset, loff_t len) +{ + struct inode *inode; +/* pgoff_t pg_start; + pgoff_t pg_end; */ + + inode = ilookup5(sb, fuse_nodeid_hash(nodeid), fuse_inode_eq, &nodeid); + if (!inode) + return -ENOENT; + + if (offset >= 0) { +/* Implement similar to the following with invalidate_inode_pages2_range. + pg_start = offset >> PAGE_CACHE_SHIFT; + if (len == 0) + pg_end = ~0UL; + else + pg_end = (offset + len) >> PAGE_CACHE_SHIFT; + invalidate_mapping_pages(inode->i_mapping, pg_start, pg_end); +*/ + invalidate_inode_pages(inode->i_mapping); + } + fuse_invalidate_attr(inode); + iput(inode); + return 0; +} + #ifdef UMOUNT_BEGIN_VFSMOUNT static void fuse_umount_begin(struct vfsmount *vfsmnt, int flags) { @@ -527,6 +554,7 @@ static struct fuse_conn *new_conn(void) fc->blocked = 1; fc->attr_version = 1; get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); + init_rwsem(&fc->killsb); } return fc; } @@ -682,6 +710,7 @@ static int fuse_fill_super(struct super_ fc->user_id = d.user_id; fc->group_id = d.group_id; fc->max_read = d.max_read; + fc->sb = sb; /* Used by get_root_inode() */ sb->s_fs_info = fc; @@ -760,11 +789,24 @@ static struct super_block *fuse_get_sb(s } #endif +static void fuse_kill_anon_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_anon_super(sb); +} + static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", .get_sb = fuse_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = fuse_kill_anon_super, .fs_flags = FS_HAS_SUBTYPE | FS_SAFE, }; @@ -787,11 +829,24 @@ static struct super_block *fuse_get_sb_b } #endif +static void fuse_kill_block_super(struct super_block *sb) +{ + struct fuse_conn *fc = get_fuse_conn_super(sb); + + if (fc) { + down_write(&fc->killsb); + fc->sb = NULL; + up_write(&fc->killsb); + } + + kill_block_super(sb); +} + static struct file_system_type fuseblk_fs_type = { .owner = THIS_MODULE, .name = "fuseblk", .get_sb = fuse_get_sb_blk, - .kill_sb = kill_block_super, + .kill_sb = fuse_kill_block_super, .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, }; diff -Ndurp old/include/fuse_lowlevel.h new/include/fuse_lowlevel.h --- old/include/fuse_lowlevel.h 2008-07-06 16:47:04.000000000 -0400 +++ new/include/fuse_lowlevel.h 2008-07-06 19:33:21.000091000 -0400 @@ -1035,6 +1035,35 @@ size_t fuse_add_direntry(fuse_req_t req, off_t off); /* ----------------------------------------------------------- * + * Reverse operations * + * ----------------------------------------------------------- */ + +/** + * Tell the kernel to invalidate cache for an inode + * + * @param ch the channel through which to send the invalidation + * @param ino the inode number + * @param off the offset in the inode where to start invalidating + * or negative to invalidate attributes only + * @param len the amount of cache to invalidate or 0 for all + * @return zero for success, -errno for failure to send operation + */ +int fuse_reverse_invalidate_inode(struct fuse_chan *ch, fuse_ino_t ino, + off_t off, off_t len); + +/** + * Tell the kernel to invalidate a dentry + * + * @param ch the channel through which to send the invalidation + * @param parent inode number + * @param name file name + * @param namelen strlen() of file name + * @return zero for success, -errno for failure to send operation + */ +int fuse_reverse_invalidate_entry(struct fuse_chan *ch, fuse_ino_t parent, + const char *name, size_t namelen); + +/* ----------------------------------------------------------- * * Utility functions * * ----------------------------------------------------------- */ @@ -1238,6 +1267,14 @@ void fuse_session_reset(struct fuse_sess int fuse_session_exited(struct fuse_session *se); /** + * Get the user data provided to the session + * + * @param se the session + * @return the user data + */ +void *fuse_session_data(struct fuse_session *se); + +/** * Enter a single threaded event loop * * @param se the session diff -Ndurp old/include/fuse_kernel.h new/include/fuse_kernel.h --- old/include/fuse_kernel.h 2008-07-06 16:47:04.000000000 -0400 +++ new/include/fuse_kernel.h 2008-07-06 19:33:21.000106000 -0400 @@ -128,6 +128,11 @@ struct fuse_file_lock { */ #define FUSE_RELEASE_FLUSH (1 << 0) +/** + * Operations originating from user space start at this value. + */ +#define FUSE_REV_OPCODE_MIN 0x80000000 + enum fuse_opcode { FUSE_LOOKUP = 1, FUSE_FORGET = 2, /* no reply */ @@ -167,6 +172,8 @@ enum fuse_opcode { FUSE_DESTROY = 38, FUSE_LOOKUP_PARENT = 39, FUSE_ILOOKUP = 40, + FUSE_INVAL_INODE = FUSE_REV_OPCODE_MIN, + FUSE_INVAL_ENTRY = FUSE_REV_OPCODE_MIN + 1, }; /* The read buffer is required to be at least 8k, but may be much larger */ @@ -376,3 +383,19 @@ struct fuse_dirent { #define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) #define FUSE_DIRENT_SIZE(d) \ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) +#define FUSE_REVERSE_MAX_ARGS 3 + +struct fuse_reverse_header { + struct fuse_out_header oh; + __u32 opcode; + __u32 padding; + __u64 unique; + __u64 nodeid; + __u32 numargs; + __u32 arglen[FUSE_REVERSE_MAX_ARGS]; +}; + +struct fuse_invalidate_in { + __s64 offset; + __s64 len; +}; diff -Ndurp old/lib/fuse_session.c new/lib/fuse_session.c --- old/lib/fuse_session.c 2007-10-16 12:35:23.000000000 -0400 +++ new/lib/fuse_session.c 2008-07-06 19:33:21.000121000 -0400 @@ -120,6 +120,11 @@ int fuse_session_exited(struct fuse_sess return se->exited; } +void *fuse_session_data(struct fuse_session *se) +{ + return se->data; +} + static struct fuse_chan *fuse_chan_new_common(struct fuse_chan_ops *op, int fd, size_t bufsize, void *data, int compat) { diff -Ndurp old/lib/fuse_versionscript new/lib/fuse_versionscript --- old/lib/fuse_versionscript 2007-07-02 07:32:17.000000000 -0400 +++ new/lib/fuse_versionscript 2008-07-06 19:33:21.000136000 -0400 @@ -152,6 +152,8 @@ FUSE_2.7 { fuse_register_module; fuse_reply_iov; fuse_version; + fuse_reverse_invalidate_inode; + fuse_reverse_invalidate_entry; local: *; diff -Ndurp old/lib/fuse_lowlevel.c new/lib/fuse_lowlevel.c --- old/lib/fuse_lowlevel.c 2008-07-06 16:47:04.000000000 -0400 +++ new/lib/fuse_lowlevel.c 2008-07-06 19:33:21.000156000 -0400 @@ -60,6 +60,14 @@ struct fuse_ll { struct fuse_req interrupts; pthread_mutex_t lock; int got_destroy; + uint64_t reverse_unique; +}; + +struct fuse_reverse { + struct fuse_ll *f; + struct fuse_chan *ch; + struct fuse_reverse_header h; + struct iovec iov[FUSE_REVERSE_MAX_ARGS + 1]; }; static void convert_stat(const struct stat *stbuf, struct fuse_attr *attr) @@ -1458,3 +1466,109 @@ struct fuse_session *fuse_lowlevel_new_c } __asm__(".symver fuse_lowlevel_new_compat25,fuse_lowlevel_new@FUSE_2.5"); + +static int send_reverse_operation(struct fuse_reverse *rev) +{ + rev->h.oh.len = iov_length(rev->iov, rev->h.numargs + 1); + + return fuse_chan_send(rev->ch, rev->iov, rev->h.numargs + 1); +} + +static uint64_t reverse_operation_unique(struct fuse_ll *f) +{ + uint64_t unique; + if (!f) + return 0; + + pthread_mutex_lock(&f->lock); + unique = f->reverse_unique++; + pthread_mutex_unlock(&f->lock); + return unique; +} + +static int reverse_operation_init(struct fuse_chan *ch, + struct fuse_reverse *rev, + uint32_t opcode, + uint64_t nodeid) +{ + memset(rev, 0, sizeof(*rev)); + rev->ch = ch; + rev->f = (struct fuse_ll *)fuse_session_data(fuse_chan_session(ch)); + rev->h.unique = reverse_operation_unique(rev->f); + rev->iov[0].iov_base = &rev->h; + rev->iov[0].iov_len = sizeof(rev->h); + rev->h.opcode = opcode; + rev->h.nodeid = nodeid; + return 0; +} + +static int reverse_operation_add_arg(struct fuse_reverse *rev, + const void *arg, size_t len) +{ + uint32_t i; + + if (!arg || !len || len > UINT32_MAX) + return -EINVAL; + + if (!rev || rev->h.numargs >= FUSE_REVERSE_MAX_ARGS) + return -EINVAL; + + i = rev->h.numargs++; + rev->h.arglen[i] = len; + rev->iov[i+1].iov_base = (void *)arg; + rev->iov[i+1].iov_len = len; + return 0; +} + +int fuse_reverse_invalidate_inode(struct fuse_chan *ch, fuse_ino_t ino, + off_t off, off_t len) +{ + struct fuse_reverse rev; + struct fuse_invalidate_in in; + int ret; + + ret = reverse_operation_init(ch, &rev, FUSE_INVAL_INODE, ino); + if (ret) + return ret; + + if (rev.f && rev.f->debug) + { + printf("unique: %llu, opcode: INVAL_INODE (%u), nodeid: %lu," + " off: %llu, len: %llu", + rev.h.unique, rev.h.opcode, ino, off, len); + fflush(stdout); + } + + in.offset = off; + in.len = len; + ret = reverse_operation_add_arg(&rev, &in, sizeof(in)); + if (ret) + return ret; + + return send_reverse_operation(&rev); +} + +int fuse_reverse_invalidate_entry(struct fuse_chan *ch, + fuse_ino_t parent, + const char *name, size_t namelen) +{ + struct fuse_reverse rev; + int ret; + + ret = reverse_operation_init(ch, &rev, FUSE_INVAL_ENTRY, parent); + if (ret) + return ret; + + if (rev.f && rev.f->debug) + { + printf("unique: %llu, opcode: INVAL_ENTRY (%u), nodeid: %lu, name: %s", + rev.h.unique, rev.h.opcode, parent, name); + fflush(stdout); + } + + ret = reverse_operation_add_arg(&rev, name, namelen + 1); + if (ret) + return ret; + + return send_reverse_operation(&rev); +} |