From: Phil S. <ps...@us...> - 2003-04-20 15:43:30
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv21268/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: b=1111 - add ptlrpc_ldlm_hooks_referenced(), so we can determine in ldlm_cleanup if "force" is needed - add a little bit more debug information to hunt connection leaks Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.16 retrieving revision 1.131.2.17 diff -u -w -b -B -p -r1.131.2.16 -r1.131.2.17 --- ldlm_lockd.c 14 Apr 2003 20:51:56 -0000 1.131.2.16 +++ ldlm_lockd.c 20 Apr 2003 15:43:26 -0000 1.131.2.17 @@ -851,8 +851,13 @@ static int ldlm_cleanup(struct obd_devic } #ifdef __KERNEL__ - if (force) + if (force) { ptlrpc_put_ldlm_hooks(); + } else if (ptlrpc_ldlm_hooks_referenced()) { + CERROR("Some connections weren't cleaned up; run lconf with " + "--force to forcibly unload.\n"); + RETURN(-EBUSY); + } ptlrpc_stop_all_threads(ldlm->ldlm_cb_service); ptlrpc_unregister_service(ldlm->ldlm_cb_service); |
From: Mike S. <sh...@us...> - 2003-04-22 23:17:02
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv2542 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: b=1127: Run expired-lock recovery on a (new) DLM thread, rather than in the timer context, because it will schedule when it sends ASTs, etc. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.18 retrieving revision 1.131.2.19 diff -u -w -b -B -p -r1.131.2.18 -r1.131.2.19 --- ldlm_lockd.c 22 Apr 2003 20:06:39 -0000 1.131.2.18 +++ ldlm_lockd.c 22 Apr 2003 23:16:58 -0000 1.131.2.19 @@ -28,6 +28,7 @@ # include <linux/module.h> # include <linux/slab.h> # include <linux/init.h> +# include <linux/wait.h> #else # include <liblustre.h> #endif @@ -53,6 +53,86 @@ static spinlock_t waiting_locks_spinlock static struct timer_list waiting_locks_timer; static int ldlm_already_setup = 0; +#ifdef __KERNEL__ + +static struct expired_lock_thread { + wait_queue_head_t elt_waitq; + int elt_state; + struct list_head elt_expired_locks; + spinlock_t elt_lock; +} expired_lock_thread; + +#define ELT_STOPPED 0 +#define ELT_READY 1 +#define ELT_TERMINATE 2 + +static inline int have_expired_locks(void) +{ + int need_to_run; + + spin_lock_bh(&expired_lock_thread.elt_lock); + need_to_run = !list_empty(&expired_lock_thread.elt_expired_locks); + spin_unlock_bh(&expired_lock_thread.elt_lock); + + RETURN(need_to_run); +} + +static int expired_lock_main(void *arg) +{ + struct list_head *expired = &expired_lock_thread.elt_expired_locks; + struct l_wait_info lwi = { 0 }; + unsigned long flags; + + ENTRY; + lock_kernel(); + daemonize(); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) + sigfillset(¤t->blocked); + recalc_sigpending(); +#else + spin_lock_irqsave(¤t->sigmask_lock, flags); + sigfillset(¤t->blocked); + recalc_sigpending(current); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); +#endif + + strcpy(current->comm, "ldlm_elt"); + unlock_kernel(); + + expired_lock_thread.elt_state = ELT_READY; + wake_up(&expired_lock_thread.elt_waitq); + + while (1) { + l_wait_event(expired_lock_thread.elt_waitq, + have_expired_locks() || + expired_lock_thread.elt_state == ELT_TERMINATE, + &lwi); + + spin_lock_bh(&expired_lock_thread.elt_lock); + while (!list_empty(expired)) { + struct ldlm_lock *lock = list_entry(expired->next, + struct ldlm_lock, + l_pending_chain); + spin_unlock_bh(&expired_lock_thread.elt_lock); + + ptlrpc_fail_export(lock->l_export); + + spin_lock_bh(&expired_lock_thread.elt_lock); + } + spin_unlock_bh(&expired_lock_thread.elt_lock); + + if (expired_lock_thread.elt_state == ELT_TERMINATE) + break; + } + + expired_lock_thread.elt_state = ELT_STOPPED; + wake_up(&expired_lock_thread.elt_waitq); + RETURN(0); +} + +#endif /* __KERNEL__ */ + static void waiting_locks_callback(unsigned long unused) { struct ldlm_lock *lock; @@ -77,9 +157,11 @@ static void waiting_locks_callback(unsig LDLM_DEBUG(lock, "timer expired"); /* ptlrpc_fail_export must be called with this lock released */ - spin_unlock_bh(&waiting_locks_spinlock); - ptlrpc_fail_export(lock->l_export); - spin_lock_bh(&waiting_locks_spinlock); + spin_lock_bh(&expired_lock_thread.elt_lock); + list_del(&lock->l_pending_chain); + list_add(&lock->l_pending_chain, + &expired_lock_thread.elt_expired_locks); + spin_unlock_bh(&expired_lock_thread.elt_lock); } spin_unlock_bh(&waiting_locks_spinlock); @@ -109,6 +191,8 @@ static int ldlm_add_waiting_lock(struct } list_add_tail(&lock->l_pending_chain, &waiting_locks_list); /* FIFO */ spin_unlock_bh(&waiting_locks_spinlock); + /* We drop this ref when we get removed from the list. */ + class_export_get(lock->l_export); RETURN(1); } @@ -146,6 +230,8 @@ int ldlm_del_waiting_lock(struct ldlm_lo } list_del_init(&lock->l_pending_chain); spin_unlock_bh(&waiting_locks_spinlock); + /* We got this ref when we were added to the list. */ + class_export_put(lock->l_export); RETURN(1); } @@ -816,6 +902,19 @@ static int ldlm_setup(struct obd_device } } + rc = kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS); + if (rc < 0) { + CERROR("Cannot start ldlm expired-lock thread: %d\n", rc); + GOTO(out_thread, rc); + } + + INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); + spin_lock_init(&expired_lock_thread.elt_lock); + expired_lock_thread.elt_state = ELT_STOPPED; + init_waitqueue_head(&expired_lock_thread.elt_waitq); + + wait_event(expired_lock_thread.elt_waitq, + expired_lock_thread.elt_state == ELT_READY); #endif INIT_LIST_HEAD(&waiting_locks_list); spin_lock_init(&waiting_locks_spinlock); @@ -865,6 +964,11 @@ static int ldlm_cleanup(struct obd_devic ptlrpc_stop_all_threads(ldlm->ldlm_cancel_service); ptlrpc_unregister_service(ldlm->ldlm_cancel_service); ldlm_proc_cleanup(obddev); + + expired_lock_thread.elt_state = ELT_TERMINATE; + wake_up(&expired_lock_thread.elt_waitq); + wait_event(expired_lock_thread.elt_waitq, + expired_lock_thread.elt_state == ELT_STOPPED); inter_module_unregister("ldlm_namespace_cleanup"); inter_module_unregister("ldlm_cli_cancel_unused"); |
From: Phil S. <ps...@us...> - 2003-04-23 03:13:38
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv20701 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: if we receive an invalid export, print the sending NID Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.19 retrieving revision 1.131.2.20 diff -u -w -b -B -p -r1.131.2.19 -r1.131.2.20 --- ldlm_lockd.c 22 Apr 2003 23:16:58 -0000 1.131.2.19 +++ ldlm_lockd.c 23 Apr 2003 03:13:31 -0000 1.131.2.20 @@ -681,9 +681,10 @@ static int ldlm_callback_handler(struct if (req->rq_export == NULL) { struct ldlm_request *dlm_req; - CERROR("operation %d with bad export (ptl req %d/rep %d)\n", - req->rq_reqmsg->opc, req->rq_request_portal, - req->rq_reply_portal); + CERROR("operation %d from nid 0x%x with bad export " + "(ptl req %d/rep %d)\n", req->rq_reqmsg->opc, + req->rq_connection->c_peer.peer_nid, + req->rq_request_portal, req->rq_reply_portal); CERROR("--> export addr: "LPX64", cookie: "LPX64"\n", req->rq_reqmsg->handle.addr, req->rq_reqmsg->handle.cookie); |
From: Phil S. <ps...@us...> - 2003-04-23 03:14:27
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv20898 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: oops, NID is LPU64 Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.20 retrieving revision 1.131.2.21 diff -u -w -b -B -p -r1.131.2.20 -r1.131.2.21 --- ldlm_lockd.c 23 Apr 2003 03:13:31 -0000 1.131.2.20 +++ ldlm_lockd.c 23 Apr 2003 03:14:17 -0000 1.131.2.21 @@ -681,7 +681,7 @@ static int ldlm_callback_handler(struct if (req->rq_export == NULL) { struct ldlm_request *dlm_req; - CERROR("operation %d from nid 0x%x with bad export " + CERROR("operation %d from nid "LPU64" with bad export " "(ptl req %d/rep %d)\n", req->rq_reqmsg->opc, req->rq_connection->c_peer.peer_nid, req->rq_request_portal, req->rq_reply_portal); |
From: Mike S. <sh...@us...> - 2003-04-23 16:23:30
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv30306 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: Important: initializing the expired_lock_thread structures _before_ starting the thread. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.21 retrieving revision 1.131.2.22 diff -u -w -b -B -p -r1.131.2.21 -r1.131.2.22 --- ldlm_lockd.c 23 Apr 2003 03:14:17 -0000 1.131.2.21 +++ ldlm_lockd.c 23 Apr 2003 16:23:26 -0000 1.131.2.22 @@ -42,6 +42,10 @@ extern struct list_head ldlm_namespace_l extern int (*mds_reint_p)(int offset, struct ptlrpc_request *req); extern int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req); +static int ldlm_already_setup = 0; + +#ifdef __KERNEL__ + inline unsigned long round_timeout(unsigned long timeout) { return ((timeout / HZ) + 1) * HZ; @@ -51,9 +55,6 @@ inline unsigned long round_timeout(unsig static struct list_head waiting_locks_list; static spinlock_t waiting_locks_spinlock; static struct timer_list waiting_locks_timer; -static int ldlm_already_setup = 0; - -#ifdef __KERNEL__ static struct expired_lock_thread { wait_queue_head_t elt_waitq; @@ -131,8 +132,6 @@ static int expired_lock_main(void *arg) RETURN(0); } -#endif /* __KERNEL__ */ - static void waiting_locks_callback(unsigned long unused) { struct ldlm_lock *lock; @@ -235,6 +234,20 @@ int ldlm_del_waiting_lock(struct ldlm_lo RETURN(1); } +#else /* !__KERNEL__ */ + +static int ldlm_add_waiting_lock(struct ldlm_lock *lock) +{ + RETURN(1); +} + +int ldlm_del_waiting_lock(struct ldlm_lock *lock) +{ + RETURN(0); +} + +#endif /* __KERNEL__ */ + static inline void ldlm_failed_ast(struct ldlm_lock *lock, int rc, char *ast_type) { @@ -903,16 +916,16 @@ static int ldlm_setup(struct obd_device } } + INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); + spin_lock_init(&expired_lock_thread.elt_lock); + expired_lock_thread.elt_state = ELT_STOPPED; + init_waitqueue_head(&expired_lock_thread.elt_waitq); + rc = kernel_thread(expired_lock_main, NULL, CLONE_VM | CLONE_FS); if (rc < 0) { CERROR("Cannot start ldlm expired-lock thread: %d\n", rc); GOTO(out_thread, rc); } - - INIT_LIST_HEAD(&expired_lock_thread.elt_expired_locks); - spin_lock_init(&expired_lock_thread.elt_lock); - expired_lock_thread.elt_state = ELT_STOPPED; - init_waitqueue_head(&expired_lock_thread.elt_waitq); wait_event(expired_lock_thread.elt_waitq, expired_lock_thread.elt_state == ELT_READY); |
From: Phil S. <ps...@us...> - 2003-04-23 22:12:12
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv5760 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: Make "ldlm_del_waiting_lock" a no-op on the client-side, because there is no waiting locks list. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.22 retrieving revision 1.131.2.23 diff -u -w -b -B -p -r1.131.2.22 -r1.131.2.23 --- ldlm_lockd.c 23 Apr 2003 16:23:26 -0000 1.131.2.22 +++ ldlm_lockd.c 23 Apr 2003 22:12:07 -0000 1.131.2.23 @@ -203,8 +203,12 @@ static int ldlm_add_waiting_lock(struct int ldlm_del_waiting_lock(struct ldlm_lock *lock) { struct list_head *list_next; - ENTRY; + + if (lock->l_export == NULL) { + /* We don't have a "waiting locks list" on clients. */ + RETURN(0); + } spin_lock_bh(&waiting_locks_spinlock); |
From: Robert R. <rr...@us...> - 2003-04-24 01:38:21
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv13228/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: b=1075 * added --failover option to lconf and lctl cleanup * failover flag added to obd_disconnect and obd_cleanup * if failover ==1, then MDS and OST will not update the client state in last_rcvd. * Also changes to lconf to add the --group support to MDS. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.23 retrieving revision 1.131.2.24 diff -u -w -b -B -p -r1.131.2.23 -r1.131.2.24 --- ldlm_lockd.c 23 Apr 2003 22:12:07 -0000 1.131.2.23 +++ ldlm_lockd.c 24 Apr 2003 01:37:48 -0000 1.131.2.24 @@ -957,7 +957,7 @@ static int ldlm_setup(struct obd_device return rc; } -static int ldlm_cleanup(struct obd_device *obddev, int force) +static int ldlm_cleanup(struct obd_device *obddev, int force, int failover) { struct ldlm_obd *ldlm = &obddev->u.ldlm; ENTRY; |
From: Mei <me...@us...> - 2003-04-26 09:25:32
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv27150 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: fix compilation for liblustre Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.24 retrieving revision 1.131.2.25 diff -u -w -b -B -p -r1.131.2.24 -r1.131.2.25 --- ldlm_lockd.c 24 Apr 2003 01:37:48 -0000 1.131.2.24 +++ ldlm_lockd.c 26 Apr 2003 09:25:22 -0000 1.131.2.25 @@ -933,12 +933,13 @@ static int ldlm_setup(struct obd_device wait_event(expired_lock_thread.elt_waitq, expired_lock_thread.elt_state == ELT_READY); -#endif + INIT_LIST_HEAD(&waiting_locks_list); spin_lock_init(&waiting_locks_spinlock); waiting_locks_timer.function = waiting_locks_callback; waiting_locks_timer.data = 0; init_timer(&waiting_locks_timer); +#endif ldlm_already_setup = 1; |
From: Phil S. <ps...@us...> - 2003-05-05 21:35:18
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv10244 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: Print rq_reqpmsg->status instead of rq_status, to get useful error code Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.26 retrieving revision 1.131.2.27 diff -u -w -b -B -p -r1.131.2.26 -r1.131.2.27 --- ldlm_lockd.c 27 Apr 2003 17:21:48 -0000 1.131.2.26 +++ ldlm_lockd.c 5 May 2003 21:35:14 -0000 1.131.2.27 @@ -328,7 +328,7 @@ int ldlm_server_blocking_ast(struct ldlm ldlm_failed_ast(lock, rc, "blocking"); } else if (rc) { CERROR("client returned %d from blocking AST for lock %p\n", - req->rq_status, lock); + req->rq_repmsg->status, lock); LDLM_DEBUG(lock, "client returned error %d from blocking AST", req->rq_status); ldlm_lock_cancel(lock); |
From: Robert R. <rr...@us...> - 2003-05-13 00:46:12
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv25471/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: - add ptlrpc_dump_connections() for debugging ldlm cleanup problems - cleanup the import's export ptlrpc_import_connect if the connect fails. this prevents an obd_refcount clean on the client during a failed mount. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.27 retrieving revision 1.131.2.28 diff -u -w -b -B -p -r1.131.2.27 -r1.131.2.28 --- ldlm_lockd.c 5 May 2003 21:35:14 -0000 1.131.2.27 +++ ldlm_lockd.c 13 May 2003 00:45:59 -0000 1.131.2.28 @@ -972,6 +972,7 @@ static int ldlm_cleanup(struct obd_devic } else if (ptlrpc_ldlm_hooks_referenced()) { CERROR("Some connections weren't cleaned up; run lconf with " "--force to forcibly unload.\n"); + ptlrpc_dump_connections(); RETURN(-EBUSY); } |
From: Hariharan T. <th...@us...> - 2003-05-16 05:56:15
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv12069/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: Added lprocfs counters for obd_ops stats, obdfilter and ptlrpc services. Bugzilla#1107. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.28 retrieving revision 1.131.2.29 diff -u -w -b -B -p -r1.131.2.28 -r1.131.2.29 --- ldlm_lockd.c 13 May 2003 00:45:59 -0000 1.131.2.28 +++ ldlm_lockd.c 16 May 2003 05:55:40 -0000 1.131.2.29 @@ -876,7 +876,7 @@ static int ldlm_setup(struct obd_device ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL, - ldlm_callback_handler, "ldlm_cbd"); + ldlm_callback_handler, "ldlm_cbd", obddev); if (!ldlm->ldlm_cb_service) { CERROR("failed to start service\n"); @@ -887,7 +887,7 @@ static int ldlm_setup(struct obd_device ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE, LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL, LDLM_CANCEL_REPLY_PORTAL, - ldlm_cancel_handler, "ldlm_canceld"); + ldlm_cancel_handler, "ldlm_canceld", obddev); if (!ldlm->ldlm_cancel_service) { CERROR("failed to start service\n"); |
From: Mike S. <sh...@us...> - 2003-05-16 14:49:50
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv28479 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: b=1289: need to wake the expired-locks thread when we add an expired lock to its work list. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.29 retrieving revision 1.131.2.30 diff -u -w -b -B -p -r1.131.2.29 -r1.131.2.30 --- ldlm_lockd.c 16 May 2003 05:55:40 -0000 1.131.2.29 +++ ldlm_lockd.c 16 May 2003 14:49:46 -0000 1.131.2.30 @@ -155,12 +155,12 @@ static void waiting_locks_callback(unsig LDLM_DEBUG(lock, "timer expired"); - /* ptlrpc_fail_export must be called with this lock released */ spin_lock_bh(&expired_lock_thread.elt_lock); list_del(&lock->l_pending_chain); list_add(&lock->l_pending_chain, &expired_lock_thread.elt_expired_locks); spin_unlock_bh(&expired_lock_thread.elt_lock); + wake_up(&expired_lock_thread.elt_waitq); } spin_unlock_bh(&waiting_locks_spinlock); |
From: Phil S. <ps...@us...> - 2003-05-19 06:29:25
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv30166/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: merge b_ad (cleaned up a bit) into b_devel; entirely 2.5 compat fixes Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.30 retrieving revision 1.131.2.31 diff -u -w -b -B -p -r1.131.2.30 -r1.131.2.31 --- ldlm_lockd.c 16 May 2003 14:49:46 -0000 1.131.2.30 +++ ldlm_lockd.c 19 May 2003 06:29:21 -0000 1.131.2.31 @@ -86,19 +86,13 @@ static int expired_lock_main(void *arg) ENTRY; lock_kernel(); - daemonize(); + kportal_daemonize("ldlm_elt"); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) + SIGNAL_MASK_LOCK(current, flags); sigfillset(¤t->blocked); - recalc_sigpending(); -#else - spin_lock_irqsave(¤t->sigmask_lock, flags); - sigfillset(¤t->blocked); - recalc_sigpending(current); - spin_unlock_irqrestore(¤t->sigmask_lock, flags); -#endif + RECALC_SIGPENDING; + SIGNAL_MASK_UNLOCK(current, flags); - strcpy(current->comm, "ldlm_elt"); unlock_kernel(); expired_lock_thread.elt_state = ELT_READY; |
From: Eric B. <ee...@us...> - 2003-05-23 18:41:42
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv19138/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: * Removed redundant parameter to ptlrpc_{error,reply} * Reply messages set msg->opc to match the request. Client checks this for sanity. Makes tcpdump tractible and should help in other debug situations. * Made mismatched req/rep transnos on replay a protocol error rather than LASSERT (it LBUGs for the time being). Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.32 retrieving revision 1.131.2.33 diff -u -w -b -B -p -r1.131.2.32 -r1.131.2.33 --- ldlm_lockd.c 22 May 2003 18:50:46 -0000 1.131.2.32 +++ ldlm_lockd.c 23 May 2003 18:41:08 -0000 1.131.2.33 @@ -566,7 +566,7 @@ int ldlm_handle_cancel(struct ptlrpc_req req->rq_status = 0; } - if (ptlrpc_reply(req->rq_svc, req) != 0) + if (ptlrpc_reply(req) != 0) LBUG(); if (lock) { @@ -674,7 +674,7 @@ static int ldlm_callback_reply(struct pt &req->rq_repmsg); if (rc) return rc; - return ptlrpc_reply(req->rq_svc, req); + return ptlrpc_reply(req); } static int ldlm_callback_handler(struct ptlrpc_request *req) |
From: Mike S. <sh...@us...> - 2003-05-29 13:58:58
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv32644 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: Phil noticed that I'd been stupid, and accidentally removed some needed return statements in my last checkin. Better now! Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.37 retrieving revision 1.131.2.38 diff -u -w -b -B -p -r1.131.2.37 -r1.131.2.38 --- ldlm_lockd.c 28 May 2003 21:17:24 -0000 1.131.2.37 +++ ldlm_lockd.c 29 May 2003 13:37:56 -0000 1.131.2.38 @@ -186,6 +186,7 @@ static int ldlm_add_waiting_lock(struct spin_unlock_bh(&waiting_locks_spinlock); /* We drop this ref when we get removed from the list. */ class_export_get(lock->l_export); + return 0; } /* @@ -200,6 +201,7 @@ int ldlm_del_waiting_lock(struct ldlm_lo if (lock->l_export == NULL) { /* We don't have a "waiting locks list" on clients. */ LDLM_DEBUG(lock, "client lock: no-op"); + return 0; } spin_lock_bh(&waiting_locks_spinlock); @@ -207,7 +209,7 @@ int ldlm_del_waiting_lock(struct ldlm_lo if (list_empty(&lock->l_pending_chain)) { spin_unlock_bh(&waiting_locks_spinlock); LDLM_DEBUG(lock, "wasn't waiting"); - RETURN(0); + return 0; } list_next = lock->l_pending_chain.next; @@ -229,6 +231,7 @@ int ldlm_del_waiting_lock(struct ldlm_lo /* We got this ref when we were added to the list. */ class_export_put(lock->l_export); LDLM_DEBUG(lock, "removed"); + return 1; } #else /* !__KERNEL__ */ |
From: Mike S. <sh...@us...> - 2003-05-29 16:43:09
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv10993 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: returning 0, meant 1; probably harmless, but hey Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.38 retrieving revision 1.131.2.39 diff -u -w -b -B -p -r1.131.2.38 -r1.131.2.39 --- ldlm_lockd.c 29 May 2003 13:37:56 -0000 1.131.2.38 +++ ldlm_lockd.c 29 May 2003 16:43:04 -0000 1.131.2.39 @@ -186,7 +186,7 @@ static int ldlm_add_waiting_lock(struct spin_unlock_bh(&waiting_locks_spinlock); /* We drop this ref when we get removed from the list. */ class_export_get(lock->l_export); - return 0; + return 1; } /* |
From: Phil S. <ps...@us...> - 2003-06-02 04:02:18
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv14395/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: - print the error code if portals_do_debug_dumplog fails - demote "mmap readpage - check locks" message to a CDEBUG - don't try to set mtime on OSTs if there's no LSM (file not yet opened) - print which client (nid) returned an error from a blocking AST Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.39 retrieving revision 1.131.2.40 diff -u -w -b -B -p -r1.131.2.39 -r1.131.2.40 --- ldlm_lockd.c 29 May 2003 16:43:04 -0000 1.131.2.39 +++ ldlm_lockd.c 2 Jun 2003 04:02:14 -0000 1.131.2.40 @@ -323,7 +323,8 @@ int ldlm_server_blocking_ast(struct ldlm ldlm_del_waiting_lock(lock); ldlm_failed_ast(lock, rc, "blocking"); } else if (rc) { - CERROR("client returned %d from blocking AST for lock %p\n", + CERROR("client (nid "LPU64") returned %d from blocking AST for " + "lock %p\n", req->rq_connection->c_peer.peer_nid, req->rq_repmsg->status, lock); LDLM_DEBUG(lock, "client returned error %d from blocking AST", req->rq_status); |
From: Phil S. <ps...@us...> - 2003-06-05 15:44:10
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv32031 Modified Files: Tag: b_devel ldlm_lockd.c Log Message: reduce noise in ldlm_server_blocking_ast: two of these error codes are common, acceptable races or other conditions. Only CERROR if we get something different. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.41 retrieving revision 1.131.2.42 diff -u -w -b -B -p -r1.131.2.41 -r1.131.2.42 --- ldlm_lockd.c 4 Jun 2003 03:56:46 -0000 1.131.2.41 +++ ldlm_lockd.c 5 Jun 2003 15:44:04 -0000 1.131.2.42 @@ -318,8 +318,21 @@ int ldlm_server_blocking_ast(struct ldlm ldlm_del_waiting_lock(lock); ldlm_failed_ast(lock, rc, "blocking"); } else if (rc) { - CERROR("client (nid "LPU64") returned %d from blocking AST for " - "lock %p\n", req->rq_connection->c_peer.peer_nid, + if (rc == -EINVAL) + CDEBUG(D_DLMTRACE, "client (nid "LPU64") returned %d " + "from blocking AST for lock %p--normal race\n", + req->rq_connection->c_peer.peer_nid, + req->rq_repmsg->status, lock); + else if (rc == -ENOTCONN) + CDEBUG(D_DLMTRACE, "client (nid "LPU64") returned %d " + "from blocking AST for lock %p--this client was " + "probably rebooted while it held a lock, nothing" + " serious\n",req->rq_connection->c_peer.peer_nid, + req->rq_repmsg->status, lock); + else + CDEBUG(D_ERROR, "client (nid "LPU64") returned %d " + "from blocking AST for lock %p\n", + req->rq_connection->c_peer.peer_nid, req->rq_repmsg->status, lock); LDLM_DEBUG(lock, "client returned error %d from blocking AST", req->rq_status); |
From: Phil S. <ps...@us...> - 2003-06-09 03:56:18
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv24063/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: It is very common, if a node is rebooted with a lock held, to receive messages to the ldlm_callback_handler with an invalid export; quiet these down to D_RPCTRACE messages instead of CERRORs. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.42 retrieving revision 1.131.2.43 diff -u -w -b -B -p -r1.131.2.42 -r1.131.2.43 --- ldlm_lockd.c 5 Jun 2003 15:44:04 -0000 1.131.2.42 +++ ldlm_lockd.c 9 Jun 2003 03:56:14 -0000 1.131.2.43 @@ -703,16 +703,17 @@ static int ldlm_callback_handler(struct if (req->rq_export == NULL) { struct ldlm_request *dlm_req; - CERROR("operation %d from nid "LPU64" with bad export " - "(ptl req %d/rep %d)\n", req->rq_reqmsg->opc, - req->rq_connection->c_peer.peer_nid, + CDEBUG(D_RPCTRACE, "operation %d from nid "LPU64" with bad " + "export cookie "LPX64" (ptl req %d/rep %d); this is " + "normal if this node rebooted with a lock held\n", + req->rq_reqmsg->opc, req->rq_connection->c_peer.peer_nid, + req->rq_reqmsg->handle.cookie, req->rq_request_portal, req->rq_reply_portal); - CERROR("--> export cookie: "LPX64"\n", - req->rq_reqmsg->handle.cookie); + dlm_req = lustre_swab_reqbuf(req, 0, sizeof (*dlm_req), lustre_swab_ldlm_request); if (dlm_req != NULL) - CERROR("--> lock cookie: "LPX64"\n", + CDEBUG(D_RPCTRACE, "--> lock cookie: "LPX64"\n", dlm_req->lock_handle1.cookie); ldlm_callback_reply(req, -ENOTCONN); |
From: Mike S. <sh...@us...> - 2003-07-13 09:33:19
|
Update of /cvsroot/lustre/lustre/ldlm In directory sc8-pr-cvs1:/tmp/cvs-serv32044/ldlm Modified Files: Tag: b_devel ldlm_lockd.c Log Message: Whine to console if we take > obd_timeout to service a request, or grant a lock. Index: ldlm_lockd.c =================================================================== RCS file: /cvsroot/lustre/lustre/ldlm/ldlm_lockd.c,v retrieving revision 1.131.2.46 retrieving revision 1.131.2.47 diff -u -w -b -B -p -r1.131.2.46 -r1.131.2.47 --- ldlm_lockd.c 5 Jul 2003 22:46:20 -0000 1.131.2.46 +++ ldlm_lockd.c 13 Jul 2003 09:33:10 -0000 1.131.2.47 @@ -347,10 +347,19 @@ int ldlm_server_blocking_ast(struct ldlm RETURN(rc); } +/* XXX copied from ptlrpc/service.c */ +static long timeval_sub(struct timeval *large, struct timeval *small) +{ + return (large->tv_sec - small->tv_sec) * 1000000 + + (large->tv_usec - small->tv_usec); +} + int ldlm_server_completion_ast(struct ldlm_lock *lock, int flags, void *data) { struct ldlm_request *body; struct ptlrpc_request *req; + struct timeval granted_time; + long total_enqueue_wait; int rc = 0, size = sizeof(*body); ENTRY; @@ -359,6 +368,12 @@ int ldlm_server_completion_ast(struct ld RETURN(-EINVAL); } + do_gettimeofday(&granted_time); + total_enqueue_wait = timeval_sub(&granted_time, &lock->l_enqueued_time); + + if (total_enqueue_wait / 1000000 > obd_timeout) + LDLM_ERROR(lock, "enqueue wait took %ldus", total_enqueue_wait); + req = ptlrpc_prep_req(lock->l_export->exp_ldlm_data.led_import, LDLM_CP_CALLBACK, 1, &size, NULL); if (!req) @@ -370,7 +385,8 @@ int ldlm_server_completion_ast(struct ld body->lock_flags = flags; ldlm_lock2desc(lock, &body->lock_desc); - LDLM_DEBUG(lock, "server preparing completion AST"); + LDLM_DEBUG(lock, "server preparing completion AST (after %ldus wait)", + total_enqueue_wait); req->rq_replen = lustre_msg_size(0, NULL); req->rq_level = LUSTRE_CONN_RECOVER; @@ -447,6 +463,7 @@ int ldlm_handle_enqueue(struct ptlrpc_re if (!lock) GOTO(out, err = -ENOMEM); + do_gettimeofday(&lock->l_enqueued_time); memcpy(&lock->l_remote_handle, &dlm_req->lock_handle1, sizeof(lock->l_remote_handle)); LDLM_DEBUG(lock, "server-side enqueue handler, new lock created"); |