[Scst-svn] SF.net SVN: scst: [301] trunk

SourceForge Headquarters 225 Broadway Suite 1600 San Diego, CA 92101 +1 (858) 422-6466

Revision: 301
          http://scst.svn.sourceforge.net/scst/?rev=301&view=rev
Author:   vlnb
Date:     2008-03-11 12:09:30 -0700 (Tue, 11 Mar 2008)

Log Message:
-----------
 - Fixed race on TM leading to crashes
 - Connection close improved to be less agressive and honor TCP TIME_WATE state
 - Docs updates

Modified Paths:
--------------
    trunk/iscsi-scst/kernel/conn.c
    trunk/iscsi-scst/kernel/iscsi.h
    trunk/iscsi-scst/kernel/nthread.c
    trunk/iscsi-scst/kernel/session.c
    trunk/iscsi-scst/kernel/target.c
    trunk/scst/README
    trunk/scst/include/scsi_tgt.h
    trunk/scst/src/scst_targ.c

Modified: trunk/iscsi-scst/kernel/conn.c
===================================================================

--- trunk/iscsi-scst/kernel/conn.c	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/iscsi-scst/kernel/conn.c	2008-03-11 19:09:30 UTC (rev 301)
@@ -158,11 +158,14 @@
 	return;
 }
 
-void __mark_conn_closed(struct iscsi_conn *conn, bool force)
+void __mark_conn_closed(struct iscsi_conn *conn, int flags)
 {
 	spin_lock_bh(&iscsi_rd_lock);
 	conn->closing = 1;
-	conn->force_close = force;
+	if (flags & ISCSI_CONN_ACTIVE_CLOSE)
+		conn->active_close = 1;
+	if (flags & ISCSI_CONN_DELETING)
+		conn->deleting = 1;
 	spin_unlock_bh(&iscsi_rd_lock);
 
 	iscsi_make_conn_rd_active(conn);
@@ -170,7 +173,7 @@
 
 void mark_conn_closed(struct iscsi_conn *conn)
 {
-	__mark_conn_closed(conn, 0);
+	__mark_conn_closed(conn, ISCSI_CONN_ACTIVE_CLOSE);
 }
 
 static void iscsi_state_change(struct sock *sk)
@@ -184,7 +187,7 @@
 			PRINT_ERROR("Connection with initiator %s (%p) "
 				"unexpectedly closed!",
 				conn->session->initiator_name, conn);
-			__mark_conn_closed(conn, 1);
+			__mark_conn_closed(conn, 0);
 		}
 	} else
 		iscsi_make_conn_rd_active(conn);
@@ -252,7 +255,7 @@
 					"%s (SID %Lx), closing connection",
 					conn->session->initiator_name,
 					conn->session->sid);
-				__mark_conn_closed(conn, 1);
+				mark_conn_closed(conn);
 			}
 		} else {
 			TRACE_DBG("Restarting timer on %ld (conn %p)",
@@ -438,7 +441,7 @@
 	PRINT_INFO("Deleting connection with initiator %s (%p)",
 		conn->session->initiator_name, conn);
 
-	mark_conn_closed(conn);
+	__mark_conn_closed(conn, ISCSI_CONN_ACTIVE_CLOSE|ISCSI_CONN_DELETING);
 
 	return 0;
 }

Modified: trunk/iscsi-scst/kernel/iscsi.h
===================================================================
--- trunk/iscsi-scst/kernel/iscsi.h	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/iscsi-scst/kernel/iscsi.h	2008-03-11 19:09:30 UTC (rev 301)
@@ -115,7 +115,7 @@
 
 	struct completion unreg_compl;
 
-	/* Both don't need any protection */
+	/* All don't need any protection */
 	char *initiator_name;
 	unsigned int shutting_down:1;
 	u64 sid;
@@ -185,11 +185,13 @@
 	int hdigest_type;
 	int ddigest_type;
 
-	/* All 4 protected by iscsi_rd_lock */
+	/* All 5 protected by iscsi_rd_lock */
 	unsigned short rd_state;
 	unsigned short rd_data_ready:1;
-	unsigned short closing:1; /* Let's save some cache footprint by putting it here */
-	unsigned short force_close:1; /* Let's save some cache footprint by putting it here */
+	/* Let's save some cache footprint by putting them here */
+	unsigned short closing:1;
+	unsigned short active_close:1;
+	unsigned short deleting:1;
 
 	struct list_head rd_list_entry;
 
@@ -353,7 +355,11 @@
 extern int conn_add(struct iscsi_session *, struct conn_info *);
 extern int conn_del(struct iscsi_session *, struct conn_info *);
 extern int conn_free(struct iscsi_conn *);
-extern void __mark_conn_closed(struct iscsi_conn *, bool);
+
+#define ISCSI_CONN_ACTIVE_CLOSE		1
+#define ISCSI_CONN_DELETING		2
+extern void __mark_conn_closed(struct iscsi_conn *, int);
+
 extern void mark_conn_closed(struct iscsi_conn *);
 extern void iscsi_make_conn_wr_active(struct iscsi_conn *);
 extern void conn_info_show(struct seq_file *, struct iscsi_session *);

Modified: trunk/iscsi-scst/kernel/nthread.c
===================================================================
--- trunk/iscsi-scst/kernel/nthread.c	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/iscsi-scst/kernel/nthread.c	2008-03-11 19:09:30 UTC (rev 301)
@@ -61,7 +61,8 @@
 
 	TRACE_ENTRY();
 
-	TRACE_CONN_CLOSE("conn %p, sk_state %d", conn, conn->sock->sk->sk_state);
+	TRACE_CONN_CLOSE_DBG("conn %p, sk_state %d", conn,
+		conn->sock->sk->sk_state);
 
 	if (conn->sock->sk->sk_state != TCP_CLOSE) {
 		TRACE_CONN_CLOSE_DBG("conn %p, skipping", conn);
@@ -173,7 +174,7 @@
 
 	TRACE_ENTRY();
 
-	TRACE_CONN_CLOSE("sess %p (scst_sess %p)", sess, scst_sess);
+	TRACE_CONN_CLOSE_DBG("sess %p (scst_sess %p)", sess, scst_sess);
 
 	sess->shutting_down = 1;
 	complete_all(&sess->unreg_compl);
@@ -188,6 +189,8 @@
 	struct iscsi_session *session = conn->session;
 	struct iscsi_target *target = conn->target;
 	unsigned long start_waiting = jiffies;
+	unsigned long shut_start_waiting = start_waiting;
+	bool pending_reported = 0, wait_expired = 0, shut_expired = 0;
 
 	TRACE_ENTRY();
 
@@ -198,12 +201,12 @@
 
 	sBUG_ON(!conn->closing);
 
-	if (conn->force_close) {
+	if (conn->active_close) {
+		/* We want all our already send operations to complete */
+		conn->sock->ops->shutdown(conn->sock, RCV_SHUTDOWN);
+	} else {
 		conn->sock->ops->shutdown(conn->sock,
 			RCV_SHUTDOWN|SEND_SHUTDOWN);
-	} else {
-		/* We want all our already send operations to complete */
-		conn->sock->ops->shutdown(conn->sock, RCV_SHUTDOWN);
 	}
 
 	/*
@@ -251,7 +254,7 @@
 			struct list_head *pending_list = &session->pending_list;
 	 		int req_freed;
 
-	 		TRACE_CONN_CLOSE("Disposing pending commands on "
+	 		TRACE_CONN_CLOSE_DBG("Disposing pending commands on "
 	 			"connection %p (conn_ref_cnt=%d)", conn,
 	 			atomic_read(&conn->conn_ref_cnt));
 
@@ -267,13 +270,13 @@
 				req_freed = 0;
 				list_for_each_entry(cmnd, pending_list,
 							pending_list_entry) {
-					TRACE_CONN_CLOSE("Pending cmd %p"
+					TRACE_CONN_CLOSE_DBG("Pending cmd %p"
 						"(conn %p, cmd_sn %u, exp_cmd_sn %u)",
 						cmnd, conn, cmnd->pdu.bhs.sn,
 						session->exp_cmd_sn);
 					if ((cmnd->conn == conn) &&
 					    (session->exp_cmd_sn == cmnd->pdu.bhs.sn)) {
-						TRACE_CONN_CLOSE("Freeing pending cmd %p",
+						TRACE_CONN_CLOSE_DBG("Freeing pending cmd %p",
 							cmnd);
 
 						list_del(&cmnd->pending_list_entry);
@@ -294,13 +297,16 @@
 			spin_unlock(&session->sn_lock);
 
 			if (time_after(jiffies, start_waiting + 10*HZ)) {
-				TRACE_CONN_CLOSE("%s", "Pending wait time expired");
+				if (!pending_reported) {
+					TRACE_CONN_CLOSE("%s", "Pending wait time expired");
+					pending_reported = 1;
+				}
 				spin_lock(&session->sn_lock);
 				do {
 					req_freed = 0;
 					list_for_each_entry(cmnd, pending_list,
 							pending_list_entry) {
-						TRACE_CONN_CLOSE("Pending cmd %p"
+						TRACE_CONN_CLOSE_DBG("Pending cmd %p"
 							"(conn %p, cmd_sn %u, exp_cmd_sn %u)",
 							cmnd, conn, cmnd->pdu.bhs.sn,
 							session->exp_cmd_sn);
@@ -330,22 +336,28 @@
 
 		iscsi_make_conn_wr_active(conn);
 
-		if (time_after(jiffies, start_waiting + 7*HZ)) {
+		if (time_after(jiffies, start_waiting + 10*HZ) && !wait_expired) {
 			TRACE_CONN_CLOSE("Wait time expired (conn %p, "
 				"sk_state %d)", conn, conn->sock->sk->sk_state);
 			conn->sock->ops->shutdown(conn->sock, SEND_SHUTDOWN);
+			wait_expired = 1;
+			shut_start_waiting = jiffies;
 		}
 
-		if (time_after(jiffies, start_waiting + 15*HZ)) {
-			TRACE_CONN_CLOSE("Wait time after shutdown expired "
-				"(conn %p, sk_state %d)", conn,
-				conn->sock->sk->sk_state);
-			conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0);
-		}
+		if (conn->deleting) {
+			if (wait_expired && !shut_expired &&
+			    time_after(jiffies, shut_start_waiting + 10*HZ)) {
+				TRACE_CONN_CLOSE("Wait time after shutdown expired "
+					"(conn %p, sk_state %d)", conn,
+					conn->sock->sk->sk_state);
+				conn->sock->sk->sk_prot->disconnect(conn->sock->sk, 0);
+				shut_expired = 1;
+			}
+			msleep(200);
+		} else
+			msleep(1000);
 
-		msleep(200);
-
-		TRACE_CONN_CLOSE("conn %p, conn_ref_cnt %d left, wr_state %d, "
+		TRACE_CONN_CLOSE_DBG("conn %p, conn_ref_cnt %d left, wr_state %d, "
 			"exp_cmd_sn %u", conn, atomic_read(&conn->conn_ref_cnt),
 			conn->wr_state, session->exp_cmd_sn);
 #ifdef DEBUG
@@ -427,8 +439,8 @@
 		if (t && (atomic_read(&conn->conn_ref_cnt) == 0))
 			break;
 
-		TRACE_CONN_CLOSE("Waiting for wr thread (conn %p), wr_state %x",
-			conn, conn->wr_state);
+		TRACE_CONN_CLOSE_DBG("Waiting for wr thread (conn %p), "
+			"wr_state %x", conn, conn->wr_state);
 		msleep(50);
 	}
 

Modified: trunk/iscsi-scst/kernel/session.c
===================================================================
--- trunk/iscsi-scst/kernel/session.c	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/iscsi-scst/kernel/session.c	2008-03-11 19:09:30 UTC (rev 301)
@@ -177,8 +177,9 @@
 	struct iscsi_session *session;
 
 	list_for_each_entry(session, &target->session_list, session_list_entry) {
-		seq_printf(seq, "\tsid:%llu initiator:%s\n",
-			   (unsigned long long) session->sid, session->initiator_name);
+		seq_printf(seq, "\tsid:%llu initiator:%s shutting down %d\n",
+			   (unsigned long long)session->sid,
+			   session->initiator_name, session->shutting_down);
 		conn_info_show(seq, session);
 	}
 }

Modified: trunk/iscsi-scst/kernel/target.c
===================================================================
--- trunk/iscsi-scst/kernel/target.c	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/iscsi-scst/kernel/target.c	2008-03-11 19:09:30 UTC (rev 301)
@@ -240,7 +240,8 @@
 								conn_list_entry) {
 							TRACE_MGMT_DBG("Mark conn %p "
 								"closing", conn);
-							mark_conn_closed(conn);
+							__mark_conn_closed(conn,
+								ISCSI_CONN_ACTIVE_CLOSE|ISCSI_CONN_DELETING);
 						}
 					} else {
 						TRACE_MGMT_DBG("Freeing session %p "

Modified: trunk/scst/README
===================================================================
--- trunk/scst/README	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/scst/README	2008-03-11 19:09:30 UTC (rev 301)
@@ -477,6 +477,13 @@
 For example, "echo "open disk1 /vdisks/disk1" >/proc/scsi_tgt/vdisk/vdisk"
 will open file /vdisks/disk1 as virtual FILEIO disk with name "disk1".
 
+CAUTION: If you partitioned/formatted your device with block size X, *NEVER*
+======== ever try to export and then mount it (even accidentally) with another
+         block size. Otherwise you can *instantly* damage it pretty
+	 badly as well as all your data on it. Messages on initiator like:
+	 "attempt to access beyond end of device" is the sign of such
+	 damage.
+
 IMPORTANT: By default for performance reasons VDISK FILEIO devices use write
 =========  back caching policy. This is generally safe from the consistence of
            journaled file systems, laying over them, point of view, but
@@ -743,14 +750,18 @@
 comparing with your target link speed and amount of simultaneously
 queued commands. On some seek intensive workloads even fast disks or
 RAIDs, which able to serve continuous data stream on 500+ MB/s speed,
-can be as slow as 0.3 MB/s. So, simply processing of one or more
-commands takes too long time, hence initiator decides that they are
-stuck on the target and tries to recover. Particularly, it is known that
-the default amount of simultaneously queued commands (48) is sometimes
-too high if you do intensive writes from VMware on a target disk, which
-uses LVM in the snapshot mode. In this case value like 16 or even 8-10
-depending of your backstorage speed could be more appropriate.
+can be as slow as 0.3 MB/s. Another possible cause for that can be
+MD/LVM/RAID on your target as in http://lkml.org/lkml/2008/2/27/96
+(check the whole thread as well).
 
+Thus, in such situations simply processing of one or more commands takes
+too long time, hence initiator decides that they are stuck on the target
+and tries to recover. Particularly, it is known that the default amount
+of simultaneously queued commands (48) is sometimes too high if you do
+intensive writes from VMware on a target disk, which uses LVM in the
+snapshot mode. In this case value like 16 or even 8-10 depending of your
+backstorage speed could be more appropriate.
+
 Unfortunately, currently SCST lacks dynamic I/O flow control, when the
 queue depth on the target is dynamically decreased/increased based on
 how slow/fast the backstorage speed comparing to the target link. So,

Modified: trunk/scst/include/scsi_tgt.h
===================================================================
--- trunk/scst/include/scsi_tgt.h	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/scst/include/scsi_tgt.h	2008-03-11 19:09:30 UTC (rev 301)
@@ -1288,7 +1288,6 @@
 	unsigned int needs_unblocking:1;
 	unsigned int lun_set:1;		/* set, if lun field is valid */
 	unsigned int cmd_sn_set:1;	/* set, if cmd_sn field is valid */
-	unsigned int nexus_loss_check_active:1; /* set, if nexus loss check is active */
 	unsigned int nexus_loss_check_done:1; /* set, if nexus loss check is done */
 
 	/*

Modified: trunk/scst/src/scst_targ.c
===================================================================
--- trunk/scst/src/scst_targ.c	2008-03-11 14:27:13 UTC (rev 300)
+++ trunk/scst/src/scst_targ.c	2008-03-11 19:09:30 UTC (rev 301)
@@ -3399,7 +3399,7 @@
 
 		if (mcmd->completed) {
 			sBUG_ON(mcmd->nexus_loss_check_done);
-			mcmd->nexus_loss_check_active = 1;
+			mcmd->completed = 0;
 			mcmd->state = SCST_MGMT_CMD_STATE_CHECK_NEXUS_LOSS;
 			TRACE_MGMT_DBG("Adding mgmt cmd %p to active mgmt cmd "
 				"list", mcmd);
@@ -3451,7 +3451,7 @@
 			continue;
 		}
 
-		if (mcmd->completed && !mcmd->nexus_loss_check_active) {
+		if (mcmd->completed) {
 			mcmd->state = SCST_MGMT_CMD_STATE_DONE;
 			TRACE_MGMT_DBG("Adding mgmt cmd %p to active mgmt cmd "
 				"list",	mcmd);
@@ -4354,7 +4354,6 @@
 	}
 
 	mcmd->nexus_loss_check_done = 1;
-	mcmd->nexus_loss_check_active = 0;
 
 	res = scst_set_mcmd_next_state(mcmd);
 


This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.