From: Lars E. <lar...@li...> - 2010-02-27 22:09:02
|
Add a per LUN BlockSize parameter, defaulting to 512, which allows us to specify the SCSI level block size. This also renames the nullio Sectors=<size> to BlockCount=<count>, keeping the Sectors= as compat alias around. As it is not sectors now any more, but scsi block count, now that we can specify the block size. Finally, it adds " blocks:%llu blocksize:%u" it iet_volume_info_show, which made nullio_data and nullio_show, obsolete. This can be used as a testbed for testing initiator io stack robustness for block size != 512, without actually having access to a physical drive or array with these properties. It also allows us better performance on disks and arrays with "physical" block size of 4k and "logical" block size of 512 byte -- which do the read-modify-write cycle internally in firmware for block sizes != 4k. Finally it makes iet usable on top of disks and arrays with both logical and physical block size != 512, which will be shipping soon enough. Signed-off-by: Lars Ellenberg <lar...@li...> --- Makefile | 5 +++++ dkms.conf | 43 +++++++++++++++++++++++-------------------- doc/manpages/ietd.conf.5 | 17 ++++++++++++++--- kernel/block-io.c | 32 +++++++++++++++++++++++++++----- kernel/file-io.c | 2 +- kernel/iscsi.h | 3 ++- kernel/null-io.c | 41 ++++++++++++----------------------------- kernel/volume.c | 29 +++++++++++++++++++++++++++++ patches/compat-2.6.30.patch | 13 +++++++++++++ 9 files changed, 126 insertions(+), 59 deletions(-) create mode 100644 patches/compat-2.6.30.patch diff --git a/Makefile b/Makefile index 0b94f89..f88a2fc 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,11 @@ ifeq ($(call kver_le,2,6,31),1) PATCHES := $(PATCHES) compat-2.6.31.patch endif +# Compatibility patch for kernels <= 2.6.30 +ifeq ($(call kver_le,2,6,30),1) + PATCHES := $(PATCHES) compat-2.6.30.patch +endif + # Compatibility patch for kernels <= 2.6.29 ifeq ($(call kver_le,2,6,29),1) PATCHES := $(PATCHES) compat-2.6.29.patch diff --git a/dkms.conf b/dkms.conf index f67226c..51c0e90 100644 --- a/dkms.conf +++ b/dkms.conf @@ -23,33 +23,36 @@ AUTOINSTALL="yes" PATCH[0]="compat-2.6.31.patch" PATCH_MATCH[0]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)" -PATCH[1]="compat-2.6.29.patch" -PATCH_MATCH[1]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29)" +PATCH[1]="compat-2.6.30.patch" +PATCH_MATCH[1]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30)" -PATCH[2]="compat-2.6.28.patch" -PATCH_MATCH[2]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28)" +PATCH[2]="compat-2.6.29.patch" +PATCH_MATCH[2]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29)" -PATCH[3]="compat-2.6.25-2.6.27.patch" -PATCH_MATCH[3]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27)" +PATCH[3]="compat-2.6.28.patch" +PATCH_MATCH[3]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28)" -PATCH[4]="compat-2.6.24.patch" -PATCH_MATCH[4]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24)" +PATCH[4]="compat-2.6.25-2.6.27.patch" +PATCH_MATCH[4]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27)" -PATCH[5]="compat-2.6.23.patch" -PATCH_MATCH[5]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23)" +PATCH[5]="compat-2.6.24.patch" +PATCH_MATCH[5]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24)" -PATCH[6]="compat-2.6.22.patch" -PATCH_MATCH[6]="2\.6\.(9|14|15|16|17|18|19|20|21|22)" +PATCH[6]="compat-2.6.23.patch" +PATCH_MATCH[6]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23)" -PATCH[7]="compat-2.6.19-2.6.21.patch" -PATCH_MATCH[7]="2\.6\.(9|14|15|16|17|18|19|20|21)" +PATCH[7]="compat-2.6.22.patch" +PATCH_MATCH[7]="2\.6\.(9|14|15|16|17|18|19|20|21|22)" -PATCH[8]="compat-2.6.14-2.6.18.patch" -PATCH_MATCH[8]="2\.6\.(9|14|15|16|17|18)" +PATCH[8]="compat-2.6.19-2.6.21.patch" +PATCH_MATCH[8]="2\.6\.(9|14|15|16|17|18|19|20|21)" -PATCH[9]="compat-sles10sp2.patch" -PATCH_MATCH[9]="2\.6\.16\.60-.*" +PATCH[9]="compat-2.6.14-2.6.18.patch" +PATCH_MATCH[9]="2\.6\.(9|14|15|16|17|18)" -PATCH[10]="compat-rhel4.patch" -PATCH_MATCH[10]="2\.6\.9-.*\.(el|plus\.c4)" +PATCH[10]="compat-sles10sp2.patch" +PATCH_MATCH[10]="2\.6\.16\.60-.*" + +PATCH[11]="compat-rhel4.patch" +PATCH_MATCH[11]="2\.6\.9-.*\.(el|plus\.c4)" diff --git a/doc/manpages/ietd.conf.5 b/doc/manpages/ietd.conf.5 index f5f9d6d..b4cf4da 100644 --- a/doc/manpages/ietd.conf.5 +++ b/doc/manpages/ietd.conf.5 @@ -27,7 +27,7 @@ Target iqn.2001\-04.com.example:storage.disk2.sys1.xyz IncomingUser jim othersecret OutgoingUser james yetanothersecret Lun 0 Path=/dev/sdc,Type=fileio - Lun 1 Sectors=10000,Type=nullio + Lun 1 Blocks=10000,BlockSize=4096,Type=nullio Alias Test HeaderDigest None DataDigest None @@ -127,7 +127,7 @@ has to be provided, if there is a .I <username> given. .TP -.B Lun <lun> Path=<device>,Type=(fileio|blockio)[,ScsiId=<scsi_id>][,ScsiSN=<scsi_sn>][,IOMode=(wb|ro)] | Sectors=<size>,Type=nullio +.B Lun <lun> Path=<device>,Type=(fileio|blockio)[,ScsiId=<scsi_id>][,ScsiSN=<scsi_sn>][,IOMode=(wb|ro|wt)][,BlockSize=<size>] | Type=nullio,Blocks=<count>,BlockSize=<size> Parameters after <lun> should not contain any blank space character except the first blank space after <lun> is needed. .br @@ -185,13 +185,24 @@ NOTE: .br .br +You can specify a logical block +.I <size> +of the iSCSI volume. This size must be one of (512, 1024, 2048, 4096). +If BlockSize isn't specified the default is 512 bytes for fileio, +and the logical block size of the lower level device for blockio +(which typically is 512 Byte as well, still). +.br + +.br In .I nullio mode, it defines a mapping between a "Logical Unit Number" .I <lun> and an unnamed virtual device with +.I <count> +blocks of .I <size> -sectors. This is ONLY useful for performance measurement purposes. All writes to this virtual device will be discarded and reads will return random data. +bytes. This is ONLY useful for performance measurement purposes. All writes to this virtual device will be discarded and reads will return random data. .TP .B [Alias <aliasname>] This assigns an optional diff --git a/kernel/block-io.c b/kernel/block-io.c index 708f101..8dbf126 100644 --- a/kernel/block-io.c +++ b/kernel/block-io.c @@ -86,7 +86,8 @@ blockio_make_request(struct iet_volume *volume, struct tio *tio, int rw) goto out; } - bio->bi_sector = ppos >> volume->blk_shift; + /* bi_sector is always in units of 512 byte */ + bio->bi_sector = ppos >> 9; bio->bi_bdev = bio_data->bdev; bio->bi_end_io = blockio_bio_endio; bio->bi_private = tio_work; @@ -244,6 +245,7 @@ static match_table_t tokens = { {Opt_path, "Path=%s"}, {Opt_ignore, "Type=%s"}, {Opt_ignore, "IOMode=%s"}, + {Opt_ignore, "BlockSize=%s"}, {Opt_err, NULL}, }; @@ -315,6 +317,7 @@ parse_blockio_params(struct iet_volume *volume, char *params) volume->target->name, volume->lun); err = -EINVAL; } + out: return err; } @@ -350,12 +353,34 @@ blockio_attach(struct iet_volume *volume, char *args) volume->private = bio_data; - if ((err = parse_blockio_params(volume, args)) < 0) { + err = parse_blockio_params(volume, args); + if (!err) { + /* Default to the lower level device logical block size. + * If the user set an explicit block size, + * make sure it is compatible. */ + struct request_queue *q = bdev_get_queue(bio_data->bdev); + unsigned bsz = queue_logical_block_size(q); + unsigned bshift = ilog2(bsz); + if (!volume->blk_shift) + volume->blk_shift = bshift; + else if (volume->blk_shift < bshift) { + eprintk("Target %s, LUN %u: " + "blocksize (%u) < logical block size of %s (%u). " + "Try Type=fileio?\n", + volume->target->name, volume->lun, + 1 << volume->blk_shift, + bio_data->path, bsz); + err = -EINVAL; + } + } + if (err < 0) { eprintk("Error attaching Lun %u to Target %s \n", volume->lun, volume->target->name); goto out; } + volume->blk_cnt = bio_data->bdev->bd_inode->i_size >> volume->blk_shift; + /* Assign a vendor id, generate scsi id if none exists */ gen_scsiid(volume, bio_data->bdev->bd_inode); @@ -363,9 +388,6 @@ blockio_attach(struct iet_volume *volume, char *args) ClearLURCache(volume); ClearLUWCache(volume); - volume->blk_shift = SECTOR_SIZE_BITS; - volume->blk_cnt = bio_data->bdev->bd_inode->i_size >> volume->blk_shift; - out: if (err < 0) blockio_detach(volume); diff --git a/kernel/file-io.c b/kernel/file-io.c index fa8d42a..d3657fd 100644 --- a/kernel/file-io.c +++ b/kernel/file-io.c @@ -181,6 +181,7 @@ static match_table_t tokens = { {Opt_path, "Path=%s"}, {Opt_ignore, "Type=%s"}, {Opt_ignore, "IOMode=%s"}, + {Opt_ignore, "BlockSize=%s"}, {Opt_err, NULL}, }; @@ -297,7 +298,6 @@ static int fileio_attach(struct iet_volume *lu, char *args) goto out; } - lu->blk_shift = SECTOR_SIZE_BITS; lu->blk_cnt = inode->i_size >> lu->blk_shift; /* we're using the page cache */ diff --git a/kernel/iscsi.h b/kernel/iscsi.h index 1b6397f..bc693c0 100644 --- a/kernel/iscsi.h +++ b/kernel/iscsi.h @@ -462,7 +462,8 @@ static inline void iscsi_cmnd_set_length(struct iscsi_pdu *pdu) #define cmnd_opcode(cmnd) ((cmnd)->pdu.bhs.opcode & ISCSI_OPCODE_MASK) #define cmnd_scsicode(cmnd) cmnd_hdr(cmnd)->scb[0] -#define SECTOR_SIZE_BITS 9 +/* maximum scsi level block size we allow */ +#define IET_MAX_BLOCK_SIZE 4096 enum cmnd_flags { CMND_hashed, diff --git a/kernel/null-io.c b/kernel/null-io.c index cfa5899..a566ff3 100644 --- a/kernel/null-io.c +++ b/kernel/null-io.c @@ -16,16 +16,17 @@ #include "iscsi_dbg.h" #include "iotype.h" -struct nullio_data { - u64 sectors; -}; - enum { - Opt_sectors, Opt_ignore, Opt_err, + Opt_blk_cnt, Opt_ignore, Opt_err, }; static match_table_t tokens = { - {Opt_sectors, "Sectors=%u"}, + /* alias for compatibility with existing setups and documentation */ + {Opt_blk_cnt, "Sectors=%u"}, + /* but actually it is the scsi block count, now that we can + * specify the block size. */ + {Opt_blk_cnt, "Blocks=%u"}, + {Opt_ignore, "BlockSize=%s"}, {Opt_ignore, "Type=%s"}, {Opt_err, NULL}, }; @@ -34,7 +35,6 @@ static int parse_nullio_params(struct iet_volume *volume, char *params) { int err = 0; char *p, *q; - struct nullio_data *data = volume->private; while ((p = strsep(¶ms, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -43,11 +43,11 @@ static int parse_nullio_params(struct iet_volume *volume, char *params) continue; token = match_token(p, tokens, args); switch (token) { - case Opt_sectors: + case Opt_blk_cnt: q = match_strdup(&args[0]); if (!q) return -ENOMEM; - data->sectors = simple_strtoull(q, NULL, 10); + volume->blk_cnt = simple_strtoull(q, NULL, 10); kfree(q); break; case Opt_ignore: @@ -63,35 +63,25 @@ static int parse_nullio_params(struct iet_volume *volume, char *params) static void nullio_detach(struct iet_volume *lu) { - struct nullio_data *p = lu->private; - - kfree(p); - lu->private = NULL; } static int nullio_attach(struct iet_volume *lu, char *args) { int err = 0; - struct nullio_data *p; if (lu->private) { printk("already attached ? %d\n", lu->lun); return -EBUSY; } - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - lu->private = p; - if ((err = parse_nullio_params(lu, args)) < 0) { eprintk("%d\n", err); goto out; } - lu->blk_shift = SECTOR_SIZE_BITS; - lu->blk_cnt = (p->sectors = p->sectors ? : 1 << 27); /* 64 GB */ + /* >= 64 GiB depending on block size */ + if (!lu->blk_cnt) + lu->blk_cnt = 1 << 27; out: if (err < 0) @@ -99,16 +89,9 @@ out: return err; } -void nullio_show(struct iet_volume *lu, struct seq_file *seq) -{ - struct nullio_data *p = lu->private; - seq_printf(seq, " sectors:%llu\n", p->sectors); -} - struct iotype nullio = { .name = "nullio", .attach = nullio_attach, .detach = nullio_detach, - .show = nullio_show, }; diff --git a/kernel/volume.c b/kernel/volume.c index 820a58a..c05e789 100644 --- a/kernel/volume.c +++ b/kernel/volume.c @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/parser.h> +#include <linux/log2.h> #include "iscsi.h" #include "iscsi_dbg.h" @@ -25,18 +26,21 @@ struct iet_volume *volume_lookup(struct iscsi_target *target, u32 lun) enum { Opt_type, Opt_iomode, + Opt_blk_size, Opt_err, }; static match_table_t tokens = { {Opt_type, "Type=%s"}, {Opt_iomode, "IOMode=%s"}, + {Opt_blk_size, "BlockSize=%u"}, {Opt_err, NULL}, }; static int set_iotype(struct iet_volume *volume, char *params) { int err = 0; + unsigned blk_sz; substring_t args[MAX_OPT_ARGS]; char *p, *argp = NULL, *buf = (char *) get_zeroed_page(GFP_USER); @@ -67,6 +71,22 @@ static int set_iotype(struct iet_volume *volume, char *params) SetLUWCache(volume); kfree(argp); break; + case Opt_blk_size: + argp = match_strdup(&args[0]); + if (!argp) { + err = -ENOMEM; + break; + } + blk_sz = simple_strtoull(argp, NULL, 10); + if (is_power_of_2(blk_sz) && + 512 <= blk_sz && blk_sz <= IET_MAX_BLOCK_SIZE) + volume->blk_shift = ilog2(blk_sz); + else { + eprintk("invalid BlockSize=%u\n", blk_sz); + err = -EINVAL; + } + kfree(argp); + break; default: break; } @@ -123,6 +143,13 @@ int volume_add(struct iscsi_target *target, struct volume_info *info) if (ret < 0) goto free_args; + /* Assume 512 byte block size by default. + * Do this after ->attach(), so those routines + * can still distinguish user-supplied vs "default" + * by checking blk_shift == 0. */ + if (!volume->blk_shift) + volume->blk_shift = 9; + INIT_LIST_HEAD(&volume->queue.wait_list); spin_lock_init(&volume->queue.queue_lock); spin_lock_init(&volume->reserve_lock); @@ -257,6 +284,8 @@ static void iet_volume_info_show(struct seq_file *seq, struct iscsi_target *targ else seq_printf(seq, " iomode:wt"); + seq_printf(seq, " blocks:%llu blocksize:%u", + volume->blk_cnt, 1 << volume->blk_shift); if (volume->iotype->show) volume->iotype->show(volume, seq); else diff --git a/patches/compat-2.6.30.patch b/patches/compat-2.6.30.patch new file mode 100644 index 0000000..aea7855 --- /dev/null +++ b/patches/compat-2.6.30.patch @@ -0,0 +1,13 @@ +diff --git a/kernel/block-io.c b/kernel/block-io.c +index 8dbf126..ddbd7fe 100644 +--- a/kernel/block-io.c ++++ b/kernel/block-io.c +@@ -359,7 +359,7 @@ blockio_attach(struct iet_volume *volume, char *args) + * If the user set an explicit block size, + * make sure it is compatible. */ + struct request_queue *q = bdev_get_queue(bio_data->bdev); +- unsigned bsz = queue_logical_block_size(q); ++ unsigned bsz = queue_hardsect_size(q); + unsigned bshift = ilog2(bsz); + if (!volume->blk_shift) + volume->blk_shift = bshift; -- 1.6.3.3 |