From: Lars E. <lar...@li...> - 2010-02-27 12:04:29
|
Add a per LUN BlockSize parameter, defaulting to 512, which allows us to specify the SCSI level block size. This also renames the nullio Sectors=<size> to BlockCount=<count>, keeping the Sectors= as compat alias around. As it is not sectors now any more, but scsi block count, now that we can specify the block size. Finally, it adds " blocks:%llu blocksize:%u" it iet_volume_info_show, which made nullio_data and nullio_show, obsolete. This can be used as a testbed for testing initiator io stack robustness for block size != 512, without actually having access to a physical drive or array with these properties. It also allows us better performance on disks and arrays with "physical" block size of 4k and "logical" block size of 512 byte -- which do the read-modify-write cycle internally in firmware for block sizes != 4k. Finally it makes iet usable on top of disks and arrays with both logical and physical block size != 512, which will be shipping soon enough. Signed-off-by: Lars Ellenberg <lar...@li...> --- Makefile | 5 +++++ dkms.conf | 43 +++++++++++++++++++++++-------------------- doc/manpages/ietd.conf.5 | 16 +++++++++++++--- kernel/block-io.c | 13 +++++++++++-- kernel/file-io.c | 2 +- kernel/iscsi.h | 2 -- kernel/null-io.c | 41 ++++++++++++----------------------------- kernel/volume.c | 24 ++++++++++++++++++++++++ patches/compat-2.6.30.patch | 12 ++++++++++++ 9 files changed, 101 insertions(+), 57 deletions(-) create mode 100644 patches/compat-2.6.30.patch diff --git a/Makefile b/Makefile index 0b94f89..f88a2fc 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,11 @@ ifeq ($(call kver_le,2,6,31),1) PATCHES := $(PATCHES) compat-2.6.31.patch endif +# Compatibility patch for kernels <= 2.6.30 +ifeq ($(call kver_le,2,6,30),1) + PATCHES := $(PATCHES) compat-2.6.30.patch +endif + # Compatibility patch for kernels <= 2.6.29 ifeq ($(call kver_le,2,6,29),1) PATCHES := $(PATCHES) compat-2.6.29.patch diff --git a/dkms.conf b/dkms.conf index f67226c..51c0e90 100644 --- a/dkms.conf +++ b/dkms.conf @@ -23,33 +23,36 @@ AUTOINSTALL="yes" PATCH[0]="compat-2.6.31.patch" PATCH_MATCH[0]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)" -PATCH[1]="compat-2.6.29.patch" -PATCH_MATCH[1]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29)" +PATCH[1]="compat-2.6.30.patch" +PATCH_MATCH[1]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30)" -PATCH[2]="compat-2.6.28.patch" -PATCH_MATCH[2]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28)" +PATCH[2]="compat-2.6.29.patch" +PATCH_MATCH[2]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29)" -PATCH[3]="compat-2.6.25-2.6.27.patch" -PATCH_MATCH[3]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27)" +PATCH[3]="compat-2.6.28.patch" +PATCH_MATCH[3]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28)" -PATCH[4]="compat-2.6.24.patch" -PATCH_MATCH[4]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24)" +PATCH[4]="compat-2.6.25-2.6.27.patch" +PATCH_MATCH[4]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24|25|26|27)" -PATCH[5]="compat-2.6.23.patch" -PATCH_MATCH[5]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23)" +PATCH[5]="compat-2.6.24.patch" +PATCH_MATCH[5]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23|24)" -PATCH[6]="compat-2.6.22.patch" -PATCH_MATCH[6]="2\.6\.(9|14|15|16|17|18|19|20|21|22)" +PATCH[6]="compat-2.6.23.patch" +PATCH_MATCH[6]="2\.6\.(9|14|15|16|17|18|19|20|21|22|23)" -PATCH[7]="compat-2.6.19-2.6.21.patch" -PATCH_MATCH[7]="2\.6\.(9|14|15|16|17|18|19|20|21)" +PATCH[7]="compat-2.6.22.patch" +PATCH_MATCH[7]="2\.6\.(9|14|15|16|17|18|19|20|21|22)" -PATCH[8]="compat-2.6.14-2.6.18.patch" -PATCH_MATCH[8]="2\.6\.(9|14|15|16|17|18)" +PATCH[8]="compat-2.6.19-2.6.21.patch" +PATCH_MATCH[8]="2\.6\.(9|14|15|16|17|18|19|20|21)" -PATCH[9]="compat-sles10sp2.patch" -PATCH_MATCH[9]="2\.6\.16\.60-.*" +PATCH[9]="compat-2.6.14-2.6.18.patch" +PATCH_MATCH[9]="2\.6\.(9|14|15|16|17|18)" -PATCH[10]="compat-rhel4.patch" -PATCH_MATCH[10]="2\.6\.9-.*\.(el|plus\.c4)" +PATCH[10]="compat-sles10sp2.patch" +PATCH_MATCH[10]="2\.6\.16\.60-.*" + +PATCH[11]="compat-rhel4.patch" +PATCH_MATCH[11]="2\.6\.9-.*\.(el|plus\.c4)" diff --git a/doc/manpages/ietd.conf.5 b/doc/manpages/ietd.conf.5 index f5f9d6d..910b9fc 100644 --- a/doc/manpages/ietd.conf.5 +++ b/doc/manpages/ietd.conf.5 @@ -27,7 +27,7 @@ Target iqn.2001\-04.com.example:storage.disk2.sys1.xyz IncomingUser jim othersecret OutgoingUser james yetanothersecret Lun 0 Path=/dev/sdc,Type=fileio - Lun 1 Sectors=10000,Type=nullio + Lun 1 BlockCount=10000,BlockSize=4096,Type=nullio Alias Test HeaderDigest None DataDigest None @@ -127,7 +127,7 @@ has to be provided, if there is a .I <username> given. .TP -.B Lun <lun> Path=<device>,Type=(fileio|blockio)[,ScsiId=<scsi_id>][,ScsiSN=<scsi_sn>][,IOMode=(wb|ro)] | Sectors=<size>,Type=nullio +.B Lun <lun> Path=<device>,Type=(fileio|blockio)[,ScsiId=<scsi_id>][,ScsiSN=<scsi_sn>][,IOMode=(wb|ro|wt)][,BlockSize=<size>] | Type=nullio,BlockCount=<count>,BlockSize=<size> Parameters after <lun> should not contain any blank space character except the first blank space after <lun> is needed. .br @@ -185,13 +185,23 @@ NOTE: .br .br +You can specify a logical block +.I <size> +of the iSCSI volume. This size must be 512 or greater and must be a power of 2 (512,1024,2048,4096...). +If BlockSize isn't specified the default is 512 bytes for fileio, and the logical block size of the +lower level device for blockio (which typically is 512 Byte as well, still). +.br + +.br In .I nullio mode, it defines a mapping between a "Logical Unit Number" .I <lun> and an unnamed virtual device with +.I <count> +blocks of .I <size> -sectors. This is ONLY useful for performance measurement purposes. All writes to this virtual device will be discarded and reads will return random data. +bytes. This is ONLY useful for performance measurement purposes. All writes to this virtual device will be discarded and reads will return random data. .TP .B [Alias <aliasname>] This assigns an optional diff --git a/kernel/block-io.c b/kernel/block-io.c index 708f101..2e6cf8b 100644 --- a/kernel/block-io.c +++ b/kernel/block-io.c @@ -86,7 +86,8 @@ blockio_make_request(struct iet_volume *volume, struct tio *tio, int rw) goto out; } - bio->bi_sector = ppos >> volume->blk_shift; + /* bi_sector is always in units of 512 byte */ + bio->bi_sector = ppos >> 9; bio->bi_bdev = bio_data->bdev; bio->bi_end_io = blockio_bio_endio; bio->bi_private = tio_work; @@ -167,6 +168,14 @@ blockio_open_path(struct iet_volume *volume, const char *path) eprintk("Can't open device %s, error %d\n", path, err); bio_data->bdev = NULL; } else { + /* see Documentation/ABI/testing/sysfs-block */ + unsigned bsz = queue_logical_block_size(bdev_get_queue(bdev)); + unsigned bshift = ilog2(bsz); + if (volume->blk_shift < bshift) { + iprintk("Target %s, LUN %u: overriding block size to %u\n", + volume->target->name, volume->lun, bsz); + volume->blk_shift = bshift; + } bio_data->bdev = bdev; fsync_bdev(bio_data->bdev); } @@ -244,6 +253,7 @@ static match_table_t tokens = { {Opt_path, "Path=%s"}, {Opt_ignore, "Type=%s"}, {Opt_ignore, "IOMode=%s"}, + {Opt_ignore, "BlockSize=%s"}, {Opt_err, NULL}, }; @@ -363,7 +373,6 @@ blockio_attach(struct iet_volume *volume, char *args) ClearLURCache(volume); ClearLUWCache(volume); - volume->blk_shift = SECTOR_SIZE_BITS; volume->blk_cnt = bio_data->bdev->bd_inode->i_size >> volume->blk_shift; out: diff --git a/kernel/file-io.c b/kernel/file-io.c index fa8d42a..d3657fd 100644 --- a/kernel/file-io.c +++ b/kernel/file-io.c @@ -181,6 +181,7 @@ static match_table_t tokens = { {Opt_path, "Path=%s"}, {Opt_ignore, "Type=%s"}, {Opt_ignore, "IOMode=%s"}, + {Opt_ignore, "BlockSize=%s"}, {Opt_err, NULL}, }; @@ -297,7 +298,6 @@ static int fileio_attach(struct iet_volume *lu, char *args) goto out; } - lu->blk_shift = SECTOR_SIZE_BITS; lu->blk_cnt = inode->i_size >> lu->blk_shift; /* we're using the page cache */ diff --git a/kernel/iscsi.h b/kernel/iscsi.h index 1b6397f..ce7c13e 100644 --- a/kernel/iscsi.h +++ b/kernel/iscsi.h @@ -462,8 +462,6 @@ static inline void iscsi_cmnd_set_length(struct iscsi_pdu *pdu) #define cmnd_opcode(cmnd) ((cmnd)->pdu.bhs.opcode & ISCSI_OPCODE_MASK) #define cmnd_scsicode(cmnd) cmnd_hdr(cmnd)->scb[0] -#define SECTOR_SIZE_BITS 9 - enum cmnd_flags { CMND_hashed, CMND_queued, diff --git a/kernel/null-io.c b/kernel/null-io.c index cfa5899..17f2480 100644 --- a/kernel/null-io.c +++ b/kernel/null-io.c @@ -16,16 +16,17 @@ #include "iscsi_dbg.h" #include "iotype.h" -struct nullio_data { - u64 sectors; -}; - enum { - Opt_sectors, Opt_ignore, Opt_err, + Opt_blk_cnt, Opt_ignore, Opt_err, }; static match_table_t tokens = { - {Opt_sectors, "Sectors=%u"}, + /* alias for compatibility with existing setups and documentation */ + {Opt_blk_cnt, "Sectors=%u"}, + /* but actually it is the scsi block count, now that we can + * specify the block size. */ + {Opt_blk_cnt, "BlockCount=%u"}, + {Opt_ignore, "BlockSize=%s"}, {Opt_ignore, "Type=%s"}, {Opt_err, NULL}, }; @@ -34,7 +35,6 @@ static int parse_nullio_params(struct iet_volume *volume, char *params) { int err = 0; char *p, *q; - struct nullio_data *data = volume->private; while ((p = strsep(¶ms, ",")) != NULL) { substring_t args[MAX_OPT_ARGS]; @@ -43,11 +43,11 @@ static int parse_nullio_params(struct iet_volume *volume, char *params) continue; token = match_token(p, tokens, args); switch (token) { - case Opt_sectors: + case Opt_blk_cnt: q = match_strdup(&args[0]); if (!q) return -ENOMEM; - data->sectors = simple_strtoull(q, NULL, 10); + volume->blk_cnt = simple_strtoull(q, NULL, 10); kfree(q); break; case Opt_ignore: @@ -63,35 +63,25 @@ static int parse_nullio_params(struct iet_volume *volume, char *params) static void nullio_detach(struct iet_volume *lu) { - struct nullio_data *p = lu->private; - - kfree(p); - lu->private = NULL; } static int nullio_attach(struct iet_volume *lu, char *args) { int err = 0; - struct nullio_data *p; if (lu->private) { printk("already attached ? %d\n", lu->lun); return -EBUSY; } - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - lu->private = p; - if ((err = parse_nullio_params(lu, args)) < 0) { eprintk("%d\n", err); goto out; } - lu->blk_shift = SECTOR_SIZE_BITS; - lu->blk_cnt = (p->sectors = p->sectors ? : 1 << 27); /* 64 GB */ + /* >= 64 GiB depending on block size */ + if (!lu->blk_cnt) + lu->blk_cnt = 1 << 27; out: if (err < 0) @@ -99,16 +89,9 @@ out: return err; } -void nullio_show(struct iet_volume *lu, struct seq_file *seq) -{ - struct nullio_data *p = lu->private; - seq_printf(seq, " sectors:%llu\n", p->sectors); -} - struct iotype nullio = { .name = "nullio", .attach = nullio_attach, .detach = nullio_detach, - .show = nullio_show, }; diff --git a/kernel/volume.c b/kernel/volume.c index 820a58a..17dae4c 100644 --- a/kernel/volume.c +++ b/kernel/volume.c @@ -6,6 +6,7 @@ #include <linux/types.h> #include <linux/parser.h> +#include <linux/log2.h> #include "iscsi.h" #include "iscsi_dbg.h" @@ -25,18 +26,21 @@ struct iet_volume *volume_lookup(struct iscsi_target *target, u32 lun) enum { Opt_type, Opt_iomode, + Opt_blk_size, Opt_err, }; static match_table_t tokens = { {Opt_type, "Type=%s"}, {Opt_iomode, "IOMode=%s"}, + {Opt_blk_size, "BlockSize=%u"}, {Opt_err, NULL}, }; static int set_iotype(struct iet_volume *volume, char *params) { int err = 0; + unsigned blk_sz; substring_t args[MAX_OPT_ARGS]; char *p, *argp = NULL, *buf = (char *) get_zeroed_page(GFP_USER); @@ -44,6 +48,9 @@ static int set_iotype(struct iet_volume *volume, char *params) return -ENOMEM; strncpy(buf, params, PAGE_CACHE_SIZE); + /* assume 512 byte block size by default */ + volume->blk_shift = 9; + while ((p = strsep(&buf, ",")) != NULL) { int token; @@ -67,6 +74,21 @@ static int set_iotype(struct iet_volume *volume, char *params) SetLUWCache(volume); kfree(argp); break; + case Opt_blk_size: + argp = match_strdup(&args[0]); + if (!argp) { + err = -ENOMEM; + break; + } + blk_sz = simple_strtoull(argp, NULL, 10); + if (512 <= blk_sz && blk_sz <= (64 << 10) && is_power_of_2(blk_sz)) + volume->blk_shift = ilog2(blk_sz); + else { + eprintk("invalid BlockSize=%u\n", blk_sz); + err = -EINVAL; + } + kfree(argp); + break; default: break; } @@ -257,6 +279,8 @@ static void iet_volume_info_show(struct seq_file *seq, struct iscsi_target *targ else seq_printf(seq, " iomode:wt"); + seq_printf(seq, " blocks:%llu blocksize:%u", + volume->blk_cnt, 1 << volume->blk_shift); if (volume->iotype->show) volume->iotype->show(volume, seq); else diff --git a/patches/compat-2.6.30.patch b/patches/compat-2.6.30.patch new file mode 100644 index 0000000..1d2e557 --- /dev/null +++ b/patches/compat-2.6.30.patch @@ -0,0 +1,12 @@ +diff -u kernel/block-io.c kernel/block-io.c +--- a/kernel/block-io.c (working copy) ++++ b/kernel/block-io.c (working copy) +@@ -169,7 +169,7 @@ + bio_data->bdev = NULL; + } else { + /* see Documentation/ABI/testing/sysfs-block */ +- unsigned bsz = queue_logical_block_size(bdev_get_queue(bdev)); ++ unsigned bsz = queue_hardsect_size(bdev_get_queue(bdev)); + unsigned bshift = ilog2(bsz); + if (volume->blk_shift < bshift) { + iprintk("Target %s, LUN %u: overriding block size to %u\n", -- 1.6.3.3 |