|
From: Andy P. <at...@us...> - 2002-04-09 12:44:29
|
Update of /cvsroot/linux-vax/kernel-2.4/include/linux/raid
In directory usw-pr-cvs1:/tmp/cvs-serv17906/linux/raid
Modified Files:
md.h md_compatible.h md_k.h md_u.h raid1.h raid5.h
Added Files:
multipath.h
Log Message:
sync 2.4.15 commit 3
--- NEW FILE ---
#ifndef _MULTIPATH_H
#define _MULTIPATH_H
#include <linux/raid/md.h>
struct multipath_info {
int number;
int raid_disk;
kdev_t dev;
/*
* State bits:
*/
int operational;
int spare;
int used_slot;
};
struct multipath_private_data {
mddev_t *mddev;
struct multipath_info multipaths[MD_SB_DISKS];
int nr_disks;
int raid_disks;
int working_disks;
mdk_thread_t *thread;
struct multipath_info *spare;
md_spinlock_t device_lock;
/* buffer pool */
/* buffer_heads that we have pre-allocated have b_pprev -> &freebh
* and are linked into a stack using b_next
* multipath_bh that are pre-allocated have MPBH_PreAlloc set.
* All these variable are protected by device_lock
*/
struct multipath_bh *freer1;
int freer1_blocked;
int freer1_cnt;
md_wait_queue_head_t wait_buffer;
};
typedef struct multipath_private_data multipath_conf_t;
/*
* this is the only point in the RAID code where we violate
* C type safety. mddev->private is an 'opaque' pointer.
*/
#define mddev_to_conf(mddev) ((multipath_conf_t *) mddev->private)
/*
* this is our 'private' 'collective' MULTIPATH buffer head.
* it contains information about what kind of IO operations were started
* for this MULTIPATH operation, and about their status:
*/
struct multipath_bh {
atomic_t remaining; /* 'have we finished' count,
* used from IRQ handlers
*/
int cmd;
unsigned long state;
mddev_t *mddev;
struct buffer_head *master_bh;
struct buffer_head bh_req;
struct multipath_bh *next_mp; /* next for retry or in free list */
};
/* bits for multipath_bh.state */
#define MPBH_Uptodate 1
#define MPBH_SyncPhase 2
#define MPBH_PreAlloc 3 /* this was pre-allocated, add to free list */
#endif
Index: md.h
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/include/linux/raid/md.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- md.h 14 Jan 2001 16:48:35 -0000 1.1.1.1
+++ md.h 9 Apr 2002 12:44:17 -0000 1.2
@@ -36,6 +36,7 @@
#include <linux/locks.h>
#include <linux/kernel_stat.h>
#include <asm/io.h>
+#include <linux/completion.h>
#include <linux/raid/md_compatible.h>
/*
@@ -77,10 +78,9 @@
extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors);
extern void md_recover_arrays (void);
extern int md_check_ordering (mddev_t *mddev);
-extern struct gendisk * find_gendisk (kdev_t dev);
extern int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x);
-extern int md_error (kdev_t mddev, kdev_t rdev);
+extern int md_error (mddev_t *mddev, kdev_t rdev);
extern int md_run_setup(void);
extern void md_print_devices (void);
Index: md_compatible.h
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/include/linux/raid/md_compatible.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- md_compatible.h 14 Jan 2001 16:48:38 -0000 1.1.1.1
+++ md_compatible.h 9 Apr 2002 12:44:17 -0000 1.2
@@ -27,12 +27,14 @@
/* 000 */
#define md__get_free_pages(x,y) __get_free_pages(x,y)
-#ifdef __i386__
+#if defined(__i386__) || defined(__x86_64__)
/* 001 */
-extern __inline__ int md_cpu_has_mmx(void)
+static __inline__ int md_cpu_has_mmx(void)
{
return test_bit(X86_FEATURE_MMX, &boot_cpu_data.x86_capability);
}
+#else
+#define md_cpu_has_mmx(x) (0)
#endif
/* 002 */
@@ -51,7 +53,7 @@
#define md_put_user put_user
/* 007 */
-extern inline int md_capable_admin(void)
+static inline int md_capable_admin(void)
{
return capable(CAP_SYS_ADMIN);
}
@@ -60,7 +62,7 @@
#define MD_FILE_TO_INODE(file) ((file)->f_dentry->d_inode)
/* 009 */
-extern inline void md_flush_signals (void)
+static inline void md_flush_signals (void)
{
spin_lock(¤t->sigmask_lock);
flush_signals(current);
@@ -68,7 +70,7 @@
}
/* 010 */
-extern inline void md_init_signals (void)
+static inline void md_init_signals (void)
{
current->exit_signal = SIGCHLD;
siginitsetinv(¤t->blocked, sigmask(SIGKILL));
Index: md_k.h
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/include/linux/raid/md_k.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- md_k.h 14 Jan 2001 16:48:36 -0000 1.1.1.1
+++ md_k.h 9 Apr 2002 12:44:17 -0000 1.2
@@ -17,17 +17,18 @@
#define MD_RESERVED 0UL
#define LINEAR 1UL
-#define STRIPED 2UL
-#define RAID0 STRIPED
+#define RAID0 2UL
#define RAID1 3UL
#define RAID5 4UL
#define TRANSLUCENT 5UL
#define HSM 6UL
-#define MAX_PERSONALITY 7UL
+#define MULTIPATH 7UL
+#define MAX_PERSONALITY 8UL
-extern inline int pers_to_level (int pers)
+static inline int pers_to_level (int pers)
{
switch (pers) {
+ case MULTIPATH: return -4;
case HSM: return -3;
case TRANSLUCENT: return -2;
case LINEAR: return -1;
@@ -35,12 +36,14 @@
case RAID1: return 1;
case RAID5: return 5;
}
- panic("pers_to_level()");
+ BUG();
+ return MD_RESERVED;
}
-extern inline int level_to_pers (int level)
+static inline int level_to_pers (int level)
{
switch (level) {
+ case -4: return MULTIPATH;
case -3: return HSM;
case -2: return TRANSLUCENT;
case -1: return LINEAR;
@@ -72,7 +75,7 @@
extern dev_mapping_t mddev_map [MAX_MD_DEVS];
-extern inline mddev_t * kdev_to_mddev (kdev_t dev)
+static inline mddev_t * kdev_to_mddev (kdev_t dev)
{
if (MAJOR(dev) != MD_MAJOR)
BUG();
@@ -90,62 +93,62 @@
*/
#define MD_READAHEAD MAX_READAHEAD
-extern inline int disk_faulty(mdp_disk_t * d)
+static inline int disk_faulty(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_FAULTY);
}
-extern inline int disk_active(mdp_disk_t * d)
+static inline int disk_active(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_ACTIVE);
}
-extern inline int disk_sync(mdp_disk_t * d)
+static inline int disk_sync(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_SYNC);
}
-extern inline int disk_spare(mdp_disk_t * d)
+static inline int disk_spare(mdp_disk_t * d)
{
return !disk_sync(d) && !disk_active(d) && !disk_faulty(d);
}
-extern inline int disk_removed(mdp_disk_t * d)
+static inline int disk_removed(mdp_disk_t * d)
{
return d->state & (1 << MD_DISK_REMOVED);
}
-extern inline void mark_disk_faulty(mdp_disk_t * d)
+static inline void mark_disk_faulty(mdp_disk_t * d)
{
d->state |= (1 << MD_DISK_FAULTY);
}
-extern inline void mark_disk_active(mdp_disk_t * d)
+static inline void mark_disk_active(mdp_disk_t * d)
{
d->state |= (1 << MD_DISK_ACTIVE);
}
-extern inline void mark_disk_sync(mdp_disk_t * d)
+static inline void mark_disk_sync(mdp_disk_t * d)
{
d->state |= (1 << MD_DISK_SYNC);
}
-extern inline void mark_disk_spare(mdp_disk_t * d)
+static inline void mark_disk_spare(mdp_disk_t * d)
{
d->state = 0;
}
-extern inline void mark_disk_removed(mdp_disk_t * d)
+static inline void mark_disk_removed(mdp_disk_t * d)
{
d->state = (1 << MD_DISK_FAULTY) | (1 << MD_DISK_REMOVED);
}
-extern inline void mark_disk_inactive(mdp_disk_t * d)
+static inline void mark_disk_inactive(mdp_disk_t * d)
{
d->state &= ~(1 << MD_DISK_ACTIVE);
}
-extern inline void mark_disk_nonsync(mdp_disk_t * d)
+static inline void mark_disk_nonsync(mdp_disk_t * d)
{
d->state &= ~(1 << MD_DISK_SYNC);
}
@@ -170,6 +173,7 @@
mdp_super_t *sb;
unsigned long sb_offset;
+ int alias_device; /* device alias to the same disk */
int faulty; /* if faulty do not issue IO requests */
int desc_nr; /* descriptor index in the superblock */
};
@@ -245,18 +249,19 @@
* number. This will have to change to dynamic allocation
* once we start supporting partitioning of md devices.
*/
-extern inline int mdidx (mddev_t * mddev)
+static inline int mdidx (mddev_t * mddev)
{
return mddev->__minor;
}
-extern inline kdev_t mddev_to_kdev(mddev_t * mddev)
+static inline kdev_t mddev_to_kdev(mddev_t * mddev)
{
return MKDEV(MD_MAJOR, mdidx(mddev));
}
extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
+extern mdp_disk_t *get_spare(mddev_t *mddev);
/*
* iterates through some rdev ringlist. It's safe to remove the
@@ -304,12 +309,12 @@
tmp = tmp->next, tmp->prev != &all_mddevs \
; )
-extern inline int lock_mddev (mddev_t * mddev)
+static inline int lock_mddev (mddev_t * mddev)
{
return down_interruptible(&mddev->reconfig_sem);
}
-extern inline void unlock_mddev (mddev_t * mddev)
+static inline void unlock_mddev (mddev_t * mddev)
{
up(&mddev->reconfig_sem);
}
@@ -322,7 +327,7 @@
void *data;
md_wait_queue_head_t wqueue;
unsigned long flags;
- struct semaphore *sem;
+ struct completion *event;
struct task_struct *tsk;
const char *name;
} mdk_thread_t;
@@ -363,6 +368,31 @@
if (condition) \
break; \
__wait_event_lock_irq(wq, condition, lock); \
+} while (0)
+
+
+#define __wait_disk_event(wq, condition) \
+do { \
+ wait_queue_t __wait; \
+ init_waitqueue_entry(&__wait, current); \
+ \
+ add_wait_queue(&wq, &__wait); \
+ for (;;) { \
+ set_current_state(TASK_UNINTERRUPTIBLE); \
+ if (condition) \
+ break; \
+ run_task_queue(&tq_disk); \
+ schedule(); \
+ } \
+ current->state = TASK_RUNNING; \
+ remove_wait_queue(&wq, &__wait); \
+} while (0)
+
+#define wait_disk_event(wq, condition) \
+do { \
+ if (condition) \
+ break; \
+ __wait_disk_event(wq, condition); \
} while (0)
#endif
Index: md_u.h
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/include/linux/raid/md_u.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -u -r1.1.1.2 -r1.2
--- md_u.h 25 Feb 2001 23:14:50 -0000 1.1.1.2
+++ md_u.h 9 Apr 2002 12:44:17 -0000 1.2
@@ -35,6 +35,7 @@
#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27)
#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28)
#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29)
+#define HOT_GENERATE_ERROR _IO (MD_MAJOR, 0x2a)
/* usage */
#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t)
Index: raid1.h
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/include/linux/raid/raid1.h,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- raid1.h 14 Jan 2001 16:48:39 -0000 1.1.1.1
+++ raid1.h 9 Apr 2002 12:44:17 -0000 1.2
@@ -42,7 +42,10 @@
*/
struct buffer_head *freebh;
int freebh_cnt; /* how many are on the list */
+ int freebh_blocked;
struct raid1_bh *freer1;
+ int freer1_blocked;
+ int freer1_cnt;
struct raid1_bh *freebuf; /* each bh_req has a page allocated */
md_wait_queue_head_t wait_buffer;
Index: raid5.h
===================================================================
RCS file: /cvsroot/linux-vax/kernel-2.4/include/linux/raid/raid5.h,v
retrieving revision 1.1.1.2
retrieving revision 1.2
diff -u -r1.1.1.2 -r1.2
--- raid5.h 25 Feb 2001 23:14:50 -0000 1.1.1.2
+++ raid5.h 9 Apr 2002 12:44:17 -0000 1.2
@@ -132,6 +132,7 @@
struct buffer_head *bh_read[MD_SB_DISKS]; /* read request buffers of the MD device */
struct buffer_head *bh_write[MD_SB_DISKS]; /* write request buffers of the MD device */
struct buffer_head *bh_written[MD_SB_DISKS]; /* write request buffers of the MD device that have been scheduled for write */
+ struct page *bh_page[MD_SB_DISKS]; /* saved bh_cache[n]->b_page when reading around the cache */
unsigned long sector; /* sector of this row */
int size; /* buffers size */
int pd_idx; /* parity disk index */
@@ -157,6 +158,32 @@
#define STRIPE_HANDLE 2
#define STRIPE_SYNCING 3
#define STRIPE_INSYNC 4
+#define STRIPE_PREREAD_ACTIVE 5
+#define STRIPE_DELAYED 6
+
+/*
+ * Plugging:
+ *
+ * To improve write throughput, we need to delay the handling of some
+ * stripes until there has been a chance that several write requests
+ * for the one stripe have all been collected.
+ * In particular, any write request that would require pre-reading
+ * is put on a "delayed" queue until there are no stripes currently
+ * in a pre-read phase. Further, if the "delayed" queue is empty when
+ * a stripe is put on it then we "plug" the queue and do not process it
+ * until an unplg call is made. (the tq_disk list is run).
+ *
+ * When preread is initiated on a stripe, we set PREREAD_ACTIVE and add
+ * it to the count of prereading stripes.
+ * When write is initiated, or the stripe refcnt == 0 (just in case) we
+ * clear the PREREAD_ACTIVE flag and decrement the count
+ * Whenever the delayed queue is empty and the device is not plugged, we
+ * move any strips from delayed to handle and clear the DELAYED flag and set PREREAD_ACTIVE.
+ * In stripe_handle, if we find pre-reading is necessary, we do it if
+ * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue.
+ * HANDLE gets cleared if stripe_handle leave nothing locked.
+ */
+
struct disk_info {
kdev_t dev;
@@ -181,14 +208,21 @@
int max_nr_stripes;
struct list_head handle_list; /* stripes needing handling */
+ struct list_head delayed_list; /* stripes that have plugged requests */
+ atomic_t preread_active_stripes; /* stripes with scheduled io */
/*
* Free stripes pool
*/
atomic_t active_stripes;
struct list_head inactive_list;
md_wait_queue_head_t wait_for_stripe;
-
+ int inactive_blocked; /* release of inactive stripes blocked,
+ * waiting for 25% to be free
+ */
md_spinlock_t device_lock;
+
+ int plugged;
+ struct tq_struct plug_tq;
};
typedef struct raid5_private_data raid5_conf_t;
|