You can subscribe to this list here.
1999 |
Jan
|
Feb
|
Mar
|
Apr
|
May
|
Jun
|
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
(8) |
---|---|---|---|---|---|---|---|---|---|---|---|---|
2000 |
Jan
(19) |
Feb
(11) |
Mar
(56) |
Apr
(31) |
May
(37) |
Jun
(21) |
Jul
(30) |
Aug
(31) |
Sep
(25) |
Oct
(60) |
Nov
(28) |
Dec
(57) |
2001 |
Jan
(47) |
Feb
(119) |
Mar
(279) |
Apr
(198) |
May
(336) |
Jun
(201) |
Jul
(136) |
Aug
(123) |
Sep
(123) |
Oct
(185) |
Nov
(66) |
Dec
(97) |
2002 |
Jan
(318) |
Feb
(101) |
Mar
(167) |
Apr
(233) |
May
(249) |
Jun
(134) |
Jul
(195) |
Aug
(99) |
Sep
(278) |
Oct
(435) |
Nov
(326) |
Dec
(325) |
2003 |
Jan
(214) |
Feb
(309) |
Mar
(142) |
Apr
(141) |
May
(210) |
Jun
(86) |
Jul
(133) |
Aug
(218) |
Sep
(315) |
Oct
(152) |
Nov
(162) |
Dec
(288) |
2004 |
Jan
(277) |
Feb
(267) |
Mar
(182) |
Apr
(168) |
May
(254) |
Jun
(131) |
Jul
(168) |
Aug
(177) |
Sep
(262) |
Oct
(309) |
Nov
(262) |
Dec
(255) |
2005 |
Jan
(258) |
Feb
(169) |
Mar
(282) |
Apr
(208) |
May
(262) |
Jun
(187) |
Jul
(207) |
Aug
(171) |
Sep
(283) |
Oct
(216) |
Nov
(307) |
Dec
(107) |
2006 |
Jan
(207) |
Feb
(82) |
Mar
(192) |
Apr
(165) |
May
(121) |
Jun
(108) |
Jul
(120) |
Aug
(126) |
Sep
(101) |
Oct
(216) |
Nov
(95) |
Dec
(125) |
2007 |
Jan
(176) |
Feb
(117) |
Mar
(240) |
Apr
(120) |
May
(81) |
Jun
(82) |
Jul
(62) |
Aug
(120) |
Sep
(103) |
Oct
(109) |
Nov
(181) |
Dec
(87) |
2008 |
Jan
(145) |
Feb
(69) |
Mar
(31) |
Apr
(98) |
May
(91) |
Jun
(43) |
Jul
(68) |
Aug
(135) |
Sep
(48) |
Oct
(18) |
Nov
(29) |
Dec
(16) |
2009 |
Jan
(26) |
Feb
(15) |
Mar
(83) |
Apr
(39) |
May
(23) |
Jun
(35) |
Jul
(11) |
Aug
(3) |
Sep
(11) |
Oct
(2) |
Nov
(28) |
Dec
(8) |
2010 |
Jan
(4) |
Feb
(40) |
Mar
(4) |
Apr
(46) |
May
(35) |
Jun
(46) |
Jul
(10) |
Aug
(4) |
Sep
(50) |
Oct
(70) |
Nov
(31) |
Dec
(24) |
2011 |
Jan
(17) |
Feb
(8) |
Mar
(35) |
Apr
(50) |
May
(75) |
Jun
(55) |
Jul
(72) |
Aug
(272) |
Sep
(10) |
Oct
(9) |
Nov
(11) |
Dec
(15) |
2012 |
Jan
(36) |
Feb
(49) |
Mar
(54) |
Apr
(47) |
May
(8) |
Jun
(82) |
Jul
(20) |
Aug
(50) |
Sep
(51) |
Oct
(20) |
Nov
(10) |
Dec
(25) |
2013 |
Jan
(34) |
Feb
(4) |
Mar
(24) |
Apr
(40) |
May
(101) |
Jun
(30) |
Jul
(55) |
Aug
(84) |
Sep
(53) |
Oct
(49) |
Nov
(61) |
Dec
(36) |
2014 |
Jan
(26) |
Feb
(22) |
Mar
(30) |
Apr
(4) |
May
(43) |
Jun
(33) |
Jul
(44) |
Aug
(61) |
Sep
(46) |
Oct
(154) |
Nov
(16) |
Dec
(12) |
2015 |
Jan
(18) |
Feb
(2) |
Mar
(122) |
Apr
(23) |
May
(56) |
Jun
(29) |
Jul
(35) |
Aug
(15) |
Sep
|
Oct
(45) |
Nov
(94) |
Dec
(38) |
2016 |
Jan
(50) |
Feb
(39) |
Mar
(39) |
Apr
(1) |
May
(14) |
Jun
(12) |
Jul
(19) |
Aug
(12) |
Sep
(9) |
Oct
(1) |
Nov
(13) |
Dec
(7) |
2017 |
Jan
(6) |
Feb
(1) |
Mar
(16) |
Apr
(5) |
May
(61) |
Jun
(18) |
Jul
(43) |
Aug
(1) |
Sep
(8) |
Oct
(25) |
Nov
(30) |
Dec
(6) |
2018 |
Jan
(5) |
Feb
(2) |
Mar
(25) |
Apr
(15) |
May
(2) |
Jun
(1) |
Jul
|
Aug
|
Sep
|
Oct
(1) |
Nov
|
Dec
|
2019 |
Jan
|
Feb
(2) |
Mar
|
Apr
(1) |
May
|
Jun
(1) |
Jul
|
Aug
|
Sep
|
Oct
|
Nov
|
Dec
|
From: Anton I. <ant...@ko...> - 2018-01-27 11:42:25
|
Thanks, looks correct, +1 Richard, can you add it to the next pull. Thanks in advance, A. On 27/01/18 10:55, Christophe JAILLET wrote: > If 'find_device()' finds something, we set '*error_out' and we should > return an error. However, 'err' is known to be 0 at this point. > > Explicitly return -EINVAL instead. > > While at it, remove the initialization of 'err' at the beginning of the > function and also explicitly return an error code if the first check > fails. > > Fixes: ad1f62ab2bd4 ("High Performance UML Vector Network Driver") > Signed-off-by: Christophe JAILLET <chr...@wa...> > --- > Not sure if correct, but it looks spurious to set 'error_out' and return > 0 (i.e. success) > --- > arch/um/drivers/vector_kern.c | 6 +++--- > 1 file changed, 3 insertions(+), 3 deletions(-) > > diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c > index 3c1e6ad91016..6fab02a126e6 100644 > --- a/arch/um/drivers/vector_kern.c > +++ b/arch/um/drivers/vector_kern.c > @@ -677,7 +677,7 @@ static struct vector_device *find_device(int n) > static int vector_parse(char *str, int *index_out, char **str_out, > char **error_out) > { > - int n, len, err = -EINVAL; > + int n, len, err; > char *start = str; > > len = strlen(str); > @@ -686,7 +686,7 @@ static int vector_parse(char *str, int *index_out, char **str_out, > str++; > if (*str != ':') { > *error_out = "Expected ':' after device number"; > - return err; > + return -EINVAL; > } > *str = '\0'; > > @@ -699,7 +699,7 @@ static int vector_parse(char *str, int *index_out, char **str_out, > str++; > if (find_device(n)) { > *error_out = "Device already configured"; > - return err; > + return -EINVAL; > } > > *index_out = n; |
From: Anton I. <ant...@ko...> - 2018-01-27 11:41:51
|
Thanks, looks correct, +1 Richard, can you add it to the next pull. Thanks in advance, A. On 27/01/18 10:53, Christophe JAILLET wrote: > Checking the result of the previous 'kstrdup()' call is expected here, so > we should test 'params' and not 'str' (which is known to be non-NULL at > this point) > > Fixes: ad1f62ab2bd4 ("High Performance UML Vector Network Driver") > Signed-off-by: Christophe JAILLET <chr...@wa...> > --- > arch/um/drivers/vector_kern.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c > index d1d53015d572..3c1e6ad91016 100644 > --- a/arch/um/drivers/vector_kern.c > +++ b/arch/um/drivers/vector_kern.c > @@ -723,7 +723,7 @@ static int vector_config(char *str, char **error_out) > */ > > params = kstrdup(params, GFP_KERNEL); > - if (str == NULL) { > + if (params == NULL) { > *error_out = "vector_config failed to strdup string"; > return -ENOMEM; > } |
From: Christophe J. <chr...@wa...> - 2018-01-27 11:12:47
|
If 'find_device()' finds something, we set '*error_out' and we should return an error. However, 'err' is known to be 0 at this point. Explicitly return -EINVAL instead. While at it, remove the initialization of 'err' at the beginning of the function and also explicitly return an error code if the first check fails. Fixes: ad1f62ab2bd4 ("High Performance UML Vector Network Driver") Signed-off-by: Christophe JAILLET <chr...@wa...> --- Not sure if correct, but it looks spurious to set 'error_out' and return 0 (i.e. success) --- arch/um/drivers/vector_kern.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index 3c1e6ad91016..6fab02a126e6 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -677,7 +677,7 @@ static struct vector_device *find_device(int n) static int vector_parse(char *str, int *index_out, char **str_out, char **error_out) { - int n, len, err = -EINVAL; + int n, len, err; char *start = str; len = strlen(str); @@ -686,7 +686,7 @@ static int vector_parse(char *str, int *index_out, char **str_out, str++; if (*str != ':') { *error_out = "Expected ':' after device number"; - return err; + return -EINVAL; } *str = '\0'; @@ -699,7 +699,7 @@ static int vector_parse(char *str, int *index_out, char **str_out, str++; if (find_device(n)) { *error_out = "Device already configured"; - return err; + return -EINVAL; } *index_out = n; -- 2.14.1 --- L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast. https://www.avast.com/antivirus |
From: Christophe J. <chr...@wa...> - 2018-01-27 11:11:32
|
Checking the result of the previous 'kstrdup()' call is expected here, so we should test 'params' and not 'str' (which is known to be non-NULL at this point) Fixes: ad1f62ab2bd4 ("High Performance UML Vector Network Driver") Signed-off-by: Christophe JAILLET <chr...@wa...> --- arch/um/drivers/vector_kern.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c index d1d53015d572..3c1e6ad91016 100644 --- a/arch/um/drivers/vector_kern.c +++ b/arch/um/drivers/vector_kern.c @@ -723,7 +723,7 @@ static int vector_config(char *str, char **error_out) */ params = kstrdup(params, GFP_KERNEL); - if (str == NULL) { + if (params == NULL) { *error_out = "vector_config failed to strdup string"; return -ENOMEM; } -- 2.14.1 --- L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast. https://www.avast.com/antivirus |
From: Anton I. <ant...@ko...> - 2018-01-05 07:54:57
|
I think we have a similar patch pending. Thanks for noticing. I will review and double check vs queued up patches. A. On 5 January 2018 07:22:52 GMT+00:00, Wei Yongjun <wei...@hu...> wrote: >Add the missing unlock before return from function vector_net_open() >in the error handling case. > >Fixes: ad1f62ab2bd4 ("High Performance UML Vector Network Driver") >Signed-off-by: Wei Yongjun <wei...@hu...> >--- > arch/um/drivers/vector_kern.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > >diff --git a/arch/um/drivers/vector_kern.c >b/arch/um/drivers/vector_kern.c >index d1d5301..bb83a2d 100644 >--- a/arch/um/drivers/vector_kern.c >+++ b/arch/um/drivers/vector_kern.c >@@ -1156,8 +1156,10 @@ static int vector_net_open(struct net_device >*dev) > struct vector_device *vdevice; > > spin_lock_irqsave(&vp->lock, flags); >- if (vp->opened) >+ if (vp->opened) { >+ spin_unlock_irqrestore(&vp->lock, flags); > return -ENXIO; >+ } > vp->opened = true; > spin_unlock_irqrestore(&vp->lock, flags); -- Sent from my Android device with K-9 Mail. Please excuse my brevity. |
From: Richard W. <ri...@no...> - 2017-12-13 20:41:33
|
Jason, Am Mittwoch, 13. Dezember 2017, 18:22:30 CET schrieb Jason A. Donenfeld: > Recent libcs have gotten a bit more strict, so we actually need to > include the right headers and use the right types. This enables UML to > compile again. > > Signed-off-by: Jason A. Donenfeld <Ja...@zx...> > Cc: st...@vg... > --- > arch/um/os-Linux/file.c | 1 + > arch/um/os-Linux/signal.c | 3 ++- > arch/x86/um/stub_segv.c | 3 ++- > 3 files changed, 5 insertions(+), 2 deletions(-) > > diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c > index 2db18cbbb0ea..c0197097c86e 100644 > --- a/arch/um/os-Linux/file.c > +++ b/arch/um/os-Linux/file.c > @@ -12,6 +12,7 @@ > #include <sys/mount.h> > #include <sys/socket.h> > #include <sys/stat.h> > +#include <sys/sysmacros.h> > #include <sys/un.h> > #include <sys/types.h> > #include <os.h> > diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c > index a86d7cc2c2d8..bf0acb8aad8b 100644 > --- a/arch/um/os-Linux/signal.c > +++ b/arch/um/os-Linux/signal.c > @@ -16,6 +16,7 @@ > #include <os.h> > #include <sysdep/mcontext.h> > #include <um_malloc.h> > +#include <sys/ucontext.h> > > void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = { > [SIGTRAP] = relay_signal, > @@ -159,7 +160,7 @@ static void (*handlers[_NSIG])(int sig, struct siginfo > *si, mcontext_t *mc) = { > > static void hard_handler(int sig, siginfo_t *si, void *p) > { > - struct ucontext *uc = p; > + ucontext_t *uc = p; > mcontext_t *mc = &uc->uc_mcontext; > unsigned long pending = 1UL << sig; > > diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c > index 1518d2805ae8..27361cbb7ca9 100644 > --- a/arch/x86/um/stub_segv.c > +++ b/arch/x86/um/stub_segv.c > @@ -6,11 +6,12 @@ > #include <sysdep/stub.h> > #include <sysdep/faultinfo.h> > #include <sysdep/mcontext.h> > +#include <sys/ucontext.h> > > void __attribute__ ((__section__ (".__syscall_stub"))) > stub_segv_handler(int sig, siginfo_t *info, void *p) > { > - struct ucontext *uc = p; > + ucontext_t *uc = p; > > GET_FAULTINFO_FROM_MC(*((struct faultinfo *) STUB_DATA), > &uc->uc_mcontext); Can you please rebase your fix on linux-next? I have already a very similar fix in the pipe. Thanks, //richard |
From: Anton I. <ant...@ko...> - 2017-12-11 09:36:38
|
Both are correct - omissions on my side Please apply. A. On 12/11/17 09:33, Richard Weinberger wrote: > Anton, > > Am Samstag, 9. Dezember 2017, 18:09:17 CET schrieb Anton Ivanov: >> Thanks, >> >> I guess I missed that one. >> >> A. >> >> On 09/12/17 11:49, Dan Carpenter wrote: >>> We need to unlock and restore IRQs on this error path. >>> >>> Fixes: ad1f62ab2bd4 ("High Performance UML Vector Network Driver") >>> Signed-off-by: Dan Carpenter <dan...@or...> >>> >>> diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c >>> index d1d53015d572..bb83a2d22ac2 100644 >>> --- a/arch/um/drivers/vector_kern.c >>> +++ b/arch/um/drivers/vector_kern.c >>> @@ -1156,8 +1156,10 @@ static int vector_net_open(struct net_device *dev) >>> >>> struct vector_device *vdevice; >>> >>> spin_lock_irqsave(&vp->lock, flags); >>> >>> - if (vp->opened) >>> + if (vp->opened) { >>> + spin_unlock_irqrestore(&vp->lock, flags); >>> >>> return -ENXIO; >>> >>> + } >>> >>> vp->opened = true; >>> spin_unlock_irqrestore(&vp->lock, flags); > Please review/ack these patches. > > Thanks, > //richard > > ------------------------------------------------------------------------------ > Check out the vibrant tech community on one of the world's most > engaging tech sites, Slashdot.org! http://sdm.link/slashdot > _______________________________________________ > User-mode-linux-devel mailing list > Use...@li... > https://lists.sourceforge.net/lists/listinfo/user-mode-linux-devel > |
From: Richard W. <ri...@no...> - 2017-12-11 09:33:13
|
Anton, Am Samstag, 9. Dezember 2017, 18:09:17 CET schrieb Anton Ivanov: > Thanks, > > I guess I missed that one. > > A. > > On 09/12/17 11:49, Dan Carpenter wrote: > > We need to unlock and restore IRQs on this error path. > > > > Fixes: ad1f62ab2bd4 ("High Performance UML Vector Network Driver") > > Signed-off-by: Dan Carpenter <dan...@or...> > > > > diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c > > index d1d53015d572..bb83a2d22ac2 100644 > > --- a/arch/um/drivers/vector_kern.c > > +++ b/arch/um/drivers/vector_kern.c > > @@ -1156,8 +1156,10 @@ static int vector_net_open(struct net_device *dev) > > > > struct vector_device *vdevice; > > > > spin_lock_irqsave(&vp->lock, flags); > > > > - if (vp->opened) > > + if (vp->opened) { > > + spin_unlock_irqrestore(&vp->lock, flags); > > > > return -ENXIO; > > > > + } > > > > vp->opened = true; > > spin_unlock_irqrestore(&vp->lock, flags); Please review/ack these patches. Thanks, //richard |
From: Christoph H. <hc...@ls...> - 2017-12-04 16:29:44
|
On Sun, Dec 03, 2017 at 10:49:23PM +0100, Richard Weinberger wrote: > Convert the driver to the modern blk-mq framework. > As byproduct we get rid of our open coded restart logic and let > blk-mq handle it. > > Signed-off-by: Richard Weinberger <ri...@no...> > --- > arch/um/drivers/ubd_kern.c | 178 +++++++++++++++++++++++---------------------- > 1 file changed, 93 insertions(+), 85 deletions(-) > > diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c > index b55fe9bf5d3e..deceb8022a28 100644 > --- a/arch/um/drivers/ubd_kern.c > +++ b/arch/um/drivers/ubd_kern.c > @@ -23,6 +23,7 @@ > #include <linux/module.h> > #include <linux/init.h> > #include <linux/blkdev.h> > +#include <linux/blk-mq.h> > #include <linux/ata.h> > #include <linux/hdreg.h> > #include <linux/cdrom.h> > @@ -142,7 +143,6 @@ struct cow { > #define MAX_SG 64 > > struct ubd { > - struct list_head restart; > /* name (and fd, below) of the file opened for writing, either the > * backing or the cow file. */ > char *file; > @@ -156,9 +156,12 @@ struct ubd { > struct cow cow; > struct platform_device pdev; > struct request_queue *queue; > + struct blk_mq_tag_set tag_set; > spinlock_t lock; > +}; > + > +struct ubd_pdu { > struct scatterlist sg[MAX_SG]; > - struct request *request; > int start_sg, end_sg; > sector_t rq_pos; > }; > @@ -182,10 +185,6 @@ struct ubd { > .shared = 0, \ > .cow = DEFAULT_COW, \ > .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ > - .request = NULL, \ > - .start_sg = 0, \ > - .end_sg = 0, \ > - .rq_pos = 0, \ > } > > /* Protected by ubd_lock */ > @@ -196,6 +195,12 @@ static int fake_ide = 0; > static struct proc_dir_entry *proc_ide_root = NULL; > static struct proc_dir_entry *proc_ide = NULL; > > +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, > + const struct blk_mq_queue_data *bd); > +static int ubd_init_request(struct blk_mq_tag_set *set, > + struct request *req, unsigned int hctx_idx, > + unsigned int numa_node); > + > static void make_proc_ide(void) > { > proc_ide_root = proc_mkdir("ide", NULL); > @@ -448,11 +453,8 @@ __uml_help(udb_setup, > " in the boot output.\n\n" > ); > > -static void do_ubd_request(struct request_queue * q); > - > /* Only changed by ubd_init, which is an initcall. */ > static int thread_fd = -1; > -static LIST_HEAD(restart); > > /* Function to read several request pointers at a time > * handling fractional reads if (and as) needed > @@ -510,9 +512,6 @@ static int bulk_req_safe_read( > /* Called without dev->lock held, and only in interrupt context. */ > static void ubd_handler(void) > { > - struct ubd *ubd; > - struct list_head *list, *next_ele; > - unsigned long flags; > int n; > int count; > > @@ -532,23 +531,17 @@ static void ubd_handler(void) > return; > } > for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { > - blk_end_request( > - (*irq_req_buffer)[count]->req, > - BLK_STS_OK, > - (*irq_req_buffer)[count]->length > - ); > - kfree((*irq_req_buffer)[count]); > + struct io_thread_req *io_req = (*irq_req_buffer)[count]; > + int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK; > + > + if (!blk_update_request(io_req->req, err, io_req->length)) > + __blk_mq_end_request(io_req->req, err); > + > + kfree(io_req); > } > } > - reactivate_fd(thread_fd, UBD_IRQ); > > - list_for_each_safe(list, next_ele, &restart){ > - ubd = container_of(list, struct ubd, restart); > - list_del_init(&ubd->restart); > - spin_lock_irqsave(&ubd->lock, flags); > - do_ubd_request(ubd->queue); > - spin_unlock_irqrestore(&ubd->lock, flags); > - } > + reactivate_fd(thread_fd, UBD_IRQ); > } > > static irqreturn_t ubd_intr(int irq, void *dev) > @@ -869,6 +862,7 @@ static void ubd_device_release(struct device *dev) > struct ubd *ubd_dev = dev_get_drvdata(dev); > > blk_cleanup_queue(ubd_dev->queue); > + blk_mq_free_tag_set(&ubd_dev->tag_set); > *ubd_dev = ((struct ubd) DEFAULT_UBD); > } > > @@ -911,6 +905,11 @@ static int ubd_disk_register(int major, u64 size, int unit, > > #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) > > +static const struct blk_mq_ops ubd_mq_ops = { > + .queue_rq = ubd_queue_rq, > + .init_request = ubd_init_request, > +}; Use tables to align the "=" signs? > + blk_mq_start_request(req); > + > + pdu->rq_pos = blk_rq_pos(req); > + pdu->start_sg = 0; > + pdu->end_sg = blk_rq_map_sg(req->q, req, pdu->sg); > > + if (req_op(req) == REQ_OP_FLUSH) { > + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); GFP_ATOMIC in the I/O path looks scary, but it seems the existing code already does this. Any reason not to embdedd the first one into struct request and then use a mempool for the rest? > + if (io_req == NULL) { > + blk_mq_requeue_request(req, true); > + goto done; > } > + prepare_flush_request(req, io_req); > + submit_request(io_req, dev); > > - req = dev->request; > + goto done; > + } If you only call blk_mq_start_request once everything is set up you can just return a BLK_STS_ code from ->queue_rq. |
From: Richard W. <ri...@no...> - 2017-12-03 21:54:22
|
Christoph, Am Mittwoch, 29. November 2017, 22:46:51 CET schrieb Christoph Hellwig: > On Sun, Nov 26, 2017 at 02:10:53PM +0100, Richard Weinberger wrote: > > MAX_SG is 64, used for blk_queue_max_segments(). This comes from > > a0044bdf60c2 ("uml: batch I/O requests"). Is this still a good/sane > > value for blk-mq? > > blk-mq itself doesn't change the tradeoff. > > > The driver does IO batching, for each request it issues many UML struct > > io_thread_req request to the IO thread on the host side. > > One io_thread_req per SG page. > > Before the conversion the driver used blk_end_request() to indicate that > > a part of the request is done. > > blk_mq_end_request() does not take a length parameter, therefore we can > > only mark the whole request as done. See the new is_last property on the > > driver. > > Maybe there is a way to partially end requests too in blk-mq? > > You can, take a look at scsi_end_request which handles this for blk-mq > and the legacy layer. That being said I wonder if batching really > makes that much sene if you execute each segment separately? Anton did a lot of performance improvements in this area. He has all the details. AFAIK batching brings us more throughput because in UML all IO is done by a different thread and the IPC has a certain overhead. > > Another obstacle with IO batching is that UML IO thread requests can > > fail. Not only due to OOM, also because the pipe between the UML kernel > > process and the host IO thread can return EAGAIN. > > In this case the driver puts the request into a list and retried later > > again when the pipe turns writable. > > I’m not sure whether this restart logic makes sense with blk-mq, maybe > > there is a way in blk-mq to put back a (partial) request? > > blk_mq_requeue_request requeues requests that have been partially > exectuted (or not at all for that matter). Thanks this is what I needed. BTW: How can I know which blk functions are not usable in blk-mq? I didn't realize that I can use blk_update_request(). Thanks, //richard |
From: Richard W. <ri...@no...> - 2017-12-03 21:49:10
|
Convert the driver to the modern blk-mq framework. As byproduct we get rid of our open coded restart logic and let blk-mq handle it. Signed-off-by: Richard Weinberger <ri...@no...> --- arch/um/drivers/ubd_kern.c | 178 +++++++++++++++++++++++---------------------- 1 file changed, 93 insertions(+), 85 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index b55fe9bf5d3e..deceb8022a28 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/ata.h> #include <linux/hdreg.h> #include <linux/cdrom.h> @@ -142,7 +143,6 @@ struct cow { #define MAX_SG 64 struct ubd { - struct list_head restart; /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; @@ -156,9 +156,12 @@ struct ubd { struct cow cow; struct platform_device pdev; struct request_queue *queue; + struct blk_mq_tag_set tag_set; spinlock_t lock; +}; + +struct ubd_pdu { struct scatterlist sg[MAX_SG]; - struct request *request; int start_sg, end_sg; sector_t rq_pos; }; @@ -182,10 +185,6 @@ struct ubd { .shared = 0, \ .cow = DEFAULT_COW, \ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ - .request = NULL, \ - .start_sg = 0, \ - .end_sg = 0, \ - .rq_pos = 0, \ } /* Protected by ubd_lock */ @@ -196,6 +195,12 @@ static int fake_ide = 0; static struct proc_dir_entry *proc_ide_root = NULL; static struct proc_dir_entry *proc_ide = NULL; +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); +static int ubd_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node); + static void make_proc_ide(void) { proc_ide_root = proc_mkdir("ide", NULL); @@ -448,11 +453,8 @@ __uml_help(udb_setup, " in the boot output.\n\n" ); -static void do_ubd_request(struct request_queue * q); - /* Only changed by ubd_init, which is an initcall. */ static int thread_fd = -1; -static LIST_HEAD(restart); /* Function to read several request pointers at a time * handling fractional reads if (and as) needed @@ -510,9 +512,6 @@ static int bulk_req_safe_read( /* Called without dev->lock held, and only in interrupt context. */ static void ubd_handler(void) { - struct ubd *ubd; - struct list_head *list, *next_ele; - unsigned long flags; int n; int count; @@ -532,23 +531,17 @@ static void ubd_handler(void) return; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - blk_end_request( - (*irq_req_buffer)[count]->req, - BLK_STS_OK, - (*irq_req_buffer)[count]->length - ); - kfree((*irq_req_buffer)[count]); + struct io_thread_req *io_req = (*irq_req_buffer)[count]; + int err = io_req->error ? BLK_STS_IOERR : BLK_STS_OK; + + if (!blk_update_request(io_req->req, err, io_req->length)) + __blk_mq_end_request(io_req->req, err); + + kfree(io_req); } } - reactivate_fd(thread_fd, UBD_IRQ); - list_for_each_safe(list, next_ele, &restart){ - ubd = container_of(list, struct ubd, restart); - list_del_init(&ubd->restart); - spin_lock_irqsave(&ubd->lock, flags); - do_ubd_request(ubd->queue); - spin_unlock_irqrestore(&ubd->lock, flags); - } + reactivate_fd(thread_fd, UBD_IRQ); } static irqreturn_t ubd_intr(int irq, void *dev) @@ -869,6 +862,7 @@ static void ubd_device_release(struct device *dev) struct ubd *ubd_dev = dev_get_drvdata(dev); blk_cleanup_queue(ubd_dev->queue); + blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } @@ -911,6 +905,11 @@ static int ubd_disk_register(int major, u64 size, int unit, #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) +static const struct blk_mq_ops ubd_mq_ops = { + .queue_rq = ubd_queue_rq, + .init_request = ubd_init_request, +}; + static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; @@ -927,15 +926,24 @@ static int ubd_add(int n, char **error_out) ubd_dev->size = ROUND_BLOCK(ubd_dev->size); - INIT_LIST_HEAD(&ubd_dev->restart); - sg_init_table(ubd_dev->sg, MAX_SG); + ubd_dev->tag_set.ops = &ubd_mq_ops; + ubd_dev->tag_set.queue_depth = 64; + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ubd_dev->tag_set.cmd_size = sizeof(struct ubd_pdu); + ubd_dev->tag_set.driver_data = ubd_dev; + ubd_dev->tag_set.nr_hw_queues = 1; - err = -ENOMEM; - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); - if (ubd_dev->queue == NULL) { - *error_out = "Failed to initialize device queue"; + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); + if (err) goto out; + + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); + if (IS_ERR(ubd_dev->queue)) { + err = PTR_ERR(ubd_dev->queue); + goto out_cleanup; } + ubd_dev->queue->queuedata = ubd_dev; blk_queue_write_cache(ubd_dev->queue, true, false); @@ -943,7 +951,7 @@ static int ubd_add(int n, char **error_out) err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; - goto out_cleanup; + goto out_cleanup_tags; } if (fake_major != UBD_MAJOR) @@ -961,6 +969,8 @@ static int ubd_add(int n, char **error_out) out: return err; +out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); out_cleanup: blk_cleanup_queue(ubd_dev->queue); goto out; @@ -1345,80 +1355,78 @@ static void prepare_flush_request(struct request *req, io_req->op = UBD_FLUSH; } -static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) +static void submit_request(struct io_thread_req *io_req, struct ubd *dev) { int n = os_write_file(thread_fd, &io_req, sizeof(io_req)); + if (n != sizeof(io_req)) { if (n != -EAGAIN) - printk("write to io thread failed, " - "errno = %d\n", -n); - else if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); + pr_err("write to io thread failed: %d\n", -n); + blk_mq_requeue_request(io_req->req, true); kfree(io_req); - return false; } - return true; } -/* Called with dev->lock held */ -static void do_ubd_request(struct request_queue *q) +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request *req = bd->rq; + struct ubd *dev = hctx->queue->queuedata; + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); struct io_thread_req *io_req; - struct request *req; - while(1){ - struct ubd *dev = q->queuedata; - if(dev->request == NULL){ - struct request *req = blk_fetch_request(q); - if(req == NULL) - return; + blk_mq_start_request(req); + + pdu->rq_pos = blk_rq_pos(req); + pdu->start_sg = 0; + pdu->end_sg = blk_rq_map_sg(req->q, req, pdu->sg); - dev->request = req; - dev->rq_pos = blk_rq_pos(req); - dev->start_sg = 0; - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); + if (req_op(req) == REQ_OP_FLUSH) { + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + if (io_req == NULL) { + blk_mq_requeue_request(req, true); + goto done; } + prepare_flush_request(req, io_req); + submit_request(io_req, dev); - req = dev->request; + goto done; + } - if (req_op(req) == REQ_OP_FLUSH) { - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if (io_req == NULL) { - if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_flush_request(req, io_req); - if (submit_request(io_req, dev) == false) - return; + while (pdu->start_sg < pdu->end_sg) { + struct scatterlist *sg = &pdu->sg[pdu->start_sg]; + + io_req = kmalloc(sizeof(struct io_thread_req), + GFP_ATOMIC); + if (io_req == NULL) { + blk_mq_requeue_request(req, true); + goto done; } + prepare_request(req, io_req, + (unsigned long long)pdu->rq_pos << 9, + sg->offset, sg->length, sg_page(sg)); - while(dev->start_sg < dev->end_sg){ - struct scatterlist *sg = &dev->sg[dev->start_sg]; + submit_request(io_req, dev); - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if(io_req == NULL){ - if(list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_request(req, io_req, - (unsigned long long)dev->rq_pos << 9, - sg->offset, sg->length, sg_page(sg)); + pdu->rq_pos += sg->length >> 9; + pdu->start_sg++; + } - if (submit_request(io_req, dev) == false) - return; +done: + return BLK_STS_OK; +} - dev->rq_pos += sg->length >> 9; - dev->start_sg++; - } - dev->end_sg = 0; - dev->request = NULL; - } +static int ubd_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) +{ + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); + + sg_init_table(pdu->sg, MAX_SG); + + return 0; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -- 2.13.6 |
From: Christoph H. <hc...@ls...> - 2017-11-29 22:03:14
|
On Sun, Nov 26, 2017 at 02:10:53PM +0100, Richard Weinberger wrote: > MAX_SG is 64, used for blk_queue_max_segments(). This comes from > a0044bdf60c2 ("uml: batch I/O requests"). Is this still a good/sane > value for blk-mq? blk-mq itself doesn't change the tradeoff. > The driver does IO batching, for each request it issues many UML struct > io_thread_req request to the IO thread on the host side. > One io_thread_req per SG page. > Before the conversion the driver used blk_end_request() to indicate that > a part of the request is done. > blk_mq_end_request() does not take a length parameter, therefore we can > only mark the whole request as done. See the new is_last property on the > driver. > Maybe there is a way to partially end requests too in blk-mq? You can, take a look at scsi_end_request which handles this for blk-mq and the legacy layer. That being said I wonder if batching really makes that much sene if you execute each segment separately? > Another obstacle with IO batching is that UML IO thread requests can > fail. Not only due to OOM, also because the pipe between the UML kernel > process and the host IO thread can return EAGAIN. > In this case the driver puts the request into a list and retried later > again when the pipe turns writable. > I’m not sure whether this restart logic makes sense with blk-mq, maybe > there is a way in blk-mq to put back a (partial) request? blk_mq_requeue_request requeues requests that have been partially exectuted (or not at all for that matter). |
From: Anton I. <ant...@ko...> - 2017-11-26 14:42:38
|
On 26/11/17 13:56, Richard Weinberger wrote: > Anton, > > please don't crop the CC list. Apologies, I wanted to keep the discussion UML side until we have come up with something. Will not do it again. > > Am Sonntag, 26. November 2017, 14:41:12 CET schrieb Anton Ivanov: >> I need to do some reading on this. >> >> First of all - a stupid question: mq's primary advantage is in >> multi-core systems as it improves io and core utilization. We are still >> single-core in UML and AFAIK this is likely to stay that way, right? > Well, someday blk-mq should completely replace the legacy block interface. > Christoph asked me convert the UML driver. > Also do find corner cases in blk-mq. > >> On 26/11/17 13:10, Richard Weinberger wrote: >>> This is the first attempt to convert the UserModeLinux block driver >>> (UBD) to blk-mq. >>> While the conversion itself is rather trivial, a few questions >>> popped up in my head. Maybe you can help me with them. >>> >>> MAX_SG is 64, used for blk_queue_max_segments(). This comes from >>> a0044bdf60c2 ("uml: batch I/O requests"). Is this still a good/sane >>> value for blk-mq? >>> >>> The driver does IO batching, for each request it issues many UML struct >>> io_thread_req request to the IO thread on the host side. >>> One io_thread_req per SG page. >>> Before the conversion the driver used blk_end_request() to indicate that >>> a part of the request is done. >>> blk_mq_end_request() does not take a length parameter, therefore we can >>> only mark the whole request as done. See the new is_last property on the >>> driver. >>> Maybe there is a way to partially end requests too in blk-mq? >>> >>> Another obstacle with IO batching is that UML IO thread requests can >>> fail. Not only due to OOM, also because the pipe between the UML kernel >>> process and the host IO thread can return EAGAIN. >>> In this case the driver puts the request into a list and retried later >>> again when the pipe turns writable. >>> I’m not sure whether this restart logic makes sense with blk-mq, maybe >>> there is a way in blk-mq to put back a (partial) request? >> This all sounds to me as blk-mq requests need different inter-thread >> IPC. We presently rely on the fact that each request to the IO thread is >> fixed size and there is no natural request grouping coming from upper >> layers. >> >> Unless I am missing something, this looks like we are now getting group >> requests, right? We need to send a group at a time which is not >> processed until the whole group has been received in the IO thread. We >> cans still batch groups though, but should not batch individual >> requests, right? > The question is, do we really need batching at all with blk-mq? > Jeff implemented that 10 years ago. Well, but in that case we need to match our IPC to the existing batching in the blck queue, right? So my proposal still stands - I suggest we roll back my batching patch which is no longer needed and change the IPC to match what is coming out of blk-mq :) > >> My first step (before moving to mq) would have been to switch to a unix >> domain socket pair probably using SOCK_SEQPACKET or SOCK_DGRAM. The >> latter for a socket pair will return ENOBUF if you try to push more than >> the receiving side can handle so we should not have IPC message loss. >> This way, we can push request groups naturally instead of relying on a >> "last" flag and keeping track of that for "end of request". > The pipe is currently a socketpair. UML just calls it "pipe". :-( I keep forgetting if we applied that patch or not :) It was a pipe once upon a time and I suggested we change it socket pair due to better buffering behavior for lots of small requests. > >> It will be easier to roll back the batching before we do that. Feel free >> to roll back that commit. >> >> Once that is in, the whole batching will need to be redone as it should >> account for variable IPC record size and use sendmmsg/recvmmsg pair - >> same as in the vector IO. I am happy to do the honors on that one :) > Let's see what block guys say. Ack. A. > > Thanks, > //richard > |
From: Richard W. <ri...@no...> - 2017-11-26 13:55:48
|
Anton, please don't crop the CC list. Am Sonntag, 26. November 2017, 14:41:12 CET schrieb Anton Ivanov: > I need to do some reading on this. > > First of all - a stupid question: mq's primary advantage is in > multi-core systems as it improves io and core utilization. We are still > single-core in UML and AFAIK this is likely to stay that way, right? Well, someday blk-mq should completely replace the legacy block interface. Christoph asked me convert the UML driver. Also do find corner cases in blk-mq. > On 26/11/17 13:10, Richard Weinberger wrote: > > This is the first attempt to convert the UserModeLinux block driver > > (UBD) to blk-mq. > > While the conversion itself is rather trivial, a few questions > > popped up in my head. Maybe you can help me with them. > > > > MAX_SG is 64, used for blk_queue_max_segments(). This comes from > > a0044bdf60c2 ("uml: batch I/O requests"). Is this still a good/sane > > value for blk-mq? > > > > The driver does IO batching, for each request it issues many UML struct > > io_thread_req request to the IO thread on the host side. > > One io_thread_req per SG page. > > Before the conversion the driver used blk_end_request() to indicate that > > a part of the request is done. > > blk_mq_end_request() does not take a length parameter, therefore we can > > only mark the whole request as done. See the new is_last property on the > > driver. > > Maybe there is a way to partially end requests too in blk-mq? > > > > Another obstacle with IO batching is that UML IO thread requests can > > fail. Not only due to OOM, also because the pipe between the UML kernel > > process and the host IO thread can return EAGAIN. > > In this case the driver puts the request into a list and retried later > > again when the pipe turns writable. > > I’m not sure whether this restart logic makes sense with blk-mq, maybe > > there is a way in blk-mq to put back a (partial) request? > > This all sounds to me as blk-mq requests need different inter-thread > IPC. We presently rely on the fact that each request to the IO thread is > fixed size and there is no natural request grouping coming from upper > layers. > > Unless I am missing something, this looks like we are now getting group > requests, right? We need to send a group at a time which is not > processed until the whole group has been received in the IO thread. We > cans still batch groups though, but should not batch individual > requests, right? The question is, do we really need batching at all with blk-mq? Jeff implemented that 10 years ago. > My first step (before moving to mq) would have been to switch to a unix > domain socket pair probably using SOCK_SEQPACKET or SOCK_DGRAM. The > latter for a socket pair will return ENOBUF if you try to push more than > the receiving side can handle so we should not have IPC message loss. > This way, we can push request groups naturally instead of relying on a > "last" flag and keeping track of that for "end of request". The pipe is currently a socketpair. UML just calls it "pipe". :-( > It will be easier to roll back the batching before we do that. Feel free > to roll back that commit. > > Once that is in, the whole batching will need to be redone as it should > account for variable IPC record size and use sendmmsg/recvmmsg pair - > same as in the vector IO. I am happy to do the honors on that one :) Let's see what block guys say. Thanks, //richard |
From: Anton I. <ant...@ko...> - 2017-11-26 13:41:21
|
I need to do some reading on this. First of all - a stupid question: mq's primary advantage is in multi-core systems as it improves io and core utilization. We are still single-core in UML and AFAIK this is likely to stay that way, right? On 26/11/17 13:10, Richard Weinberger wrote: > This is the first attempt to convert the UserModeLinux block driver > (UBD) to blk-mq. > While the conversion itself is rather trivial, a few questions > popped up in my head. Maybe you can help me with them. > > MAX_SG is 64, used for blk_queue_max_segments(). This comes from > a0044bdf60c2 ("uml: batch I/O requests"). Is this still a good/sane > value for blk-mq? > > The driver does IO batching, for each request it issues many UML struct > io_thread_req request to the IO thread on the host side. > One io_thread_req per SG page. > Before the conversion the driver used blk_end_request() to indicate that > a part of the request is done. > blk_mq_end_request() does not take a length parameter, therefore we can > only mark the whole request as done. See the new is_last property on the > driver. > Maybe there is a way to partially end requests too in blk-mq? > > Another obstacle with IO batching is that UML IO thread requests can > fail. Not only due to OOM, also because the pipe between the UML kernel > process and the host IO thread can return EAGAIN. > In this case the driver puts the request into a list and retried later > again when the pipe turns writable. > I’m not sure whether this restart logic makes sense with blk-mq, maybe > there is a way in blk-mq to put back a (partial) request? This all sounds to me as blk-mq requests need different inter-thread IPC. We presently rely on the fact that each request to the IO thread is fixed size and there is no natural request grouping coming from upper layers. Unless I am missing something, this looks like we are now getting group requests, right? We need to send a group at a time which is not processed until the whole group has been received in the IO thread. We cans still batch groups though, but should not batch individual requests, right? My first step (before moving to mq) would have been to switch to a unix domain socket pair probably using SOCK_SEQPACKET or SOCK_DGRAM. The latter for a socket pair will return ENOBUF if you try to push more than the receiving side can handle so we should not have IPC message loss. This way, we can push request groups naturally instead of relying on a "last" flag and keeping track of that for "end of request". It will be easier to roll back the batching before we do that. Feel free to roll back that commit. Once that is in, the whole batching will need to be redone as it should account for variable IPC record size and use sendmmsg/recvmmsg pair - same as in the vector IO. I am happy to do the honors on that one :) Brgds, A. > > Signed-off-by: Richard Weinberger <ri...@no...> > --- > arch/um/drivers/ubd_kern.c | 188 ++++++++++++++++++++++++++------------------- > 1 file changed, 107 insertions(+), 81 deletions(-) > > diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c > index 90034acace2a..abbfe0c97418 100644 > --- a/arch/um/drivers/ubd_kern.c > +++ b/arch/um/drivers/ubd_kern.c > @@ -23,6 +23,7 @@ > #include <linux/module.h> > #include <linux/init.h> > #include <linux/blkdev.h> > +#include <linux/blk-mq.h> > #include <linux/ata.h> > #include <linux/hdreg.h> > #include <linux/cdrom.h> > @@ -57,6 +58,7 @@ struct io_thread_req { > unsigned long long cow_offset; > unsigned long bitmap_words[2]; > int error; > + bool is_last; > }; > > > @@ -142,7 +144,6 @@ struct cow { > #define MAX_SG 64 > > struct ubd { > - struct list_head restart; > /* name (and fd, below) of the file opened for writing, either the > * backing or the cow file. */ > char *file; > @@ -156,9 +157,12 @@ struct ubd { > struct cow cow; > struct platform_device pdev; > struct request_queue *queue; > + struct blk_mq_tag_set tag_set; > spinlock_t lock; > +}; > + > +struct ubd_pdu { > struct scatterlist sg[MAX_SG]; > - struct request *request; > int start_sg, end_sg; > sector_t rq_pos; > }; > @@ -182,10 +186,6 @@ struct ubd { > .shared = 0, \ > .cow = DEFAULT_COW, \ > .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ > - .request = NULL, \ > - .start_sg = 0, \ > - .end_sg = 0, \ > - .rq_pos = 0, \ > } > > /* Protected by ubd_lock */ > @@ -196,6 +196,12 @@ static int fake_ide = 0; > static struct proc_dir_entry *proc_ide_root = NULL; > static struct proc_dir_entry *proc_ide = NULL; > > +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, > + const struct blk_mq_queue_data *bd); > +static int ubd_init_request(struct blk_mq_tag_set *set, > + struct request *req, unsigned int hctx_idx, > + unsigned int numa_node); > + > static void make_proc_ide(void) > { > proc_ide_root = proc_mkdir("ide", NULL); > @@ -448,11 +454,8 @@ __uml_help(udb_setup, > " in the boot output.\n\n" > ); > > -static void do_ubd_request(struct request_queue * q); > - > /* Only changed by ubd_init, which is an initcall. */ > static int thread_fd = -1; > -static LIST_HEAD(restart); > > /* Function to read several request pointers at a time > * handling fractional reads if (and as) needed > @@ -510,9 +513,6 @@ static int bulk_req_safe_read( > /* Called without dev->lock held, and only in interrupt context. */ > static void ubd_handler(void) > { > - struct ubd *ubd; > - struct list_head *list, *next_ele; > - unsigned long flags; > int n; > int count; > > @@ -532,23 +532,22 @@ static void ubd_handler(void) > return; > } > for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { > - blk_end_request( > - (*irq_req_buffer)[count]->req, > - BLK_STS_OK, > - (*irq_req_buffer)[count]->length > - ); > - kfree((*irq_req_buffer)[count]); > + struct io_thread_req *io_req = (*irq_req_buffer)[count]; > + > + /* > + * UBD is batching IO, only end the blk mq request > + * if this is the last one. > + */ > + if (io_req->is_last) > + blk_mq_end_request(io_req->req, > + io_req->error ? > + BLK_STS_IOERR : BLK_STS_OK); > + > + kfree(io_req); > } > } > - reactivate_fd(thread_fd, UBD_IRQ); > > - list_for_each_safe(list, next_ele, &restart){ > - ubd = container_of(list, struct ubd, restart); > - list_del_init(&ubd->restart); > - spin_lock_irqsave(&ubd->lock, flags); > - do_ubd_request(ubd->queue); > - spin_unlock_irqrestore(&ubd->lock, flags); > - } > + reactivate_fd(thread_fd, UBD_IRQ); > } > > static irqreturn_t ubd_intr(int irq, void *dev) > @@ -858,6 +857,7 @@ static void ubd_device_release(struct device *dev) > struct ubd *ubd_dev = dev_get_drvdata(dev); > > blk_cleanup_queue(ubd_dev->queue); > + blk_mq_free_tag_set(&ubd_dev->tag_set); > *ubd_dev = ((struct ubd) DEFAULT_UBD); > } > > @@ -900,6 +900,11 @@ static int ubd_disk_register(int major, u64 size, int unit, > > #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) > > +static const struct blk_mq_ops ubd_mq_ops = { > + .queue_rq = ubd_queue_rq, > + .init_request = ubd_init_request, > +}; > + > static int ubd_add(int n, char **error_out) > { > struct ubd *ubd_dev = &ubd_devs[n]; > @@ -916,15 +921,24 @@ static int ubd_add(int n, char **error_out) > > ubd_dev->size = ROUND_BLOCK(ubd_dev->size); > > - INIT_LIST_HEAD(&ubd_dev->restart); > - sg_init_table(ubd_dev->sg, MAX_SG); > + ubd_dev->tag_set.ops = &ubd_mq_ops; > + ubd_dev->tag_set.queue_depth = 64; > + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; > + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; > + ubd_dev->tag_set.cmd_size = sizeof(struct ubd_pdu); > + ubd_dev->tag_set.driver_data = ubd_dev; > + ubd_dev->tag_set.nr_hw_queues = 1; > > - err = -ENOMEM; > - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); > - if (ubd_dev->queue == NULL) { > - *error_out = "Failed to initialize device queue"; > + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); > + if (err) > goto out; > + > + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); > + if (IS_ERR(ubd_dev->queue)) { > + err = PTR_ERR(ubd_dev->queue); > + goto out_cleanup; > } > + > ubd_dev->queue->queuedata = ubd_dev; > blk_queue_write_cache(ubd_dev->queue, true, false); > > @@ -932,7 +946,7 @@ static int ubd_add(int n, char **error_out) > err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); > if(err){ > *error_out = "Failed to register device"; > - goto out_cleanup; > + goto out_cleanup_tags; > } > > if (fake_major != UBD_MAJOR) > @@ -950,6 +964,8 @@ static int ubd_add(int n, char **error_out) > out: > return err; > > +out_cleanup_tags: > + blk_mq_free_tag_set(&ubd_dev->tag_set); > out_cleanup: > blk_cleanup_queue(ubd_dev->queue); > goto out; > @@ -1342,9 +1358,9 @@ static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) > if (n != sizeof(io_req)) { > if (n != -EAGAIN) > printk("write to io thread failed, " > - "errno = %d\n", -n); > - else if (list_empty(&dev->restart)) > - list_add(&dev->restart, &restart); > + "errno = %d\n", -n); > + > + WARN_ONCE(1, "Failed to submit IO request\n"); > > kfree(io_req); > return false; > @@ -1352,63 +1368,73 @@ static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) > return true; > } > > -/* Called with dev->lock held */ > -static void do_ubd_request(struct request_queue *q) > +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, > + const struct blk_mq_queue_data *bd) > { > + struct request *req = bd->rq; > + struct ubd *dev = hctx->queue->queuedata; > + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); > struct io_thread_req *io_req; > - struct request *req; > > - while(1){ > - struct ubd *dev = q->queuedata; > - if(dev->request == NULL){ > - struct request *req = blk_fetch_request(q); > - if(req == NULL) > - return; > + blk_mq_start_request(req); > + > + pdu->rq_pos = blk_rq_pos(req); > + pdu->start_sg = 0; > + pdu->end_sg = blk_rq_map_sg(req->q, req, pdu->sg); > > - dev->request = req; > - dev->rq_pos = blk_rq_pos(req); > - dev->start_sg = 0; > - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); > + if (req_op(req) == REQ_OP_FLUSH) { > + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); > + if (io_req == NULL) { > + WARN_ONCE(1, "Out of memory in io_thread_req allocation\n"); > + return BLK_STS_IOERR; > } > + prepare_flush_request(req, io_req); > + if (submit_request(io_req, dev) == false) > + return BLK_STS_IOERR; > + > + if (!pdu->end_sg) > + io_req->is_last = true; > + else > + io_req->is_last = false; > + } > > - req = dev->request; > + while (pdu->start_sg < pdu->end_sg) { > + struct scatterlist *sg = &pdu->sg[pdu->start_sg]; > > - if (req_op(req) == REQ_OP_FLUSH) { > - io_req = kmalloc(sizeof(struct io_thread_req), > - GFP_ATOMIC); > - if (io_req == NULL) { > - if (list_empty(&dev->restart)) > - list_add(&dev->restart, &restart); > - return; > - } > - prepare_flush_request(req, io_req); > - if (submit_request(io_req, dev) == false) > - return; > + io_req = kmalloc(sizeof(struct io_thread_req), > + GFP_ATOMIC); > + if (io_req == NULL) { > + WARN_ONCE(1, "Out of memory in io_thread_req allocation\n"); > + return BLK_STS_IOERR; > } > + prepare_request(req, io_req, > + (unsigned long long)pdu->rq_pos << 9, > + sg->offset, sg->length, sg_page(sg)); > > - while(dev->start_sg < dev->end_sg){ > - struct scatterlist *sg = &dev->sg[dev->start_sg]; > + if (pdu->start_sg + 1 == pdu->end_sg) > + io_req->is_last = true; > + else > + io_req->is_last = false; > > - io_req = kmalloc(sizeof(struct io_thread_req), > - GFP_ATOMIC); > - if(io_req == NULL){ > - if(list_empty(&dev->restart)) > - list_add(&dev->restart, &restart); > - return; > - } > - prepare_request(req, io_req, > - (unsigned long long)dev->rq_pos << 9, > - sg->offset, sg->length, sg_page(sg)); > + if (submit_request(io_req, dev) == false) > + return BLK_STS_IOERR; > > - if (submit_request(io_req, dev) == false) > - return; > - > - dev->rq_pos += sg->length >> 9; > - dev->start_sg++; > - } > - dev->end_sg = 0; > - dev->request = NULL; > + pdu->rq_pos += sg->length >> 9; > + pdu->start_sg++; > } > + > + return BLK_STS_OK; > +} > + > +static int ubd_init_request(struct blk_mq_tag_set *set, > + struct request *req, unsigned int hctx_idx, > + unsigned int numa_node) > +{ > + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); > + > + sg_init_table(pdu->sg, MAX_SG); > + > + return 0; > } > > static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) |
From: Richard W. <ri...@no...> - 2017-11-26 13:10:55
|
This is the first attempt to convert the UserModeLinux block driver (UBD) to blk-mq. While the conversion itself is rather trivial, a few questions popped up in my head. Maybe you can help me with them. MAX_SG is 64, used for blk_queue_max_segments(). This comes from a0044bdf60c2 ("uml: batch I/O requests"). Is this still a good/sane value for blk-mq? The driver does IO batching, for each request it issues many UML struct io_thread_req request to the IO thread on the host side. One io_thread_req per SG page. Before the conversion the driver used blk_end_request() to indicate that a part of the request is done. blk_mq_end_request() does not take a length parameter, therefore we can only mark the whole request as done. See the new is_last property on the driver. Maybe there is a way to partially end requests too in blk-mq? Another obstacle with IO batching is that UML IO thread requests can fail. Not only due to OOM, also because the pipe between the UML kernel process and the host IO thread can return EAGAIN. In this case the driver puts the request into a list and retried later again when the pipe turns writable. I’m not sure whether this restart logic makes sense with blk-mq, maybe there is a way in blk-mq to put back a (partial) request? Signed-off-by: Richard Weinberger <ri...@no...> --- arch/um/drivers/ubd_kern.c | 188 ++++++++++++++++++++++++++------------------- 1 file changed, 107 insertions(+), 81 deletions(-) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 90034acace2a..abbfe0c97418 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -23,6 +23,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/blkdev.h> +#include <linux/blk-mq.h> #include <linux/ata.h> #include <linux/hdreg.h> #include <linux/cdrom.h> @@ -57,6 +58,7 @@ struct io_thread_req { unsigned long long cow_offset; unsigned long bitmap_words[2]; int error; + bool is_last; }; @@ -142,7 +144,6 @@ struct cow { #define MAX_SG 64 struct ubd { - struct list_head restart; /* name (and fd, below) of the file opened for writing, either the * backing or the cow file. */ char *file; @@ -156,9 +157,12 @@ struct ubd { struct cow cow; struct platform_device pdev; struct request_queue *queue; + struct blk_mq_tag_set tag_set; spinlock_t lock; +}; + +struct ubd_pdu { struct scatterlist sg[MAX_SG]; - struct request *request; int start_sg, end_sg; sector_t rq_pos; }; @@ -182,10 +186,6 @@ struct ubd { .shared = 0, \ .cow = DEFAULT_COW, \ .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ - .request = NULL, \ - .start_sg = 0, \ - .end_sg = 0, \ - .rq_pos = 0, \ } /* Protected by ubd_lock */ @@ -196,6 +196,12 @@ static int fake_ide = 0; static struct proc_dir_entry *proc_ide_root = NULL; static struct proc_dir_entry *proc_ide = NULL; +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd); +static int ubd_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node); + static void make_proc_ide(void) { proc_ide_root = proc_mkdir("ide", NULL); @@ -448,11 +454,8 @@ __uml_help(udb_setup, " in the boot output.\n\n" ); -static void do_ubd_request(struct request_queue * q); - /* Only changed by ubd_init, which is an initcall. */ static int thread_fd = -1; -static LIST_HEAD(restart); /* Function to read several request pointers at a time * handling fractional reads if (and as) needed @@ -510,9 +513,6 @@ static int bulk_req_safe_read( /* Called without dev->lock held, and only in interrupt context. */ static void ubd_handler(void) { - struct ubd *ubd; - struct list_head *list, *next_ele; - unsigned long flags; int n; int count; @@ -532,23 +532,22 @@ static void ubd_handler(void) return; } for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { - blk_end_request( - (*irq_req_buffer)[count]->req, - BLK_STS_OK, - (*irq_req_buffer)[count]->length - ); - kfree((*irq_req_buffer)[count]); + struct io_thread_req *io_req = (*irq_req_buffer)[count]; + + /* + * UBD is batching IO, only end the blk mq request + * if this is the last one. + */ + if (io_req->is_last) + blk_mq_end_request(io_req->req, + io_req->error ? + BLK_STS_IOERR : BLK_STS_OK); + + kfree(io_req); } } - reactivate_fd(thread_fd, UBD_IRQ); - list_for_each_safe(list, next_ele, &restart){ - ubd = container_of(list, struct ubd, restart); - list_del_init(&ubd->restart); - spin_lock_irqsave(&ubd->lock, flags); - do_ubd_request(ubd->queue); - spin_unlock_irqrestore(&ubd->lock, flags); - } + reactivate_fd(thread_fd, UBD_IRQ); } static irqreturn_t ubd_intr(int irq, void *dev) @@ -858,6 +857,7 @@ static void ubd_device_release(struct device *dev) struct ubd *ubd_dev = dev_get_drvdata(dev); blk_cleanup_queue(ubd_dev->queue); + blk_mq_free_tag_set(&ubd_dev->tag_set); *ubd_dev = ((struct ubd) DEFAULT_UBD); } @@ -900,6 +900,11 @@ static int ubd_disk_register(int major, u64 size, int unit, #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9)) +static const struct blk_mq_ops ubd_mq_ops = { + .queue_rq = ubd_queue_rq, + .init_request = ubd_init_request, +}; + static int ubd_add(int n, char **error_out) { struct ubd *ubd_dev = &ubd_devs[n]; @@ -916,15 +921,24 @@ static int ubd_add(int n, char **error_out) ubd_dev->size = ROUND_BLOCK(ubd_dev->size); - INIT_LIST_HEAD(&ubd_dev->restart); - sg_init_table(ubd_dev->sg, MAX_SG); + ubd_dev->tag_set.ops = &ubd_mq_ops; + ubd_dev->tag_set.queue_depth = 64; + ubd_dev->tag_set.numa_node = NUMA_NO_NODE; + ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ubd_dev->tag_set.cmd_size = sizeof(struct ubd_pdu); + ubd_dev->tag_set.driver_data = ubd_dev; + ubd_dev->tag_set.nr_hw_queues = 1; - err = -ENOMEM; - ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock); - if (ubd_dev->queue == NULL) { - *error_out = "Failed to initialize device queue"; + err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); + if (err) goto out; + + ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); + if (IS_ERR(ubd_dev->queue)) { + err = PTR_ERR(ubd_dev->queue); + goto out_cleanup; } + ubd_dev->queue->queuedata = ubd_dev; blk_queue_write_cache(ubd_dev->queue, true, false); @@ -932,7 +946,7 @@ static int ubd_add(int n, char **error_out) err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; - goto out_cleanup; + goto out_cleanup_tags; } if (fake_major != UBD_MAJOR) @@ -950,6 +964,8 @@ static int ubd_add(int n, char **error_out) out: return err; +out_cleanup_tags: + blk_mq_free_tag_set(&ubd_dev->tag_set); out_cleanup: blk_cleanup_queue(ubd_dev->queue); goto out; @@ -1342,9 +1358,9 @@ static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) if (n != sizeof(io_req)) { if (n != -EAGAIN) printk("write to io thread failed, " - "errno = %d\n", -n); - else if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); + "errno = %d\n", -n); + + WARN_ONCE(1, "Failed to submit IO request\n"); kfree(io_req); return false; @@ -1352,63 +1368,73 @@ static bool submit_request(struct io_thread_req *io_req, struct ubd *dev) return true; } -/* Called with dev->lock held */ -static void do_ubd_request(struct request_queue *q) +static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { + struct request *req = bd->rq; + struct ubd *dev = hctx->queue->queuedata; + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); struct io_thread_req *io_req; - struct request *req; - while(1){ - struct ubd *dev = q->queuedata; - if(dev->request == NULL){ - struct request *req = blk_fetch_request(q); - if(req == NULL) - return; + blk_mq_start_request(req); + + pdu->rq_pos = blk_rq_pos(req); + pdu->start_sg = 0; + pdu->end_sg = blk_rq_map_sg(req->q, req, pdu->sg); - dev->request = req; - dev->rq_pos = blk_rq_pos(req); - dev->start_sg = 0; - dev->end_sg = blk_rq_map_sg(q, req, dev->sg); + if (req_op(req) == REQ_OP_FLUSH) { + io_req = kmalloc(sizeof(struct io_thread_req), GFP_ATOMIC); + if (io_req == NULL) { + WARN_ONCE(1, "Out of memory in io_thread_req allocation\n"); + return BLK_STS_IOERR; } + prepare_flush_request(req, io_req); + if (submit_request(io_req, dev) == false) + return BLK_STS_IOERR; + + if (!pdu->end_sg) + io_req->is_last = true; + else + io_req->is_last = false; + } - req = dev->request; + while (pdu->start_sg < pdu->end_sg) { + struct scatterlist *sg = &pdu->sg[pdu->start_sg]; - if (req_op(req) == REQ_OP_FLUSH) { - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if (io_req == NULL) { - if (list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_flush_request(req, io_req); - if (submit_request(io_req, dev) == false) - return; + io_req = kmalloc(sizeof(struct io_thread_req), + GFP_ATOMIC); + if (io_req == NULL) { + WARN_ONCE(1, "Out of memory in io_thread_req allocation\n"); + return BLK_STS_IOERR; } + prepare_request(req, io_req, + (unsigned long long)pdu->rq_pos << 9, + sg->offset, sg->length, sg_page(sg)); - while(dev->start_sg < dev->end_sg){ - struct scatterlist *sg = &dev->sg[dev->start_sg]; + if (pdu->start_sg + 1 == pdu->end_sg) + io_req->is_last = true; + else + io_req->is_last = false; - io_req = kmalloc(sizeof(struct io_thread_req), - GFP_ATOMIC); - if(io_req == NULL){ - if(list_empty(&dev->restart)) - list_add(&dev->restart, &restart); - return; - } - prepare_request(req, io_req, - (unsigned long long)dev->rq_pos << 9, - sg->offset, sg->length, sg_page(sg)); + if (submit_request(io_req, dev) == false) + return BLK_STS_IOERR; - if (submit_request(io_req, dev) == false) - return; - - dev->rq_pos += sg->length >> 9; - dev->start_sg++; - } - dev->end_sg = 0; - dev->request = NULL; + pdu->rq_pos += sg->length >> 9; + pdu->start_sg++; } + + return BLK_STS_OK; +} + +static int ubd_init_request(struct blk_mq_tag_set *set, + struct request *req, unsigned int hctx_idx, + unsigned int numa_node) +{ + struct ubd_pdu *pdu = blk_mq_rq_to_pdu(req); + + sg_init_table(pdu->sg, MAX_SG); + + return 0; } static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -- 2.13.6 |
From: Richard W. <ri...@no...> - 2017-11-24 09:46:22
|
Linus, Am Freitag, 24. November 2017, 04:41:37 CET schrieb Linus Torvalds: > On Thu, Nov 23, 2017 at 4:37 AM, Richard Weinberger <ri...@no...> wrote: > > git://git.infradead.org/linux-ubifs.git tags/upstream-4.15-rc1 > > Similarly to the arch/um case, none of this seems to have been in > linux-next, and is sent late in the merge window, so I'm skipping it. It is since next-20171120 in linux-next. I'm sorry for being late, will do a better job next time. If you don't mind I'll send you a PR with bug fixes for 4.15-rc2. Same for UML. Thanks, //richard |
From: Linus T. <tor...@li...> - 2017-11-24 03:40:13
|
On Thu, Nov 23, 2017 at 4:36 AM, Richard Weinberger <ri...@no...> wrote: > > git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git for-linus-4.15-rc1 I asked people to send me their pull requests early before I was traveling, and this second week I'm only taking fixes, or things that were in linux-next. I _should_ do that every release, just to make sure people actually do put things in linux-next, but this release I'm doing it because I want to know it's independently gone through the build tests etc and isn't some random last-minute stuff. As far as I can tell, none of this was in linux-next 20171117. Linus |
From: Richard W. <ri...@no...> - 2017-11-23 14:36:30
|
Linus, The following changes since commit bebc6082da0a9f5d47a1ea2edc099bf671058bd4: Linux 4.14 (2017-11-12 10:46:13 -0800) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml.git for-linus-4.15-rc1 for you to fetch changes up to 02eb0b11eab56b47bcc36aa04dd522786c8faab9: um: Add missing EXPORT for free_irq_by_fd() (2017-11-22 15:20:56 +0100) ---------------------------------------------------------------- This pull request contains updates for UML: - A new and faster epoll based IRQ controller and NIC driver - Misc fixes and janitorial updates ---------------------------------------------------------------- Anton Ivanov (3): Epoll based IRQ controller High Performance UML Vector Network Driver um: Add missing EXPORT for free_irq_by_fd() Arnd Bergmann (1): um: time: Use timespec64 for persistent clock Geert Uytterhoeven (1): um: Restore symbol versions for __memcpy and memcpy Kees Cook (1): um: net: Convert timers to use timer_setup() Krzysztof Mazur (1): um: Use POSIX ucontext_t instead of struct ucontext arch/um/Kconfig.net | 11 + arch/um/drivers/Makefile | 4 +- arch/um/drivers/chan_kern.c | 53 +- arch/um/drivers/line.c | 2 +- arch/um/drivers/net_kern.c | 13 +- arch/um/drivers/random.c | 11 +- arch/um/drivers/ubd_kern.c | 4 +- arch/um/drivers/vector_kern.c | 1630 +++++++++++++++++++++++++++++++++ + arch/um/drivers/vector_kern.h | 129 +++ arch/um/drivers/vector_transports.c | 458 ++++++++++ arch/um/drivers/vector_user.c | 586 ++++++++++++ arch/um/drivers/vector_user.h | 99 +++ arch/um/include/asm/asm-prototypes.h | 1 + arch/um/include/asm/irq.h | 12 + arch/um/include/shared/irq_user.h | 12 +- arch/um/include/shared/net_kern.h | 2 + arch/um/include/shared/os.h | 17 +- arch/um/kernel/irq.c | 461 ++++++---- arch/um/kernel/time.c | 6 +- arch/um/os-Linux/irq.c | 202 +++-- arch/um/os-Linux/signal.c | 2 +- arch/x86/um/stub_segv.c | 2 +- 22 files changed, 3387 insertions(+), 330 deletions(-) create mode 100644 arch/um/drivers/vector_kern.c create mode 100644 arch/um/drivers/vector_kern.h create mode 100644 arch/um/drivers/vector_transports.c create mode 100644 arch/um/drivers/vector_user.c create mode 100644 arch/um/drivers/vector_user.h create mode 100644 arch/um/include/asm/asm-prototypes.h |
From: <ant...@ko...> - 2017-11-22 13:50:11
|
From: Anton Ivanov <ant...@ca...> Add missing EXPORT for free_irq_by_fd() Signed-off-by: Anton Ivanov <ant...@ca...> --- arch/um/kernel/irq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 980148d56537..6b7f3827d6e4 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -328,6 +328,7 @@ void free_irq_by_fd(int fd) garbage_collect_irq_entries(); spin_unlock_irqrestore(&irq_lock, flags); } +EXPORT_SYMBOL(free_irq_by_fd); static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { -- 2.11.0 |
From: <ant...@ko...> - 2017-11-22 13:50:07
|
Errata vs todays Linux next and once again, apologies - I checked out the yesterday's one by mistake. A. |
From: Anton I. <ant...@ko...> - 2017-11-22 13:33:24
|
Apologies, Broken stupidometer - i checked out next-20171121 instead of next-20171122 fixing it, errata patch will follow shortly. A. On 11/22/17 13:22, Richard Weinberger wrote: > Am Mittwoch, 22. November 2017, 14:09:11 CET schrieb anton.ivanov@kot- > begemot.co.uk: >> Resending patchset v11 versus Linux Next 20171121 > Erm, you sent me again full patches. > *confused* > > Version 10 is in linux-next, now we have to deal with the fallout. > Otherwise I have to revert. > > Thanks, > //richard > |
From: Richard W. <ri...@si...> - 2017-11-22 13:22:11
|
Am Mittwoch, 22. November 2017, 14:09:11 CET schrieb anton.ivanov@kot- begemot.co.uk: > Resending patchset v11 versus Linux Next 20171121 Erm, you sent me again full patches. *confused* Version 10 is in linux-next, now we have to deal with the fallout. Otherwise I have to revert. Thanks, //richard -- sigma star gmbh - Eduard-Bodem-Gasse 6 - 6020 Innsbruck - Austria ATU66964118 - FN 374287y |
From: <ant...@ko...> - 2017-11-22 13:09:30
|
From: Anton Ivanov <ant...@ca...> 1. Removes the need to walk the IRQ/Device list to determine who triggered the IRQ. 2. Improves scalability (up to several times performance improvement for cases with 10s of devices). 3. Improves UML baseline IO performance for one disk + one NIC use case by up to 10%. 4. Introduces write poll triggered IRQs. 5. Prerequisite for introducing high performance mmesg family of functions in network IO. 6. Fixes RNG shutdown which was leaking a file descriptor Signed-off-by: Anton Ivanov <ant...@ca...> --- arch/um/drivers/chan_kern.c | 53 +---- arch/um/drivers/line.c | 2 +- arch/um/drivers/random.c | 11 +- arch/um/drivers/ubd_kern.c | 4 +- arch/um/include/shared/irq_user.h | 12 +- arch/um/include/shared/os.h | 17 +- arch/um/kernel/irq.c | 460 ++++++++++++++++++++++++-------------- arch/um/os-Linux/irq.c | 202 +++++++++-------- 8 files changed, 444 insertions(+), 317 deletions(-) diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index acbe6c67afba..05588f9466c7 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -171,56 +171,19 @@ int enable_chan(struct line *line) return err; } -/* Items are added in IRQ context, when free_irq can't be called, and - * removed in process context, when it can. - * This handles interrupt sources which disappear, and which need to - * be permanently disabled. This is discovered in IRQ context, but - * the freeing of the IRQ must be done later. - */ -static DEFINE_SPINLOCK(irqs_to_free_lock); -static LIST_HEAD(irqs_to_free); - -void free_irqs(void) -{ - struct chan *chan; - LIST_HEAD(list); - struct list_head *ele; - unsigned long flags; - - spin_lock_irqsave(&irqs_to_free_lock, flags); - list_splice_init(&irqs_to_free, &list); - spin_unlock_irqrestore(&irqs_to_free_lock, flags); - - list_for_each(ele, &list) { - chan = list_entry(ele, struct chan, free_list); - - if (chan->input && chan->enabled) - um_free_irq(chan->line->driver->read_irq, chan); - if (chan->output && chan->enabled) - um_free_irq(chan->line->driver->write_irq, chan); - chan->enabled = 0; - } -} - static void close_one_chan(struct chan *chan, int delay_free_irq) { - unsigned long flags; - if (!chan->opened) return; - if (delay_free_irq) { - spin_lock_irqsave(&irqs_to_free_lock, flags); - list_add(&chan->free_list, &irqs_to_free); - spin_unlock_irqrestore(&irqs_to_free_lock, flags); - } - else { - if (chan->input && chan->enabled) - um_free_irq(chan->line->driver->read_irq, chan); - if (chan->output && chan->enabled) - um_free_irq(chan->line->driver->write_irq, chan); - chan->enabled = 0; - } + /* we can safely call free now - it will be marked + * as free and freed once the IRQ stopped processing + */ + if (chan->input && chan->enabled) + um_free_irq(chan->line->driver->read_irq, chan); + if (chan->output && chan->enabled) + um_free_irq(chan->line->driver->write_irq, chan); + chan->enabled = 0; if (chan->ops->close != NULL) (*chan->ops->close)(chan->fd, chan->data); diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 366e57f5e8d6..8d80b27502e6 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -284,7 +284,7 @@ int line_setup_irq(int fd, int input, int output, struct line *line, void *data) if (err) return err; if (output) - err = um_request_irq(driver->write_irq, fd, IRQ_WRITE, + err = um_request_irq(driver->write_irq, fd, IRQ_NONE, line_write_interrupt, IRQF_SHARED, driver->write_irq_name, data); return err; diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 37c51a6be690..778a0e52d5a5 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -13,6 +13,7 @@ #include <linux/miscdevice.h> #include <linux/delay.h> #include <linux/uaccess.h> +#include <init.h> #include <irq_kern.h> #include <os.h> @@ -154,7 +155,14 @@ static int __init rng_init (void) /* * rng_cleanup - shutdown RNG module */ -static void __exit rng_cleanup (void) + +static void cleanup(void) +{ + free_irq_by_fd(random_fd); + os_close_file(random_fd); +} + +static void __exit rng_cleanup(void) { os_close_file(random_fd); misc_deregister (&rng_miscdev); @@ -162,6 +170,7 @@ static void __exit rng_cleanup (void) module_init (rng_init); module_exit (rng_cleanup); +__uml_exitcall(cleanup); MODULE_DESCRIPTION("UML Host Random Number Generator (RNG) driver"); MODULE_LICENSE("GPL"); diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index b55fe9bf5d3e..d4e8c497ae86 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -1587,11 +1587,11 @@ int io_thread(void *arg) do { res = os_write_file(kernel_fd, ((char *) io_req_buffer) + written, n); - if (res > 0) { + if (res >= 0) { written += res; } else { if (res != -EAGAIN) { - printk("io_thread - read failed, fd = %d, " + printk("io_thread - write failed, fd = %d, " "err = %d\n", kernel_fd, -n); } } diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h index df5633053957..a7a6120f19d5 100644 --- a/arch/um/include/shared/irq_user.h +++ b/arch/um/include/shared/irq_user.h @@ -7,6 +7,7 @@ #define __IRQ_USER_H__ #include <sysdep/ptrace.h> +#include <stdbool.h> struct irq_fd { struct irq_fd *next; @@ -15,10 +16,17 @@ struct irq_fd { int type; int irq; int events; - int current_events; + bool active; + bool pending; + bool purge; }; -enum { IRQ_READ, IRQ_WRITE }; +#define IRQ_READ 0 +#define IRQ_WRITE 1 +#define IRQ_NONE 2 +#define MAX_IRQ_TYPE (IRQ_NONE + 1) + + struct siginfo; extern void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs); diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index d8ddaf9790d2..048ae37eb5aa 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -290,15 +290,16 @@ extern void halt_skas(void); extern void reboot_skas(void); /* irq.c */ -extern int os_waiting_for_events(struct irq_fd *active_fds); -extern int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds); -extern void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, - struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2); -extern void os_free_irq_later(struct irq_fd *active_fds, - int irq, void *dev_id); -extern int os_get_pollfd(int i); -extern void os_set_pollfd(int i, int fd); +extern int os_waiting_for_events_epoll(void); +extern void *os_epoll_get_data_pointer(int index); +extern int os_epoll_triggered(int index, int events); +extern int os_event_mask(int irq_type); +extern int os_setup_epoll(void); +extern int os_add_epoll_fd(int events, int fd, void *data); +extern int os_mod_epoll_fd(int events, int fd, void *data); +extern int os_del_epoll_fd(int fd); extern void os_set_ioignore(void); +extern void os_close_epoll_fd(void); /* sigio.c */ extern int add_sigio_fd(int fd); diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 23cb9350d47e..980148d56537 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -1,4 +1,6 @@ /* + * Copyright (C) 2017 - Cambridge Greys Ltd + * Copyright (C) 2011 - 2014 Cisco Systems Inc * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c: @@ -16,243 +18,361 @@ #include <as-layout.h> #include <kern_util.h> #include <os.h> +#include <irq_user.h> -/* - * This list is accessed under irq_lock, except in sigio_handler, - * where it is safe from being modified. IRQ handlers won't change it - - * if an IRQ source has vanished, it will be freed by free_irqs just - * before returning from sigio_handler. That will process a separate - * list of irqs to free, with its own locking, coming back here to - * remove list elements, taking the irq_lock to do so. + +/* When epoll triggers we do not know why it did so + * we can also have different IRQs for read and write. + * This is why we keep a small irq_fd array for each fd - + * one entry per IRQ type */ -static struct irq_fd *active_fds = NULL; -static struct irq_fd **last_irq_ptr = &active_fds; -extern void free_irqs(void); +struct irq_entry { + struct irq_entry *next; + int fd; + struct irq_fd *irq_array[MAX_IRQ_TYPE + 1]; +}; + +static struct irq_entry *active_fds; + +static DEFINE_SPINLOCK(irq_lock); + +static void irq_io_loop(struct irq_fd *irq, struct uml_pt_regs *regs) +{ +/* + * irq->active guards against reentry + * irq->pending accumulates pending requests + * if pending is raised the irq_handler is re-run + * until pending is cleared + */ + if (irq->active) { + irq->active = false; + do { + irq->pending = false; + do_IRQ(irq->irq, regs); + } while (irq->pending && (!irq->purge)); + if (!irq->purge) + irq->active = true; + } else { + irq->pending = true; + } +} void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) { - struct irq_fd *irq_fd; - int n; + struct irq_entry *irq_entry; + struct irq_fd *irq; + + int n, i, j; while (1) { - n = os_waiting_for_events(active_fds); + /* This is now lockless - epoll keeps back-referencesto the irqs + * which have trigger it so there is no need to walk the irq + * list and lock it every time. We avoid locking by turning off + * IO for a specific fd by executing os_del_epoll_fd(fd) before + * we do any changes to the actual data structures + */ + n = os_waiting_for_events_epoll(); + if (n <= 0) { if (n == -EINTR) continue; - else break; + else + break; } - for (irq_fd = active_fds; irq_fd != NULL; - irq_fd = irq_fd->next) { - if (irq_fd->current_events != 0) { - irq_fd->current_events = 0; - do_IRQ(irq_fd->irq, regs); + for (i = 0; i < n ; i++) { + /* Epoll back reference is the entry with 3 irq_fd + * leaves - one for each irq type. + */ + irq_entry = (struct irq_entry *) + os_epoll_get_data_pointer(i); + for (j = 0; j < MAX_IRQ_TYPE ; j++) { + irq = irq_entry->irq_array[j]; + if (irq == NULL) + continue; + if (os_epoll_triggered(i, irq->events) > 0) + irq_io_loop(irq, regs); + if (irq->purge) { + irq_entry->irq_array[j] = NULL; + kfree(irq); + } } } } +} + +static int assign_epoll_events_to_irq(struct irq_entry *irq_entry) +{ + int i; + int events = 0; + struct irq_fd *irq; - free_irqs(); + for (i = 0; i < MAX_IRQ_TYPE ; i++) { + irq = irq_entry->irq_array[i]; + if (irq != NULL) + events = irq->events | events; + } + if (events > 0) { + /* os_add_epoll will call os_mod_epoll if this already exists */ + return os_add_epoll_fd(events, irq_entry->fd, irq_entry); + } + /* No events - delete */ + return os_del_epoll_fd(irq_entry->fd); } -static DEFINE_SPINLOCK(irq_lock); + static int activate_fd(int irq, int fd, int type, void *dev_id) { - struct pollfd *tmp_pfd; - struct irq_fd *new_fd, *irq_fd; + struct irq_fd *new_fd; + struct irq_entry *irq_entry; + int i, err, events; unsigned long flags; - int events, err, n; err = os_set_fd_async(fd); if (err < 0) goto out; - err = -ENOMEM; - new_fd = kmalloc(sizeof(struct irq_fd), GFP_KERNEL); - if (new_fd == NULL) - goto out; + spin_lock_irqsave(&irq_lock, flags); - if (type == IRQ_READ) - events = UM_POLLIN | UM_POLLPRI; - else events = UM_POLLOUT; - *new_fd = ((struct irq_fd) { .next = NULL, - .id = dev_id, - .fd = fd, - .type = type, - .irq = irq, - .events = events, - .current_events = 0 } ); + /* Check if we have an entry for this fd */ err = -EBUSY; - spin_lock_irqsave(&irq_lock, flags); - for (irq_fd = active_fds; irq_fd != NULL; irq_fd = irq_fd->next) { - if ((irq_fd->fd == fd) && (irq_fd->type == type)) { - printk(KERN_ERR "Registering fd %d twice\n", fd); - printk(KERN_ERR "Irqs : %d, %d\n", irq_fd->irq, irq); - printk(KERN_ERR "Ids : 0x%p, 0x%p\n", irq_fd->id, - dev_id); + for (irq_entry = active_fds; + irq_entry != NULL; irq_entry = irq_entry->next) { + if (irq_entry->fd == fd) + break; + } + + if (irq_entry == NULL) { + /* This needs to be atomic as it may be called from an + * IRQ context. + */ + irq_entry = kmalloc(sizeof(struct irq_entry), GFP_ATOMIC); + if (irq_entry == NULL) { + printk(KERN_ERR + "Failed to allocate new IRQ entry\n"); goto out_unlock; } + irq_entry->fd = fd; + for (i = 0; i < MAX_IRQ_TYPE; i++) + irq_entry->irq_array[i] = NULL; + irq_entry->next = active_fds; + active_fds = irq_entry; } - if (type == IRQ_WRITE) - fd = -1; - - tmp_pfd = NULL; - n = 0; + /* Check if we are trying to re-register an interrupt for a + * particular fd + */ - while (1) { - n = os_create_pollfd(fd, events, tmp_pfd, n); - if (n == 0) - break; + if (irq_entry->irq_array[type] != NULL) { + printk(KERN_ERR + "Trying to reregister IRQ %d FD %d TYPE %d ID %p\n", + irq, fd, type, dev_id + ); + goto out_unlock; + } else { + /* New entry for this fd */ + + err = -ENOMEM; + new_fd = kmalloc(sizeof(struct irq_fd), GFP_ATOMIC); + if (new_fd == NULL) + goto out_unlock; - /* - * n > 0 - * It means we couldn't put new pollfd to current pollfds - * and tmp_fds is NULL or too small for new pollfds array. - * Needed size is equal to n as minimum. - * - * Here we have to drop the lock in order to call - * kmalloc, which might sleep. - * If something else came in and changed the pollfds array - * so we will not be able to put new pollfd struct to pollfds - * then we free the buffer tmp_fds and try again. + events = os_event_mask(type); + + *new_fd = ((struct irq_fd) { + .id = dev_id, + .irq = irq, + .type = type, + .events = events, + .active = true, + .pending = false, + .purge = false + }); + /* Turn off any IO on this fd - allows us to + * avoid locking the IRQ loop */ - spin_unlock_irqrestore(&irq_lock, flags); - kfree(tmp_pfd); - - tmp_pfd = kmalloc(n, GFP_KERNEL); - if (tmp_pfd == NULL) - goto out_kfree; - - spin_lock_irqsave(&irq_lock, flags); + os_del_epoll_fd(irq_entry->fd); + irq_entry->irq_array[type] = new_fd; } - *last_irq_ptr = new_fd; - last_irq_ptr = &new_fd->next; - + /* Turn back IO on with the correct (new) IO event mask */ + assign_epoll_events_to_irq(irq_entry); spin_unlock_irqrestore(&irq_lock, flags); - - /* - * This calls activate_fd, so it has to be outside the critical - * section. - */ - maybe_sigio_broken(fd, (type == IRQ_READ)); + maybe_sigio_broken(fd, (type != IRQ_NONE)); return 0; - - out_unlock: +out_unlock: spin_unlock_irqrestore(&irq_lock, flags); - out_kfree: - kfree(new_fd); - out: +out: return err; } -static void free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg) +/* + * Walk the IRQ list and dispose of any unused entries. + * Should be done under irq_lock. + */ + +static void garbage_collect_irq_entries(void) { - unsigned long flags; + int i; + bool reap; + struct irq_entry *walk; + struct irq_entry *previous = NULL; + struct irq_entry *to_free; - spin_lock_irqsave(&irq_lock, flags); - os_free_irq_by_cb(test, arg, active_fds, &last_irq_ptr); - spin_unlock_irqrestore(&irq_lock, flags); + if (active_fds == NULL) + return; + walk = active_fds; + while (walk != NULL) { + reap = true; + for (i = 0; i < MAX_IRQ_TYPE ; i++) { + if (walk->irq_array[i] != NULL) { + reap = false; + break; + } + } + if (reap) { + if (previous == NULL) + active_fds = walk->next; + else + previous->next = walk->next; + to_free = walk; + } else { + to_free = NULL; + } + walk = walk->next; + if (to_free != NULL) + kfree(to_free); + } } -struct irq_and_dev { - int irq; - void *dev; -}; +/* + * Walk the IRQ list and get the descriptor for our FD + */ -static int same_irq_and_dev(struct irq_fd *irq, void *d) +static struct irq_entry *get_irq_entry_by_fd(int fd) { - struct irq_and_dev *data = d; + struct irq_entry *walk = active_fds; - return ((irq->irq == data->irq) && (irq->id == data->dev)); + while (walk != NULL) { + if (walk->fd == fd) + return walk; + walk = walk->next; + } + return NULL; } -static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) -{ - struct irq_and_dev data = ((struct irq_and_dev) { .irq = irq, - .dev = dev }); - free_irq_by_cb(same_irq_and_dev, &data); -} +/* + * Walk the IRQ list and dispose of an entry for a specific + * device, fd and number. Note - if sharing an IRQ for read + * and writefor the same FD it will be disposed in either case. + * If this behaviour is undesirable use different IRQ ids. + */ -static int same_fd(struct irq_fd *irq, void *fd) -{ - return (irq->fd == *((int *)fd)); -} +#define IGNORE_IRQ 1 +#define IGNORE_DEV (1<<1) -void free_irq_by_fd(int fd) +static void do_free_by_irq_and_dev( + struct irq_entry *irq_entry, + unsigned int irq, + void *dev, + int flags +) { - free_irq_by_cb(same_fd, &fd); + int i; + struct irq_fd *to_free; + + for (i = 0; i < MAX_IRQ_TYPE ; i++) { + if (irq_entry->irq_array[i] != NULL) { + if ( + ((flags & IGNORE_IRQ) || + (irq_entry->irq_array[i]->irq == irq)) && + ((flags & IGNORE_DEV) || + (irq_entry->irq_array[i]->id == dev)) + ) { + /* Turn off any IO on this fd - allows us to + * avoid locking the IRQ loop + */ + os_del_epoll_fd(irq_entry->fd); + to_free = irq_entry->irq_array[i]; + irq_entry->irq_array[i] = NULL; + assign_epoll_events_to_irq(irq_entry); + if (to_free->active) + to_free->purge = true; + else + kfree(to_free); + } + } + } } -/* Must be called with irq_lock held */ -static struct irq_fd *find_irq_by_fd(int fd, int irqnum, int *index_out) +void free_irq_by_fd(int fd) { - struct irq_fd *irq; - int i = 0; - int fdi; + struct irq_entry *to_free; + unsigned long flags; - for (irq = active_fds; irq != NULL; irq = irq->next) { - if ((irq->fd == fd) && (irq->irq == irqnum)) - break; - i++; - } - if (irq == NULL) { - printk(KERN_ERR "find_irq_by_fd doesn't have descriptor %d\n", - fd); - goto out; - } - fdi = os_get_pollfd(i); - if ((fdi != -1) && (fdi != fd)) { - printk(KERN_ERR "find_irq_by_fd - mismatch between active_fds " - "and pollfds, fd %d vs %d, need %d\n", irq->fd, - fdi, fd); - irq = NULL; - goto out; + spin_lock_irqsave(&irq_lock, flags); + to_free = get_irq_entry_by_fd(fd); + if (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + -1, + NULL, + IGNORE_IRQ | IGNORE_DEV + ); } - *index_out = i; - out: - return irq; + garbage_collect_irq_entries(); + spin_unlock_irqrestore(&irq_lock, flags); } -void reactivate_fd(int fd, int irqnum) +static void free_irq_by_irq_and_dev(unsigned int irq, void *dev) { - struct irq_fd *irq; + struct irq_entry *to_free; unsigned long flags; - int i; spin_lock_irqsave(&irq_lock, flags); - irq = find_irq_by_fd(fd, irqnum, &i); - if (irq == NULL) { - spin_unlock_irqrestore(&irq_lock, flags); - return; + to_free = active_fds; + while (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + irq, + dev, + 0 + ); + to_free = to_free->next; } - os_set_pollfd(i, irq->fd); + garbage_collect_irq_entries(); spin_unlock_irqrestore(&irq_lock, flags); +} - add_sigio_fd(fd); + +void reactivate_fd(int fd, int irqnum) +{ + /** NOP - we do auto-EOI now **/ } void deactivate_fd(int fd, int irqnum) { - struct irq_fd *irq; + struct irq_entry *to_free; unsigned long flags; - int i; + os_del_epoll_fd(fd); spin_lock_irqsave(&irq_lock, flags); - irq = find_irq_by_fd(fd, irqnum, &i); - if (irq == NULL) { - spin_unlock_irqrestore(&irq_lock, flags); - return; + to_free = get_irq_entry_by_fd(fd); + if (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + irqnum, + NULL, + IGNORE_DEV + ); } - - os_set_pollfd(i, -1); + garbage_collect_irq_entries(); spin_unlock_irqrestore(&irq_lock, flags); - ignore_sigio_fd(fd); } EXPORT_SYMBOL(deactivate_fd); @@ -265,17 +385,28 @@ EXPORT_SYMBOL(deactivate_fd); */ int deactivate_all_fds(void) { - struct irq_fd *irq; - int err; + unsigned long flags; + struct irq_entry *to_free; - for (irq = active_fds; irq != NULL; irq = irq->next) { - err = os_clear_fd_async(irq->fd); - if (err) - return err; - } - /* If there is a signal already queued, after unblocking ignore it */ + spin_lock_irqsave(&irq_lock, flags); + /* Stop IO. The IRQ loop has no lock so this is our + * only way of making sure we are safe to dispose + * of all IRQ handlers + */ os_set_ioignore(); - + to_free = active_fds; + while (to_free != NULL) { + do_free_by_irq_and_dev( + to_free, + -1, + NULL, + IGNORE_IRQ | IGNORE_DEV + ); + to_free = to_free->next; + } + garbage_collect_irq_entries(); + spin_unlock_irqrestore(&irq_lock, flags); + os_close_epoll_fd(); return 0; } @@ -353,8 +484,11 @@ void __init init_IRQ(void) irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq); + for (i = 1; i < NR_IRQS; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); + /* Initialize EPOLL Loop */ + os_setup_epoll(); } /* diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c index b9afb74b79ad..365823010346 100644 --- a/arch/um/os-Linux/irq.c +++ b/arch/um/os-Linux/irq.c @@ -1,135 +1,147 @@ /* + * Copyright (C) 2017 - Cambridge Greys Ltd + * Copyright (C) 2011 - 2014 Cisco Systems Inc * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) * Licensed under the GPL */ #include <stdlib.h> #include <errno.h> -#include <poll.h> +#include <sys/epoll.h> #include <signal.h> #include <string.h> #include <irq_user.h> #include <os.h> #include <um_malloc.h> +/* Epoll support */ + +static int epollfd = -1; + +#define MAX_EPOLL_EVENTS 64 + +static struct epoll_event epoll_events[MAX_EPOLL_EVENTS]; + +/* Helper to return an Epoll data pointer from an epoll event structure. + * We need to keep this one on the userspace side to keep includes separate + */ + +void *os_epoll_get_data_pointer(int index) +{ + return epoll_events[index].data.ptr; +} + +/* Helper to compare events versus the events in the epoll structure. + * Same as above - needs to be on the userspace side + */ + + +int os_epoll_triggered(int index, int events) +{ + return epoll_events[index].events & events; +} +/* Helper to set the event mask. + * The event mask is opaque to the kernel side, because it does not have + * access to the right includes/defines for EPOLL constants. + */ + +int os_event_mask(int irq_type) +{ + if (irq_type == IRQ_READ) + return EPOLLIN | EPOLLPRI; + if (irq_type == IRQ_WRITE) + return EPOLLOUT; + return 0; +} + /* - * Locked by irq_lock in arch/um/kernel/irq.c. Changed by os_create_pollfd - * and os_free_irq_by_cb, which are called under irq_lock. + * Initial Epoll Setup */ -static struct pollfd *pollfds = NULL; -static int pollfds_num = 0; -static int pollfds_size = 0; +int os_setup_epoll(void) +{ + epollfd = epoll_create(MAX_EPOLL_EVENTS); + return epollfd; +} -int os_waiting_for_events(struct irq_fd *active_fds) +/* + * Helper to run the actual epoll_wait + */ +int os_waiting_for_events_epoll(void) { - struct irq_fd *irq_fd; - int i, n, err; + int n, err; - n = poll(pollfds, pollfds_num, 0); + n = epoll_wait(epollfd, + (struct epoll_event *) &epoll_events, MAX_EPOLL_EVENTS, 0); if (n < 0) { err = -errno; if (errno != EINTR) - printk(UM_KERN_ERR "os_waiting_for_events:" - " poll returned %d, errno = %d\n", n, errno); + printk( + UM_KERN_ERR "os_waiting_for_events:" + " epoll returned %d, error = %s\n", n, + strerror(errno) + ); return err; } - - if (n == 0) - return 0; - - irq_fd = active_fds; - - for (i = 0; i < pollfds_num; i++) { - if (pollfds[i].revents != 0) { - irq_fd->current_events = pollfds[i].revents; - pollfds[i].fd = -1; - } - irq_fd = irq_fd->next; - } return n; } -int os_create_pollfd(int fd, int events, void *tmp_pfd, int size_tmpfds) -{ - if (pollfds_num == pollfds_size) { - if (size_tmpfds <= pollfds_size * sizeof(pollfds[0])) { - /* return min size needed for new pollfds area */ - return (pollfds_size + 1) * sizeof(pollfds[0]); - } - - if (pollfds != NULL) { - memcpy(tmp_pfd, pollfds, - sizeof(pollfds[0]) * pollfds_size); - /* remove old pollfds */ - kfree(pollfds); - } - pollfds = tmp_pfd; - pollfds_size++; - } else - kfree(tmp_pfd); /* remove not used tmp_pfd */ - - pollfds[pollfds_num] = ((struct pollfd) { .fd = fd, - .events = events, - .revents = 0 }); - pollfds_num++; - - return 0; -} -void os_free_irq_by_cb(int (*test)(struct irq_fd *, void *), void *arg, - struct irq_fd *active_fds, struct irq_fd ***last_irq_ptr2) +/* + * Helper to add a fd to epoll + */ +int os_add_epoll_fd(int events, int fd, void *data) { - struct irq_fd **prev; - int i = 0; - - prev = &active_fds; - while (*prev != NULL) { - if ((*test)(*prev, arg)) { - struct irq_fd *old_fd = *prev; - if ((pollfds[i].fd != -1) && - (pollfds[i].fd != (*prev)->fd)) { - printk(UM_KERN_ERR "os_free_irq_by_cb - " - "mismatch between active_fds and " - "pollfds, fd %d vs %d\n", - (*prev)->fd, pollfds[i].fd); - goto out; - } - - pollfds_num--; - - /* - * This moves the *whole* array after pollfds[i] - * (though it doesn't spot as such)! - */ - memmove(&pollfds[i], &pollfds[i + 1], - (pollfds_num - i) * sizeof(pollfds[0])); - if (*last_irq_ptr2 == &old_fd->next) - *last_irq_ptr2 = prev; - - *prev = (*prev)->next; - if (old_fd->type == IRQ_WRITE) - ignore_sigio_fd(old_fd->fd); - kfree(old_fd); - continue; - } - prev = &(*prev)->next; - i++; - } - out: - return; + struct epoll_event event; + int result; + + event.data.ptr = data; + event.events = events | EPOLLET; + result = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event); + if ((result) && (errno == EEXIST)) + result = os_mod_epoll_fd(events, fd, data); + if (result) + printk("epollctl add err fd %d, %s\n", fd, strerror(errno)); + return result; } -int os_get_pollfd(int i) +/* + * Helper to mod the fd event mask and/or data backreference + */ +int os_mod_epoll_fd(int events, int fd, void *data) { - return pollfds[i].fd; + struct epoll_event event; + int result; + + event.data.ptr = data; + event.events = events; + result = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event); + if (result) + printk(UM_KERN_ERR + "epollctl mod err fd %d, %s\n", fd, strerror(errno)); + return result; } -void os_set_pollfd(int i, int fd) +/* + * Helper to delete the epoll fd + */ +int os_del_epoll_fd(int fd) { - pollfds[i].fd = fd; + struct epoll_event event; + int result; + /* This is quiet as we use this as IO ON/OFF - so it is often + * invoked on a non-existent fd + */ + result = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, &event); + return result; } void os_set_ioignore(void) { signal(SIGIO, SIG_IGN); } + +void os_close_epoll_fd(void) +{ + /* Needed so we do not leak an fd when rebooting */ + os_close_file(epollfd); +} -- 2.11.0 |
From: <ant...@ko...> - 2017-11-22 13:09:23
|
Resending patchset v11 versus Linux Next 20171121 Best Regards, A. |