You can subscribe to this list here.
| 2009 |
Jan
|
Feb
|
Mar
|
Apr
|
May
(32) |
Jun
(66) |
Jul
(102) |
Aug
(78) |
Sep
(106) |
Oct
(137) |
Nov
(147) |
Dec
(147) |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2010 |
Jan
(71) |
Feb
(139) |
Mar
(86) |
Apr
(76) |
May
(57) |
Jun
(10) |
Jul
(12) |
Aug
(6) |
Sep
(8) |
Oct
(12) |
Nov
(12) |
Dec
(18) |
| 2011 |
Jan
(16) |
Feb
(19) |
Mar
(3) |
Apr
(1) |
May
(16) |
Jun
(17) |
Jul
(74) |
Aug
(22) |
Sep
(18) |
Oct
(24) |
Nov
(21) |
Dec
(30) |
| 2012 |
Jan
(31) |
Feb
(16) |
Mar
(22) |
Apr
(25) |
May
(18) |
Jun
(13) |
Jul
(83) |
Aug
(49) |
Sep
(20) |
Oct
(60) |
Nov
(35) |
Dec
(28) |
| 2013 |
Jan
(39) |
Feb
(61) |
Mar
(35) |
Apr
(21) |
May
(45) |
Jun
(56) |
Jul
(20) |
Aug
(9) |
Sep
(10) |
Oct
(31) |
Nov
(8) |
Dec
(4) |
| 2014 |
Jan
(6) |
Feb
(7) |
Mar
(7) |
Apr
(6) |
May
(4) |
Jun
(8) |
Jul
(5) |
Aug
(2) |
Sep
(4) |
Oct
(4) |
Nov
(11) |
Dec
(5) |
| 2015 |
Jan
(4) |
Feb
(4) |
Mar
(3) |
Apr
(4) |
May
(9) |
Jun
(4) |
Jul
(15) |
Aug
(8) |
Sep
(16) |
Oct
(18) |
Nov
(15) |
Dec
(7) |
| 2016 |
Jan
(20) |
Feb
(9) |
Mar
(15) |
Apr
(24) |
May
(16) |
Jun
(28) |
Jul
(22) |
Aug
(23) |
Sep
(18) |
Oct
(30) |
Nov
(40) |
Dec
(9) |
| 2017 |
Jan
(1) |
Feb
(8) |
Mar
(37) |
Apr
(26) |
May
(25) |
Jun
(46) |
Jul
(24) |
Aug
(9) |
Sep
|
Oct
|
Nov
|
Dec
|
|
From: Seiji A. <sei...@hd...> - 2013-10-30 20:42:22
|
Currently irq vector handlers for tracing are registered in both set_intr_gate()
and __trace_alloc_intr_gate() in alloc_intr_gate().
But, we don't need to do that twice.
So, let's delete __trace_alloc_intr_gate().
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
arch/x86/include/asm/desc.h | 22 ----------------------
1 file changed, 22 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 3d73437..50d033a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -392,32 +392,10 @@ static inline void alloc_system_vector(int vector)
}
}
-#ifdef CONFIG_TRACING
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
- gate_desc s;
-
- pack_gate(&s, GATE_INTERRUPT, (unsigned long)addr, 0, 0, __KERNEL_CS);
- write_idt_entry(trace_idt_table, gate, &s);
-}
-
-static inline void __trace_alloc_intr_gate(unsigned int n, void *addr)
-{
- trace_set_intr_gate(n, addr);
-}
-#else
-static inline void trace_set_intr_gate(unsigned int gate, void *addr)
-{
-}
-
-#define __trace_alloc_intr_gate(n, addr)
-#endif
-
#define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector(n); \
set_intr_gate(n, addr); \
- __trace_alloc_intr_gate(n, trace_##addr); \
} while (0)
/*
--
1.8.3.1
|
|
From: Seiji A. <sei...@hd...> - 2013-10-30 20:42:21
|
Change from v3: - Separate modifications to make review easy. - Refactor implementations registering exception/irq_vector handers. (Patch 1, 2, 3) This series introduce page fault tracepoints. Detailed descriptions are explained in each patch. Any comments are welcome. Seiji Aguchi (4): Move set_intr_gate() into macro Register exception handler to trace IDT Delete __trace_alloc_intr_gate() Add page fault tracepoints arch/x86/include/asm/desc.h | 57 ++++++++++++++------------------- arch/x86/include/asm/hw_irq.h | 3 ++ arch/x86/include/asm/segment.h | 3 ++ arch/x86/include/asm/trace/exceptions.h | 52 ++++++++++++++++++++++++++++++ arch/x86/include/asm/traps.h | 20 ++++++++++++ arch/x86/kernel/entry_32.S | 10 ++++++ arch/x86/kernel/entry_64.S | 13 +++++++- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/traps.c | 28 ++++++++-------- arch/x86/mm/Makefile | 2 ++ arch/x86/mm/fault.c | 23 +++++++++++++ 12 files changed, 165 insertions(+), 50 deletions(-) create mode 100644 arch/x86/include/asm/trace/exceptions.h -- 1.8.3.1 |
|
From: Seiji A. <sei...@hd...> - 2013-10-30 20:42:20
|
This patch introduces page fault tracepoints to x86 architecture
by switching IDT.
Two events, for user and kernel spaces, are introduced at the beginning
of page fault handler for tracing.
- User space event
There is a request of page fault event for user space as below.
https://lkml.kernel.org/r/1368079520-11015-2-git-send-email-fdeslaur+()+gmail+!+com
https://lkml.kernel.org/r/1368079520-11015-1-git-send-email-fdeslaur+()+gmail+!+com
- Kernel space event:
When we meature an overhead in kernel space for investigating performance
issues, we can check if it comes from the page fault events.
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
arch/x86/include/asm/trace/exceptions.h | 52 +++++++++++++++++++++++++++++++++
arch/x86/mm/Makefile | 2 ++
arch/x86/mm/fault.c | 13 +++++++++
3 files changed, 67 insertions(+)
create mode 100644 arch/x86/include/asm/trace/exceptions.h
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
new file mode 100644
index 0000000..86540c0
--- /dev/null
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -0,0 +1,52 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM exceptions
+
+#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_PAGE_FAULT_H
+
+#include <linux/tracepoint.h>
+
+extern void trace_irq_vector_regfunc(void);
+extern void trace_irq_vector_unregfunc(void);
+
+DECLARE_EVENT_CLASS(x86_exceptions,
+
+ TP_PROTO(unsigned long address, struct pt_regs *regs,
+ unsigned long error_code),
+
+ TP_ARGS(address, regs, error_code),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, address )
+ __field( unsigned long, ip )
+ __field( unsigned long, error_code )
+ ),
+
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->ip = regs->ip;
+ __entry->error_code = error_code;
+ ),
+
+ TP_printk("address=%pf ip=%pf error_code=0x%lx",
+ (void *)__entry->address, (void *)__entry->ip,
+ __entry->error_code) );
+
+#define DEFINE_PAGE_FAULT_EVENT(name) \
+DEFINE_EVENT_FN(x86_exceptions, name, \
+ TP_PROTO(unsigned long address, struct pt_regs *regs, \
+ unsigned long error_code), \
+ TP_ARGS(address, regs, error_code), \
+ trace_irq_vector_regfunc, \
+ trace_irq_vector_unregfunc);
+
+DEFINE_PAGE_FAULT_EVENT(user_page_fault);
+DEFINE_PAGE_FAULT_EVENT(kernel_page_fault);
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE exceptions
+#endif /* _TRACE_PAGE_FAULT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 23d8e5f..6a19ad9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -6,6 +6,8 @@ nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
CFLAGS_setup_nx.o := $(nostackp)
+CFLAGS_fault.o := -I$(src)/../include/asm/trace
+
obj-$(CONFIG_X86_PAT) += pat_rbtree.o
obj-$(CONFIG_SMP) += tlb.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index fd3e281..f2730cbc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -20,6 +20,9 @@
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
+#define CREATE_TRACE_POINTS
+#include <asm/trace/exceptions.h>
+
/*
* Page fault error code bits:
*
@@ -1232,12 +1235,22 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
exception_exit(prev_state);
}
+static void trace_page_fault_entries(struct pt_regs *regs,
+ unsigned long error_code)
+{
+ if (user_mode(regs))
+ trace_user_page_fault(read_cr2(), regs, error_code);
+ else
+ trace_kernel_page_fault(read_cr2(), regs, error_code);
+}
+
dotraplinkage void __kprobes
trace_do_page_fault(struct pt_regs *regs, unsigned long error_code)
{
enum ctx_state prev_state;
prev_state = exception_enter();
+ trace_page_fault_entries(regs, error_code);
__do_page_fault(regs, error_code);
exception_exit(prev_state);
}
--
1.8.3.1
|
|
From: Seiji A. <sei...@hd...> - 2013-10-30 20:42:16
|
Move set_intr_gate() into a macro by removing __alloc_intr_gate().
The purpose is to avoid failing a kernel build after applying
a subsequent patch which changes set_intr_gate() to macro.
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
arch/x86/include/asm/desc.h | 7 +------
1 file changed, 1 insertion(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index b90e5df..d939567 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -395,15 +395,10 @@ static inline void trace_set_intr_gate(unsigned int gate, void *addr)
#define __trace_alloc_intr_gate(n, addr)
#endif
-static inline void __alloc_intr_gate(unsigned int n, void *addr)
-{
- set_intr_gate(n, addr);
-}
-
#define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector(n); \
- __alloc_intr_gate(n, addr); \
+ set_intr_gate(n, addr); \
__trace_alloc_intr_gate(n, trace_##addr); \
} while (0)
--
1.8.3.1
|
|
From: Seiji A. <sei...@hd...> - 2013-10-30 19:27:41
|
Change from v3:
- Introduce EFI_VARS_DATA_SIZE_MAX macro.
- Add some description why a scanning/deleting logic is needed.
Currently, when mounting pstore file system, a read callback of efi_pstore
driver runs mutiple times as below.
- In the first read callback, scan efivar_sysfs_list from head and pass
a kmsg buffer of a entry to an upper pstore layer.
- In the second read callback, rescan efivar_sysfs_list from the entry and pass
another kmsg buffer to it.
- Repeat the scan and pass until the end of efivar_sysfs_list.
In this process, an entry is read across the multiple read function calls.
To avoid race between the read and erasion, the whole process above is
protected by a spinlock, holding in open() and releasing in close().
At the same time, kmemdup() is called to pass the buffer to pstore filesystem
during it.
And then, it causes a following lockdep warning.
To make the dynamic memory allocation runnable without taking spinlock,
holding off a deletion of sysfs entry if it happens while scanning it
via efi_pstore, and deleting it after the scan is completed.
To implement it, this patch introduces two flags, scanning and deleting,
to efivar_entry.
On the code basis, it seems that all the scanning and deleting logic is not
needed because __efivars->lock are not dropped when reading from the EFI
variable store.
But, the scanning and deleting logic is still needed because an efi-pstore and
a pstore filesystem works as follows.
In case an entry(A) is found, the pointer is saved to psi->data.
And efi_pstore_read() passes the entry(A) to a pstore filesystem by
releasing __efivars->lock.
And then, the pstore filesystem calls efi_pstore_read() again and the same
entry(A), which is saved to psi->data, is used for resuming to scan
a sysfs-list.
So, to protect the entry(A), the logic is needed.
[ 1.143710] ------------[ cut here ]------------
[ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
lockdep_trace_alloc+0x104/0x110()
[ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
[ 1.144058] Modules linked in:
[ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
[ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
ffff8800797e9b28
[ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
0000000000000046
[ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
ffff8800797e9b78
[ 1.144058] Call Trace:
[ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
[ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
[ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
[ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
[ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
[ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
[ 1.144058] [<ffffffff815147bb>] ?
efi_pstore_read_func.part.1+0x12b/0x170
[ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
[ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
[ 1.144058] [<ffffffff81514800>] ?
efi_pstore_read_func.part.1+0x170/0x170
[ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
[ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
[ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
[ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
[ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
[ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
[ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
[ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
[ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
[ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
[ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
[ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
[ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
[ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
[ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
[ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
[ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
drivers/firmware/efi/efivars.c | 12 ++--
drivers/firmware/efi/vars.c | 12 +++-
include/linux/efi.h | 4 ++
4 files changed, 155 insertions(+), 16 deletions(-)
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 5002d50..6ce31e9 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
static int efi_pstore_open(struct pstore_info *psi)
{
- efivar_entry_iter_begin();
psi->data = NULL;
return 0;
}
static int efi_pstore_close(struct pstore_info *psi)
{
- efivar_entry_iter_end();
psi->data = NULL;
return 0;
}
@@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
__efivar_entry_get(entry, &entry->var.Attributes,
&entry->var.DataSize, entry->var.Data);
size = entry->var.DataSize;
+ memcpy(*cb_data->buf, entry->var.Data,
+ (size_t)min_t(unsigned long, EFIVARS_DATA_SIZE_MAX, size));
- *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
- if (*cb_data->buf == NULL)
- return -ENOMEM;
return size;
}
+/**
+ * efi_pstore_scan_sysfs_enter
+ * @entry: scanning entry
+ * @next: next entry
+ * @head: list head
+ */
+static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
+ struct efivar_entry *next,
+ struct list_head *head)
+{
+ pos->scanning = true;
+ if (&next->list != head)
+ next->scanning = true;
+}
+
+/**
+ * __efi_pstore_scan_sysfs_exit
+ * @entry: deleting entry
+ * @turn_off_scanning: Check if a scanning flag should be turned off
+ */
+static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
+ bool turn_off_scanning)
+{
+ if (entry->deleting) {
+ list_del(&entry->list);
+ efivar_entry_iter_end();
+ efivar_unregister(entry);
+ efivar_entry_iter_begin();
+ } else if (turn_off_scanning)
+ entry->scanning = false;
+}
+
+/**
+ * efi_pstore_scan_sysfs_exit
+ * @pos: scanning entry
+ * @next: next entry
+ * @head: list head
+ * @stop: a flag checking if scanning will stop
+ */
+static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
+ struct efivar_entry *next,
+ struct list_head *head, bool stop)
+{
+ __efi_pstore_scan_sysfs_exit(pos, true);
+ if (stop)
+ __efi_pstore_scan_sysfs_exit(next, &next->list != head);
+}
+
+/**
+ * efi_pstore_sysfs_entry_iter
+ *
+ * @data: function-specific data to pass to callback
+ * @pos: entry to begin iterating from
+ *
+ * You MUST call efivar_enter_iter_begin() before this function, and
+ * efivar_entry_iter_end() afterwards.
+ *
+ * It is possible to begin iteration from an arbitrary entry within
+ * the list by passing @pos. @pos is updated on return to point to
+ * the next entry of the last one passed to efi_pstore_read_func().
+ * To begin iterating from the beginning of the list @pos must be %NULL.
+ */
+static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
+{
+ struct efivar_entry *entry, *n;
+ struct list_head *head = &efivar_sysfs_list;
+ int size = 0;
+
+ if (!*pos) {
+ list_for_each_entry_safe(entry, n, head, list) {
+ efi_pstore_scan_sysfs_enter(entry, n, head);
+
+ size = efi_pstore_read_func(entry, data);
+ efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
+ if (size)
+ break;
+ }
+ *pos = n;
+ return size;
+ }
+
+ list_for_each_entry_safe_from((*pos), n, head, list) {
+ efi_pstore_scan_sysfs_enter((*pos), n, head);
+
+ size = efi_pstore_read_func((*pos), data);
+ efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+ if (size)
+ break;
+ }
+ *pos = n;
+ return size;
+}
+
+/**
+ * efi_pstore_read
+ *
+ * This function returns a size of NVRAM entry logged via efi_pstore_write().
+ * The meaning and behavior of efi_pstore/pstore are as below.
+ *
+ * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
+ * and pstore filesystem will continue reading subsequent entries.
+ * size == 0: Entry was not logged via efi_pstore_write(),
+ * and efi_pstore driver will continue reading subsequent entries.
+ * size < 0: Failed to get data of entry logging via efi_pstore_write(),
+ * and pstore will stop reading entry.
+ */
static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
int *count, struct timespec *timespec,
char **buf, bool *compressed,
struct pstore_info *psi)
{
struct pstore_read_data data;
+ ssize_t size;
data.id = id;
data.type = type;
@@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
data.compressed = compressed;
data.buf = buf;
- return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
- (struct efivar_entry **)&psi->data);
+ *data.buf = kzalloc(EFIVARS_DATA_SIZE_MAX, GFP_KERNEL);
+ if (!*data.buf)
+ return -ENOMEM;
+
+ efivar_entry_iter_begin();
+ size = efi_pstore_sysfs_entry_iter(&data,
+ (struct efivar_entry **)&psi->data);
+ efivar_entry_iter_end();
+ if (size <= 0)
+ kfree(*data.buf);
+ return size;
}
static int efi_pstore_write(enum pstore_type_id type,
@@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
return 0;
}
+ if (entry->scanning) {
+ /*
+ * Skip deletion because this entry will be deleted
+ * after scanning is completed.
+ */
+ entry->deleting = true;
+ } else
+ list_del(&entry->list);
+
/* found */
__efivar_entry_delete(entry);
- list_del(&entry->list);
return 1;
}
@@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
efivar_entry_iter_begin();
found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
- efivar_entry_iter_end();
- if (found)
+ if (found && !entry->scanning) {
+ efivar_entry_iter_end();
efivar_unregister(entry);
+ } else
+ efivar_entry_iter_end();
return 0;
}
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 8a7432a..8c5a61a 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
else if (__efivar_entry_delete(entry))
err = -EIO;
- efivar_entry_iter_end();
-
- if (err)
+ if (err) {
+ efivar_entry_iter_end();
return err;
+ }
- efivar_unregister(entry);
+ if (!entry->scanning) {
+ efivar_entry_iter_end();
+ efivar_unregister(entry);
+ } else
+ efivar_entry_iter_end();
/* It's dead Jim.... */
return count;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 391c67b..b22659c 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
if (!found)
return NULL;
- if (remove)
- list_del(&entry->list);
+ if (remove) {
+ if (entry->scanning) {
+ /*
+ * The entry will be deleted
+ * after scanning is completed.
+ */
+ entry->deleting = true;
+ } else
+ list_del(&entry->list);
+ }
return entry;
}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5f8f176..094ddd0 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -782,6 +782,8 @@ struct efivar_entry {
struct efi_variable var;
struct list_head list;
struct kobject kobj;
+ bool scanning;
+ bool deleting;
};
extern struct list_head efivar_sysfs_list;
@@ -840,6 +842,8 @@ void efivar_run_worker(void);
#if defined(CONFIG_EFI_VARS) || defined(CONFIG_EFI_VARS_MODULE)
int efivars_sysfs_init(void);
+#define EFIVARS_DATA_SIZE_MAX 1024
+
#endif /* CONFIG_EFI_VARS */
#endif /* _LINUX_EFI_H */
--
1.8.2.1
|
|
From: Seiji A. <sei...@hd...> - 2013-10-30 19:19:49
|
> On Fri, 18 Oct, at 10:30:58PM, Seiji Aguchi wrote: > > The scanning and deleting logic is still needed. In case an entry(A) > > is found, the pointer is saved to psi->data. And efi_pstore_read() > > passes the entry(A) to a pstore filesystem by releasing > > __efivars->lock. > > > > And then, the pstore filesystem calls efi_pstore_read() again and the > > same entry(A), which is saved to psi->data, is used for re-scanning a > > sysfs-list. (That is why list_for_each_entry_safe_from () is used in > > efi_pstore_sysfs_entry_iter().) > > > > So, to protect the entry(A), the logic is needed because, in pstore > > filesystem, __efivars->lock Is released. > > Very good point. Thanks, I will add the description in the next patch. Seiji |
|
From: Matt F. <ma...@co...> - 2013-10-30 14:35:41
|
On Fri, 18 Oct, at 10:30:58PM, Seiji Aguchi wrote: > The scanning and deleting logic is still needed. In case an entry(A) > is found, the pointer is saved to psi->data. And efi_pstore_read() > passes the entry(A) to a pstore filesystem by releasing > __efivars->lock. > > And then, the pstore filesystem calls efi_pstore_read() again and the > same entry(A), which is saved to psi->data, is used for re-scanning a > sysfs-list. (That is why list_for_each_entry_safe_from () is used in > efi_pstore_sysfs_entry_iter().) > > So, to protect the entry(A), the logic is needed because, in pstore > filesystem, __efivars->lock Is released. Very good point. -- Matt Fleming, Intel Open Source Technology Center |
|
From: Seiji A. <sei...@hd...> - 2013-10-18 22:31:20
|
Matt,
> It seems to me that because you're no longer dropping __efivars->lock
> when reading from the EFI variable store, you actually don't need all
> the ->scanning and ->deleting logic because anything that sets those
> flags runs to completion while holding the lock.
The scanning and deleting logic is still needed.
In case an entry(A) is found, the pointer is saved to psi->data.
And efi_pstore_read() passes the entry(A) to a pstore filesystem by releasing __efivars->lock.
And then, the pstore filesystem calls efi_pstore_read() again and the same entry(A), which is saved to
psi->data, is used for re-scanning a sysfs-list.
(That is why list_for_each_entry_safe_from () is used in efi_pstore_sysfs_entry_iter().)
So, to protect the entry(A), the logic is needed because, in pstore filesystem, __efivars->lock
Is released.
> Can't the patch be simplified to just allocating data.buf at the
> beginning of efi_pstore_read()?
I think data.buf is simply allocated at the beginning of efi_pstore_read() with this patch v3.
If you are not satisfied with it, could you please show me your pseudo code?
>Also, it would be a good idea to
> introduce a #define for the 1024 magic number, e.g.
>
> #define EFIVARS_DATA_SIZE_MAX 1024
It is good idea. I can fix it.
Seiji
<snip>
+/**
+ * efi_pstore_read
+ *
+ * This function returns a size of NVRAM entry logged via efi_pstore_write().
+ * The meaning and behavior of efi_pstore/pstore are as below.
+ *
+ * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
+ * and pstore filesystem will continue reading subsequent entries.
+ * size == 0: Entry was not logged via efi_pstore_write(),
+ * and efi_pstore driver will continue reading subsequent entries.
+ * size < 0: Failed to get data of entry logging via efi_pstore_write(),
+ * and pstore will stop reading entry.
+ */
static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
int *count, struct timespec *timespec,
char **buf, bool *compressed,
struct pstore_info *psi)
{
struct pstore_read_data data;
+ ssize_t size;
data.id = id;
data.type = type;
@@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
data.compressed = compressed;
data.buf = buf;
- return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
- (struct efivar_entry **)&psi->data);
+ *data.buf = kzalloc(1024, GFP_KERNEL);
+ if (!*data.buf)
+ return -ENOMEM;
+
+ efivar_entry_iter_begin();
+ size = efi_pstore_sysfs_entry_iter(&data,
+ (struct efivar_entry **)&psi->data);
+ efivar_entry_iter_end();
+ if (size <= 0)
+ kfree(*data.buf);
+ return size;
}
<snip>
|
|
From: Seiji A. <sei...@hd...> - 2013-10-17 15:11:34
|
Hi Madper,
Thank you for assisting me.
But, I need to discuss the implementation with Matt more.
After the discussion, I will post v4 and ask you to test it.
Please wait for a while.
Seiji
> -----Original Message-----
> From: Madper Xie [mailto:cx...@re...]
> Sent: Thursday, October 17, 2013 11:07 AM
> To: Madper Xie
> Cc: Seiji Aguchi; lin...@vg...; lin...@vg...; mat...@in...; ton...@in...; Tomoki
> Sekiyama; dle...@li...
> Subject: Re: [PATCH v3] efivars,efi-pstore: Hold off deletion of sysfs entry until, the scan is completed
>
> Hi folks,
> I tested it on my DELL XPS desktop. And it won't show any warnings
> when I mounting pstore and deleting pstore items after this patch
> applied.
>
> Tested-by: Madper Xie
> cx...@re... writes:
>
> > Oops, It seems my mu4e(a email client for emacs)'s auto-indent breaks the
> > patch... I apologize for this...
> >
> > sei...@hd... writes:
> >
> >> Hi Madper,
> >>
> >> I tested this patch on 3.12-rc4.
> >> Could you please send me the log when you failed to apply?
> >>
> >> Seiji
> >>
> >>> -----Original Message-----
> >>> From: Madper Xie [mailto:cx...@re...]
> >>> Sent: Thursday, October 17, 2013 1:54 AM
> >>> To: Seiji Aguchi
> >>> Cc: lin...@vg...; lin...@vg...; mat...@in...; ton...@in...; Tomoki Sekiyama;
> dle-
> >>> de...@li...
> >>> Subject: Re: [PATCH v3] efivars,efi-pstore: Hold off deletion of sysfs entry until, the scan is completed
> >>>
> >>> Howdy Seiji,
> >>> I failed appily this patch to both 3.12-rc2 and 3.12-rc4. Could you
> >>> please let me know which is the right tree for this patch?
> >>>
> >>> Thanks,
> >>> Madper.
> >>> sei...@hd... writes:
> >>>
> >>> > Change from v2:
> >>> > - Move dynamic memory allocation to efi_pstore_read() before holding
> >>> > efivars->lock to protect entry->var.Data.
> >>> > - Access to entry->scanning while holding efivars->lock.
> >>> > - Move a comment about a returned value from efi_pstore_read_func() to
> >>> > efi_pstore_read() because "size < 0" case may happen in efi_pstore_read().
> >>> >
> >>> > Currently, when mounting pstore file system, a read callback of efi_pstore
> >>> > driver runs mutiple times as below.
> >>> >
> >>> > - In the first read callback, scan efivar_sysfs_list from head and pass
> >>> > a kmsg buffer of a entry to an upper pstore layer.
> >>> > - In the second read callback, rescan efivar_sysfs_list from the entry and pass
> >>> > another kmsg buffer to it.
> >>> > - Repeat the scan and pass until the end of efivar_sysfs_list.
> >>> >
> >>> > In this process, an entry is read across the multiple read function calls.
> >>> > To avoid race between the read and erasion, the whole process above is
> >>> > protected by a spinlock, holding in open() and releasing in close().
> >>> >
> >>> > At the same time, kmemdup() is called to pass the buffer to pstore filesystem
> >>> > during it.
> >>> > And then, it causes a following lockdep warning.
> >>> >
> >>> > To make the dynamic memory allocation runnable without taking spinlock,
> >>> > holding off a deletion of sysfs entry if it happens while scanning it
> >>> > via efi_pstore, and deleting it after the scan is completed.
> >>> >
> >>> > To implement it, this patch introduces two flags, scanning and deleting,
> >>> > to efivar_entry.
> >>> >
> >>> > [ 1.143710] ------------[ cut here ]------------
> >>> > [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
> >>> > lockdep_trace_alloc+0x104/0x110()
> >>> > [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
> >>> > [ 1.144058] Modules linked in:
> >>> >
> >>> > [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
> >>> > [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
> >>> > ffff8800797e9b28
> >>> > [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
> >>> > 0000000000000046
> >>> > [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
> >>> > ffff8800797e9b78
> >>> > [ 1.144058] Call Trace:
> >>> > [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
> >>> > [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
> >>> > [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
> >>> > [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
> >>> > [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
> >>> > [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
> >>> > [ 1.144058] [<ffffffff815147bb>] ?
> >>> > efi_pstore_read_func.part.1+0x12b/0x170
> >>> > [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
> >>> > [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
> >>> > [ 1.144058] [<ffffffff81514800>] ?
> >>> > efi_pstore_read_func.part.1+0x170/0x170
> >>> > [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
> >>> > [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
> >>> > [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
> >>> > [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
> >>> > [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
> >>> > [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
> >>> > [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
> >>> > [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
> >>> > [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
> >>> > [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
> >>> > [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
> >>> > [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
> >>> > [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
> >>> > [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
> >>> > [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
> >>> > [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
> >>> > [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
> >>> >
> >>> > Signed-off-by: Seiji Aguchi <sei...@hd...>
> >>> > ---
> >>> > drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
> >>> > drivers/firmware/efi/efivars.c | 12 ++--
> >>> > drivers/firmware/efi/vars.c | 12 +++-
> >>> > include/linux/efi.h | 2 +
> >>> > 4 files changed, 153 insertions(+), 16 deletions(-)
> >>> >
> >>> > diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
> >>> > index 5002d50..ebd5dbc 100644
> >>> > --- a/drivers/firmware/efi/efi-pstore.c
> >>> > +++ b/drivers/firmware/efi/efi-pstore.c
> >>> > @@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
> >>> >
> >>> > static int efi_pstore_open(struct pstore_info *psi)
> >>> > {
> >>> > - efivar_entry_iter_begin();
> >>> > psi->data = NULL;
> >>> > return 0;
> >>> > }
> >>> >
> >>> > static int efi_pstore_close(struct pstore_info *psi)
> >>> > {
> >>> > - efivar_entry_iter_end();
> >>> > psi->data = NULL;
> >>> > return 0;
> >>> > }
> >>> > @@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> >>> > __efivar_entry_get(entry, &entry->var.Attributes,
> >>> > &entry->var.DataSize, entry->var.Data);
> >>> > size = entry->var.DataSize;
> >>> > + memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
> >>> > + 1024, size));
> >>> >
> >>> > - *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
> >>> > - if (*cb_data->buf == NULL)
> >>> > - return -ENOMEM;
> >>> > return size;
> >>> > }
> >>> >
> >>> > +/**
> >>> > + * efi_pstore_scan_sysfs_enter
> >>> > + * @entry: scanning entry
> >>> > + * @next: next entry
> >>> > + * @head: list head
> >>> > + */
> >>> > +static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
> >>> > + struct efivar_entry *next,
> >>> > + struct list_head *head)
> >>> > +{
> >>> > + pos->scanning = true;
> >>> > + if (&next->list != head)
> >>> > + next->scanning = true;
> >>> > +}
> >>> > +
> >>> > +/**
> >>> > + * __efi_pstore_scan_sysfs_exit
> >>> > + * @entry: deleting entry
> >>> > + * @turn_off_scanning: Check if a scanning flag should be turned off
> >>> > + */
> >>> > +static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
> >>> > + bool turn_off_scanning)
> >>> > +{
> >>> > + if (entry->deleting) {
> >>> > + list_del(&entry->list);
> >>> > + efivar_entry_iter_end();
> >>> > + efivar_unregister(entry);
> >>> > + efivar_entry_iter_begin();
> >>> > + } else if (turn_off_scanning)
> >>> > + entry->scanning = false;
> >>> > +}
> >>> > +
> >>> > +/**
> >>> > + * efi_pstore_scan_sysfs_exit
> >>> > + * @pos: scanning entry
> >>> > + * @next: next entry
> >>> > + * @head: list head
> >>> > + * @stop: a flag checking if scanning will stop
> >>> > + */
> >>> > +static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
> >>> > + struct efivar_entry *next,
> >>> > + struct list_head *head, bool stop)
> >>> > +{
> >>> > + __efi_pstore_scan_sysfs_exit(pos, true);
> >>> > + if (stop)
> >>> > + __efi_pstore_scan_sysfs_exit(next, &next->list != head);
> >>> > +}
> >>> > +
> >>> > +/**
> >>> > + * efi_pstore_sysfs_entry_iter
> >>> > + *
> >>> > + * @data: function-specific data to pass to callback
> >>> > + * @pos: entry to begin iterating from
> >>> > + *
> >>> > + * You MUST call efivar_enter_iter_begin() before this function, and
> >>> > + * efivar_entry_iter_end() afterwards.
> >>> > + *
> >>> > + * It is possible to begin iteration from an arbitrary entry within
> >>> > + * the list by passing @pos. @pos is updated on return to point to
> >>> > + * the next entry of the last one passed to efi_pstore_read_func().
> >>> > + * To begin iterating from the beginning of the list @pos must be %NULL.
> >>> > + */
> >>> > +static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
> >>> > +{
> >>> > + struct efivar_entry *entry, *n;
> >>> > + struct list_head *head = &efivar_sysfs_list;
> >>> > + int size = 0;
> >>> > +
> >>> > + if (!*pos) {
> >>> > + list_for_each_entry_safe(entry, n, head, list) {
> >>> > + efi_pstore_scan_sysfs_enter(entry, n, head);
> >>> > +
> >>> > + size = efi_pstore_read_func(entry, data);
> >>> > + efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
> >>> > + if (size)
> >>> > + break;
> >>> > + }
> >>> > + *pos = n;
> >>> > + return size;
> >>> > + }
> >>> > +
> >>> > + list_for_each_entry_safe_from((*pos), n, head, list) {
> >>> > + efi_pstore_scan_sysfs_enter((*pos), n, head);
> >>> > +
> >>> > + size = efi_pstore_read_func((*pos), data);
> >>> > + efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
> >>> > + if (size)
> >>> > + break;
> >>> > + }
> >>> > + *pos = n;
> >>> > + return size;
> >>> > +}
> >>> > +
> >>> > +/**
> >>> > + * efi_pstore_read
> >>> > + *
> >>> > + * This function returns a size of NVRAM entry logged via efi_pstore_write().
> >>> > + * The meaning and behavior of efi_pstore/pstore are as below.
> >>> > + *
> >>> > + * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
> >>> > + * and pstore filesystem will continue reading subsequent entries.
> >>> > + * size == 0: Entry was not logged via efi_pstore_write(),
> >>> > + * and efi_pstore driver will continue reading subsequent entries.
> >>> > + * size < 0: Failed to get data of entry logging via efi_pstore_write(),
> >>> > + * and pstore will stop reading entry.
> >>> > + */
> >>> > static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> >>> > int *count, struct timespec *timespec,
> >>> > char **buf, bool *compressed,
> >>> > struct pstore_info *psi)
> >>> > {
> >>> > struct pstore_read_data data;
> >>> > + ssize_t size;
> >>> >
> >>> > data.id = id;
> >>> > data.type = type;
> >>> > @@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> >>> > data.compressed = compressed;
> >>> > data.buf = buf;
> >>> >
> >>> > - return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
> >>> > - (struct efivar_entry **)&psi->data);
> >>> > + *data.buf = kzalloc(1024, GFP_KERNEL);
> >>> > + if (!*data.buf)
> >>> > + return -ENOMEM;
> >>> > +
> >>> > + efivar_entry_iter_begin();
> >>> > + size = efi_pstore_sysfs_entry_iter(&data,
> >>> > + (struct efivar_entry **)&psi->data);
> >>> > + efivar_entry_iter_end();
> >>> > + if (size <= 0)
> >>> > + kfree(*data.buf);
> >>> > + return size;
> >>> > }
> >>> >
> >>> > static int efi_pstore_write(enum pstore_type_id type,
> >>> > @@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
> >>> > return 0;
> >>> > }
> >>> >
> >>> > + if (entry->scanning) {
> >>> > + /*
> >>> > + * Skip deletion because this entry will be deleted
> >>> > + * after scanning is completed.
> >>> > + */
> >>> > + entry->deleting = true;
> >>> > + } else
> >>> > + list_del(&entry->list);
> >>> > +
> >>> > /* found */
> >>> > __efivar_entry_delete(entry);
> >>> > - list_del(&entry->list);
> >>> >
> >>> > return 1;
> >>> > }
> >>> > @@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
> >>> >
> >>> > efivar_entry_iter_begin();
> >>> > found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
> >>> > - efivar_entry_iter_end();
> >>> >
> >>> > - if (found)
> >>> > + if (found && !entry->scanning) {
> >>> > + efivar_entry_iter_end();
> >>> > efivar_unregister(entry);
> >>> > + } else
> >>> > + efivar_entry_iter_end();
> >>> >
> >>> > return 0;
> >>> > }
> >>> > diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
> >>> > index 8a7432a..8c5a61a 100644
> >>> > --- a/drivers/firmware/efi/efivars.c
> >>> > +++ b/drivers/firmware/efi/efivars.c
> >>> > @@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
> >>> > else if (__efivar_entry_delete(entry))
> >>> > err = -EIO;
> >>> >
> >>> > - efivar_entry_iter_end();
> >>> > -
> >>> > - if (err)
> >>> > + if (err) {
> >>> > + efivar_entry_iter_end();
> >>> > return err;
> >>> > + }
> >>> >
> >>> > - efivar_unregister(entry);
> >>> > + if (!entry->scanning) {
> >>> > + efivar_entry_iter_end();
> >>> > + efivar_unregister(entry);
> >>> > + } else
> >>> > + efivar_entry_iter_end();
> >>> >
> >>> > /* It's dead Jim.... */
> >>> > return count;
> >>> > diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
> >>> > index 391c67b..b22659c 100644
> >>> > --- a/drivers/firmware/efi/vars.c
> >>> > +++ b/drivers/firmware/efi/vars.c
> >>> > @@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
> >>> > if (!found)
> >>> > return NULL;
> >>> >
> >>> > - if (remove)
> >>> > - list_del(&entry->list);
> >>> > + if (remove) {
> >>> > + if (entry->scanning) {
> >>> > + /*
> >>> > + * The entry will be deleted
> >>> > + * after scanning is completed.
> >>> > + */
> >>> > + entry->deleting = true;
> >>> > + } else
> >>> > + list_del(&entry->list);
> >>> > + }
> >>> >
> >>> > return entry;
> >>> > }
> >>> > diff --git a/include/linux/efi.h b/include/linux/efi.h
> >>> > index 5f8f176..04088fb 100644
> >>> > --- a/include/linux/efi.h
> >>> > +++ b/include/linux/efi.h
> >>> > @@ -782,6 +782,8 @@ struct efivar_entry {
> >>> > struct efi_variable var;
> >>> > struct list_head list;
> >>> > struct kobject kobj;
> >>> > + bool scanning;
> >>> > + bool deleting;
> >>> > };
> >>> >
> >>> > extern struct list_head efivar_sysfs_list;
> >>>
> >>>
> >>> --
> >>> Best,
> >>> Madper Xie.
>
>
> --
> Best,
> Madper Xie.
|
|
From: Madper X. <cx...@re...> - 2013-10-17 15:07:24
|
Hi folks,
I tested it on my DELL XPS desktop. And it won't show any warnings
when I mounting pstore and deleting pstore items after this patch
applied.
Tested-by: Madper Xie
cx...@re... writes:
> Oops, It seems my mu4e(a email client for emacs)'s auto-indent breaks the
> patch... I apologize for this...
>
> sei...@hd... writes:
>
>> Hi Madper,
>>
>> I tested this patch on 3.12-rc4.
>> Could you please send me the log when you failed to apply?
>>
>> Seiji
>>
>>> -----Original Message-----
>>> From: Madper Xie [mailto:cx...@re...]
>>> Sent: Thursday, October 17, 2013 1:54 AM
>>> To: Seiji Aguchi
>>> Cc: lin...@vg...; lin...@vg...; mat...@in...; ton...@in...; Tomoki Sekiyama; dle-
>>> de...@li...
>>> Subject: Re: [PATCH v3] efivars,efi-pstore: Hold off deletion of sysfs entry until, the scan is completed
>>>
>>> Howdy Seiji,
>>> I failed appily this patch to both 3.12-rc2 and 3.12-rc4. Could you
>>> please let me know which is the right tree for this patch?
>>>
>>> Thanks,
>>> Madper.
>>> sei...@hd... writes:
>>>
>>> > Change from v2:
>>> > - Move dynamic memory allocation to efi_pstore_read() before holding
>>> > efivars->lock to protect entry->var.Data.
>>> > - Access to entry->scanning while holding efivars->lock.
>>> > - Move a comment about a returned value from efi_pstore_read_func() to
>>> > efi_pstore_read() because "size < 0" case may happen in efi_pstore_read().
>>> >
>>> > Currently, when mounting pstore file system, a read callback of efi_pstore
>>> > driver runs mutiple times as below.
>>> >
>>> > - In the first read callback, scan efivar_sysfs_list from head and pass
>>> > a kmsg buffer of a entry to an upper pstore layer.
>>> > - In the second read callback, rescan efivar_sysfs_list from the entry and pass
>>> > another kmsg buffer to it.
>>> > - Repeat the scan and pass until the end of efivar_sysfs_list.
>>> >
>>> > In this process, an entry is read across the multiple read function calls.
>>> > To avoid race between the read and erasion, the whole process above is
>>> > protected by a spinlock, holding in open() and releasing in close().
>>> >
>>> > At the same time, kmemdup() is called to pass the buffer to pstore filesystem
>>> > during it.
>>> > And then, it causes a following lockdep warning.
>>> >
>>> > To make the dynamic memory allocation runnable without taking spinlock,
>>> > holding off a deletion of sysfs entry if it happens while scanning it
>>> > via efi_pstore, and deleting it after the scan is completed.
>>> >
>>> > To implement it, this patch introduces two flags, scanning and deleting,
>>> > to efivar_entry.
>>> >
>>> > [ 1.143710] ------------[ cut here ]------------
>>> > [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
>>> > lockdep_trace_alloc+0x104/0x110()
>>> > [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
>>> > [ 1.144058] Modules linked in:
>>> >
>>> > [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
>>> > [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
>>> > ffff8800797e9b28
>>> > [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
>>> > 0000000000000046
>>> > [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
>>> > ffff8800797e9b78
>>> > [ 1.144058] Call Trace:
>>> > [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
>>> > [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
>>> > [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
>>> > [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
>>> > [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
>>> > [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
>>> > [ 1.144058] [<ffffffff815147bb>] ?
>>> > efi_pstore_read_func.part.1+0x12b/0x170
>>> > [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
>>> > [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
>>> > [ 1.144058] [<ffffffff81514800>] ?
>>> > efi_pstore_read_func.part.1+0x170/0x170
>>> > [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
>>> > [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
>>> > [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
>>> > [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
>>> > [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
>>> > [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
>>> > [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
>>> > [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
>>> > [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
>>> > [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
>>> > [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
>>> > [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
>>> > [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
>>> > [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
>>> > [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
>>> > [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
>>> > [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
>>> >
>>> > Signed-off-by: Seiji Aguchi <sei...@hd...>
>>> > ---
>>> > drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
>>> > drivers/firmware/efi/efivars.c | 12 ++--
>>> > drivers/firmware/efi/vars.c | 12 +++-
>>> > include/linux/efi.h | 2 +
>>> > 4 files changed, 153 insertions(+), 16 deletions(-)
>>> >
>>> > diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
>>> > index 5002d50..ebd5dbc 100644
>>> > --- a/drivers/firmware/efi/efi-pstore.c
>>> > +++ b/drivers/firmware/efi/efi-pstore.c
>>> > @@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
>>> >
>>> > static int efi_pstore_open(struct pstore_info *psi)
>>> > {
>>> > - efivar_entry_iter_begin();
>>> > psi->data = NULL;
>>> > return 0;
>>> > }
>>> >
>>> > static int efi_pstore_close(struct pstore_info *psi)
>>> > {
>>> > - efivar_entry_iter_end();
>>> > psi->data = NULL;
>>> > return 0;
>>> > }
>>> > @@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
>>> > __efivar_entry_get(entry, &entry->var.Attributes,
>>> > &entry->var.DataSize, entry->var.Data);
>>> > size = entry->var.DataSize;
>>> > + memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
>>> > + 1024, size));
>>> >
>>> > - *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
>>> > - if (*cb_data->buf == NULL)
>>> > - return -ENOMEM;
>>> > return size;
>>> > }
>>> >
>>> > +/**
>>> > + * efi_pstore_scan_sysfs_enter
>>> > + * @entry: scanning entry
>>> > + * @next: next entry
>>> > + * @head: list head
>>> > + */
>>> > +static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
>>> > + struct efivar_entry *next,
>>> > + struct list_head *head)
>>> > +{
>>> > + pos->scanning = true;
>>> > + if (&next->list != head)
>>> > + next->scanning = true;
>>> > +}
>>> > +
>>> > +/**
>>> > + * __efi_pstore_scan_sysfs_exit
>>> > + * @entry: deleting entry
>>> > + * @turn_off_scanning: Check if a scanning flag should be turned off
>>> > + */
>>> > +static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
>>> > + bool turn_off_scanning)
>>> > +{
>>> > + if (entry->deleting) {
>>> > + list_del(&entry->list);
>>> > + efivar_entry_iter_end();
>>> > + efivar_unregister(entry);
>>> > + efivar_entry_iter_begin();
>>> > + } else if (turn_off_scanning)
>>> > + entry->scanning = false;
>>> > +}
>>> > +
>>> > +/**
>>> > + * efi_pstore_scan_sysfs_exit
>>> > + * @pos: scanning entry
>>> > + * @next: next entry
>>> > + * @head: list head
>>> > + * @stop: a flag checking if scanning will stop
>>> > + */
>>> > +static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
>>> > + struct efivar_entry *next,
>>> > + struct list_head *head, bool stop)
>>> > +{
>>> > + __efi_pstore_scan_sysfs_exit(pos, true);
>>> > + if (stop)
>>> > + __efi_pstore_scan_sysfs_exit(next, &next->list != head);
>>> > +}
>>> > +
>>> > +/**
>>> > + * efi_pstore_sysfs_entry_iter
>>> > + *
>>> > + * @data: function-specific data to pass to callback
>>> > + * @pos: entry to begin iterating from
>>> > + *
>>> > + * You MUST call efivar_enter_iter_begin() before this function, and
>>> > + * efivar_entry_iter_end() afterwards.
>>> > + *
>>> > + * It is possible to begin iteration from an arbitrary entry within
>>> > + * the list by passing @pos. @pos is updated on return to point to
>>> > + * the next entry of the last one passed to efi_pstore_read_func().
>>> > + * To begin iterating from the beginning of the list @pos must be %NULL.
>>> > + */
>>> > +static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
>>> > +{
>>> > + struct efivar_entry *entry, *n;
>>> > + struct list_head *head = &efivar_sysfs_list;
>>> > + int size = 0;
>>> > +
>>> > + if (!*pos) {
>>> > + list_for_each_entry_safe(entry, n, head, list) {
>>> > + efi_pstore_scan_sysfs_enter(entry, n, head);
>>> > +
>>> > + size = efi_pstore_read_func(entry, data);
>>> > + efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
>>> > + if (size)
>>> > + break;
>>> > + }
>>> > + *pos = n;
>>> > + return size;
>>> > + }
>>> > +
>>> > + list_for_each_entry_safe_from((*pos), n, head, list) {
>>> > + efi_pstore_scan_sysfs_enter((*pos), n, head);
>>> > +
>>> > + size = efi_pstore_read_func((*pos), data);
>>> > + efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
>>> > + if (size)
>>> > + break;
>>> > + }
>>> > + *pos = n;
>>> > + return size;
>>> > +}
>>> > +
>>> > +/**
>>> > + * efi_pstore_read
>>> > + *
>>> > + * This function returns a size of NVRAM entry logged via efi_pstore_write().
>>> > + * The meaning and behavior of efi_pstore/pstore are as below.
>>> > + *
>>> > + * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
>>> > + * and pstore filesystem will continue reading subsequent entries.
>>> > + * size == 0: Entry was not logged via efi_pstore_write(),
>>> > + * and efi_pstore driver will continue reading subsequent entries.
>>> > + * size < 0: Failed to get data of entry logging via efi_pstore_write(),
>>> > + * and pstore will stop reading entry.
>>> > + */
>>> > static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
>>> > int *count, struct timespec *timespec,
>>> > char **buf, bool *compressed,
>>> > struct pstore_info *psi)
>>> > {
>>> > struct pstore_read_data data;
>>> > + ssize_t size;
>>> >
>>> > data.id = id;
>>> > data.type = type;
>>> > @@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
>>> > data.compressed = compressed;
>>> > data.buf = buf;
>>> >
>>> > - return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
>>> > - (struct efivar_entry **)&psi->data);
>>> > + *data.buf = kzalloc(1024, GFP_KERNEL);
>>> > + if (!*data.buf)
>>> > + return -ENOMEM;
>>> > +
>>> > + efivar_entry_iter_begin();
>>> > + size = efi_pstore_sysfs_entry_iter(&data,
>>> > + (struct efivar_entry **)&psi->data);
>>> > + efivar_entry_iter_end();
>>> > + if (size <= 0)
>>> > + kfree(*data.buf);
>>> > + return size;
>>> > }
>>> >
>>> > static int efi_pstore_write(enum pstore_type_id type,
>>> > @@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
>>> > return 0;
>>> > }
>>> >
>>> > + if (entry->scanning) {
>>> > + /*
>>> > + * Skip deletion because this entry will be deleted
>>> > + * after scanning is completed.
>>> > + */
>>> > + entry->deleting = true;
>>> > + } else
>>> > + list_del(&entry->list);
>>> > +
>>> > /* found */
>>> > __efivar_entry_delete(entry);
>>> > - list_del(&entry->list);
>>> >
>>> > return 1;
>>> > }
>>> > @@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
>>> >
>>> > efivar_entry_iter_begin();
>>> > found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
>>> > - efivar_entry_iter_end();
>>> >
>>> > - if (found)
>>> > + if (found && !entry->scanning) {
>>> > + efivar_entry_iter_end();
>>> > efivar_unregister(entry);
>>> > + } else
>>> > + efivar_entry_iter_end();
>>> >
>>> > return 0;
>>> > }
>>> > diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
>>> > index 8a7432a..8c5a61a 100644
>>> > --- a/drivers/firmware/efi/efivars.c
>>> > +++ b/drivers/firmware/efi/efivars.c
>>> > @@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
>>> > else if (__efivar_entry_delete(entry))
>>> > err = -EIO;
>>> >
>>> > - efivar_entry_iter_end();
>>> > -
>>> > - if (err)
>>> > + if (err) {
>>> > + efivar_entry_iter_end();
>>> > return err;
>>> > + }
>>> >
>>> > - efivar_unregister(entry);
>>> > + if (!entry->scanning) {
>>> > + efivar_entry_iter_end();
>>> > + efivar_unregister(entry);
>>> > + } else
>>> > + efivar_entry_iter_end();
>>> >
>>> > /* It's dead Jim.... */
>>> > return count;
>>> > diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
>>> > index 391c67b..b22659c 100644
>>> > --- a/drivers/firmware/efi/vars.c
>>> > +++ b/drivers/firmware/efi/vars.c
>>> > @@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
>>> > if (!found)
>>> > return NULL;
>>> >
>>> > - if (remove)
>>> > - list_del(&entry->list);
>>> > + if (remove) {
>>> > + if (entry->scanning) {
>>> > + /*
>>> > + * The entry will be deleted
>>> > + * after scanning is completed.
>>> > + */
>>> > + entry->deleting = true;
>>> > + } else
>>> > + list_del(&entry->list);
>>> > + }
>>> >
>>> > return entry;
>>> > }
>>> > diff --git a/include/linux/efi.h b/include/linux/efi.h
>>> > index 5f8f176..04088fb 100644
>>> > --- a/include/linux/efi.h
>>> > +++ b/include/linux/efi.h
>>> > @@ -782,6 +782,8 @@ struct efivar_entry {
>>> > struct efi_variable var;
>>> > struct list_head list;
>>> > struct kobject kobj;
>>> > + bool scanning;
>>> > + bool deleting;
>>> > };
>>> >
>>> > extern struct list_head efivar_sysfs_list;
>>>
>>>
>>> --
>>> Best,
>>> Madper Xie.
--
Best,
Madper Xie.
|
|
From: Madper X. <cx...@re...> - 2013-10-17 14:30:53
|
Oops, It seems my mu4e(a email client for emacs)'s auto-indent breaks the
patch... I apologize for this...
sei...@hd... writes:
> Hi Madper,
>
> I tested this patch on 3.12-rc4.
> Could you please send me the log when you failed to apply?
>
> Seiji
>
>> -----Original Message-----
>> From: Madper Xie [mailto:cx...@re...]
>> Sent: Thursday, October 17, 2013 1:54 AM
>> To: Seiji Aguchi
>> Cc: lin...@vg...; lin...@vg...; mat...@in...; ton...@in...; Tomoki Sekiyama; dle-
>> de...@li...
>> Subject: Re: [PATCH v3] efivars,efi-pstore: Hold off deletion of sysfs entry until, the scan is completed
>>
>> Howdy Seiji,
>> I failed appily this patch to both 3.12-rc2 and 3.12-rc4. Could you
>> please let me know which is the right tree for this patch?
>>
>> Thanks,
>> Madper.
>> sei...@hd... writes:
>>
>> > Change from v2:
>> > - Move dynamic memory allocation to efi_pstore_read() before holding
>> > efivars->lock to protect entry->var.Data.
>> > - Access to entry->scanning while holding efivars->lock.
>> > - Move a comment about a returned value from efi_pstore_read_func() to
>> > efi_pstore_read() because "size < 0" case may happen in efi_pstore_read().
>> >
>> > Currently, when mounting pstore file system, a read callback of efi_pstore
>> > driver runs mutiple times as below.
>> >
>> > - In the first read callback, scan efivar_sysfs_list from head and pass
>> > a kmsg buffer of a entry to an upper pstore layer.
>> > - In the second read callback, rescan efivar_sysfs_list from the entry and pass
>> > another kmsg buffer to it.
>> > - Repeat the scan and pass until the end of efivar_sysfs_list.
>> >
>> > In this process, an entry is read across the multiple read function calls.
>> > To avoid race between the read and erasion, the whole process above is
>> > protected by a spinlock, holding in open() and releasing in close().
>> >
>> > At the same time, kmemdup() is called to pass the buffer to pstore filesystem
>> > during it.
>> > And then, it causes a following lockdep warning.
>> >
>> > To make the dynamic memory allocation runnable without taking spinlock,
>> > holding off a deletion of sysfs entry if it happens while scanning it
>> > via efi_pstore, and deleting it after the scan is completed.
>> >
>> > To implement it, this patch introduces two flags, scanning and deleting,
>> > to efivar_entry.
>> >
>> > [ 1.143710] ------------[ cut here ]------------
>> > [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
>> > lockdep_trace_alloc+0x104/0x110()
>> > [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
>> > [ 1.144058] Modules linked in:
>> >
>> > [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
>> > [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
>> > ffff8800797e9b28
>> > [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
>> > 0000000000000046
>> > [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
>> > ffff8800797e9b78
>> > [ 1.144058] Call Trace:
>> > [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
>> > [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
>> > [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
>> > [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
>> > [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
>> > [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
>> > [ 1.144058] [<ffffffff815147bb>] ?
>> > efi_pstore_read_func.part.1+0x12b/0x170
>> > [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
>> > [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
>> > [ 1.144058] [<ffffffff81514800>] ?
>> > efi_pstore_read_func.part.1+0x170/0x170
>> > [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
>> > [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
>> > [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
>> > [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
>> > [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
>> > [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
>> > [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
>> > [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
>> > [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
>> > [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
>> > [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
>> > [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
>> > [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
>> > [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
>> > [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
>> > [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
>> > [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
>> >
>> > Signed-off-by: Seiji Aguchi <sei...@hd...>
>> > ---
>> > drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
>> > drivers/firmware/efi/efivars.c | 12 ++--
>> > drivers/firmware/efi/vars.c | 12 +++-
>> > include/linux/efi.h | 2 +
>> > 4 files changed, 153 insertions(+), 16 deletions(-)
>> >
>> > diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
>> > index 5002d50..ebd5dbc 100644
>> > --- a/drivers/firmware/efi/efi-pstore.c
>> > +++ b/drivers/firmware/efi/efi-pstore.c
>> > @@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
>> >
>> > static int efi_pstore_open(struct pstore_info *psi)
>> > {
>> > - efivar_entry_iter_begin();
>> > psi->data = NULL;
>> > return 0;
>> > }
>> >
>> > static int efi_pstore_close(struct pstore_info *psi)
>> > {
>> > - efivar_entry_iter_end();
>> > psi->data = NULL;
>> > return 0;
>> > }
>> > @@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
>> > __efivar_entry_get(entry, &entry->var.Attributes,
>> > &entry->var.DataSize, entry->var.Data);
>> > size = entry->var.DataSize;
>> > + memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
>> > + 1024, size));
>> >
>> > - *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
>> > - if (*cb_data->buf == NULL)
>> > - return -ENOMEM;
>> > return size;
>> > }
>> >
>> > +/**
>> > + * efi_pstore_scan_sysfs_enter
>> > + * @entry: scanning entry
>> > + * @next: next entry
>> > + * @head: list head
>> > + */
>> > +static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
>> > + struct efivar_entry *next,
>> > + struct list_head *head)
>> > +{
>> > + pos->scanning = true;
>> > + if (&next->list != head)
>> > + next->scanning = true;
>> > +}
>> > +
>> > +/**
>> > + * __efi_pstore_scan_sysfs_exit
>> > + * @entry: deleting entry
>> > + * @turn_off_scanning: Check if a scanning flag should be turned off
>> > + */
>> > +static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
>> > + bool turn_off_scanning)
>> > +{
>> > + if (entry->deleting) {
>> > + list_del(&entry->list);
>> > + efivar_entry_iter_end();
>> > + efivar_unregister(entry);
>> > + efivar_entry_iter_begin();
>> > + } else if (turn_off_scanning)
>> > + entry->scanning = false;
>> > +}
>> > +
>> > +/**
>> > + * efi_pstore_scan_sysfs_exit
>> > + * @pos: scanning entry
>> > + * @next: next entry
>> > + * @head: list head
>> > + * @stop: a flag checking if scanning will stop
>> > + */
>> > +static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
>> > + struct efivar_entry *next,
>> > + struct list_head *head, bool stop)
>> > +{
>> > + __efi_pstore_scan_sysfs_exit(pos, true);
>> > + if (stop)
>> > + __efi_pstore_scan_sysfs_exit(next, &next->list != head);
>> > +}
>> > +
>> > +/**
>> > + * efi_pstore_sysfs_entry_iter
>> > + *
>> > + * @data: function-specific data to pass to callback
>> > + * @pos: entry to begin iterating from
>> > + *
>> > + * You MUST call efivar_enter_iter_begin() before this function, and
>> > + * efivar_entry_iter_end() afterwards.
>> > + *
>> > + * It is possible to begin iteration from an arbitrary entry within
>> > + * the list by passing @pos. @pos is updated on return to point to
>> > + * the next entry of the last one passed to efi_pstore_read_func().
>> > + * To begin iterating from the beginning of the list @pos must be %NULL.
>> > + */
>> > +static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
>> > +{
>> > + struct efivar_entry *entry, *n;
>> > + struct list_head *head = &efivar_sysfs_list;
>> > + int size = 0;
>> > +
>> > + if (!*pos) {
>> > + list_for_each_entry_safe(entry, n, head, list) {
>> > + efi_pstore_scan_sysfs_enter(entry, n, head);
>> > +
>> > + size = efi_pstore_read_func(entry, data);
>> > + efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
>> > + if (size)
>> > + break;
>> > + }
>> > + *pos = n;
>> > + return size;
>> > + }
>> > +
>> > + list_for_each_entry_safe_from((*pos), n, head, list) {
>> > + efi_pstore_scan_sysfs_enter((*pos), n, head);
>> > +
>> > + size = efi_pstore_read_func((*pos), data);
>> > + efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
>> > + if (size)
>> > + break;
>> > + }
>> > + *pos = n;
>> > + return size;
>> > +}
>> > +
>> > +/**
>> > + * efi_pstore_read
>> > + *
>> > + * This function returns a size of NVRAM entry logged via efi_pstore_write().
>> > + * The meaning and behavior of efi_pstore/pstore are as below.
>> > + *
>> > + * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
>> > + * and pstore filesystem will continue reading subsequent entries.
>> > + * size == 0: Entry was not logged via efi_pstore_write(),
>> > + * and efi_pstore driver will continue reading subsequent entries.
>> > + * size < 0: Failed to get data of entry logging via efi_pstore_write(),
>> > + * and pstore will stop reading entry.
>> > + */
>> > static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
>> > int *count, struct timespec *timespec,
>> > char **buf, bool *compressed,
>> > struct pstore_info *psi)
>> > {
>> > struct pstore_read_data data;
>> > + ssize_t size;
>> >
>> > data.id = id;
>> > data.type = type;
>> > @@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
>> > data.compressed = compressed;
>> > data.buf = buf;
>> >
>> > - return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
>> > - (struct efivar_entry **)&psi->data);
>> > + *data.buf = kzalloc(1024, GFP_KERNEL);
>> > + if (!*data.buf)
>> > + return -ENOMEM;
>> > +
>> > + efivar_entry_iter_begin();
>> > + size = efi_pstore_sysfs_entry_iter(&data,
>> > + (struct efivar_entry **)&psi->data);
>> > + efivar_entry_iter_end();
>> > + if (size <= 0)
>> > + kfree(*data.buf);
>> > + return size;
>> > }
>> >
>> > static int efi_pstore_write(enum pstore_type_id type,
>> > @@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
>> > return 0;
>> > }
>> >
>> > + if (entry->scanning) {
>> > + /*
>> > + * Skip deletion because this entry will be deleted
>> > + * after scanning is completed.
>> > + */
>> > + entry->deleting = true;
>> > + } else
>> > + list_del(&entry->list);
>> > +
>> > /* found */
>> > __efivar_entry_delete(entry);
>> > - list_del(&entry->list);
>> >
>> > return 1;
>> > }
>> > @@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
>> >
>> > efivar_entry_iter_begin();
>> > found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
>> > - efivar_entry_iter_end();
>> >
>> > - if (found)
>> > + if (found && !entry->scanning) {
>> > + efivar_entry_iter_end();
>> > efivar_unregister(entry);
>> > + } else
>> > + efivar_entry_iter_end();
>> >
>> > return 0;
>> > }
>> > diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
>> > index 8a7432a..8c5a61a 100644
>> > --- a/drivers/firmware/efi/efivars.c
>> > +++ b/drivers/firmware/efi/efivars.c
>> > @@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
>> > else if (__efivar_entry_delete(entry))
>> > err = -EIO;
>> >
>> > - efivar_entry_iter_end();
>> > -
>> > - if (err)
>> > + if (err) {
>> > + efivar_entry_iter_end();
>> > return err;
>> > + }
>> >
>> > - efivar_unregister(entry);
>> > + if (!entry->scanning) {
>> > + efivar_entry_iter_end();
>> > + efivar_unregister(entry);
>> > + } else
>> > + efivar_entry_iter_end();
>> >
>> > /* It's dead Jim.... */
>> > return count;
>> > diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
>> > index 391c67b..b22659c 100644
>> > --- a/drivers/firmware/efi/vars.c
>> > +++ b/drivers/firmware/efi/vars.c
>> > @@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
>> > if (!found)
>> > return NULL;
>> >
>> > - if (remove)
>> > - list_del(&entry->list);
>> > + if (remove) {
>> > + if (entry->scanning) {
>> > + /*
>> > + * The entry will be deleted
>> > + * after scanning is completed.
>> > + */
>> > + entry->deleting = true;
>> > + } else
>> > + list_del(&entry->list);
>> > + }
>> >
>> > return entry;
>> > }
>> > diff --git a/include/linux/efi.h b/include/linux/efi.h
>> > index 5f8f176..04088fb 100644
>> > --- a/include/linux/efi.h
>> > +++ b/include/linux/efi.h
>> > @@ -782,6 +782,8 @@ struct efivar_entry {
>> > struct efi_variable var;
>> > struct list_head list;
>> > struct kobject kobj;
>> > + bool scanning;
>> > + bool deleting;
>> > };
>> >
>> > extern struct list_head efivar_sysfs_list;
>>
>>
>> --
>> Best,
>> Madper Xie.
--
Best,
Madper Xie.
|
|
From: Matt F. <ma...@co...> - 2013-10-17 13:18:34
|
On Fri, 11 Oct, at 02:29:07PM, Seiji Aguchi wrote: > Change from v2: > - Move dynamic memory allocation to efi_pstore_read() before holding > efivars->lock to protect entry->var.Data. > - Access to entry->scanning while holding efivars->lock. > - Move a comment about a returned value from efi_pstore_read_func() to > efi_pstore_read() because "size < 0" case may happen in efi_pstore_read(). It seems to me that because you're no longer dropping __efivars->lock when reading from the EFI variable store, you actually don't need all the ->scanning and ->deleting logic because anything that sets those flags runs to completion while holding the lock. Can't the patch be simplified to just allocating data.buf at the beginning of efi_pstore_read()? Also, it would be a good idea to introduce a #define for the 1024 magic number, e.g. #define EFIVARS_DATA_SIZE_MAX 1024 -- Matt Fleming, Intel Open Source Technology Center |
|
From: Seiji A. <sei...@hd...> - 2013-10-17 13:07:54
|
Hi Madper,
I tested this patch on 3.12-rc4.
Could you please send me the log when you failed to apply?
Seiji
> -----Original Message-----
> From: Madper Xie [mailto:cx...@re...]
> Sent: Thursday, October 17, 2013 1:54 AM
> To: Seiji Aguchi
> Cc: lin...@vg...; lin...@vg...; mat...@in...; ton...@in...; Tomoki Sekiyama; dle-
> de...@li...
> Subject: Re: [PATCH v3] efivars,efi-pstore: Hold off deletion of sysfs entry until, the scan is completed
>
> Howdy Seiji,
> I failed appily this patch to both 3.12-rc2 and 3.12-rc4. Could you
> please let me know which is the right tree for this patch?
>
> Thanks,
> Madper.
> sei...@hd... writes:
>
> > Change from v2:
> > - Move dynamic memory allocation to efi_pstore_read() before holding
> > efivars->lock to protect entry->var.Data.
> > - Access to entry->scanning while holding efivars->lock.
> > - Move a comment about a returned value from efi_pstore_read_func() to
> > efi_pstore_read() because "size < 0" case may happen in efi_pstore_read().
> >
> > Currently, when mounting pstore file system, a read callback of efi_pstore
> > driver runs mutiple times as below.
> >
> > - In the first read callback, scan efivar_sysfs_list from head and pass
> > a kmsg buffer of a entry to an upper pstore layer.
> > - In the second read callback, rescan efivar_sysfs_list from the entry and pass
> > another kmsg buffer to it.
> > - Repeat the scan and pass until the end of efivar_sysfs_list.
> >
> > In this process, an entry is read across the multiple read function calls.
> > To avoid race between the read and erasion, the whole process above is
> > protected by a spinlock, holding in open() and releasing in close().
> >
> > At the same time, kmemdup() is called to pass the buffer to pstore filesystem
> > during it.
> > And then, it causes a following lockdep warning.
> >
> > To make the dynamic memory allocation runnable without taking spinlock,
> > holding off a deletion of sysfs entry if it happens while scanning it
> > via efi_pstore, and deleting it after the scan is completed.
> >
> > To implement it, this patch introduces two flags, scanning and deleting,
> > to efivar_entry.
> >
> > [ 1.143710] ------------[ cut here ]------------
> > [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
> > lockdep_trace_alloc+0x104/0x110()
> > [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
> > [ 1.144058] Modules linked in:
> >
> > [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
> > [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
> > ffff8800797e9b28
> > [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
> > 0000000000000046
> > [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
> > ffff8800797e9b78
> > [ 1.144058] Call Trace:
> > [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
> > [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
> > [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
> > [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
> > [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
> > [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
> > [ 1.144058] [<ffffffff815147bb>] ?
> > efi_pstore_read_func.part.1+0x12b/0x170
> > [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
> > [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
> > [ 1.144058] [<ffffffff81514800>] ?
> > efi_pstore_read_func.part.1+0x170/0x170
> > [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
> > [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
> > [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
> > [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
> > [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
> > [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
> > [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
> > [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
> > [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
> > [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
> > [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
> > [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
> > [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
> > [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
> > [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
> > [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
> > [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
> >
> > Signed-off-by: Seiji Aguchi <sei...@hd...>
> > ---
> > drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
> > drivers/firmware/efi/efivars.c | 12 ++--
> > drivers/firmware/efi/vars.c | 12 +++-
> > include/linux/efi.h | 2 +
> > 4 files changed, 153 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
> > index 5002d50..ebd5dbc 100644
> > --- a/drivers/firmware/efi/efi-pstore.c
> > +++ b/drivers/firmware/efi/efi-pstore.c
> > @@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
> >
> > static int efi_pstore_open(struct pstore_info *psi)
> > {
> > - efivar_entry_iter_begin();
> > psi->data = NULL;
> > return 0;
> > }
> >
> > static int efi_pstore_close(struct pstore_info *psi)
> > {
> > - efivar_entry_iter_end();
> > psi->data = NULL;
> > return 0;
> > }
> > @@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> > __efivar_entry_get(entry, &entry->var.Attributes,
> > &entry->var.DataSize, entry->var.Data);
> > size = entry->var.DataSize;
> > + memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
> > + 1024, size));
> >
> > - *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
> > - if (*cb_data->buf == NULL)
> > - return -ENOMEM;
> > return size;
> > }
> >
> > +/**
> > + * efi_pstore_scan_sysfs_enter
> > + * @entry: scanning entry
> > + * @next: next entry
> > + * @head: list head
> > + */
> > +static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
> > + struct efivar_entry *next,
> > + struct list_head *head)
> > +{
> > + pos->scanning = true;
> > + if (&next->list != head)
> > + next->scanning = true;
> > +}
> > +
> > +/**
> > + * __efi_pstore_scan_sysfs_exit
> > + * @entry: deleting entry
> > + * @turn_off_scanning: Check if a scanning flag should be turned off
> > + */
> > +static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
> > + bool turn_off_scanning)
> > +{
> > + if (entry->deleting) {
> > + list_del(&entry->list);
> > + efivar_entry_iter_end();
> > + efivar_unregister(entry);
> > + efivar_entry_iter_begin();
> > + } else if (turn_off_scanning)
> > + entry->scanning = false;
> > +}
> > +
> > +/**
> > + * efi_pstore_scan_sysfs_exit
> > + * @pos: scanning entry
> > + * @next: next entry
> > + * @head: list head
> > + * @stop: a flag checking if scanning will stop
> > + */
> > +static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
> > + struct efivar_entry *next,
> > + struct list_head *head, bool stop)
> > +{
> > + __efi_pstore_scan_sysfs_exit(pos, true);
> > + if (stop)
> > + __efi_pstore_scan_sysfs_exit(next, &next->list != head);
> > +}
> > +
> > +/**
> > + * efi_pstore_sysfs_entry_iter
> > + *
> > + * @data: function-specific data to pass to callback
> > + * @pos: entry to begin iterating from
> > + *
> > + * You MUST call efivar_enter_iter_begin() before this function, and
> > + * efivar_entry_iter_end() afterwards.
> > + *
> > + * It is possible to begin iteration from an arbitrary entry within
> > + * the list by passing @pos. @pos is updated on return to point to
> > + * the next entry of the last one passed to efi_pstore_read_func().
> > + * To begin iterating from the beginning of the list @pos must be %NULL.
> > + */
> > +static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
> > +{
> > + struct efivar_entry *entry, *n;
> > + struct list_head *head = &efivar_sysfs_list;
> > + int size = 0;
> > +
> > + if (!*pos) {
> > + list_for_each_entry_safe(entry, n, head, list) {
> > + efi_pstore_scan_sysfs_enter(entry, n, head);
> > +
> > + size = efi_pstore_read_func(entry, data);
> > + efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
> > + if (size)
> > + break;
> > + }
> > + *pos = n;
> > + return size;
> > + }
> > +
> > + list_for_each_entry_safe_from((*pos), n, head, list) {
> > + efi_pstore_scan_sysfs_enter((*pos), n, head);
> > +
> > + size = efi_pstore_read_func((*pos), data);
> > + efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
> > + if (size)
> > + break;
> > + }
> > + *pos = n;
> > + return size;
> > +}
> > +
> > +/**
> > + * efi_pstore_read
> > + *
> > + * This function returns a size of NVRAM entry logged via efi_pstore_write().
> > + * The meaning and behavior of efi_pstore/pstore are as below.
> > + *
> > + * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
> > + * and pstore filesystem will continue reading subsequent entries.
> > + * size == 0: Entry was not logged via efi_pstore_write(),
> > + * and efi_pstore driver will continue reading subsequent entries.
> > + * size < 0: Failed to get data of entry logging via efi_pstore_write(),
> > + * and pstore will stop reading entry.
> > + */
> > static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> > int *count, struct timespec *timespec,
> > char **buf, bool *compressed,
> > struct pstore_info *psi)
> > {
> > struct pstore_read_data data;
> > + ssize_t size;
> >
> > data.id = id;
> > data.type = type;
> > @@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> > data.compressed = compressed;
> > data.buf = buf;
> >
> > - return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
> > - (struct efivar_entry **)&psi->data);
> > + *data.buf = kzalloc(1024, GFP_KERNEL);
> > + if (!*data.buf)
> > + return -ENOMEM;
> > +
> > + efivar_entry_iter_begin();
> > + size = efi_pstore_sysfs_entry_iter(&data,
> > + (struct efivar_entry **)&psi->data);
> > + efivar_entry_iter_end();
> > + if (size <= 0)
> > + kfree(*data.buf);
> > + return size;
> > }
> >
> > static int efi_pstore_write(enum pstore_type_id type,
> > @@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
> > return 0;
> > }
> >
> > + if (entry->scanning) {
> > + /*
> > + * Skip deletion because this entry will be deleted
> > + * after scanning is completed.
> > + */
> > + entry->deleting = true;
> > + } else
> > + list_del(&entry->list);
> > +
> > /* found */
> > __efivar_entry_delete(entry);
> > - list_del(&entry->list);
> >
> > return 1;
> > }
> > @@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
> >
> > efivar_entry_iter_begin();
> > found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
> > - efivar_entry_iter_end();
> >
> > - if (found)
> > + if (found && !entry->scanning) {
> > + efivar_entry_iter_end();
> > efivar_unregister(entry);
> > + } else
> > + efivar_entry_iter_end();
> >
> > return 0;
> > }
> > diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
> > index 8a7432a..8c5a61a 100644
> > --- a/drivers/firmware/efi/efivars.c
> > +++ b/drivers/firmware/efi/efivars.c
> > @@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
> > else if (__efivar_entry_delete(entry))
> > err = -EIO;
> >
> > - efivar_entry_iter_end();
> > -
> > - if (err)
> > + if (err) {
> > + efivar_entry_iter_end();
> > return err;
> > + }
> >
> > - efivar_unregister(entry);
> > + if (!entry->scanning) {
> > + efivar_entry_iter_end();
> > + efivar_unregister(entry);
> > + } else
> > + efivar_entry_iter_end();
> >
> > /* It's dead Jim.... */
> > return count;
> > diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
> > index 391c67b..b22659c 100644
> > --- a/drivers/firmware/efi/vars.c
> > +++ b/drivers/firmware/efi/vars.c
> > @@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
> > if (!found)
> > return NULL;
> >
> > - if (remove)
> > - list_del(&entry->list);
> > + if (remove) {
> > + if (entry->scanning) {
> > + /*
> > + * The entry will be deleted
> > + * after scanning is completed.
> > + */
> > + entry->deleting = true;
> > + } else
> > + list_del(&entry->list);
> > + }
> >
> > return entry;
> > }
> > diff --git a/include/linux/efi.h b/include/linux/efi.h
> > index 5f8f176..04088fb 100644
> > --- a/include/linux/efi.h
> > +++ b/include/linux/efi.h
> > @@ -782,6 +782,8 @@ struct efivar_entry {
> > struct efi_variable var;
> > struct list_head list;
> > struct kobject kobj;
> > + bool scanning;
> > + bool deleting;
> > };
> >
> > extern struct list_head efivar_sysfs_list;
>
>
> --
> Best,
> Madper Xie.
|
|
From: Madper X. <cx...@re...> - 2013-10-17 05:54:33
|
Howdy Seiji,
I failed appily this patch to both 3.12-rc2 and 3.12-rc4. Could you
please let me know which is the right tree for this patch?
Thanks,
Madper.
sei...@hd... writes:
> Change from v2:
> - Move dynamic memory allocation to efi_pstore_read() before holding
> efivars->lock to protect entry->var.Data.
> - Access to entry->scanning while holding efivars->lock.
> - Move a comment about a returned value from efi_pstore_read_func() to
> efi_pstore_read() because "size < 0" case may happen in efi_pstore_read().
>
> Currently, when mounting pstore file system, a read callback of efi_pstore
> driver runs mutiple times as below.
>
> - In the first read callback, scan efivar_sysfs_list from head and pass
> a kmsg buffer of a entry to an upper pstore layer.
> - In the second read callback, rescan efivar_sysfs_list from the entry and pass
> another kmsg buffer to it.
> - Repeat the scan and pass until the end of efivar_sysfs_list.
>
> In this process, an entry is read across the multiple read function calls.
> To avoid race between the read and erasion, the whole process above is
> protected by a spinlock, holding in open() and releasing in close().
>
> At the same time, kmemdup() is called to pass the buffer to pstore filesystem
> during it.
> And then, it causes a following lockdep warning.
>
> To make the dynamic memory allocation runnable without taking spinlock,
> holding off a deletion of sysfs entry if it happens while scanning it
> via efi_pstore, and deleting it after the scan is completed.
>
> To implement it, this patch introduces two flags, scanning and deleting,
> to efivar_entry.
>
> [ 1.143710] ------------[ cut here ]------------
> [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
> lockdep_trace_alloc+0x104/0x110()
> [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
> [ 1.144058] Modules linked in:
>
> [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
> [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
> ffff8800797e9b28
> [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
> 0000000000000046
> [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
> ffff8800797e9b78
> [ 1.144058] Call Trace:
> [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
> [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
> [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
> [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
> [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
> [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
> [ 1.144058] [<ffffffff815147bb>] ?
> efi_pstore_read_func.part.1+0x12b/0x170
> [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
> [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
> [ 1.144058] [<ffffffff81514800>] ?
> efi_pstore_read_func.part.1+0x170/0x170
> [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
> [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
> [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
> [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
> [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
> [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
> [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
> [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
> [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
> [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
> [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
> [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
> [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
> [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
> [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
> [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
> [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
>
> Signed-off-by: Seiji Aguchi <sei...@hd...>
> ---
> drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
> drivers/firmware/efi/efivars.c | 12 ++--
> drivers/firmware/efi/vars.c | 12 +++-
> include/linux/efi.h | 2 +
> 4 files changed, 153 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
> index 5002d50..ebd5dbc 100644
> --- a/drivers/firmware/efi/efi-pstore.c
> +++ b/drivers/firmware/efi/efi-pstore.c
> @@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
>
> static int efi_pstore_open(struct pstore_info *psi)
> {
> - efivar_entry_iter_begin();
> psi->data = NULL;
> return 0;
> }
>
> static int efi_pstore_close(struct pstore_info *psi)
> {
> - efivar_entry_iter_end();
> psi->data = NULL;
> return 0;
> }
> @@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> __efivar_entry_get(entry, &entry->var.Attributes,
> &entry->var.DataSize, entry->var.Data);
> size = entry->var.DataSize;
> + memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
> + 1024, size));
>
> - *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
> - if (*cb_data->buf == NULL)
> - return -ENOMEM;
> return size;
> }
>
> +/**
> + * efi_pstore_scan_sysfs_enter
> + * @entry: scanning entry
> + * @next: next entry
> + * @head: list head
> + */
> +static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
> + struct efivar_entry *next,
> + struct list_head *head)
> +{
> + pos->scanning = true;
> + if (&next->list != head)
> + next->scanning = true;
> +}
> +
> +/**
> + * __efi_pstore_scan_sysfs_exit
> + * @entry: deleting entry
> + * @turn_off_scanning: Check if a scanning flag should be turned off
> + */
> +static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
> + bool turn_off_scanning)
> +{
> + if (entry->deleting) {
> + list_del(&entry->list);
> + efivar_entry_iter_end();
> + efivar_unregister(entry);
> + efivar_entry_iter_begin();
> + } else if (turn_off_scanning)
> + entry->scanning = false;
> +}
> +
> +/**
> + * efi_pstore_scan_sysfs_exit
> + * @pos: scanning entry
> + * @next: next entry
> + * @head: list head
> + * @stop: a flag checking if scanning will stop
> + */
> +static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
> + struct efivar_entry *next,
> + struct list_head *head, bool stop)
> +{
> + __efi_pstore_scan_sysfs_exit(pos, true);
> + if (stop)
> + __efi_pstore_scan_sysfs_exit(next, &next->list != head);
> +}
> +
> +/**
> + * efi_pstore_sysfs_entry_iter
> + *
> + * @data: function-specific data to pass to callback
> + * @pos: entry to begin iterating from
> + *
> + * You MUST call efivar_enter_iter_begin() before this function, and
> + * efivar_entry_iter_end() afterwards.
> + *
> + * It is possible to begin iteration from an arbitrary entry within
> + * the list by passing @pos. @pos is updated on return to point to
> + * the next entry of the last one passed to efi_pstore_read_func().
> + * To begin iterating from the beginning of the list @pos must be %NULL.
> + */
> +static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
> +{
> + struct efivar_entry *entry, *n;
> + struct list_head *head = &efivar_sysfs_list;
> + int size = 0;
> +
> + if (!*pos) {
> + list_for_each_entry_safe(entry, n, head, list) {
> + efi_pstore_scan_sysfs_enter(entry, n, head);
> +
> + size = efi_pstore_read_func(entry, data);
> + efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
> + if (size)
> + break;
> + }
> + *pos = n;
> + return size;
> + }
> +
> + list_for_each_entry_safe_from((*pos), n, head, list) {
> + efi_pstore_scan_sysfs_enter((*pos), n, head);
> +
> + size = efi_pstore_read_func((*pos), data);
> + efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
> + if (size)
> + break;
> + }
> + *pos = n;
> + return size;
> +}
> +
> +/**
> + * efi_pstore_read
> + *
> + * This function returns a size of NVRAM entry logged via efi_pstore_write().
> + * The meaning and behavior of efi_pstore/pstore are as below.
> + *
> + * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
> + * and pstore filesystem will continue reading subsequent entries.
> + * size == 0: Entry was not logged via efi_pstore_write(),
> + * and efi_pstore driver will continue reading subsequent entries.
> + * size < 0: Failed to get data of entry logging via efi_pstore_write(),
> + * and pstore will stop reading entry.
> + */
> static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> int *count, struct timespec *timespec,
> char **buf, bool *compressed,
> struct pstore_info *psi)
> {
> struct pstore_read_data data;
> + ssize_t size;
>
> data.id = id;
> data.type = type;
> @@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> data.compressed = compressed;
> data.buf = buf;
>
> - return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
> - (struct efivar_entry **)&psi->data);
> + *data.buf = kzalloc(1024, GFP_KERNEL);
> + if (!*data.buf)
> + return -ENOMEM;
> +
> + efivar_entry_iter_begin();
> + size = efi_pstore_sysfs_entry_iter(&data,
> + (struct efivar_entry **)&psi->data);
> + efivar_entry_iter_end();
> + if (size <= 0)
> + kfree(*data.buf);
> + return size;
> }
>
> static int efi_pstore_write(enum pstore_type_id type,
> @@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
> return 0;
> }
>
> + if (entry->scanning) {
> + /*
> + * Skip deletion because this entry will be deleted
> + * after scanning is completed.
> + */
> + entry->deleting = true;
> + } else
> + list_del(&entry->list);
> +
> /* found */
> __efivar_entry_delete(entry);
> - list_del(&entry->list);
>
> return 1;
> }
> @@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
>
> efivar_entry_iter_begin();
> found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
> - efivar_entry_iter_end();
>
> - if (found)
> + if (found && !entry->scanning) {
> + efivar_entry_iter_end();
> efivar_unregister(entry);
> + } else
> + efivar_entry_iter_end();
>
> return 0;
> }
> diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
> index 8a7432a..8c5a61a 100644
> --- a/drivers/firmware/efi/efivars.c
> +++ b/drivers/firmware/efi/efivars.c
> @@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
> else if (__efivar_entry_delete(entry))
> err = -EIO;
>
> - efivar_entry_iter_end();
> -
> - if (err)
> + if (err) {
> + efivar_entry_iter_end();
> return err;
> + }
>
> - efivar_unregister(entry);
> + if (!entry->scanning) {
> + efivar_entry_iter_end();
> + efivar_unregister(entry);
> + } else
> + efivar_entry_iter_end();
>
> /* It's dead Jim.... */
> return count;
> diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
> index 391c67b..b22659c 100644
> --- a/drivers/firmware/efi/vars.c
> +++ b/drivers/firmware/efi/vars.c
> @@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
> if (!found)
> return NULL;
>
> - if (remove)
> - list_del(&entry->list);
> + if (remove) {
> + if (entry->scanning) {
> + /*
> + * The entry will be deleted
> + * after scanning is completed.
> + */
> + entry->deleting = true;
> + } else
> + list_del(&entry->list);
> + }
>
> return entry;
> }
> diff --git a/include/linux/efi.h b/include/linux/efi.h
> index 5f8f176..04088fb 100644
> --- a/include/linux/efi.h
> +++ b/include/linux/efi.h
> @@ -782,6 +782,8 @@ struct efivar_entry {
> struct efi_variable var;
> struct list_head list;
> struct kobject kobj;
> + bool scanning;
> + bool deleting;
> };
>
> extern struct list_head efivar_sysfs_list;
--
Best,
Madper Xie.
|
|
From: Seiji A. <sei...@hd...> - 2013-10-16 23:27:05
|
Thank you for reviewing. > > http://marc.info/?l=linux-mm&m=136807959830182&w=2 > > http://marc.info/?l=linux-mm&m=136807959130175&w=2 > > > > For permanence, please use links of the form: > > http://lkml.kernel.org/r/message-id > > (Yes, they currently point to marc.info, but can be redirected to point > to any archive.) I will fix it. > This is needlessly confusing, which is apart of why reviewing this patch > took a lot more time than it should. > > Please break this patch into two: one which sets up the tracing IDT and > one to create the #PF tracepoint. The assumption is, I am assuming, > there will be more. OK. I will divide the patch into two or more to make the review smooth. Seiji > > -hpa |
|
From: H. P. A. <hp...@zy...> - 2013-10-15 23:59:57
|
On 09/09/2013 02:55 PM, Seiji Aguchi wrote: > Change from v2 > - Print entry->ip instead of entry->regs->ip to avoid kernel crash. > - Use %pf instead of 0x%lx to print address and ip. > > This patch introduces page fault tracepoints to x86 architecture > by switching IDT. > > [Use case of page fault events] > > Two events, for user and kernel spaces, are introduced at the beginning of > page fault handler. > > - User space event > There is a request of page fault event for user space as below. > > http://marc.info/?l=linux-mm&m=136807959830182&w=2 > http://marc.info/?l=linux-mm&m=136807959130175&w=2 > For permanence, please use links of the form: http://lkml.kernel.org/r/message-id (Yes, they currently point to marc.info, but can be redirected to point to any archive.) > - Kernel space event: > Overhead in kernel space is measurable by enabling it. > > [Creating IDT] > > A way to create IDT is as below. > > - Introduce set_intr_gate_raw() to register just non-trace handler to IDT. > This is used at boot time which tracing is disabled. > - Make set_intr_gate() macro so that it can register trace handler to > trace IDT and non-trace handler to normal IDT. > This is needlessly confusing, which is apart of why reviewing this patch took a lot more time than it should. Please break this patch into two: one which sets up the tracing IDT and one to create the #PF tracepoint. The assumption is, I am assuming, there will be more. -hpa |
|
From: Seiji A. <sei...@hd...> - 2013-10-11 18:53:29
|
Peter, Any comment? Seiji > -----Original Message----- > From: lin...@vg... [mailto:lin...@vg...] On Behalf Of Seiji Aguchi > Sent: Monday, September 09, 2013 5:56 PM > To: lin...@vg...; x8...@ke... > Cc: hp...@zy...; ro...@go...; mi...@el...; bp...@al...; tg...@li...; fde...@gm...; > rap...@gm...; dle...@li...; Tomoki Sekiyama > Subject: [PATCH v3] Introduce page fault tracepoint > > Change from v2 > - Print entry->ip instead of entry->regs->ip to avoid kernel crash. > - Use %pf instead of 0x%lx to print address and ip. > > This patch introduces page fault tracepoints to x86 architecture > by switching IDT. > > [Use case of page fault events] > > Two events, for user and kernel spaces, are introduced at the beginning of > page fault handler. > > - User space event > There is a request of page fault event for user space as below. > > http://marc.info/?l=linux-mm&m=136807959830182&w=2 > http://marc.info/?l=linux-mm&m=136807959130175&w=2 > > - Kernel space event: > Overhead in kernel space is measurable by enabling it. > > [Creating IDT] > > A way to create IDT is as below. > > - Introduce set_intr_gate_raw() to register just non-trace handler to IDT. > This is used at boot time which tracing is disabled. > - Make set_intr_gate() macro so that it can register trace handler to > trace IDT and non-trace handler to normal IDT. > > Signed-off-by: Seiji Aguchi <sei...@hd...> > --- > arch/x86/include/asm/desc.h | 33 +++++++++++++++++---- > arch/x86/include/asm/hw_irq.h | 14 ++++++++- > arch/x86/include/asm/trace/exceptions.h | 52 +++++++++++++++++++++++++++++++++ > arch/x86/include/asm/traps.h | 22 ++++++++++++++ > arch/x86/kernel/entry_32.S | 10 +++++++ > arch/x86/kernel/entry_64.S | 13 ++++++++- > arch/x86/kernel/head64.c | 2 +- > arch/x86/kernel/irqinit.c | 2 +- > arch/x86/kernel/kvm.c | 2 +- > arch/x86/kernel/traps.c | 28 +++++++++--------- > arch/x86/mm/Makefile | 2 ++ > arch/x86/mm/fault.c | 22 ++++++++++++++ > 12 files changed, 178 insertions(+), 24 deletions(-) > create mode 100644 arch/x86/include/asm/trace/exceptions.h > > diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h > index b90e5df..c04302b 100644 > --- a/arch/x86/include/asm/desc.h > +++ b/arch/x86/include/asm/desc.h > @@ -327,10 +327,28 @@ static inline void write_trace_idt_entry(int entry, const gate_desc *gate) > { > write_idt_entry(trace_idt_table, entry, gate); > } > + > +static inline void _trace_set_gate(int gate, unsigned type, void *addr, > + unsigned dpl, unsigned ist, unsigned seg) > +{ > + gate_desc s; > + > + pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); > + /* > + * does not need to be atomic because it is only done once at > + * setup time > + */ > + write_trace_idt_entry(gate, &s); > +} > #else > static inline void write_trace_idt_entry(int entry, const gate_desc *gate) > { > } > + > +static inline void _trace_set_gate(int gate, unsigned type, void *addr, > + unsigned dpl, unsigned ist, unsigned seg) > +{ > +} > #endif > > static inline void _set_gate(int gate, unsigned type, void *addr, > @@ -353,12 +371,20 @@ static inline void _set_gate(int gate, unsigned type, void *addr, > * Pentium F0 0F bugfix can have resulted in the mapped > * IDT being write-protected. > */ > -static inline void set_intr_gate(unsigned int n, void *addr) > +static inline void set_intr_gate_raw(unsigned int n, void *addr) > { > BUG_ON((unsigned)n > 0xFF); > _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); > } > > +#define set_intr_gate(n, addr) \ > + do { \ > + BUG_ON((unsigned)n > 0xFF); \ > + _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); \ > + _trace_set_gate(n, GATE_INTERRUPT, trace_##addr, 0, 0, \ > + __KERNEL_CS); \ > + } while (0) > + > extern int first_system_vector; > /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ > extern unsigned long used_vectors[]; > @@ -395,10 +421,7 @@ static inline void trace_set_intr_gate(unsigned int gate, void *addr) > #define __trace_alloc_intr_gate(n, addr) > #endif > > -static inline void __alloc_intr_gate(unsigned int n, void *addr) > -{ > - set_intr_gate(n, addr); > -} > +#define __alloc_intr_gate(n, addr) set_intr_gate(n, addr) > > #define alloc_intr_gate(n, addr) \ > do { \ > diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h > index 92b3bae..c856e69 100644 > --- a/arch/x86/include/asm/hw_irq.h > +++ b/arch/x86/include/asm/hw_irq.h > @@ -89,10 +89,22 @@ extern void trace_reschedule_interrupt(void); > extern void trace_threshold_interrupt(void); > extern void trace_call_function_interrupt(void); > extern void trace_call_function_single_interrupt(void); > +#else /* CONFIG_TRACING */ > +#define trace_apic_timer_interrupt apic_timer_interrupt > +#define trace_x86_platform_ipi x86_platform_ipi > +#define trace_error_interrupt error_interrupt > +#define trace_irq_work_interrupt irq_work_interrupt > +#define trace_spurious_interrupt spurious_interrupt > +#define trace_thermal_interrupt thermal_interrupt > +#define trace_reschedule_interrupt reschedule_interrupt > +#define trace_threshold_interrupt threshold_interrupt > +#define trace_call_function_interrupt call_function_interrupt > +#define trace_call_function_single_interrupt call_function_single_interrupt > +#endif > + > #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt > #define trace_reboot_interrupt reboot_interrupt > #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi > -#endif /* CONFIG_TRACING */ > > /* IOAPIC */ > #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) > diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h > new file mode 100644 > index 0000000..86540c0 > --- /dev/null > +++ b/arch/x86/include/asm/trace/exceptions.h > @@ -0,0 +1,52 @@ > +#undef TRACE_SYSTEM > +#define TRACE_SYSTEM exceptions > + > +#if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) > +#define _TRACE_PAGE_FAULT_H > + > +#include <linux/tracepoint.h> > + > +extern void trace_irq_vector_regfunc(void); > +extern void trace_irq_vector_unregfunc(void); > + > +DECLARE_EVENT_CLASS(x86_exceptions, > + > + TP_PROTO(unsigned long address, struct pt_regs *regs, > + unsigned long error_code), > + > + TP_ARGS(address, regs, error_code), > + > + TP_STRUCT__entry( > + __field( unsigned long, address ) > + __field( unsigned long, ip ) > + __field( unsigned long, error_code ) > + ), > + > + TP_fast_assign( > + __entry->address = address; > + __entry->ip = regs->ip; > + __entry->error_code = error_code; > + ), > + > + TP_printk("address=%pf ip=%pf error_code=0x%lx", > + (void *)__entry->address, (void *)__entry->ip, > + __entry->error_code) ); > + > +#define DEFINE_PAGE_FAULT_EVENT(name) \ > +DEFINE_EVENT_FN(x86_exceptions, name, \ > + TP_PROTO(unsigned long address, struct pt_regs *regs, \ > + unsigned long error_code), \ > + TP_ARGS(address, regs, error_code), \ > + trace_irq_vector_regfunc, \ > + trace_irq_vector_unregfunc); > + > +DEFINE_PAGE_FAULT_EVENT(user_page_fault); > +DEFINE_PAGE_FAULT_EVENT(kernel_page_fault); > + > +#undef TRACE_INCLUDE_PATH > +#define TRACE_INCLUDE_PATH . > +#define TRACE_INCLUDE_FILE exceptions > +#endif /* _TRACE_PAGE_FAULT_H */ > + > +/* This part must be outside protection */ > +#include <trace/define_trace.h> > diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h > index 7036cb6..a400a22 100644 > --- a/arch/x86/include/asm/traps.h > +++ b/arch/x86/include/asm/traps.h > @@ -37,6 +37,25 @@ asmlinkage void machine_check(void); > #endif /* CONFIG_X86_MCE */ > asmlinkage void simd_coprocessor_error(void); > > +#ifdef CONFIG_TRACING > +asmlinkage void trace_page_fault(void); > +#else > +#define trace_page_fault page_fault > +#endif > +#define trace_divide_error divide_error > +#define trace_bounds bounds > +#define trace_invalid_op invalid_op > +#define trace_device_not_available device_not_available > +#define trace_coprocessor_segment_overrun coprocessor_segment_overrun > +#define trace_invalid_TSS invalid_TSS > +#define trace_segment_not_present segment_not_present > +#define trace_general_protection general_protection > +#define trace_spurious_interrupt_bug spurious_interrupt_bug > +#define trace_coprocessor_error coprocessor_error > +#define trace_alignment_check alignment_check > +#define trace_simd_coprocessor_error simd_coprocessor_error > +#define trace_async_page_fault async_page_fault > + > dotraplinkage void do_divide_error(struct pt_regs *, long); > dotraplinkage void do_debug(struct pt_regs *, long); > dotraplinkage void do_nmi(struct pt_regs *, long); > @@ -55,6 +74,9 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); > #endif > dotraplinkage void do_general_protection(struct pt_regs *, long); > dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); > +#ifdef CONFIG_TRACING > +dotraplinkage void trace_do_page_fault(struct pt_regs *, unsigned long); > +#endif > dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); > dotraplinkage void do_coprocessor_error(struct pt_regs *, long); > dotraplinkage void do_alignment_check(struct pt_regs *, long); > diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S > index 2cfbc3a..c9eb4e2 100644 > --- a/arch/x86/kernel/entry_32.S > +++ b/arch/x86/kernel/entry_32.S > @@ -1244,6 +1244,16 @@ return_to_handler: > */ > .pushsection .kprobes.text, "ax" > > +#ifdef CONFIG_TRACING > +ENTRY(trace_page_fault) > + RING0_EC_FRAME > + ASM_CLAC > + pushl_cfi $trace_do_page_fault > + jmp error_code > + CFI_ENDPROC > +END(trace_page_fault) > +#endif > + > ENTRY(page_fault) > RING0_EC_FRAME > ASM_CLAC > diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S > index 1b69951..5136404 100644 > --- a/arch/x86/kernel/entry_64.S > +++ b/arch/x86/kernel/entry_64.S > @@ -1295,6 +1295,17 @@ ENTRY(\sym) > END(\sym) > .endm > > +#ifdef CONFIG_TRACING > +.macro trace_errorentry sym do_sym > +errorentry trace(\sym) trace(\do_sym) > +errorentry \sym \do_sym > +.endm > +#else > +.macro trace_errorentry sym do_sym > +errorentry \sym \do_sym > +.endm > +#endif > + > /* error code is on the stack already */ > .macro paranoiderrorentry sym do_sym > ENTRY(\sym) > @@ -1497,7 +1508,7 @@ zeroentry xen_int3 do_int3 > errorentry xen_stack_segment do_stack_segment > #endif > errorentry general_protection do_general_protection > -errorentry page_fault do_page_fault > +trace_errorentry page_fault do_page_fault > #ifdef CONFIG_KVM_GUEST > errorentry async_page_fault do_async_page_fault > #endif > diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c > index 1be8e43..aebb2bf 100644 > --- a/arch/x86/kernel/head64.c > +++ b/arch/x86/kernel/head64.c > @@ -162,7 +162,7 @@ asmlinkage void __init x86_64_start_kernel(char * real_mode_data) > clear_bss(); > > for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) > - set_intr_gate(i, &early_idt_handlers[i]); > + set_intr_gate_raw(i, &early_idt_handlers[i]); > load_idt((const struct desc_ptr *)&idt_descr); > > copy_bootdata(__va(real_mode_data)); > diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c > index a2a1fbc..2ca2354 100644 > --- a/arch/x86/kernel/irqinit.c > +++ b/arch/x86/kernel/irqinit.c > @@ -206,7 +206,7 @@ void __init native_init_IRQ(void) > i = FIRST_EXTERNAL_VECTOR; > for_each_clear_bit_from(i, used_vectors, NR_VECTORS) { > /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ > - set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); > + set_intr_gate_raw(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); > } > > if (!acpi_ioapic && !of_ioapic) > diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c > index 697b93a..ba202ee 100644 > --- a/arch/x86/kernel/kvm.c > +++ b/arch/x86/kernel/kvm.c > @@ -464,7 +464,7 @@ static struct notifier_block kvm_cpu_notifier = { > > static void __init kvm_apf_trap_init(void) > { > - set_intr_gate(14, &async_page_fault); > + set_intr_gate(14, async_page_fault); > } > > void __init kvm_guest_init(void) > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index 8c8093b..1c9d0ad 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -713,7 +713,7 @@ void __init early_trap_init(void) > /* int3 can be called from all */ > set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); > #ifdef CONFIG_X86_32 > - set_intr_gate(X86_TRAP_PF, &page_fault); > + set_intr_gate(X86_TRAP_PF, page_fault); > #endif > load_idt(&idt_descr); > } > @@ -721,7 +721,7 @@ void __init early_trap_init(void) > void __init early_trap_pf_init(void) > { > #ifdef CONFIG_X86_64 > - set_intr_gate(X86_TRAP_PF, &page_fault); > + set_intr_gate(X86_TRAP_PF, page_fault); > #endif > } > > @@ -737,30 +737,30 @@ void __init trap_init(void) > early_iounmap(p, 4); > #endif > > - set_intr_gate(X86_TRAP_DE, ÷_error); > + set_intr_gate(X86_TRAP_DE, divide_error); > set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); > /* int4 can be called from all */ > set_system_intr_gate(X86_TRAP_OF, &overflow); > - set_intr_gate(X86_TRAP_BR, &bounds); > - set_intr_gate(X86_TRAP_UD, &invalid_op); > - set_intr_gate(X86_TRAP_NM, &device_not_available); > + set_intr_gate(X86_TRAP_BR, bounds); > + set_intr_gate(X86_TRAP_UD, invalid_op); > + set_intr_gate(X86_TRAP_NM, device_not_available); > #ifdef CONFIG_X86_32 > set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); > #else > set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); > #endif > - set_intr_gate(X86_TRAP_OLD_MF, &coprocessor_segment_overrun); > - set_intr_gate(X86_TRAP_TS, &invalid_TSS); > - set_intr_gate(X86_TRAP_NP, &segment_not_present); > + set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); > + set_intr_gate(X86_TRAP_TS, invalid_TSS); > + set_intr_gate(X86_TRAP_NP, segment_not_present); > set_intr_gate_ist(X86_TRAP_SS, &stack_segment, STACKFAULT_STACK); > - set_intr_gate(X86_TRAP_GP, &general_protection); > - set_intr_gate(X86_TRAP_SPURIOUS, &spurious_interrupt_bug); > - set_intr_gate(X86_TRAP_MF, &coprocessor_error); > - set_intr_gate(X86_TRAP_AC, &alignment_check); > + set_intr_gate(X86_TRAP_GP, general_protection); > + set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); > + set_intr_gate(X86_TRAP_MF, coprocessor_error); > + set_intr_gate(X86_TRAP_AC, alignment_check); > #ifdef CONFIG_X86_MCE > set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); > #endif > - set_intr_gate(X86_TRAP_XF, &simd_coprocessor_error); > + set_intr_gate(X86_TRAP_XF, simd_coprocessor_error); > > /* Reserve all the builtin and the syscall vector: */ > for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) > diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile > index 23d8e5f..6a19ad9 100644 > --- a/arch/x86/mm/Makefile > +++ b/arch/x86/mm/Makefile > @@ -6,6 +6,8 @@ nostackp := $(call cc-option, -fno-stack-protector) > CFLAGS_physaddr.o := $(nostackp) > CFLAGS_setup_nx.o := $(nostackp) > > +CFLAGS_fault.o := -I$(src)/../include/asm/trace > + > obj-$(CONFIG_X86_PAT) += pat_rbtree.o > obj-$(CONFIG_SMP) += tlb.o > > diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c > index 654be4a..f515154 100644 > --- a/arch/x86/mm/fault.c > +++ b/arch/x86/mm/fault.c > @@ -20,6 +20,9 @@ > #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ > #include <asm/fixmap.h> /* VSYSCALL_START */ > > +#define CREATE_TRACE_POINTS > +#include <asm/trace/exceptions.h> > + > /* > * Page fault error code bits: > * > @@ -1230,3 +1233,22 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) > __do_page_fault(regs, error_code); > exception_exit(prev_state); > } > + > +static void trace_page_fault_entries(struct pt_regs *regs, > + unsigned long error_code) > +{ > + if (user_mode(regs)) > + trace_user_page_fault(read_cr2(), regs, error_code); > + else > + trace_kernel_page_fault(read_cr2(), regs, error_code); > +} > + > +dotraplinkage void __kprobes > +trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) > +{ > + enum ctx_state prev_state; > + prev_state = exception_enter(); > + trace_page_fault_entries(regs, error_code); > + __do_page_fault(regs, error_code); > + exception_exit(prev_state); > +} > -- > 1.8.2.1 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to maj...@vg... > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ |
|
From: Seiji A. <sei...@hd...> - 2013-10-11 18:34:38
|
Matt,
I submitted a v3 patch based on my comment below..
Seiji
> -----Original Message-----
> From: lin...@vg... [mailto:lin...@vg...] On Behalf Of Seiji Aguchi
> Sent: Wednesday, October 09, 2013 12:37 PM
> To: Matt Fleming
> Cc: lin...@vg...; lin...@vg...; ton...@in...; mat...@in...; dle-
> de...@li...; Tomoki Sekiyama
> Subject: RE: [RFC][PATCH v2] efivars,efi-pstore: Hold off deletion of sysfs entry until the scan is completed
>
> Thank you for reviewing.
> In my understanding, your point is that all accesses to efivar_entry should be done while holding __efivars->lock.
>
> > > @@ -88,8 +103,9 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> > > return 0;
> > >
> > > entry->var.DataSize = 1024;
> > > - __efivar_entry_get(entry, &entry->var.Attributes,
> > > - &entry->var.DataSize, entry->var.Data);
> > > + efivar_entry_get(entry, &entry->var.Attributes,
> > > + &entry->var.DataSize, entry->var.Data);
> > > +
> > > size = entry->var.DataSize;
> > >
> > > *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
> >
> > This isn't safe to do without holding the __efivars->lock, because
> > there's the potential for someone else to update entry->var.Data and
> > entry->var.DataSize while you're in the middle of copying the data in
> > kmemdup(). This could leak to an information leak, though I think you're
> > safe from an out-of-bounds access because DataSize is never > 1024.
> >
>
> I see...
> Bu, kmemdup() cannot be called while holding the spinlock.
>
> So, for protecting efivar_entry, I will call kzalloc() before holding the lock in efi_pstore_read().
> and use memcpy() in efi_pstore_read_func().
>
> The pseudo code is as below.
>
> static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> struct pstore_info *psi)
> {
> *data.buf = kzalloc(1024, GFP_KERNEL);
> if (!*data.buf)
> return -ENOMEM;
>
> efivar_entry_iter_begin();
> size = efi_pstore_sysfs_entry_iter(&data,
> (struct efivar_entry **)&psi->data);
> efivar_entry_iter_end();
> if (size <= 0)
> kfree(*data.buf);
> return size;
> }
>
> static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> {
> [...]
> entry->var.DataSize = 1024;
> __efivar_entry_get(entry, &entry->var.Attributes,
> &entry->var.DataSize, entry->var.Data);
>
> size = entry->var.DataSize;
> memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
> 1024, size));
> return size;
> }
>
>
> > This doesn't look correct to me. You can't access 'entry' outside of the
> > *_iter_begin() and *_iter_end() blocks. You can't do,
> >
> > efivar_entry_iter_end():
> >
> > if (!entry->scanning)
> > efivar_unregister(entry);
> >
> > because 'entry' may have already been freed by another CPU.
>
> I will fix it as follows.
>
> if (!entry->scanning) {
> efivar_entry_iter_end();
> efivar_unregister(entry);
> } else
> efivar_entry_iter_end();
>
> (efivar_unregister(entry) still runs concurrently.
> But, it cannot move inside spinlock because kzalloc() may run while freeing kobject.)
>
> Is it your expectation?
>
> Seiji
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-efi" in
> the body of a message to maj...@vg...
> More majordomo info at http://vger.kernel.org/majordomo-info.html
|
|
From: Seiji A. <sei...@hd...> - 2013-10-11 18:29:46
|
Change from v2:
- Move dynamic memory allocation to efi_pstore_read() before holding
efivars->lock to protect entry->var.Data.
- Access to entry->scanning while holding efivars->lock.
- Move a comment about a returned value from efi_pstore_read_func() to
efi_pstore_read() because "size < 0" case may happen in efi_pstore_read().
Currently, when mounting pstore file system, a read callback of efi_pstore
driver runs mutiple times as below.
- In the first read callback, scan efivar_sysfs_list from head and pass
a kmsg buffer of a entry to an upper pstore layer.
- In the second read callback, rescan efivar_sysfs_list from the entry and pass
another kmsg buffer to it.
- Repeat the scan and pass until the end of efivar_sysfs_list.
In this process, an entry is read across the multiple read function calls.
To avoid race between the read and erasion, the whole process above is
protected by a spinlock, holding in open() and releasing in close().
At the same time, kmemdup() is called to pass the buffer to pstore filesystem
during it.
And then, it causes a following lockdep warning.
To make the dynamic memory allocation runnable without taking spinlock,
holding off a deletion of sysfs entry if it happens while scanning it
via efi_pstore, and deleting it after the scan is completed.
To implement it, this patch introduces two flags, scanning and deleting,
to efivar_entry.
[ 1.143710] ------------[ cut here ]------------
[ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
lockdep_trace_alloc+0x104/0x110()
[ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
[ 1.144058] Modules linked in:
[ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
[ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
ffff8800797e9b28
[ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
0000000000000046
[ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
ffff8800797e9b78
[ 1.144058] Call Trace:
[ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
[ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
[ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
[ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
[ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
[ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
[ 1.144058] [<ffffffff815147bb>] ?
efi_pstore_read_func.part.1+0x12b/0x170
[ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
[ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
[ 1.144058] [<ffffffff81514800>] ?
efi_pstore_read_func.part.1+0x170/0x170
[ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
[ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
[ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
[ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
[ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
[ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
[ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
[ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
[ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
[ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
[ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
[ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
[ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
[ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
[ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
[ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
[ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
drivers/firmware/efi/efi-pstore.c | 143 +++++++++++++++++++++++++++++++++++---
drivers/firmware/efi/efivars.c | 12 ++--
drivers/firmware/efi/vars.c | 12 +++-
include/linux/efi.h | 2 +
4 files changed, 153 insertions(+), 16 deletions(-)
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 5002d50..ebd5dbc 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
static int efi_pstore_open(struct pstore_info *psi)
{
- efivar_entry_iter_begin();
psi->data = NULL;
return 0;
}
static int efi_pstore_close(struct pstore_info *psi)
{
- efivar_entry_iter_end();
psi->data = NULL;
return 0;
}
@@ -91,19 +89,125 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
__efivar_entry_get(entry, &entry->var.Attributes,
&entry->var.DataSize, entry->var.Data);
size = entry->var.DataSize;
+ memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
+ 1024, size));
- *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
- if (*cb_data->buf == NULL)
- return -ENOMEM;
return size;
}
+/**
+ * efi_pstore_scan_sysfs_enter
+ * @entry: scanning entry
+ * @next: next entry
+ * @head: list head
+ */
+static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
+ struct efivar_entry *next,
+ struct list_head *head)
+{
+ pos->scanning = true;
+ if (&next->list != head)
+ next->scanning = true;
+}
+
+/**
+ * __efi_pstore_scan_sysfs_exit
+ * @entry: deleting entry
+ * @turn_off_scanning: Check if a scanning flag should be turned off
+ */
+static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
+ bool turn_off_scanning)
+{
+ if (entry->deleting) {
+ list_del(&entry->list);
+ efivar_entry_iter_end();
+ efivar_unregister(entry);
+ efivar_entry_iter_begin();
+ } else if (turn_off_scanning)
+ entry->scanning = false;
+}
+
+/**
+ * efi_pstore_scan_sysfs_exit
+ * @pos: scanning entry
+ * @next: next entry
+ * @head: list head
+ * @stop: a flag checking if scanning will stop
+ */
+static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
+ struct efivar_entry *next,
+ struct list_head *head, bool stop)
+{
+ __efi_pstore_scan_sysfs_exit(pos, true);
+ if (stop)
+ __efi_pstore_scan_sysfs_exit(next, &next->list != head);
+}
+
+/**
+ * efi_pstore_sysfs_entry_iter
+ *
+ * @data: function-specific data to pass to callback
+ * @pos: entry to begin iterating from
+ *
+ * You MUST call efivar_enter_iter_begin() before this function, and
+ * efivar_entry_iter_end() afterwards.
+ *
+ * It is possible to begin iteration from an arbitrary entry within
+ * the list by passing @pos. @pos is updated on return to point to
+ * the next entry of the last one passed to efi_pstore_read_func().
+ * To begin iterating from the beginning of the list @pos must be %NULL.
+ */
+static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
+{
+ struct efivar_entry *entry, *n;
+ struct list_head *head = &efivar_sysfs_list;
+ int size = 0;
+
+ if (!*pos) {
+ list_for_each_entry_safe(entry, n, head, list) {
+ efi_pstore_scan_sysfs_enter(entry, n, head);
+
+ size = efi_pstore_read_func(entry, data);
+ efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
+ if (size)
+ break;
+ }
+ *pos = n;
+ return size;
+ }
+
+ list_for_each_entry_safe_from((*pos), n, head, list) {
+ efi_pstore_scan_sysfs_enter((*pos), n, head);
+
+ size = efi_pstore_read_func((*pos), data);
+ efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+ if (size)
+ break;
+ }
+ *pos = n;
+ return size;
+}
+
+/**
+ * efi_pstore_read
+ *
+ * This function returns a size of NVRAM entry logged via efi_pstore_write().
+ * The meaning and behavior of efi_pstore/pstore are as below.
+ *
+ * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
+ * and pstore filesystem will continue reading subsequent entries.
+ * size == 0: Entry was not logged via efi_pstore_write(),
+ * and efi_pstore driver will continue reading subsequent entries.
+ * size < 0: Failed to get data of entry logging via efi_pstore_write(),
+ * and pstore will stop reading entry.
+ */
static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
int *count, struct timespec *timespec,
char **buf, bool *compressed,
struct pstore_info *psi)
{
struct pstore_read_data data;
+ ssize_t size;
data.id = id;
data.type = type;
@@ -112,8 +216,17 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
data.compressed = compressed;
data.buf = buf;
- return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
- (struct efivar_entry **)&psi->data);
+ *data.buf = kzalloc(1024, GFP_KERNEL);
+ if (!*data.buf)
+ return -ENOMEM;
+
+ efivar_entry_iter_begin();
+ size = efi_pstore_sysfs_entry_iter(&data,
+ (struct efivar_entry **)&psi->data);
+ efivar_entry_iter_end();
+ if (size <= 0)
+ kfree(*data.buf);
+ return size;
}
static int efi_pstore_write(enum pstore_type_id type,
@@ -184,9 +297,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
return 0;
}
+ if (entry->scanning) {
+ /*
+ * Skip deletion because this entry will be deleted
+ * after scanning is completed.
+ */
+ entry->deleting = true;
+ } else
+ list_del(&entry->list);
+
/* found */
__efivar_entry_delete(entry);
- list_del(&entry->list);
return 1;
}
@@ -214,10 +335,12 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
efivar_entry_iter_begin();
found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
- efivar_entry_iter_end();
- if (found)
+ if (found && !entry->scanning) {
+ efivar_entry_iter_end();
efivar_unregister(entry);
+ } else
+ efivar_entry_iter_end();
return 0;
}
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 8a7432a..8c5a61a 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -383,12 +383,16 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
else if (__efivar_entry_delete(entry))
err = -EIO;
- efivar_entry_iter_end();
-
- if (err)
+ if (err) {
+ efivar_entry_iter_end();
return err;
+ }
- efivar_unregister(entry);
+ if (!entry->scanning) {
+ efivar_entry_iter_end();
+ efivar_unregister(entry);
+ } else
+ efivar_entry_iter_end();
/* It's dead Jim.... */
return count;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 391c67b..b22659c 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
if (!found)
return NULL;
- if (remove)
- list_del(&entry->list);
+ if (remove) {
+ if (entry->scanning) {
+ /*
+ * The entry will be deleted
+ * after scanning is completed.
+ */
+ entry->deleting = true;
+ } else
+ list_del(&entry->list);
+ }
return entry;
}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5f8f176..04088fb 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -782,6 +782,8 @@ struct efivar_entry {
struct efi_variable var;
struct list_head list;
struct kobject kobj;
+ bool scanning;
+ bool deleting;
};
extern struct list_head efivar_sysfs_list;
--
1.8.2.1
|
|
From: Seiji A. <sei...@hd...> - 2013-10-09 16:37:55
|
Thank you for reviewing.
In my understanding, your point is that all accesses to efivar_entry should be done while holding __efivars->lock.
> > @@ -88,8 +103,9 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> > return 0;
> >
> > entry->var.DataSize = 1024;
> > - __efivar_entry_get(entry, &entry->var.Attributes,
> > - &entry->var.DataSize, entry->var.Data);
> > + efivar_entry_get(entry, &entry->var.Attributes,
> > + &entry->var.DataSize, entry->var.Data);
> > +
> > size = entry->var.DataSize;
> >
> > *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
>
> This isn't safe to do without holding the __efivars->lock, because
> there's the potential for someone else to update entry->var.Data and
> entry->var.DataSize while you're in the middle of copying the data in
> kmemdup(). This could leak to an information leak, though I think you're
> safe from an out-of-bounds access because DataSize is never > 1024.
>
I see...
Bu, kmemdup() cannot be called while holding the spinlock.
So, for protecting efivar_entry, I will call kzalloc() before holding the lock in efi_pstore_read().
and use memcpy() in efi_pstore_read_func().
The pseudo code is as below.
static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
struct pstore_info *psi)
{
*data.buf = kzalloc(1024, GFP_KERNEL);
if (!*data.buf)
return -ENOMEM;
efivar_entry_iter_begin();
size = efi_pstore_sysfs_entry_iter(&data,
(struct efivar_entry **)&psi->data);
efivar_entry_iter_end();
if (size <= 0)
kfree(*data.buf);
return size;
}
static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
{
[...]
entry->var.DataSize = 1024;
__efivar_entry_get(entry, &entry->var.Attributes,
&entry->var.DataSize, entry->var.Data);
size = entry->var.DataSize;
memcpy(*cb_data->buf, entry->var.Data, (size_t)min_t(unsigned long,
1024, size));
return size;
}
> This doesn't look correct to me. You can't access 'entry' outside of the
> *_iter_begin() and *_iter_end() blocks. You can't do,
>
> efivar_entry_iter_end():
>
> if (!entry->scanning)
> efivar_unregister(entry);
>
> because 'entry' may have already been freed by another CPU.
I will fix it as follows.
if (!entry->scanning) {
efivar_entry_iter_end();
efivar_unregister(entry);
} else
efivar_entry_iter_end();
(efivar_unregister(entry) still runs concurrently.
But, it cannot move inside spinlock because kzalloc() may run while freeing kobject.)
Is it your expectation?
Seiji
|
|
From: Matt F. <ma...@co...> - 2013-10-07 11:42:37
|
On Fri, 27 Sep, at 04:23:52PM, Seiji Aguchi wrote:
> Change form v1
> - Rebase to 3.12-rc2
>
> Currently, when mounting pstore file system, a read callback of efi_pstore
> driver runs mutiple times as below.
>
> - In the first read callback, scan efivar_sysfs_list from head and pass
> a kmsg buffer of a entry to an upper pstore layer.
> - In the second read callback, rescan efivar_sysfs_list from the entry and pass
> another kmsg buffer to it.
> - Repeat the scan and pass until the end of efivar_sysfs_list.
>
> In this process, an entry is read across the multiple read function calls.
> To avoid race between the read and erasion, the whole process above is
> protected by a spinlock, holding in open() and releasing in close().
>
> At the same time, kmemdup() is called to pass the buffer to pstore filesystem
> during it.
> And then, it causes a following lockdep warning.
>
> To make the read callback runnable without taking spinlok,
> holding off a deletion of sysfs entry if it happens while scanning it
> via efi_pstore, and deleting it after the scan is completed.
>
> To implement it, this patch introduces two flags, scanning and deleting,
> to efivar_entry.
> Also, __efivar_entry_get() is removed because it was used in efi_pstore only.
[...]
> @@ -88,8 +103,9 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> return 0;
>
> entry->var.DataSize = 1024;
> - __efivar_entry_get(entry, &entry->var.Attributes,
> - &entry->var.DataSize, entry->var.Data);
> + efivar_entry_get(entry, &entry->var.Attributes,
> + &entry->var.DataSize, entry->var.Data);
> +
> size = entry->var.DataSize;
>
> *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
This isn't safe to do without holding the __efivars->lock, because
there's the potential for someone else to update entry->var.Data and
entry->var.DataSize while you're in the middle of copying the data in
kmemdup(). This could leak to an information leak, though I think you're
safe from an out-of-bounds access because DataSize is never > 1024.
> +/**
> + * __efi_pstore_scan_sysfs_exit
> + * @entry: deleting entry
> + * @turn_off_scanning: Check if a scanning flag should be turned off
> + */
> +static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
> + bool turn_off_scanning)
> +{
> + if (entry->deleting) {
> + list_del(&entry->list);
> + efivar_entry_iter_end();
> + efivar_unregister(entry);
> + efivar_entry_iter_begin();
> + } else if (turn_off_scanning)
> + entry->scanning = false;
> +}
[...]
> @@ -184,9 +305,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
> return 0;
> }
>
> + if (entry->scanning) {
> + /*
> + * Skip deletion because this entry will be deleted
> + * after scanning is completed.
> + */
> + entry->deleting = true;
> + } else
> + list_del(&entry->list);
> +
> /* found */
> __efivar_entry_delete(entry);
> - list_del(&entry->list);
>
> return 1;
> }
> @@ -216,7 +345,7 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
> found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
> efivar_entry_iter_end();
>
> - if (found)
> + if (found && !entry->scanning)
> efivar_unregister(entry);
>
> return 0;
> diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
> index 8a7432a..831bc5c 100644
> --- a/drivers/firmware/efi/efivars.c
> +++ b/drivers/firmware/efi/efivars.c
> @@ -388,7 +388,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
> if (err)
> return err;
>
> - efivar_unregister(entry);
> + if (!entry->scanning)
> + efivar_unregister(entry);
>
> /* It's dead Jim.... */
> return count;
> diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
> index 391c67b..573ed92 100644
> --- a/drivers/firmware/efi/vars.c
> +++ b/drivers/firmware/efi/vars.c
> @@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
> if (!found)
> return NULL;
>
> - if (remove)
> - list_del(&entry->list);
> + if (remove) {
> + if (entry->scanning) {
> + /*
> + * The entry will be deleted
> + * after scanning is completed.
> + */
> + entry->deleting = true;
> + } else
> + list_del(&entry->list);
> + }
>
> return entry;
> }
This doesn't look correct to me. You can't access 'entry' outside of the
*_iter_begin() and *_iter_end() blocks. You can't do,
efivar_entry_iter_end():
if (!entry->scanning)
efivar_unregister(entry);
because 'entry' may have already been freed by another CPU.
--
Matt Fleming, Intel Open Source Technology Center
|
|
From: Fleming, M. <mat...@in...> - 2013-10-04 16:25:52
|
On 4 October 2013 16:46, Seiji Aguchi <sei...@hd...> wrote: > Are there anyone who can review this bugfix? Sorry I haven't got to it yet (or the previous version). It is on my TODO list. |
|
From: Seiji A. <sei...@hd...> - 2013-10-04 15:47:35
|
Are there anyone who can review this bugfix?
Seiji
> -----Original Message-----
> From: lin...@vg... [mailto:lin...@vg...] On Behalf Of Seiji Aguchi
> Sent: Friday, September 27, 2013 4:24 PM
> To: lin...@vg...; lin...@vg...; ton...@in...; mat...@in...
> Cc: dle...@li...; Tomoki Sekiyama
> Subject: [RFC][PATCH v2] efivars,efi-pstore: Hold off deletion of sysfs entry until the scan is completed
>
> Change form v1
> - Rebase to 3.12-rc2
>
> Currently, when mounting pstore file system, a read callback of efi_pstore
> driver runs mutiple times as below.
>
> - In the first read callback, scan efivar_sysfs_list from head and pass
> a kmsg buffer of a entry to an upper pstore layer.
> - In the second read callback, rescan efivar_sysfs_list from the entry and pass
> another kmsg buffer to it.
> - Repeat the scan and pass until the end of efivar_sysfs_list.
>
> In this process, an entry is read across the multiple read function calls.
> To avoid race between the read and erasion, the whole process above is
> protected by a spinlock, holding in open() and releasing in close().
>
> At the same time, kmemdup() is called to pass the buffer to pstore filesystem
> during it.
> And then, it causes a following lockdep warning.
>
> To make the read callback runnable without taking spinlok,
> holding off a deletion of sysfs entry if it happens while scanning it
> via efi_pstore, and deleting it after the scan is completed.
>
> To implement it, this patch introduces two flags, scanning and deleting,
> to efivar_entry.
> Also, __efivar_entry_get() is removed because it was used in efi_pstore only.
>
> [ 1.143710] ------------[ cut here ]------------
> [ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
> lockdep_trace_alloc+0x104/0x110()
> [ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
> [ 1.144058] Modules linked in:
>
> [ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
> [ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
> ffff8800797e9b28
> [ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
> 0000000000000046
> [ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
> ffff8800797e9b78
> [ 1.144058] Call Trace:
> [ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
> [ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
> [ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
> [ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
> [ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
> [ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
> [ 1.144058] [<ffffffff815147bb>] ?
> efi_pstore_read_func.part.1+0x12b/0x170
> [ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
> [ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
> [ 1.144058] [<ffffffff81514800>] ?
> efi_pstore_read_func.part.1+0x170/0x170
> [ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
> [ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
> [ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
> [ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
> [ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
> [ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
> [ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
> [ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
> [ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
> [ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
> [ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
> [ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
> [ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
> [ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
> [ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
> [ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
> [ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
>
> Signed-off-by: Seiji Aguchi <sei...@hd...>
> ---
> drivers/firmware/efi/efi-pstore.c | 145 +++++++++++++++++++++++++++++++++++---
> drivers/firmware/efi/efivars.c | 3 +-
> drivers/firmware/efi/vars.c | 39 +++-------
> include/linux/efi.h | 4 +-
> 4 files changed, 151 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
> index 5002d50..53001a5 100644
> --- a/drivers/firmware/efi/efi-pstore.c
> +++ b/drivers/firmware/efi/efi-pstore.c
> @@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
>
> static int efi_pstore_open(struct pstore_info *psi)
> {
> - efivar_entry_iter_begin();
> psi->data = NULL;
> return 0;
> }
>
> static int efi_pstore_close(struct pstore_info *psi)
> {
> - efivar_entry_iter_end();
> psi->data = NULL;
> return 0;
> }
> @@ -39,6 +37,23 @@ struct pstore_read_data {
> char **buf;
> };
>
> +/**
> + * efi_pstore_read_func
> + * @entry: reading entry
> + * @data: data of the entry
> + *
> + * This function runs in non-atomic context.
> + *
> + * Also, it returns a size of NVRAM entry logged via efi_pstore_write().
> + * pstore in accordance with the returned value as below.
> + *
> + * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
> + * and pstore filesystem will continue reading subsequent entries.
> + * size == 0: Entry was not logged via efi_pstore_write(),
> + * and efi_pstore driver will continue reading subsequent entries.
> + * size < 0: Failed to get data of entry logging via efi_pstore_write(),
> + * and pstore will stop reading entry.
> + */
> static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> {
> efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
> @@ -88,8 +103,9 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> return 0;
>
> entry->var.DataSize = 1024;
> - __efivar_entry_get(entry, &entry->var.Attributes,
> - &entry->var.DataSize, entry->var.Data);
> + efivar_entry_get(entry, &entry->var.Attributes,
> + &entry->var.DataSize, entry->var.Data);
> +
> size = entry->var.DataSize;
>
> *cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
> @@ -98,12 +114,114 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
> return size;
> }
>
> +/**
> + * efi_pstore_scan_sysfs_enter
> + * @entry: scanning entry
> + * @next: next entry
> + * @head: list head
> + */
> +static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
> + struct efivar_entry *next,
> + struct list_head *head)
> +{
> + pos->scanning = true;
> + if (&next->list != head)
> + next->scanning = true;
> +
> + /*
> + * Release a spin_lock because efi_pstore_read_func() should
> + * run in non-atomic context to allocate buffer dynamically.
> + */
> + efivar_entry_iter_end();
> +}
> +
> +/**
> + * __efi_pstore_scan_sysfs_exit
> + * @entry: deleting entry
> + * @turn_off_scanning: Check if a scanning flag should be turned off
> + */
> +static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
> + bool turn_off_scanning)
> +{
> + if (entry->deleting) {
> + list_del(&entry->list);
> + efivar_entry_iter_end();
> + efivar_unregister(entry);
> + efivar_entry_iter_begin();
> + } else if (turn_off_scanning)
> + entry->scanning = false;
> +}
> +
> +/**
> + * efi_pstore_scan_sysfs_exit
> + * @pos: scanning entry
> + * @next: next entry
> + * @head: list head
> + * @stop: a flag checking if scanning will stop
> + */
> +static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
> + struct efivar_entry *next,
> + struct list_head *head, bool stop)
> +{
> + /* Hold a spinlock to access efivar_entry safely. */
> + efivar_entry_iter_begin();
> + __efi_pstore_scan_sysfs_exit(pos, true);
> + if (stop)
> + __efi_pstore_scan_sysfs_exit(next, &next->list != head);
> +}
> +
> +/**
> + * efi_pstore_sysfs_entry_iter
> + *
> + * @data: function-specific data to pass to callback
> + * @pos: entry to begin iterating from
> + *
> + * You MUST call efivar_enter_iter_begin() before this function, and
> + * efivar_entry_iter_end() afterwards.
> + *
> + * It is possible to begin iteration from an arbitrary entry within
> + * the list by passing @pos. @pos is updated on return to point to
> + * the next entry of the last one passed to efi_pstore_read_func().
> + * To begin iterating from the beginning of the list @pos must be %NULL.
> + */
> +static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
> +{
> + struct efivar_entry *entry, *n;
> + struct list_head *head = &efivar_sysfs_list;
> + int size = 0;
> +
> + if (!*pos) {
> + list_for_each_entry_safe(entry, n, head, list) {
> + efi_pstore_scan_sysfs_enter(entry, n, head);
> +
> + size = efi_pstore_read_func(entry, data);
> + efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
> + if (size)
> + break;
> + }
> + *pos = n;
> + return size;
> + }
> +
> + list_for_each_entry_safe_from((*pos), n, head, list) {
> + efi_pstore_scan_sysfs_enter((*pos), n, head);
> +
> + size = efi_pstore_read_func((*pos), data);
> + efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
> + if (size)
> + break;
> + }
> + *pos = n;
> + return size;
> +}
> +
> static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> int *count, struct timespec *timespec,
> char **buf, bool *compressed,
> struct pstore_info *psi)
> {
> struct pstore_read_data data;
> + ssize_t size;
>
> data.id = id;
> data.type = type;
> @@ -112,8 +230,11 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
> data.compressed = compressed;
> data.buf = buf;
>
> - return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
> - (struct efivar_entry **)&psi->data);
> + efivar_entry_iter_begin();
> + size = efi_pstore_sysfs_entry_iter(&data,
> + (struct efivar_entry **)&psi->data);
> + efivar_entry_iter_end();
> + return size;
> }
>
> static int efi_pstore_write(enum pstore_type_id type,
> @@ -184,9 +305,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
> return 0;
> }
>
> + if (entry->scanning) {
> + /*
> + * Skip deletion because this entry will be deleted
> + * after scanning is completed.
> + */
> + entry->deleting = true;
> + } else
> + list_del(&entry->list);
> +
> /* found */
> __efivar_entry_delete(entry);
> - list_del(&entry->list);
>
> return 1;
> }
> @@ -216,7 +345,7 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
> found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
> efivar_entry_iter_end();
>
> - if (found)
> + if (found && !entry->scanning)
> efivar_unregister(entry);
>
> return 0;
> diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
> index 8a7432a..831bc5c 100644
> --- a/drivers/firmware/efi/efivars.c
> +++ b/drivers/firmware/efi/efivars.c
> @@ -388,7 +388,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
> if (err)
> return err;
>
> - efivar_unregister(entry);
> + if (!entry->scanning)
> + efivar_unregister(entry);
>
> /* It's dead Jim.... */
> return count;
> diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
> index 391c67b..573ed92 100644
> --- a/drivers/firmware/efi/vars.c
> +++ b/drivers/firmware/efi/vars.c
> @@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
> if (!found)
> return NULL;
>
> - if (remove)
> - list_del(&entry->list);
> + if (remove) {
> + if (entry->scanning) {
> + /*
> + * The entry will be deleted
> + * after scanning is completed.
> + */
> + entry->deleting = true;
> + } else
> + list_del(&entry->list);
> + }
>
> return entry;
> }
> @@ -715,33 +723,6 @@ int efivar_entry_size(struct efivar_entry *entry, unsigned long *size)
> EXPORT_SYMBOL_GPL(efivar_entry_size);
>
> /**
> - * __efivar_entry_get - call get_variable()
> - * @entry: read data for this variable
> - * @attributes: variable attributes
> - * @size: size of @data buffer
> - * @data: buffer to store variable data
> - *
> - * The caller MUST call efivar_entry_iter_begin() and
> - * efivar_entry_iter_end() before and after the invocation of this
> - * function, respectively.
> - */
> -int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes,
> - unsigned long *size, void *data)
> -{
> - const struct efivar_operations *ops = __efivars->ops;
> - efi_status_t status;
> -
> - WARN_ON(!spin_is_locked(&__efivars->lock));
> -
> - status = ops->get_variable(entry->var.VariableName,
> - &entry->var.VendorGuid,
> - attributes, size, data);
> -
> - return efi_status_to_err(status);
> -}
> -EXPORT_SYMBOL_GPL(__efivar_entry_get);
> -
> -/**
> * efivar_entry_get - call get_variable()
> * @entry: read data for this variable
> * @attributes: variable attributes
> diff --git a/include/linux/efi.h b/include/linux/efi.h
> index 5f8f176..1e3388e 100644
> --- a/include/linux/efi.h
> +++ b/include/linux/efi.h
> @@ -782,6 +782,8 @@ struct efivar_entry {
> struct efi_variable var;
> struct list_head list;
> struct kobject kobj;
> + bool scanning;
> + bool deleting;
> };
>
> extern struct list_head efivar_sysfs_list;
> @@ -809,8 +811,6 @@ int __efivar_entry_delete(struct efivar_entry *entry);
> int efivar_entry_delete(struct efivar_entry *entry);
>
> int efivar_entry_size(struct efivar_entry *entry, unsigned long *size);
> -int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes,
> - unsigned long *size, void *data);
> int efivar_entry_get(struct efivar_entry *entry, u32 *attributes,
> unsigned long *size, void *data);
> int efivar_entry_set(struct efivar_entry *entry, u32 attributes,
> --
> 1.8.2.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to maj...@vg...
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
|
|
From: Seiji A. <sei...@hd...> - 2013-09-27 20:24:06
|
Change form v1
- Rebase to 3.12-rc2
Currently, when mounting pstore file system, a read callback of efi_pstore
driver runs mutiple times as below.
- In the first read callback, scan efivar_sysfs_list from head and pass
a kmsg buffer of a entry to an upper pstore layer.
- In the second read callback, rescan efivar_sysfs_list from the entry and pass
another kmsg buffer to it.
- Repeat the scan and pass until the end of efivar_sysfs_list.
In this process, an entry is read across the multiple read function calls.
To avoid race between the read and erasion, the whole process above is
protected by a spinlock, holding in open() and releasing in close().
At the same time, kmemdup() is called to pass the buffer to pstore filesystem
during it.
And then, it causes a following lockdep warning.
To make the read callback runnable without taking spinlok,
holding off a deletion of sysfs entry if it happens while scanning it
via efi_pstore, and deleting it after the scan is completed.
To implement it, this patch introduces two flags, scanning and deleting,
to efivar_entry.
Also, __efivar_entry_get() is removed because it was used in efi_pstore only.
[ 1.143710] ------------[ cut here ]------------
[ 1.144058] WARNING: CPU: 1 PID: 1 at kernel/lockdep.c:2740
lockdep_trace_alloc+0x104/0x110()
[ 1.144058] DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags))
[ 1.144058] Modules linked in:
[ 1.144058] CPU: 1 PID: 1 Comm: systemd Not tainted 3.11.0-rc5 #2
[ 1.144058] 0000000000000009 ffff8800797e9ae0 ffffffff816614a5
ffff8800797e9b28
[ 1.144058] ffff8800797e9b18 ffffffff8105510d 0000000000000080
0000000000000046
[ 1.144058] 00000000000000d0 00000000000003af ffffffff81ccd0c0
ffff8800797e9b78
[ 1.144058] Call Trace:
[ 1.144058] [<ffffffff816614a5>] dump_stack+0x54/0x74
[ 1.144058] [<ffffffff8105510d>] warn_slowpath_common+0x7d/0xa0
[ 1.144058] [<ffffffff8105517c>] warn_slowpath_fmt+0x4c/0x50
[ 1.144058] [<ffffffff8131290f>] ? vsscanf+0x57f/0x7b0
[ 1.144058] [<ffffffff810bbd74>] lockdep_trace_alloc+0x104/0x110
[ 1.144058] [<ffffffff81192da0>] __kmalloc_track_caller+0x50/0x280
[ 1.144058] [<ffffffff815147bb>] ?
efi_pstore_read_func.part.1+0x12b/0x170
[ 1.144058] [<ffffffff8115b260>] kmemdup+0x20/0x50
[ 1.144058] [<ffffffff815147bb>] efi_pstore_read_func.part.1+0x12b/0x170
[ 1.144058] [<ffffffff81514800>] ?
efi_pstore_read_func.part.1+0x170/0x170
[ 1.144058] [<ffffffff815148b4>] efi_pstore_read_func+0xb4/0xe0
[ 1.144058] [<ffffffff81512b7b>] __efivar_entry_iter+0xfb/0x120
[ 1.144058] [<ffffffff8151428f>] efi_pstore_read+0x3f/0x50
[ 1.144058] [<ffffffff8128d7ba>] pstore_get_records+0x9a/0x150
[ 1.158207] [<ffffffff812af25c>] ? selinux_d_instantiate+0x1c/0x20
[ 1.158207] [<ffffffff8128ce30>] ? parse_options+0x80/0x80
[ 1.158207] [<ffffffff8128ced5>] pstore_fill_super+0xa5/0xc0
[ 1.158207] [<ffffffff811ae7d2>] mount_single+0xa2/0xd0
[ 1.158207] [<ffffffff8128ccf8>] pstore_mount+0x18/0x20
[ 1.158207] [<ffffffff811ae8b9>] mount_fs+0x39/0x1b0
[ 1.158207] [<ffffffff81160550>] ? __alloc_percpu+0x10/0x20
[ 1.158207] [<ffffffff811c9493>] vfs_kern_mount+0x63/0xf0
[ 1.158207] [<ffffffff811cbb0e>] do_mount+0x23e/0xa20
[ 1.158207] [<ffffffff8115b51b>] ? strndup_user+0x4b/0xf0
[ 1.158207] [<ffffffff811cc373>] SyS_mount+0x83/0xc0
[ 1.158207] [<ffffffff81673cc2>] system_call_fastpath+0x16/0x1b
[ 1.158207] ---[ end trace 61981bc62de9f6f4 ]---
Signed-off-by: Seiji Aguchi <sei...@hd...>
---
drivers/firmware/efi/efi-pstore.c | 145 +++++++++++++++++++++++++++++++++++---
drivers/firmware/efi/efivars.c | 3 +-
drivers/firmware/efi/vars.c | 39 +++-------
include/linux/efi.h | 4 +-
4 files changed, 151 insertions(+), 40 deletions(-)
diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c
index 5002d50..53001a5 100644
--- a/drivers/firmware/efi/efi-pstore.c
+++ b/drivers/firmware/efi/efi-pstore.c
@@ -18,14 +18,12 @@ module_param_named(pstore_disable, efivars_pstore_disable, bool, 0644);
static int efi_pstore_open(struct pstore_info *psi)
{
- efivar_entry_iter_begin();
psi->data = NULL;
return 0;
}
static int efi_pstore_close(struct pstore_info *psi)
{
- efivar_entry_iter_end();
psi->data = NULL;
return 0;
}
@@ -39,6 +37,23 @@ struct pstore_read_data {
char **buf;
};
+/**
+ * efi_pstore_read_func
+ * @entry: reading entry
+ * @data: data of the entry
+ *
+ * This function runs in non-atomic context.
+ *
+ * Also, it returns a size of NVRAM entry logged via efi_pstore_write().
+ * pstore in accordance with the returned value as below.
+ *
+ * size > 0: Got data of an entry logged via efi_pstore_write() successfully,
+ * and pstore filesystem will continue reading subsequent entries.
+ * size == 0: Entry was not logged via efi_pstore_write(),
+ * and efi_pstore driver will continue reading subsequent entries.
+ * size < 0: Failed to get data of entry logging via efi_pstore_write(),
+ * and pstore will stop reading entry.
+ */
static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
{
efi_guid_t vendor = LINUX_EFI_CRASH_GUID;
@@ -88,8 +103,9 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
return 0;
entry->var.DataSize = 1024;
- __efivar_entry_get(entry, &entry->var.Attributes,
- &entry->var.DataSize, entry->var.Data);
+ efivar_entry_get(entry, &entry->var.Attributes,
+ &entry->var.DataSize, entry->var.Data);
+
size = entry->var.DataSize;
*cb_data->buf = kmemdup(entry->var.Data, size, GFP_KERNEL);
@@ -98,12 +114,114 @@ static int efi_pstore_read_func(struct efivar_entry *entry, void *data)
return size;
}
+/**
+ * efi_pstore_scan_sysfs_enter
+ * @entry: scanning entry
+ * @next: next entry
+ * @head: list head
+ */
+static void efi_pstore_scan_sysfs_enter(struct efivar_entry *pos,
+ struct efivar_entry *next,
+ struct list_head *head)
+{
+ pos->scanning = true;
+ if (&next->list != head)
+ next->scanning = true;
+
+ /*
+ * Release a spin_lock because efi_pstore_read_func() should
+ * run in non-atomic context to allocate buffer dynamically.
+ */
+ efivar_entry_iter_end();
+}
+
+/**
+ * __efi_pstore_scan_sysfs_exit
+ * @entry: deleting entry
+ * @turn_off_scanning: Check if a scanning flag should be turned off
+ */
+static inline void __efi_pstore_scan_sysfs_exit(struct efivar_entry *entry,
+ bool turn_off_scanning)
+{
+ if (entry->deleting) {
+ list_del(&entry->list);
+ efivar_entry_iter_end();
+ efivar_unregister(entry);
+ efivar_entry_iter_begin();
+ } else if (turn_off_scanning)
+ entry->scanning = false;
+}
+
+/**
+ * efi_pstore_scan_sysfs_exit
+ * @pos: scanning entry
+ * @next: next entry
+ * @head: list head
+ * @stop: a flag checking if scanning will stop
+ */
+static void efi_pstore_scan_sysfs_exit(struct efivar_entry *pos,
+ struct efivar_entry *next,
+ struct list_head *head, bool stop)
+{
+ /* Hold a spinlock to access efivar_entry safely. */
+ efivar_entry_iter_begin();
+ __efi_pstore_scan_sysfs_exit(pos, true);
+ if (stop)
+ __efi_pstore_scan_sysfs_exit(next, &next->list != head);
+}
+
+/**
+ * efi_pstore_sysfs_entry_iter
+ *
+ * @data: function-specific data to pass to callback
+ * @pos: entry to begin iterating from
+ *
+ * You MUST call efivar_enter_iter_begin() before this function, and
+ * efivar_entry_iter_end() afterwards.
+ *
+ * It is possible to begin iteration from an arbitrary entry within
+ * the list by passing @pos. @pos is updated on return to point to
+ * the next entry of the last one passed to efi_pstore_read_func().
+ * To begin iterating from the beginning of the list @pos must be %NULL.
+ */
+static int efi_pstore_sysfs_entry_iter(void *data, struct efivar_entry **pos)
+{
+ struct efivar_entry *entry, *n;
+ struct list_head *head = &efivar_sysfs_list;
+ int size = 0;
+
+ if (!*pos) {
+ list_for_each_entry_safe(entry, n, head, list) {
+ efi_pstore_scan_sysfs_enter(entry, n, head);
+
+ size = efi_pstore_read_func(entry, data);
+ efi_pstore_scan_sysfs_exit(entry, n, head, size < 0);
+ if (size)
+ break;
+ }
+ *pos = n;
+ return size;
+ }
+
+ list_for_each_entry_safe_from((*pos), n, head, list) {
+ efi_pstore_scan_sysfs_enter((*pos), n, head);
+
+ size = efi_pstore_read_func((*pos), data);
+ efi_pstore_scan_sysfs_exit((*pos), n, head, size < 0);
+ if (size)
+ break;
+ }
+ *pos = n;
+ return size;
+}
+
static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
int *count, struct timespec *timespec,
char **buf, bool *compressed,
struct pstore_info *psi)
{
struct pstore_read_data data;
+ ssize_t size;
data.id = id;
data.type = type;
@@ -112,8 +230,11 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type,
data.compressed = compressed;
data.buf = buf;
- return __efivar_entry_iter(efi_pstore_read_func, &efivar_sysfs_list, &data,
- (struct efivar_entry **)&psi->data);
+ efivar_entry_iter_begin();
+ size = efi_pstore_sysfs_entry_iter(&data,
+ (struct efivar_entry **)&psi->data);
+ efivar_entry_iter_end();
+ return size;
}
static int efi_pstore_write(enum pstore_type_id type,
@@ -184,9 +305,17 @@ static int efi_pstore_erase_func(struct efivar_entry *entry, void *data)
return 0;
}
+ if (entry->scanning) {
+ /*
+ * Skip deletion because this entry will be deleted
+ * after scanning is completed.
+ */
+ entry->deleting = true;
+ } else
+ list_del(&entry->list);
+
/* found */
__efivar_entry_delete(entry);
- list_del(&entry->list);
return 1;
}
@@ -216,7 +345,7 @@ static int efi_pstore_erase(enum pstore_type_id type, u64 id, int count,
found = __efivar_entry_iter(efi_pstore_erase_func, &efivar_sysfs_list, &edata, &entry);
efivar_entry_iter_end();
- if (found)
+ if (found && !entry->scanning)
efivar_unregister(entry);
return 0;
diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c
index 8a7432a..831bc5c 100644
--- a/drivers/firmware/efi/efivars.c
+++ b/drivers/firmware/efi/efivars.c
@@ -388,7 +388,8 @@ static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
if (err)
return err;
- efivar_unregister(entry);
+ if (!entry->scanning)
+ efivar_unregister(entry);
/* It's dead Jim.... */
return count;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 391c67b..573ed92 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -683,8 +683,16 @@ struct efivar_entry *efivar_entry_find(efi_char16_t *name, efi_guid_t guid,
if (!found)
return NULL;
- if (remove)
- list_del(&entry->list);
+ if (remove) {
+ if (entry->scanning) {
+ /*
+ * The entry will be deleted
+ * after scanning is completed.
+ */
+ entry->deleting = true;
+ } else
+ list_del(&entry->list);
+ }
return entry;
}
@@ -715,33 +723,6 @@ int efivar_entry_size(struct efivar_entry *entry, unsigned long *size)
EXPORT_SYMBOL_GPL(efivar_entry_size);
/**
- * __efivar_entry_get - call get_variable()
- * @entry: read data for this variable
- * @attributes: variable attributes
- * @size: size of @data buffer
- * @data: buffer to store variable data
- *
- * The caller MUST call efivar_entry_iter_begin() and
- * efivar_entry_iter_end() before and after the invocation of this
- * function, respectively.
- */
-int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes,
- unsigned long *size, void *data)
-{
- const struct efivar_operations *ops = __efivars->ops;
- efi_status_t status;
-
- WARN_ON(!spin_is_locked(&__efivars->lock));
-
- status = ops->get_variable(entry->var.VariableName,
- &entry->var.VendorGuid,
- attributes, size, data);
-
- return efi_status_to_err(status);
-}
-EXPORT_SYMBOL_GPL(__efivar_entry_get);
-
-/**
* efivar_entry_get - call get_variable()
* @entry: read data for this variable
* @attributes: variable attributes
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5f8f176..1e3388e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -782,6 +782,8 @@ struct efivar_entry {
struct efi_variable var;
struct list_head list;
struct kobject kobj;
+ bool scanning;
+ bool deleting;
};
extern struct list_head efivar_sysfs_list;
@@ -809,8 +811,6 @@ int __efivar_entry_delete(struct efivar_entry *entry);
int efivar_entry_delete(struct efivar_entry *entry);
int efivar_entry_size(struct efivar_entry *entry, unsigned long *size);
-int __efivar_entry_get(struct efivar_entry *entry, u32 *attributes,
- unsigned long *size, void *data);
int efivar_entry_get(struct efivar_entry *entry, u32 *attributes,
unsigned long *size, void *data);
int efivar_entry_set(struct efivar_entry *entry, u32 attributes,
--
1.8.2.1
|
|
From: Martin H. <mar...@ft...> - 2013-09-26 20:29:38
|
Ahoj! Před několika měsíci mi jeden z mých známých doporučil jednu užitečnou webovou stránku, kde je předveden neobyčejně jednoduchý online zdroj výdělku peněz. Společně jsme tento způsob otestovali a skutečně výborně funguje! První dny jsme ani nechtěli věřit, že touto metodou je možno tak rychle získat velké částky. Použitím této metody vydělávám v současnosti za týden tolik, jako dříve za celý měsíc v zaměstnání. Seznamte se s touto metodou a používejte ji: http://rychlepenize24.net/tajemstvi Není to vždy lehké, avšak jednoduché a přináší to dobrý výdělek. http://rychlepenize24.net/tajemstvi S pozdravem, Martin Havelka Odhlásit: http://ftlbmw.com/00cz/unsubscribe.php?M=1783164&C=02e4a67c652690e61507a7a56dffe13d&L=32&N=5 |