|
From: Stephan S. <sp...@bf...> - 2002-05-22 06:42:37
|
Hi Haris,
a few months ago we had a similar problem. We also run a RDBMS (not
Oracle, though) with linux-abi and about twice a week all SCO binaries
would die with segfault at the same time. We traced the problem to an
error in the standard kernel (even without abi). When programs modify
their local descriptor table (LDT, a terrible 386ism), the kernel will
not correctly change the LDT on a particular CPU under certain
circumstances. The problem arises on SMP kernels when you have (emulated
abi) programs that really need their LDT and also run programs that modif=
y
their LDT.
A kernel modified with the appended patch has been running here on a
dual Athlon box for about two months without any problems. The patch is=20
against 2.4.19-pre4 but should work on 2.4.18 as well.
Hope this helps.
Stephan
diff -ur linux-2.4.19-pre4-ppro.orig/arch/i386/kernel/ldt.c linux-2.4.19-=
pre4-ppro/arch/i386/kernel/ldt.c
--- linux-2.4.19-pre4-ppro.orig/arch/i386/kernel/ldt.c Wed Oct 17 23:46:2=
9 2001
+++ linux-2.4.19-pre4-ppro/arch/i386/kernel/ldt.c Fri Mar 22 07:23:21 200=
2
@@ -101,7 +101,6 @@
memset(segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
wmb();
mm->context.segments =3D segments;
- mm->context.cpuvalid =3D 1UL << smp_processor_id();
load_LDT(mm);
}
=20
diff -ur linux-2.4.19-pre4-ppro.orig/arch/i386/kernel/process.c linux-2.4=
.19-pre4-ppro/arch/i386/kernel/process.c
--- linux-2.4.19-pre4-ppro.orig/arch/i386/kernel/process.c Mon Feb 25 20:=
37:53 2002
+++ linux-2.4.19-pre4-ppro/arch/i386/kernel/process.c Fri Mar 22 07:23:08=
2002
@@ -569,7 +569,6 @@
memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
}
new_mm->context.segments =3D ldt;
- new_mm->context.cpuvalid =3D ~0UL; /* valid on all CPU's - they can't h=
ave stale data */
}
=20
/*
diff -ur linux-2.4.19-pre4-ppro.orig/include/asm-i386/mmu.h linux-2.4.19-=
pre4-ppro/include/asm-i386/mmu.h
--- linux-2.4.19-pre4-ppro.orig/include/asm-i386/mmu.h Thu Jul 26 03:03:0=
4 2001
+++ linux-2.4.19-pre4-ppro/include/asm-i386/mmu.h Fri Mar 22 07:22:52 200=
2
@@ -7,7 +7,6 @@
*/
typedef struct {=20
void *segments;
- unsigned long cpuvalid;
} mm_context_t;
=20
#endif
diff -ur linux-2.4.19-pre4-ppro.orig/include/asm-i386/mmu_context.h linux=
-2.4.19-pre4-ppro/include/asm-i386/mmu_context.h
--- linux-2.4.19-pre4-ppro.orig/include/asm-i386/mmu_context.h Fri Mar 22=
07:16:02 2002
+++ linux-2.4.19-pre4-ppro/include/asm-i386/mmu_context.h Fri Mar 22 08:1=
6:50 2002
@@ -27,20 +27,31 @@
=20
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *n=
ext, struct task_struct *tsk, unsigned cpu)
{
+ void *new_ldt;
+ void *old_ldt;
+
+ /*
+ * Re-load LDT if necessary
+ */
+ new_ldt =3D default_ldt;
+ if (next->context.segments)
+ new_ldt =3D next->context.segments;
+ asm ("movb 7(%1),%%ah\n"
+ "movb 4(%1),%%al\n"
+ "shl $16,%%eax\n"
+ "movw 2(%1),%%ax" : "=3D&a"(old_ldt) :
+ "r"(gdt_table + __LDT(cpu)));
+ if (old_ldt !=3D new_ldt)
+ load_LDT(next);
+
if (prev !=3D next) {
/* stop flush ipis for the previous mm */
clear_bit(cpu, &prev->cpu_vm_mask);
- /*
- * Re-load LDT if necessary
- */
- if (prev->context.segments !=3D next->context.segments)
- load_LDT(next);
#ifdef CONFIG_SMP
cpu_tlbstate[cpu].state =3D TLBSTATE_OK;
cpu_tlbstate[cpu].active_mm =3D next;
#endif
set_bit(cpu, &next->cpu_vm_mask);
- set_bit(cpu, &next->context.cpuvalid);
/* Re-load page tables */
asm volatile("movl %0,%%cr3": :"r" (__pa(next->pgd)));
}
@@ -55,8 +66,6 @@
*/
local_flush_tlb();
}
- if (!test_and_set_bit(cpu, &next->context.cpuvalid))
- load_LDT(next);
}
#endif
}
--
Stephan Springl BFW Werner V=F6lk GmbH
sp...@bf... B=FCro f=FCr W=E4rmeme=DFger=E4=
te
+49 89 82917-452 Drosselgasse 5
82166 Gr=E4felfing/M=FCnchen
|