From: Itsuro O. <od...@us...> - 2006-02-14 05:30:35
|
Update of /cvsroot/mkdump/mkexec/3.0/2.6/arch/x86_64/kernel In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12378/3.0/2.6/arch/x86_64/kernel Added Files: machine_mkexec.c minik_dump.c start_new_kernel.S x86_64-setup-32.S Log Message: register for 3.0: based on 2.1 mkexec-2_0-linux-2_6-2_r --- NEW FILE: minik_dump.c --- /* * arch/x86_64/kernel/minik_dump.c * * $Id: minik_dump.c,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. * * Mkdump is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation (version 2 of the License). * * Mkdump is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Mkdump; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/smp.h> #include <linux/time.h> #include <linux/sched.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/delay.h> #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/mkexec.h> #include <linux/minik_param.h> #include <linux/cpus.h> #include <asm/proto.h> #include <asm/processor.h> #include <asm/e820.h> #include <asm/hardirq.h> #include <asm/nmi.h> #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/minik_dump.h> static struct dump_header *dhp; extern void mkexec_exec(void); static void mem_seg_init(struct mem_seg *mem_segp) { pg_data_t *pgdat; int i; unsigned long avoid_pfn = 0; mem_segp->page_size = PAGE_SIZE; /* XXX: intension: avoid reserved area around 4GB (is there write only area ?) */ /* this code is uncertain for NUMA */ for (i = 0; i < e820.nr_map; i++) { if (e820.map[i].type == E820_RESERVED && e820.map[i].addr > 0xfffff) { avoid_pfn = e820.map[i].addr >> PAGE_SHIFT; break; } } pgdat = pgdat_list; i = 0; if (avoid_pfn && avoid_pfn < pfn_pgdat(pgdat) + size_pgdat(pgdat)) { /* assume start to 0 */ mem_segp->seg_list[i].seg_start_pfn = 0; mem_segp->seg_list[i].seg_size_pfn = avoid_pfn; i++; if (size_pgdat(pgdat) > 0x100000UL) { /* over 4GB */ mem_segp->seg_list[i].seg_start_pfn = 0x100000UL; mem_segp->seg_list[i].seg_size_pfn = size_pgdat(pgdat) - 0x100000UL; i++; } } else { mem_segp->seg_list[i].seg_start_pfn = 0; mem_segp->seg_list[i].seg_size_pfn = size_pgdat(pgdat); i++; } for (pgdat = next_pgdat(pgdat); pgdat && i < MAX_MEM_SEG; pgdat = next_pgdat(pgdat), i++) { mem_segp->seg_list[i].seg_start_pfn = pfn_pgdat(pgdat); mem_segp->seg_list[i].seg_size_pfn = size_pgdat(pgdat); } mem_segp->seg_num = i; } /* * init_dump_header called when dump-mini-kernel load. */ void init_dump_header(struct kimage *image) { unsigned long *addrp; long size; int i; if (image->minik_type == MINIK_V1) { dhp = (struct dump_header *)(__va(image->reserve_mem[0].base_pa) + PAGE_SIZE * 2); } else { dhp = (struct dump_header *)page_address(image->dump_header_pages); } strncpy(dhp->dh_version.dv_magic, DUMP_MAGIC, DUMP_MAGIC_LEN); dhp->dh_version.dv_version = DUMP_VERSION; dhp->dh_version.dv_arch = DUMP_ARCH_X8664; /* dv_unique set later */ /* dh_dump_cpu N/A */ memset(dhp->dh_tasks, 0, sizeof(dhp->dh_tasks)); memset(dhp->dh_regs, 0, sizeof(dhp->dh_regs)); memset(dhp->dh_panic_string, 0, sizeof(dhp->dh_panic_string)); /* dh_time N/A */ dhp->dh_utsname = system_utsname; mem_seg_init((void *)((u8 *)dhp + PAGE_SIZE)); if (image->minik_type == MINIK_V1) { addrp = (unsigned long *)(__va(image->reserve_mem[0].base_pa) + PAGE_SIZE); for (i = 0; i < image->num_minik_mem; i++) { size = image->reserve_mem[i].size_bytes; /* must be multiple 4MB ! */ while (size > 0) { *addrp = image->reserve_mem[i].base_pa + image->reserve_mem[i].size_bytes - size; addrp++; size -= MINIK_SEG_SIZE; } /* ODA: should check addrp range */ } *addrp = 0; } } /* * get current context * (copy from LKCD) */ static inline void get_current_regs(struct pt_regs *regs) { unsigned seg; __asm__ __volatile__("movq %%r15,%0" : "=m"(regs->r15)); __asm__ __volatile__("movq %%r14,%0" : "=m"(regs->r14)); __asm__ __volatile__("movq %%r13,%0" : "=m"(regs->r13)); __asm__ __volatile__("movq %%r12,%0" : "=m"(regs->r12)); __asm__ __volatile__("movq %%r11,%0" : "=m"(regs->r11)); __asm__ __volatile__("movq %%r10,%0" : "=m"(regs->r10)); __asm__ __volatile__("movq %%r9,%0" : "=m"(regs->r9)); __asm__ __volatile__("movq %%r8,%0" : "=m"(regs->r8)); __asm__ __volatile__("movq %%rbx,%0" : "=m"(regs->rbx)); __asm__ __volatile__("movq %%rcx,%0" : "=m"(regs->rcx)); __asm__ __volatile__("movq %%rdx,%0" : "=m"(regs->rdx)); __asm__ __volatile__("movq %%rsi,%0" : "=m"(regs->rsi)); __asm__ __volatile__("movq %%rdi,%0" : "=m"(regs->rdi)); __asm__ __volatile__("movq %%rbp,%0" : "=m"(regs->rbp)); __asm__ __volatile__("movq %%rax,%0" : "=m"(regs->rax)); __asm__ __volatile__("movq %%rsp,%0" : "=m"(regs->rsp)); __asm__ __volatile__("movl %%ss, %0" :"=r"(seg)); regs->ss = (unsigned long)seg; __asm__ __volatile__("movl %%cs, %0" :"=r"(seg)); regs->cs = (unsigned long)seg; __asm__ __volatile__("pushfq; popq %0" :"=m"(regs->eflags)); regs->rip = (unsigned long)current_text_addr(); } #ifdef MKEXEC_NO_PATCH #ifdef CONFIG_X86_IO_APIC extern void disable_IO_APIC(void); extern spinlock_t ioapic_lock; #endif #ifdef CONFIG_X86_LOCAL_APIC extern void disconnect_bsp_APIC(void); #endif static void mkexecreboot(void) { #if defined(CONFIG_X86_LOCAL_APIC) if (cpu_has_apic) { disable_local_APIC(); } #endif #if defined(CONFIG_X86_IO_APIC) spin_lock_init(&ioapic_lock); /* force to lock success */ disable_IO_APIC(); #elif defined(CONFIG_X86_LOCAL_APIC) disconnect_bsp_APIC(); #endif mkexec_exec(); } #else /* MKEXEC_NO_PATCH */ #ifdef CONFIG_X86_IO_APIC static void ioapic_mkexec_restore_once(void) { static int tried[NR_CPUS]; int cpu = get_processor_id(); /* We may crash inside: ioapic_mkexec_restore() */ if (tried[cpu]) return; tried[cpu] = 1; spin_lock_init(&ioapic_lock); /* Force success of locking it inside. */ ioapic_mkexec_restore(); /* errors ignored */ } #endif #ifdef CONFIG_X86_LOCAL_APIC void lapic_mkexec_restore_once(void) { static int tried[NR_CPUS]; int cpu = get_processor_id(); if (!cpu_has_apic) return; /* We may crash inside: lapic_mkexec_restore() */ if (tried[cpu]) return; tried[cpu] = 1; lapic_mkexec_restore(); /* errors ignored */ } #endif static void mkexecreboot(void) { /* Do not: disable_IO_APIC(); * or: disable_local_APIC(); * as we may not imitate the BIOS legacy IRQ0 settings properly. * Later minik may hang-up on: Calibrating delay loop... */ #ifdef CONFIG_X86_IO_APIC ioapic_mkexec_restore_once(); #endif /* CONFIG_X86_IO_APIC */ #ifdef CONFIG_X86_LOCAL_APIC lapic_mkexec_restore_once(); #endif mkexec_exec(); } #endif /* MKEXEC_NO_PATCH */ #ifdef CONFIG_SMP static atomic_t waiting_for_dump_ipi; static int save_done[NR_CPUS]; static int reboot_cpu = 0; void mkdump_send_nmi(void); static void wait_and_mkexecreboot(void) { int i; for (i = 0; i < 1000000000; i++) { if (atomic_read(&waiting_for_dump_ipi) == 0) { break; } } mkexecreboot(); } static int mkdump_nmi_callback(struct pt_regs *regs, int cpu) { if (save_done[cpu]) { return 1; /* anyway now in dump, suppress default nmi handler */ } dhp->dh_tasks[cpu] = (unsigned long)current; dhp->dh_regs[cpu] = *regs; save_done[cpu] = 1; atomic_dec(&waiting_for_dump_ipi); if (cpu == reboot_cpu) { wait_and_mkexecreboot(); } else { stop_this_cpu_safe(NULL); } return 1; } static void stop_other_cpus(void) { int i; int other_cpus = num_online_cpus() - 1; if (other_cpus < 1) { /* Other CPUs are not online and we do not need to stop them. * At least as long as 'cpu_online_map' is valid. */ return; } /* always boot from 0. but if 0 is not online... */ if (!cpu_isset(reboot_cpu, cpu_online_map)) { reboot_cpu = smp_processor_id(); } atomic_set(&waiting_for_dump_ipi, other_cpus); for (i = 0; i < NR_CPUS; i++) { save_done[i] = 0; } set_nmi_callback(mkdump_nmi_callback); wmb(); mkdump_send_nmi(); if (smp_processor_id() == reboot_cpu) { wait_and_mkexecreboot(); } else { stop_this_cpu_safe(NULL); } } #endif /* * start_dump called when dump occur. * save context, stop other cpus and boot mini kernel */ void start_dump(char *panic_str, struct pt_regs *regs, int cpu) { struct pt_regs cur_reg; if (regs == NULL) { get_current_regs(&cur_reg); regs = &cur_reg; } dhp->dh_version.dv_unique = (u32)xtime.tv_sec; dhp->dh_dump_cpu = cpu; #ifdef BACKPORT_24 dhp->dh_time.tv_sec = xtime.tv_sec; dhp->dh_time.tv_nsec = 1000 * xtime.tv_usec; #else dhp->dh_time = xtime; #endif dhp->dh_tasks[cpu] = (unsigned long)current; dhp->dh_regs[cpu] = *regs; strncpy(dhp->dh_panic_string, panic_str, DUMP_PANIC_LEN); #ifdef CONFIG_SMP /* stop_other_cpus() can return. */ stop_other_cpus(); #endif mkexecreboot(); } --- NEW FILE: start_new_kernel.S --- /* * arch/x86_64/kernel/start_new_kernel.S * * $Id: start_new_kernel.S,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec(relocate_kernel.S) : * * relocate_kernel.S - put the kernel image in place to boot * Copyright (C) 2002-2004 Eric Biederman <ebi...@xm...> * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ #include <linux/linkage.h> /* * Must be relocatable PIC code callable as a C function, that once * it starts can not use the previous processes stack. */ .globl start_new_kernel .code64 start_new_kernel: /* %rdi indirection_page * %rsi reboot_code_buffer * %rdx start address * %rcx page_table * %r8 arg5 * %r9 arg6 */ /* zero out flags, and disable interrupts */ pushq $0 popfq /* set a new stack at the bottom of our page... */ lea 4096(%rsi), %rsp /* store the parameters back on the stack */ pushq %rdx /* store the start address */ /* Set cr0 to a known state: * 31 1 == Paging enabled * 18 0 == Alignment check disabled * 16 0 == Write protect disabled * 3 0 == No task switch * 2 0 == Don't do FP software emulation. * 0 1 == Proctected mode enabled */ movq %cr0, %rax andq $~((1<<18)|(1<<16)|(1<<3)|(1<<2)), %rax orl $((1<<31)|(1<<0)), %eax movq %rax, %cr0 /* Set cr4 to a known state: * 10 0 == xmm exceptions disabled * 9 0 == xmm registers instructions disabled * 8 0 == performance monitoring counter disabled * 7 0 == page global disabled * 6 0 == machine check exceptions disabled * 5 1 == physical address extension enabled * 4 0 == page size extensions disabled * 3 0 == Debug extensions disabled * 2 0 == Time stamp disable (disabled) * 1 0 == Protected mode virtual interrupts disabled * 0 0 == VME disabled */ movq $((1<<5)), %rax movq %rax, %cr4 jmp 1f 1: /* Switch to the identity mapped page tables, * and flush the TLB. */ movq %rcx, %cr3 /* set all of the registers to known values */ /* leave %rsp alone */ xorq %rax, %rax xorq %rbx, %rbx xorq %rcx, %rcx xorq %rdx, %rdx xorq %rsi, %rsi // xorq %rdi, %rdi xorq %rbp, %rbp xorq %r8, %r8 xorq %r9, %r9 xorq %r10, %r9 xorq %r11, %r11 xorq %r12, %r12 xorq %r13, %r13 xorq %r14, %r14 xorq %r15, %r15 ret start_new_kernel_end: .globl start_new_kernel_size start_new_kernel_size: .quad start_new_kernel_end - start_new_kernel --- NEW FILE: machine_mkexec.c --- /* * arch/x86_64/kernel/machine_mkexec.c * * $Id: machine_mkexec.c,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec(machine_kexec.c) * * machine_kexec.c - handle transition of Linux booting another kernel * Copyright (C) 2002-2004 Eric Biederman <ebi...@xm...> * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ #include <linux/mm.h> #include <linux/mkexec.h> #include <linux/delay.h> #include <linux/string.h> #include <linux/reboot.h> #include <asm/pda.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #ifndef BACKPORT_24 #include <asm/tlbflush.h> #endif #include <asm/mmu_context.h> #include <asm/io.h> #include <asm/apic.h> #include <asm/cpufeature.h> #include <asm/hw_irq.h> #include <asm/proto.h> #include <linux/mkexec.h> #include <asm/mkexec-x86_64.h> #define LEVEL0_SIZE (1UL << 12UL) #define LEVEL1_SIZE (1UL << 21UL) #define LEVEL2_SIZE (1UL << 30UL) #define LEVEL3_SIZE (1UL << 39UL) #define LEVEL4_SIZE (1UL << 48UL) #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE) #define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) static void init_level2_page( uint64_t *level2p, unsigned long addr) { unsigned long end_addr; addr &= PAGE_MASK; addr &= LEVEL4_SIZE - 1; end_addr = addr + LEVEL2_SIZE; while(addr < end_addr) { #if 0 printk("init_level2_page: addr=0x%lx\n", addr); #endif *(level2p++) = addr | L1_ATTR; addr += LEVEL1_SIZE; } } static int init_level3_page(struct kimage *image, uint64_t *level3p, unsigned long addr, unsigned long first_addr, unsigned long last_addr, unsigned long pa_delta) { unsigned long end_addr; int result; #if 0 printk("init_level3_page: addr=0x%lx, first_addr=0x%lx, last_addr=0x%lx\n", addr, first_addr, last_addr); #endif result = 0; end_addr = addr + LEVEL3_SIZE; for (;addr < end_addr && addr < last_addr; level3p++, addr += LEVEL2_SIZE) { struct page *page; uint64_t *level2p; if (addr + LEVEL2_SIZE <= first_addr) continue; if (image->num_control_page >= ARRAY_SIZE(image->control_page)) { result = -ENOBUFS; goto out; } page = alloc_pages(GFP_KERNEL | GFP_DMA, 0); if (!page) { result = -ENOMEM; goto out; } // page->mapping = NULL; // page->private = 0; // SetPageReserved(page); image->control_page[image->num_control_page++] = page; level2p = (uint64_t *)page_address(page); init_level2_page(level2p, addr + pa_delta); *level3p = __pa(level2p) | L2_ATTR; } out: return result; } static int init_level4_page(struct kimage *image, uint64_t *level4p, unsigned long first_addr, unsigned long last_addr, unsigned long pa_delta) { int result; unsigned long addr; result = 0; /* x86_64 handles/pages only 48 bits! */ first_addr &= LEVEL4_SIZE - 1; last_addr &= LEVEL4_SIZE - 1; for (addr = 0; addr < last_addr; level4p++, addr += LEVEL3_SIZE) { struct page *page; uint64_t *level3p; #if 0 printk("init_level4_page: addr=0x%lx, first_addr=0x%lx, last_addr=0x%lx\n", addr, first_addr, last_addr); #endif if (addr + LEVEL3_SIZE <= first_addr) continue; if (image->num_control_page >= ARRAY_SIZE(image->control_page)) { result = -ENOBUFS; goto out; } page = alloc_pages(GFP_KERNEL | GFP_DMA, 0); if (!page) { result = -ENOMEM; goto out; } // page->mapping = NULL; // page->private = 0; // SetPageReserved(page); image->control_page[image->num_control_page++] = page; level3p = (uint64_t *)page_address(page); memset(level3p, 0, PAGE_SIZE); result = init_level3_page(image, level3p, addr, first_addr, last_addr, pa_delta); if (result) { goto out; } *level4p = __pa(level3p) | L3_ATTR; } out: return result; } static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { uint64_t *level4p; int result; level4p = (uint64_t *)__va(start_pgtable); memset(level4p, 0, PAGE_SIZE); result = init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT, 0); if (result) return result; /* Be aware '__PAGE_OFFSET' mapping is missing here - not required. */ /* In fact '__START_KERNEL_map' is also not needed, sometimes somehow CPU cached. */ return init_level4_page(image, level4p, __START_KERNEL_map, LEVEL4_SIZE - 1, /* __pa() adjusts by: physical_start_offset */ -__START_KERNEL_map + __pa(__START_KERNEL_map)); } static void set_idt(void *newidt_va, __u16 limit) { unsigned char curidt[10]; /* x86-64 supports unaliged loads & stores */ (*(__u16 *)(curidt)) = limit; (*(__u64 *)(curidt +2)) = __pa(newidt_va); __asm__ __volatile__ ( "lidt %0\n" : "=m" (curidt) ); }; static void set_gdt(void *newgdt_va, __u16 limit) { unsigned char curgdt[10]; /* x86-64 supports unaligned loads & stores */ (*(__u16 *)(curgdt)) = limit; (*(__u64 *)(curgdt +2)) = __pa(newgdt_va); __asm__ __volatile__ ( "lgdt %0\n" : "=m" (curgdt) ); }; static void load_segments(void) { __asm__ __volatile__ ( "\tmovl $"STR(__KERNEL_DS)",%eax\n" "\tmovl %eax,%ds\n" "\tmovl %eax,%es\n" "\tmovl %eax,%ss\n" "\tmovl %eax,%fs\n" "\tmovl %eax,%gs\n" ); #undef STR #undef __STR } int machine_mkexec_prepare(struct kimage *image) { unsigned long start_pgtable, control_code_buffer; int result; struct page *page; if (image->num_control_page >= ARRAY_SIZE(image->control_page)) return -ENOBUFS; /* Calculate the offsets */ page = alloc_pages(GFP_KERNEL | GFP_DMA, 0); // page->mapping = NULL; // page->private = 0; // SetPageReserved(page); image->control_page[image->num_control_page++] = page; image->control_code_page = page; start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; control_code_buffer = image->new_kernel_paddr; /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, start_pgtable); if (result) { int num; for(num = image->num_control_page-1; num >= 0; num--){ __free_pages(image->control_page[num], 0); } image->num_control_page = 0; return result; } /* Place the code in the reboot code buffer */ // memcpy(__va(control_code_buffer), start_new_kernel, start_new_kernel_size); // image->reboot_code_addr = control_code_buffer; return 0; } void machine_kexec_cleanup(struct kimage *image) { return; } typedef asmlinkage void (*start_new_kernel_t)( unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address, unsigned int has_pae); const extern unsigned char start_new_kernel[]; extern void start_new_kernel_end(void); const extern unsigned int start_new_kernel_size; /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ void machine_mkexec(struct kimage *image) { unsigned long control_code_buffer; unsigned long start_pgtable; start_new_kernel_t rnk; /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); /* Calculate the offsets */ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; control_code_buffer = image->reboot_code_addr; /* Set the low half of the page table to my identity mapped * page table for kexec. Leave the high half pointing at the * kernel pages. Don't bother to flush the global pages * as that will happen when I fully switch to my identity mapped * page table anyway. */ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12) load_cr3(__va(start_pgtable)); #else memcpy((void *)read_pda(level4_pgt), __va(start_pgtable), PAGE_SIZE/2); #endif __flush_tlb(); /* The segment registers are funny things, they are * automatically loaded from a table, in memory wherever you * set them to a specific selector, but this table is never * accessed again unless you set the segment to a different selector. * * The more common model are caches where the behide * the scenes work is done, but is also dropped at arbitrary * times. * * I take advantage of this here by force loading the * segments, before I zap the gdt with an invalid value. */ load_segments(); /* The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ set_gdt(phys_to_virt(0),0); set_idt(phys_to_virt(0),0); /* now call it */ rnk = (start_new_kernel_t)image->reboot_code_addr; (*rnk)(image->new_kernel_paddr, image->reboot_code_addr, image->start+image->new_kernel_paddr, start_pgtable); } --- NEW FILE: x86_64-setup-32.S --- /* * arch/x86_64/kernel/x86_64-setup-32.S * * mkexec: Linux boots Linux(Mini kernel) * * $Id: x86_64-setup-32.S,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec : * * kexec: Linux boots Linux * * Copyright (C) 2003,2004 Eric Biederman (ebi...@xm...) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation (version 2 of the License). * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ .data .equ MSR_K6_EFER, 0xC0000080 .equ EFER_LME, 0x00000100 .equ X86_CR4_PAE, 0x00000020 .equ CR0_PG, 0x80000000 .globl setup32_start, setup32_end, setup32_size .globl setup32_regs setup32_start: _reloc = . .code64 /* Compute where I am running at */ leaq setup32_start(%rip), %rbx /* Relocate the code */ addq %rbx, lreloc1(%rip) addl %ebx, reloc1(%rip) addl %ebx, reloc2(%rip) addl %ebx, reloc3(%rip) /* Setup a gdt that should be preserved */ /* This also acts as a serializing instruction ensuring * my self modifying code works. */ lgdt gdt(%rip) /* Switch to 32bit compatiblity mode */ ljmp *lm_exit_addr(%rip) lm_exit: .code32 /* addl %edi, ebx - _reloc(%ebx)*/ addl %edi, eip - _reloc(%ebx) addl %edi, esp - _reloc(%ebx) addl %edi, esi - _reloc(%ebx) /* Disable paging */ movl %cr0, %eax andl $~CR0_PG, %eax movl %eax, %cr0 /* Disable long mode */ movl $MSR_K6_EFER, %ecx rdmsr andl $~EFER_LME, %eax wrmsr /* Disable PAE */ xorl %eax, %eax movl %eax, %cr4 /* load the data segments */ movl $0x18, %eax /* data segment */ movl %eax, %ds movl %eax, %es movl %eax, %ss movl %eax, %fs movl %eax, %gs /* Load the registers */ movl eax - _reloc(%ebx), %eax movl ecx - _reloc(%ebx), %ecx movl edx - _reloc(%ebx), %edx movl esi - _reloc(%ebx), %esi movl edi - _reloc(%ebx), %edi movl esp - _reloc(%ebx), %esp movl ebp - _reloc(%ebx), %ebp /* * 5: * jmp 5b */ .byte 0x8b, 0x1d # movl ebx, %ebx reloc1: .long ebx - _reloc nop nop /* * 5: * jmp 5b */ .byte 0xff, 0x25 # jmpl *(eip) reloc2: .long eip - _reloc nop nop .balign 4 setup32_regs: eax: .long 0x00000000 ebx: .long 0x00000000 ecx: .long 0x00000000 edx: .long 0x00000000 esi: .long 0x00000000 edi: .long 0x00000000 esp: .long 0x00000000 ebp: .long 0x00000000 eip: .long 0x00000000 .balign 16 gdt: /* 0x00 unusable segment * 0x08 unused * so use them as the gdt ptr */ .word gdt_end - gdt - 1 lreloc1: .quad gdt - _reloc .word 0, 0, 0 /* Documented linux kernel segments */ /* 0x10 4GB flat code segment */ .word 0xFFFF, 0x0000, 0x9A00, 0x00CF /* 0x18 4GB flat data segment */ .word 0xFFFF, 0x0000, 0x9200, 0x00CF /* 0x20 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x28 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x30 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x38 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x40 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x48 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x50 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x58 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* Segments used by the 2.5.x kernel */ /* 0x60 4GB flat code segment */ .word 0xFFFF, 0x0000, 0x9A00, 0x00CF /* 0x68 4GB flat data segment */ .word 0xFFFF, 0x0000, 0x9200, 0x00CF gdt_end: lm_exit_addr: reloc3: .long lm_exit - _reloc .long 0x10 reloc4: .quad 0 setup32_end: setup32_size: .long setup32_end - setup32_start |