From: Itsuro O. <od...@us...> - 2006-02-14 05:30:36
|
Update of /cvsroot/mkdump/mkexec/3.0/2.6/arch/i386/kernel In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv12378/3.0/2.6/arch/i386/kernel Added Files: machine_mkexec.c minik_dump.c mkexec-vmlinux.c mkexec-x86.c start_new_kernel.S x86-setup-32.S Log Message: register for 3.0: based on 2.1 mkexec-2_0-linux-2_6-2_r --- NEW FILE: mkexec-vmlinux.c --- /* * arch/i386/kernel/mkexec-vmlinux.c * * mkexec: Linux boots Linux(Mini kernel) * * $Id: mkexec-vmlinux.c,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec : * * kexec: Linux boots Linux * * Copyright (C) 2003,2004 Eric Biederman (ebi...@xm...) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation (version 2 of the License). * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/config.h> #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/errno.h> #include <linux/unistd.h> #include <linux/proc_fs.h> #include <linux/ide.h> #include <linux/vmalloc.h> #include <linux/file.h> #include <asm/e820.h> #include <linux/mkexec.h> #include <asm/x86-linux.h> #include <asm/mkexec.h> #include <asm/mkexec-x86.h> extern char saved_command_line[]; int setup_linux_parameters(struct x86_linux_param_header *real_mode) { int i; /* Default screen size */ real_mode->orig_x = 0; real_mode->orig_y = 0; real_mode->orig_video_page = 0; real_mode->orig_video_mode = 0; real_mode->orig_video_cols = 80; real_mode->orig_video_lines = 25; real_mode->orig_video_ega_bx = 0; real_mode->orig_video_isVGA = 1; real_mode->orig_video_points = 16; /* Fill in the memsize later */ real_mode->ext_mem_k = 0; real_mode->alt_mem_k = 0; real_mode->e820_map_nr = 0; /* Default APM info */ memset(&real_mode->apm_bios_info, 0, sizeof(real_mode->apm_bios_info)); /* Default drive info */ memset(&real_mode->drive_info, 0, sizeof(real_mode->drive_info)); /* Default sysdesc table */ real_mode->sys_desc_table.length = 0; /* default yes: this can be overridden on the command line */ real_mode->mount_root_rdonly = 0xFFFF; /* default /dev/hda * this can be overrident on the command line if necessary. */ real_mode->root_dev = (0x3 <<8)| 0; /* another safe default */ real_mode->aux_device_info = 0; /* Fill in the memory info */ real_mode->e820_map_nr = e820.nr_map; memcpy(real_mode->e820_map, e820.map, sizeof(struct e820entry) * e820.nr_map); for(i = 0; i < e820.nr_map; i++) { if (real_mode->e820_map[i].type != RANGE_RAM) continue; if ((real_mode->e820_map[i].addr <= 0x100000) && real_mode->e820_map[i].addr + real_mode->e820_map[i].size > 0x100000) { unsigned long long mem_k = ((real_mode->e820_map[i].addr + real_mode->e820_map[i].size) >> 10) - 0x100000; real_mode->ext_mem_k = mem_k; real_mode->alt_mem_k = mem_k; if (mem_k > 0xfc00) { real_mode->ext_mem_k = 0xfc00; /* 64M */ } if (mem_k > 0xffffffff) { real_mode->alt_mem_k = 0xffffffff; } } } real_mode->setup_sects = 4; memcpy(real_mode->header_magic, "HdrS",4); real_mode->protocol_version = 0x0202; // real_mode->initrd_addr_max = 0x37ffffff; return 0; } struct buffer { char *tail; char *end; }; static void buffer_append(struct buffer *buffer, const char *fmt, ...) __attribute__ ((format (printf, 2, 3))); static void buffer_append(struct buffer *buffer, const char *fmt, ...) { va_list ap; ssize_t remains; remains = buffer->end - buffer->tail; if (remains <= 1) return; va_start(ap, fmt); buffer->tail += vsnprintf(buffer->tail, remains, fmt, ap); va_end(ap); } static void buffer_append_memparse(struct buffer *buffer, unsigned long long val) { const char *s = ""; /****/ if (!(val & ((1 << 30) - 1))) { val >>= 30; s = "G"; } else if (!(val & ((1 << 20) - 1))) { val >>= 20; s = "M"; } else if (!(val & ((1 << 10) - 1))) { val >>= 10; s = "K"; } /* Decimal is ugly but it is shorter, we need space! */ buffer_append(buffer, "%llu%s", val, s); } static void buffer_append_crashmem(struct buffer *buffer, struct kimage *image) { int i; int size; if (image->minik_type == MINIK_V1) { size = 0; for (i = 0; i < image->num_minik_mem; i++) { size += image->reserve_mem[i].size_bytes; } size = size / (1024 * 1024); buffer_append(buffer, " mem=%dM", size); return; } if (image->reserve_mem_dma.size_bytes > 0) { buffer_append(buffer, " memmap="); buffer_append_memparse(buffer, image->reserve_mem_dma.size_bytes); buffer_append(buffer, "@"); buffer_append_memparse(buffer, image->reserve_mem_dma.base_pa); } for (i = 0; i < image->num_minik_mem; i++) { buffer_append(buffer, " memmap="); buffer_append_memparse(buffer, image->reserve_mem[i].size_bytes); buffer_append(buffer, "@"); buffer_append_memparse(buffer, image->reserve_mem[i].base_pa); } } static void string_arg_strip(char *string, const char *arg) { char *s, *s2; s = string; while ((s = strstr(s, arg))) { if (s > string && s[-1] != ' ') continue; s2 = s + strlen(arg); while (*s2 && *s2 != ' ') s2++; while (*s2 == ' ') s2++; memmove(s, s2, strlen(s2) + 1); } } int pimage_create(struct kimage *image) { struct x86_linux_param_header *real_mode; int setup_sects; int command_line_len; int command_line_off; int kern32_size; int setup32_off; char *cmdline; char *start32; unsigned long entry; size_t size; char image_command_line[COMMAND_LINE_SIZE]; struct buffer image_command_line_buffer = { .end = image_command_line + sizeof(image_command_line), }; /* * Prepare the minik command line arguments. */ /* Copy the current kernel's commandline. */ strncpy(image_command_line, saved_command_line, sizeof(image_command_line)); image_command_line[sizeof(image_command_line) - 1] = 0; /* Do not: append " crashkernel=no" * as the commandline may get too big to fit in. * Drop the original parameter instead. */ string_arg_strip(image_command_line, "crashdma="); string_arg_strip(image_command_line, "crashmain="); image_command_line_buffer.tail = image_command_line + strlen(image_command_line); if (image->minik_type == MINIK_V1) { buffer_append(&image_command_line_buffer, " dump_dev=%s maxcpus=1 noapic irqpoll", mkexec_inf.ddev); } else { buffer_append(&image_command_line_buffer, " dump_dev=%s" " dump_pfn=0x%lx" /* BSP causes problems when starting secondary CPU in minik. */ " maxcpus=1" /* FIXME: mkexec/SMP+minik/SMPmaxcpu=1 locks on no timer on 2.6.12 */ " noapic" /* Device-shared interrupts may get stuck. */ " irqpoll" " memmap=exactmap" /* * minik locks up if no 4K page is available below 1MB. * Some untested driver may require some addition low memory pages. * These values are just random choice in the 640KB range. */ " memmap=64K@192K" , mkexec_inf.ddev, /* " dump_dev=%s" */ page_to_pfn(image->dump_header_pages)); /* " dump_pfn=0x%lx" */ } buffer_append_crashmem(&image_command_line_buffer, image); /* Keep 'mkexec_inf.parm' last to make the parameters overridable by user. */ buffer_append(&image_command_line_buffer, " %s", mkexec_inf.parm); #if 0 /* debug */ printk("minik commandline: %s\n", image_command_line); #endif if (image_command_line_buffer.tail >= image_command_line_buffer.end - 1) { /* Re-termination should not be needed here. */ image_command_line_buffer.end[-1] = 0; printk("mkexec: minik commandline too long, the fitted part: %s\n", image_command_line); return -1; } setup_sects = 4; kern32_size = (setup_sects +1) *512; // /* Can not get kernel version.(setup sector has version information.) */ // kernel_version = ((unsigned char *)&header) + 512 + header.kver_addr; /* The x86 code segment */ command_line_off = kern32_size; /* The 32bit entry point */ command_line_len = (image_command_line_buffer.tail + 1) - image_command_line; size = (command_line_off + command_line_len+16-1) & ~(16-1) ; setup32_off = (size + 3) & ~3; /* 4 byte align */ size = setup32_off + setup32_size; image->segment[0].memsz = image->segment[0].bufsz = size; image->segment[0].buf = (char *)((unsigned long)\ kmap(pfn_to_page(image->new_kernel_paddr >> PAGE_SHIFT)) + image->segment[0].mem); real_mode = image->segment[0].buf; cmdline = (char *)image->segment[0].buf + command_line_off; start32 = (char *)image->segment[0].buf + setup32_off; entry = (unsigned long)image->segment[0].mem + setup32_off; memset(image->segment[0].buf, 0, image->segment[0].bufsz); kunmap(pfn_to_page(image->new_kernel_paddr>> PAGE_SHIFT)); /* * Initialize the param_header with bootloader information. */ /* The location of the command line */ real_mode->cl_magic = CL_MAGIC_VALUE; real_mode->cl_offset = command_line_off; // if (header.protocol_version >= 0x0202) { // real_mode->cmd_line_ptr = 0x90000 + command_line_off; // } /* Provide absolute physical address of the commandline, it is not relocated. */ real_mode->cmd_line_ptr = __pa(cmdline); real_mode->heap_end_ptr = image->segment[0].bufsz + 1024; /* The loader type */ real_mode->loader_type = LOADER_TYPE_UNKNOWN; /* The ramdisk */ real_mode->initrd_start = 0; real_mode->initrd_size = 0; /* * Initialize the 32bit start information. */ setup32_regs.eax = 0; /* unused */ setup32_regs.ebx = 0; /* 0 == boot not AP processor start */ setup32_regs.ecx = 0; /* unused */ setup32_regs.edx = 0; /* unused */ setup32_regs.esi = (unsigned long)image->segment[0].mem; /* kernel parameters */ setup32_regs.edi = 0; /* unused */ setup32_regs.esp = (unsigned long)image->segment[0].mem; /* stack, unused */ setup32_regs.ebp = 0; /* unused */ setup32_regs.eip = (unsigned long)image->segment[1].mem; /* kernel entry point */ /* * Copy it all into the startup vector */ strcpy(cmdline, image_command_line); memcpy(start32, setup32_start, setup32_size); /* Fill in the information BIOS calls would normally provide. */ if (setup_linux_parameters(real_mode) < 0) { return -1; } image->start = entry; return 0; } --- NEW FILE: x86-setup-32.S --- /* * arch/i386/kernel/x86-setup-32.S * * mkexec: Linux boots Linux(Mini kernel) * * $Id: x86-setup-32.S,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec : * * kexec: Linux boots Linux * * Copyright (C) 2003,2004 Eric Biederman (ebi...@xm...) * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation (version 2 of the License). * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ .data .code32 .globl setup32_start, setup32_end, setup32_size .globl setup32_regs setup32_start: _reloc = . /* Compute where I am running at */ pushl %ebx call 1f 1: popl %ebx subl $(1b - _reloc), %ebx /* align the code, I feel better when word load & store instructions * are aligned. */ nop /* Relocate the code */ addl %ebx, reloc1 - _reloc(%ebx) addl %ebx, reloc2 - _reloc(%ebx) addl %ebx, reloc3 - _reloc(%ebx) popl reloc4 - _reloc(%ebx) pushl %eax movl reloc4 - _reloc(%ebx), %eax addl %eax, ebx - _reloc(%ebx) addl %eax, eip - _reloc(%ebx) addl %eax, esp - _reloc(%ebx) addl %eax, esi - _reloc(%ebx) popl %eax /* Make certain the special registers are in a sane state. * The kernel should have done this but... */ call x86_setup_state /* Setup a gdt that should be preserved */ /* This also acts as a serializing instruction ensuring * my self modifying code works. */ lgdt gdt - _reloc(%ebx) /* load the data segments */ movl $0x18, %eax /* data segment */ movl %eax, %ds movl %eax, %es movl %eax, %ss movl %eax, %fs movl %eax, %gs /* load the code segment */ leal 1f - _reloc(%ebx), %eax pushl $0x10 /* code segment */ pushl %eax /* destination address */ lret 1: /* Load the registers */ movl eax - _reloc(%ebx), %eax movl ecx - _reloc(%ebx), %ecx movl edx - _reloc(%ebx), %edx movl esi - _reloc(%ebx), %esi movl edi - _reloc(%ebx), %edi movl esp - _reloc(%ebx), %esp movl ebp - _reloc(%ebx), %ebp .byte 0x8b, 0x1d # movl ebx, %ebx reloc1: .long ebx - _reloc nop nop .byte 0xff, 0x25 # jmpl *(eip) reloc2: .long eip - _reloc .balign 4 setup32_regs: eax: .long 0x00000000 ebx: .long 0x00000000 ecx: .long 0x00000000 edx: .long 0x00000000 esi: .long 0x00000000 edi: .long 0x00000000 esp: .long 0x00000000 ebp: .long 0x00000000 eip: .long 0x00000000 gdt: /* 0x00 unusable segment so used as the gdt ptr */ .word gdt_end - gdt - 1 reloc3: .long gdt - _reloc .word 0 /* 0x08 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* Documented linux kernel segments */ /* 0x10 4GB flat code segment */ .word 0xFFFF, 0x0000, 0x9A00, 0x00CF /* 0x18 4GB flat data segment */ .word 0xFFFF, 0x0000, 0x9200, 0x00CF /* 0x20 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x28 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x30 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x38 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x40 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x48 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x50 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* 0x58 dummy */ .word 0x0000, 0x0000, 0x0000, 0x000 /* Segments used by the 2.5.x kernel */ /* 0x60 4GB flat code segment */ .word 0xFFFF, 0x0000, 0x9A00, 0x00CF /* 0x68 4GB flat data segment */ .word 0xFFFF, 0x0000, 0x9200, 0x00CF gdt_end: reloc4: .long 0 /* #include "x86-setup-state.S" */ .code32 /* Make certain the special registers are in a sane state. * The kernel should have done this but... */ x86_setup_state: /* Don't modify any registers... */ pushl %eax /* clear special bits in %cr4 */ movl %cr4, %eax andl $0, %eax movl %eax, %cr4 popl %eax ret /* #include "x86-setup-state.S" END */ setup32_end: setup32_size: .long setup32_end - setup32_start --- NEW FILE: mkexec-x86.c --- /* * arch/i386/kernel/mkexec-x86.c * * mkexec: Linux boots Linux(Mini kernel) * * $Id: mkexec-x86.c,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec : * * kexec: Linux boots Linux * * Copyright (C) 2003,2004 Eric Biederman (ebi...@xm...) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation (version 2 of the License). * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include <linux/config.h> #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/stat.h> #include <linux/ide.h> #include <linux/fs.h> #include <linux/vmalloc.h> #include <linux/mkexec.h> #include <asm/mkexec.h> #include <asm/mkexec-x86.h> #include <asm/x86-linux.h> #define MAX_MEMORY_RANGES 20 #define MAX_LINE 160 #define MKEXEC_READ_BUF_SIZE 2048 static struct memory_range memory_range[MAX_MEMORY_RANGES]; extern struct mexec_mod_t mexec_inf; /* Return a sorted list of memory ranges. */ int get_memory_ranges(struct memory_range **range, int *ranges) { const char iomem[]= "/proc/iomem"; int memory_ranges = 0; char line[MAX_LINE]; char *read_mem, *end_ptr; ssize_t read_size; char *oc, *ic, *line_max; struct file *file = 0; char * tmp = 0; int i; int sts = 0; int next_read; mm_segment_t old_fs = get_fs(); /* get read buffer */ read_mem = vmalloc(MKEXEC_READ_BUF_SIZE); if (read_mem < 0) { printk("mexec_mod: vmalloc NG.\n"); return -1 ; } set_fs(KERNEL_DS); tmp = getname(iomem); if (IS_ERR(tmp)) { printk("mexec_mod: can't open %s.\n", iomem); vfree(read_mem); set_fs(old_fs); return -1 ; } file = filp_open(tmp, O_RDONLY, 0); putname(tmp); if (IS_ERR(file)) { printk("mexec_mod: can't open %s.\n", iomem); vfree(read_mem); set_fs(old_fs); return -1 ; } oc = line; line_max = oc + MAX_LINE; for(i=0; sts != -1;i++){ next_read = 0; memset(read_mem, 0, MKEXEC_READ_BUF_SIZE); if(default_llseek(file, MKEXEC_READ_BUF_SIZE * i, 0) < 0){ printk("default_llseek NG.\n"); read_size = 0; sts = -1; goto err; } if((read_size = vfs_read(file, read_mem, MKEXEC_READ_BUF_SIZE, &file->f_pos)) < 0){ printk("mini-kernel load NG(can't read %s)\n", iomem); printk("--- i=%d read_size=0x%x---\n", i, read_size); sts = -1; goto err; } /* EOF */ if (read_size == 0){ break; } end_ptr = read_mem + read_size; for(ic=read_mem; ; ){ unsigned long long start, end; char *str; int type; int consumed; int count; if (memory_ranges >= MAX_MEMORY_RANGES) break; for (; ; ic++, oc++){ if (end_ptr == ic){ next_read = 1; break; } if (line_max == oc){ printk("%s:too many characters of one line.\n", iomem); sts = -1; goto err; } *oc = *ic; if (*ic=='\0' || *ic==0x0a){ *oc++ = '\0'; ic++; break; } } if(next_read == 1){ break; } count = sscanf(line, "%Lx-%Lx : %n", &start, &end, &consumed); if (count != 2) { continue; } str = line + consumed; end = end + 1; #if 0 printk("%016Lx-%016Lx : %s\n", start, end, str); #endif if (memcmp(str, "System RAM", 10) == 0) { type = RANGE_RAM; } else if (memcmp(str, "reserved", 8) == 0) { type = RANGE_RESERVED; } else if (memcmp(str, "ACPI Tables", 11) == 0) { type = RANGE_ACPI; } else if (memcmp(str, "ACPI Non-volatile Storage", 25) == 0) { type = RANGE_ACPI_NVS; } else { oc = line; continue; } memory_range[memory_ranges].start = start; memory_range[memory_ranges].end = end; memory_range[memory_ranges].type = type; #if 0 printk("*** OK *** %016Lx-%016Lx : %x\n", start, end, type); #endif memory_ranges++; oc = line; } } err: set_fs(old_fs); filp_close(file, NULL); vfree(read_mem); *range = memory_range; *ranges = memory_ranges; return 0; } --- NEW FILE: machine_mkexec.c --- /* * arch/i386/kernel/machine_mkexec.c * * $Id: machine_mkexec.c,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec(machine_kexec.c) : * * machine_kexec.c - handle transition of Linux booting another kernel * Copyright (C) 2002-2004 Eric Biederman <ebi...@xm...> * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ #include <linux/mm.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/mmu_context.h> #include <asm/io.h> #include <asm/apic.h> #include <asm/cpufeature.h> #include <linux/mkexec.h> #include <asm/mkexec-x86.h> static int identity_map_page(unsigned long addr) { int error = 0; pgd_t *dir; #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12) pud_t *pud; #endif pmd_t *pmd; pte_t *pte; dir = pgd_offset(&init_mm, addr); spin_lock(&init_mm.page_table_lock); #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 12) pud = pud_alloc(&init_mm, dir, addr); pmd = pmd_alloc(&init_mm, pud, addr); #else pmd = pmd_alloc(&init_mm, dir, addr); #endif if (pmd == NULL) { error = -ENOMEM; goto out; } pte = pte_alloc_kernel(&init_mm, pmd, addr); if (pte == NULL) { error = -ENOMEM; goto out; } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 8) set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); #else set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); #endif out: spin_unlock(&init_mm.page_table_lock); return error; } static void set_idt(void *newidt, __u16 limit) { unsigned char curidt[6]; /* ia32 supports unaliged loads & stores */ (*(__u16 *)(curidt)) = limit; (*(__u32 *)(curidt +2)) = (unsigned long)(newidt); __asm__ __volatile__ ( "lidt %0\n" : "=m" (curidt) ); }; static void set_gdt(void *newgdt, __u16 limit) { unsigned char curgdt[6]; /* ia32 supports unaligned loads & stores */ (*(__u16 *)(curgdt)) = limit; (*(__u32 *)(curgdt +2)) = (unsigned long)(newgdt); __asm__ __volatile__ ( "lgdt %0\n" : "=m" (curgdt) ); }; static void load_segments(void) { #define __STRX(X) #X #define STRX(X) __STRX(X) __asm__ __volatile__ ( "\tljmp $"STRX(__KERNEL_CS)",$1f\n" "\t1:\n" "\tmovl $"STRX(__KERNEL_DS)",%eax\n" "\tmovl %eax,%ds\n" "\tmovl %eax,%es\n" "\tmovl %eax,%fs\n" "\tmovl %eax,%gs\n" "\tmovl %eax,%ss\n" ); #undef STRX #undef __STRX } typedef asmlinkage void (*start_new_kernel_t)( unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address, unsigned int has_pae); const extern unsigned char start_new_kernel[]; extern void start_new_kernel_end(void); const extern unsigned int start_new_kernel_size; /* * Do what every setup is needed on image and the * reboot code buffer to allow us to avoid allocations * later. Currently nothing. */ int machine_mkexec_prepare(struct kimage *image) { return identity_map_page(image->reboot_code_addr); } void machine_mkexec_cleanup(struct kimage *image) { } /* * Do not allocate memory (or fail in any way) in machine_kexec(). * We are past the point of no return, committed to rebooting now. */ void machine_mkexec(struct kimage *image) { start_new_kernel_t rnk; /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); /* switch to an mm where the reboot_code_buffer is identity mapped */ load_cr3(init_mm.pgd); /* The segment registers are funny things, they are * automatically loaded from a table, in memory wherever you * set them to a specific selector, but this table is never * accessed again you set the segment to a different selector. * * The more common model is are caches where the behide * the scenes work is done, but is also dropped at arbitrary * times. * * I take advantage of this here by force loading the * segments, before I zap the gdt with an invalid value. */ load_segments(); /* The gdt & idt are now invalid. * If you want to load them you must set up your own idt & gdt. */ set_gdt(phys_to_virt(0),0); set_idt(phys_to_virt(0),0); /* now call it */ rnk = (start_new_kernel_t)image->reboot_code_addr; (*rnk)(image->new_kernel_paddr, image->reboot_code_addr, image->start+image->new_kernel_paddr, cpu_has_pae); } --- NEW FILE: minik_dump.c --- /* * arch/i386/kernel/minik_dump.c * * $Id: minik_dump.c,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. * * Mkdump is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation (version 2 of the License). * * Mkdump is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Mkdump; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include <linux/init.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/smp.h> #include <linux/irq.h> #include <linux/time.h> #include <linux/sched.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/delay.h> #include <linux/ptrace.h> #include <linux/utsname.h> #include <linux/mkexec.h> #include <linux/minik_param.h> #include <linux/cpus.h> #include <asm/processor.h> #include <asm/smp.h> #include <asm/e820.h> #include <asm/hardirq.h> #include <asm/nmi.h> #include <asm/apic.h> #include <asm/io_apic.h> #include <asm/minik_dump.h> static struct dump_header *dhp; extern void mkexec_exec(void); static void mem_seg_init(struct mem_seg *mem_segp) { pg_data_t *pgdat; int i; unsigned long avoid_pfn = 0; mem_segp->page_size = PAGE_SIZE; /* XXX: intension: avoid reserved area around 4GB (is there write only area ?) */ /* this code is uncertain for NUMA */ for (i = 0; i < e820.nr_map; i++) { if (e820.map[i].type == E820_RESERVED && e820.map[i].addr > 0xfffff) { avoid_pfn = e820.map[i].addr >> PAGE_SHIFT; break; } } pgdat = pgdat_list; i = 0; if (avoid_pfn && avoid_pfn < pfn_pgdat(pgdat) + size_pgdat(pgdat)) { /* assume start to 0 */ mem_segp->seg_list[i].seg_start_pfn = 0; mem_segp->seg_list[i].seg_size_pfn = avoid_pfn; i++; if (size_pgdat(pgdat) > 0x100000UL) { /* over 4GB */ mem_segp->seg_list[i].seg_start_pfn = 0x100000UL; mem_segp->seg_list[i].seg_size_pfn = size_pgdat(pgdat) - 0x100000UL; i++; } } else { mem_segp->seg_list[i].seg_start_pfn = 0; mem_segp->seg_list[i].seg_size_pfn = size_pgdat(pgdat); i++; } for (pgdat = next_pgdat(pgdat); pgdat && i < MAX_MEM_SEG; pgdat = next_pgdat(pgdat), i++) { mem_segp->seg_list[i].seg_start_pfn = pfn_pgdat(pgdat); mem_segp->seg_list[i].seg_size_pfn = size_pgdat(pgdat); } mem_segp->seg_num = i; } /* * init_dump_header called when dump-mini-kernel load. */ void init_dump_header(struct kimage *image) { unsigned long *addrp; long size; int i; if (image->minik_type == MINIK_V1) { dhp = (struct dump_header *)(__va(image->reserve_mem[0].base_pa) + PAGE_SIZE * 2); } else { dhp = (struct dump_header *)page_address(image->dump_header_pages); } strncpy(dhp->dh_version.dv_magic, DUMP_MAGIC, DUMP_MAGIC_LEN); dhp->dh_version.dv_version = DUMP_VERSION; dhp->dh_version.dv_arch = DUMP_ARCH_I386; /* dv_unique set later */ /* dh_dump_cpu N/A */ memset(dhp->dh_tasks, 0, sizeof(dhp->dh_tasks)); memset(dhp->dh_regs, 0, sizeof(dhp->dh_regs)); memset(dhp->dh_panic_string, 0, sizeof(dhp->dh_panic_string)); /* dh_time N/A */ dhp->dh_utsname = system_utsname; mem_seg_init((void *)((u8 *)dhp + PAGE_SIZE)); if (image->minik_type == MINIK_V1) { addrp = (unsigned long *)(__va(image->reserve_mem[0].base_pa) + PAGE_SIZE); for (i = 0; i < image->num_minik_mem; i++) { size = image->reserve_mem[i].size_bytes; /* must be multiple 4MB ! */ while (size > 0) { *addrp = image->reserve_mem[i].base_pa + image->reserve_mem[i].size_bytes - size; addrp++; size -= MINIK_SEG_SIZE; } /* ODA: should check addrp range */ } *addrp = 0; } } /* * get current context * (copy from LKCD) */ static inline void get_current_regs(struct pt_regs *regs) { __asm__ __volatile__("movl %%ebx,%0" : "=m"(regs->ebx)); __asm__ __volatile__("movl %%ecx,%0" : "=m"(regs->ecx)); __asm__ __volatile__("movl %%edx,%0" : "=m"(regs->edx)); __asm__ __volatile__("movl %%esi,%0" : "=m"(regs->esi)); __asm__ __volatile__("movl %%edi,%0" : "=m"(regs->edi)); __asm__ __volatile__("movl %%ebp,%0" : "=m"(regs->ebp)); __asm__ __volatile__("movl %%eax,%0" : "=m"(regs->eax)); __asm__ __volatile__("movl %%esp,%0" : "=m"(regs->esp)); __asm__ __volatile__("movw %%ss, %%ax;" :"=a"(regs->xss)); __asm__ __volatile__("movw %%cs, %%ax;" :"=a"(regs->xcs)); __asm__ __volatile__("movw %%ds, %%ax;" :"=a"(regs->xds)); __asm__ __volatile__("movw %%es, %%ax;" :"=a"(regs->xes)); __asm__ __volatile__("pushfl; popl %0" :"=m"(regs->eflags)); regs->eip = (unsigned long)current_text_addr(); } #ifdef MKEXEC_NO_PATCH #ifdef CONFIG_X86_IO_APIC extern void disable_IO_APIC(void); extern spinlock_t ioapic_lock; #endif #ifdef CONFIG_X86_LOCAL_APIC void disconnect_bsp_APIC(void); #endif static void mkexecreboot(void) { #if defined(CONFIG_X86_LOCAL_APIC) if (cpu_has_apic) { disable_local_APIC(); } #endif #if defined(CONFIG_X86_IO_APIC) spin_lock_init(&ioapic_lock); /* force to lock success */ disable_IO_APIC(); #elif defined(CONFIG_X86_LOCAL_APIC) disconnect_bsp_APIC(); #endif mkexec_exec(); } void mkdump_send_nmi(void) { unsigned int cfg; /* * if there are no other CPUs in the system then we get an APIC send * error if we try to broadcast, thus avoid sending IPIs in this case. */ if (!(num_online_cpus() > 1)) return; /* * Wait for idle. */ apic_wait_icr_idle(); /* * No need to touch the target chip field */ cfg = APIC_DM_FIXED | APIC_DEST_ALLBUT | APIC_DM_NMI | APIC_DEST_LOGICAL; /* * Send the IPI. The write to APIC_ICR fires this off. */ apic_write_around(APIC_ICR, cfg); return; } #else /* MKEXEC_NO_PATCH */ #ifdef CONFIG_X86_IO_APIC static void ioapic_mkexec_restore_once(void) { static int tried[NR_CPUS]; int cpu = get_processor_id(); /* We may crash inside: ioapic_mkexec_restore() */ if (tried[cpu]) return; tried[cpu] = 1; spin_lock_init(&ioapic_lock); /* Force success of locking it inside. */ ioapic_mkexec_restore(); /* errors ignored */ } #endif #ifdef CONFIG_X86_LOCAL_APIC void lapic_mkexec_restore_once(void) { static int tried[NR_CPUS]; int cpu = get_processor_id(); if (!cpu_has_apic) return; /* We may crash inside: lapic_mkexec_restore() */ if (tried[cpu]) return; tried[cpu] = 1; lapic_mkexec_restore(); /* errors ignored */ } #endif static void mkexecreboot(void) { /* Do not: disable_IO_APIC(); * or: disable_local_APIC(); * as we may not imitate the BIOS legacy IRQ0 settings properly. * Later minik may hang-up on: Calibrating delay loop... */ #ifdef CONFIG_X86_IO_APIC ioapic_mkexec_restore_once(); #endif /* CONFIG_X86_IO_APIC */ #ifdef CONFIG_X86_LOCAL_APIC lapic_mkexec_restore_once(); #endif mkexec_exec(); } #endif /* MKEXEC_NO_PATCH */ #ifdef CONFIG_SMP static atomic_t waiting_for_dump_ipi; static int save_done[NR_CPUS]; static int reboot_cpu = 0; void mkdump_send_nmi(void); static void wait_and_mkexecreboot(void) { int i; for (i = 0; i < 1000000000; i++) { if (atomic_read(&waiting_for_dump_ipi) == 0) { break; } } mkexecreboot(); } static int mkdump_nmi_callback(struct pt_regs *regs, int fcpu) { /* 'fcpu' came from: do_nmi()->smp_processor_id() */ int cpu = get_processor_id(); if (save_done[cpu]) { return 1; /* anyway now in dump, suppress default nmi handler */ } dhp->dh_tasks[cpu] = (cpu == fcpu ? (unsigned long)current : 0); dhp->dh_regs[cpu] = *regs; if (!(regs->xcs & 3)) { /* IA-32 Intel(R) Architecture Software Developer's Manual, Volume 3: System Programming Guide * <http://www.intel.com/design/pentium4/manuals/253668.htm> * Chapter 5.12.1 Exception- or Interrupt-Handler Procedures: * No ss/esp saved on stack on no privilege change. */ /* AMD64 Architecture Programmer's Manual Volume 2: System Programming * <http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf> * Chapter 8.9.3 Interrupt Stack Frame: * It does not apply to x86_64 as ss/rsp is always saved there. */ dhp->dh_regs[cpu].esp = (unsigned long) (®s->esp); dhp->dh_regs[cpu].xss = __KERNEL_DS; } save_done[cpu] = 1; atomic_dec(&waiting_for_dump_ipi); if (cpu == reboot_cpu) { wait_and_mkexecreboot(); } else { stop_this_cpu_safe(NULL); } return 1; } static void stop_other_cpus(int cpu) { int i; int other_cpus = num_online_cpus() - 1; if (other_cpus < 1) { /* Other CPUs are not online and we do not need to stop them. * At least as long as 'cpu_online_map' is valid. */ return; } /* always boot from 0. but if 0 is not online... */ if (!cpu_isset(reboot_cpu, cpu_online_map)) { reboot_cpu = cpu; } atomic_set(&waiting_for_dump_ipi, other_cpus); for (i = 0; i < NR_CPUS; i++) { save_done[i] = 0; } set_nmi_callback(mkdump_nmi_callback); wmb(); mkdump_send_nmi(); if (cpu == reboot_cpu) { wait_and_mkexecreboot(); } else { stop_this_cpu_safe(NULL); } } #endif /* * start_dump called when dump occur. * save context, stop other cpus and boot mini kernel */ void start_dump(char *panic_str, struct pt_regs *regs, int cpu) { struct pt_regs cur_reg; int fcpu = smp_processor_id(); if (regs == NULL) { get_current_regs(&cur_reg); } else { cur_reg = *regs; /* IA-32 Intel(R) Architecture Software Developer's Manual, Volume 3: System Programming Guide * <http://www.intel.com/design/pentium4/manuals/253668.htm> * Chapter 5.12.1 Exception- or Interrupt-Handler Procedures: * No ss/esp saved on stack on no privilege change. */ /* AMD64 Architecture Programmer's Manual Volume 2: System Programming * <http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/24593.pdf> * Chapter 8.9.3 Interrupt Stack Frame: * It does not apply to x86_64 as ss/rsp is always saved there. */ if (!(regs->xcs & 3)) { cur_reg.esp = (unsigned long) (®s->esp); cur_reg.xss = __KERNEL_DS; } } dhp->dh_version.dv_unique = (u32)xtime.tv_sec; dhp->dh_dump_cpu = cpu; #if !defined(BACKPORT_24) dhp->dh_time = xtime; #else dhp->dh_time.tv_sec = xtime.tv_sec; dhp->dh_time.tv_nsec = 1000 * xtime.tv_usec; #endif dhp->dh_tasks[cpu] = (cpu == fcpu ? (unsigned long)current : 0); dhp->dh_regs[cpu] = cur_reg; strncpy(dhp->dh_panic_string, panic_str, DUMP_PANIC_LEN); #ifdef CONFIG_SMP /* stop_other_cpus() can return. */ stop_other_cpus(cpu); #endif mkexecreboot(); } --- NEW FILE: start_new_kernel.S --- /* * arch/i386/kernel/start_new_kernel.S * * $Id: start_new_kernel.S,v 1.1 2006/02/14 05:30:26 odaodab Exp $ * * Portions Copyright (C) 2004-2005 NTT DATA CORPORATION. * Portions Copyright (C) 2004-2005 VA Linux Systems Japan K.K. * * This file is part of Mkdump. */ /* * Some codes were derived from kexec(relocate_kernel.S) : * * relocate_kernel.S - put the kernel image in place to boot * Copyright (C) 2002-2003 Eric Biederman <ebi...@xm...> * * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ #include <linux/linkage.h> /* * Must be relocatable PIC code callable as a C function, that once * it starts can not use the previous processes stack. */ .globl start_new_kernel start_new_kernel: /* read the arguments and say goodbye to the stack */ movl 4(%esp), %ebx /* new address */ movl 8(%esp), %ebp /* reboot_code_buffer */ movl 12(%esp), %edx /* start address */ movl 16(%esp), %ecx /* cpu_has_pae */ /* zero out flags, and disable interrupts */ pushl $0 popfl /* set a new stack at the bottom of our page... */ lea 4096(%ebp), %esp /* store the parameters back on the stack */ pushl %edx /* store the start address */ /* Set cr0 to a known state: * 31 0 == Paging disabled * 18 0 == Alignment check disabled * 16 0 == Write protect disabled * 3 0 == No task switch * 2 0 == Don't do FP software emulation. * 0 1 == Proctected mode enabled */ movl %cr0, %eax andl $~((1<<31)|(1<<18)|(1<<16)|(1<<3)|(1<<2)), %eax orl $(1<<0), %eax movl %eax, %cr0 /* clear cr4 if applicable */ testl %ecx, %ecx jz 1f /* Set cr4 to a known state: * Setting everything to zero seems safe. */ movl %cr4, %eax andl $0, %eax movl %eax, %cr4 jmp 1f 1: /* Flush the TLB (needed?) */ xorl %eax, %eax movl %eax, %cr3 /* set all of the registers to known values */ /* leave %esp alone */ xorl %eax, %eax /* xorl %ebx, %ebx *//* new address */ xorl %ecx, %ecx xorl %edx, %edx xorl %esi, %esi xorl %edi, %edi xorl %ebp, %ebp ret start_new_kernel_end: .globl start_new_kernel_size start_new_kernel_size: .long start_new_kernel_end - start_new_kernel |