Yanyg - SAN Software Engineer

Linux process segfault at xx error N in IP

目录

进程异常退出,看内核日志(/var/log/messages)有如下信息:

Dec 29 18:33:58 t430 kernel: [34217.713018] test_common[29453]: segfault at 7f0484aef1d0 ip 0000557b7c60c5c7 sp 00007ffc3a9f38a0 error 4 in test_common[557b7c609000+43000]

1 error分析

Segfault一般是由内存访问越界导致的。error数值含义如下:

含义
0 0表示对应地址没有page,1表示保护异常
1 0表示读访问异常,1表示写访问异常
2 0内核访问异常,1表示用户态访问异常

error=4(0b100)用户态读访问地址无效, error=6(0b110)用户态写访问地址无效。

2 参考代码

linux/arch/x86/include/asm/traps.h:

/*
 * Page fault error code bits:
 *
 *   bit 0 ==    0: no page found       1: protection fault
 *   bit 1 ==    0: read access         1: write access
 *   bit 2 ==    0: kernel-mode access  1: user-mode access
 *   bit 3 ==                           1: use of reserved bit detected
 *   bit 4 ==                           1: fault was an instruction fetch
 *   bit 5 ==                           1: protection keys block access
 */
enum x86_pf_error_code {
        X86_PF_PROT     =               1 << 0,
        X86_PF_WRITE    =               1 << 1,
        X86_PF_USER     =               1 << 2,
        X86_PF_RSVD     =               1 << 3,
        X86_PF_INSTR    =               1 << 4,
        X86_PF_PK       =               1 << 5,
};

linux/arch/x86/mm/fault.c:

static inline void
show_signal_msg(struct pt_regs *regs, unsigned long error_code,
                unsigned long address, struct task_struct *tsk)
{
        const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;

        if (!unhandled_signal(tsk, SIGSEGV))
                return;

        if (!printk_ratelimit())
                return;

        printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
                loglvl, tsk->comm, task_pid_nr(tsk), address,
                (void *)regs->ip, (void *)regs->sp, error_code);

        print_vma_addr(KERN_CONT " in ", regs->ip);

        printk(KERN_CONT "\n");

        show_opcodes(regs, loglvl);
}