系统调用实现（Printf函数为例）

调用程序时，会检查当前段的CPL（位于CS中），与目标段的DPL（位于gdt中），如果权限不够无法执行，所以我们无法以用户态直接访问某些指令并执行。而通过系统调用可以从用户态转变为内核态，执行相关程序。实现的方法为0x80中断，改变CS中的CPL为0。、

以printf函数为例，其本身调用了write函数。write函数的源代码为：

// linux/lib/write.c

#define __LIBRARY__
#include <unistd.h>

_syscall3(int,write,int,fd,const char *,buf,off_t,count)

// linux/include/unistd.h

#define __NR_write	4

#define _syscall3(type,name,atype,a,btype,b,ctype,c) \
type name(atype a,btype b,ctype c) \
{ \
long __res; \
__asm__ volatile ("int $0x80" \
	: "=a" (__res) \
	: "0" (__NR_##name),"b" ((long)(a)),"c" ((long)(b)),"d" ((long)(c))); \
if (__res>=0) \
	return (type) __res; \
errno=-__res; \
return -1; \
}

经过宏展开为：

int write(int fd, const char * buf, off_t count)
{
    long __res;
    __asm__ volatile (
        "int $0x80"
        : "=a" (__res)
        : "0" (__NR_write),
          "b" ((long)(fd)),
          "c" ((long)(buf)),
          "d" ((long)(count))
    );
    if (__res >= 0) { return (int)__res; }
    errno = -__res;
    return -1;
}

其中汇编代码的意思是：将__NR_write放入EAX，fd放入EBX，buf放入ECX，count放入EDX，执行int $0x80中断，然后将EAX放入__res。

__NR_##name（这里是__NR_write）被称为系统调用号，在unistd.h中已经宏定义，因为都通过int 0x80进入中断，可用这个区分需要执行的内核对应物。

0x80中断执行的程序是加载OS核心文件时已经配置好了的。在执行system中的main时，执行了sched_init函数，设置了int 0x80执行system_call函数：

// linux/kernel/sched.c

void sched_init(void)
{
    ...
    set_system_gate(0x80,&system_call);
}

// linux/include/asm/system.h

#define _set_gate(gate_addr,type,dpl,addr) \
__asm__ ("movw %%dx,%%ax\n\t" \
	"movw %0,%%dx\n\t" \
	"movl %%eax,%1\n\t" \
	"movl %%edx,%2" \
	: \
	: "i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
	"o" (*((char *) (gate_addr))), \
	"o" (*(4+(char *) (gate_addr))), \
	"d" ((char *) (addr)),"a" (0x00080000))

#define set_system_gate(n,addr) \
	_set_gate(&idt[n],15,3,addr)

经过宏展开为：

gate_addr是idt[80]的地址，addr是system_call中断程序的地址。

__asm__(
    "movw %%dx,%%ax\n\t"
    "movw %0,%%dx\n\t"
    "movl %%eax,%1\n\t"
    "movl %%edx,%2"
    :
    : "i" ((short) (0x8000+(3<<13)+(15<<8))), 
      "o" (*((char *) (gate_addr))),
      "o" (*(4+(char *) (gate_addr))),
      "d" ((char *) (addr)),
      "a" (0x00080000))
);

其实就是设置了以下的idt表：

观察看，0x80中断的idt表查询的DPL被设置为3，而用户态的CPL为3，所以该中断程序我们是有权跳转执行的。执行时，段选择符CS被设置为0x0008（内核代码段），此时CPL被设置为0，通过gdt表找到了内核代码段，IP为system_call的地址。

system_call函数主要执行的内容（在调用write时，我们的系统调用号已经被放在EAX中）：

# linux/kernel/system_call.s

nr_system_calls = 72        # Linux 0.11 版本内核中的系统共调用总数。
.globl system_call		# 定义入口点
system_call:
	cmpl $nr_system_calls-1,%eax    # 调用号如果超出范围的话就在eax中置-1并退出
	ja bad_sys_call
	push %ds                        # 保存原段寄存器值
	push %es
	push %fs
	pushl %edx
	pushl %ecx		# push %ebx,%ecx,%edx as parameters
	pushl %ebx		# to the system call
	
	# 设置ds、es为0x10，内核数据段。
	movl $0x10,%edx
	mov %dx,%ds
	mov %dx,%es
	
	movl $0x17,%edx		# fs points to local data space
	mov %dx,%fs
	call sys_call_table(,%eax,4)        # 间接调用指定功能C函数
	pushl %eax
...

sys_call_table+4*%eax就是相应系统调用处理函数入口地址。这里通过查sys_call_table全局函数数组，call sys_call_table(,%eax,4) 就是call sys_write。

// linux/include/linux/sched.h
typedef int (*fn_ptr)();

// linux/include/linux/sys.h
fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, ...};

所以，整体调用printf函数的过程是：