Linux内核bind系统调用源码分析

发布时间 2024-01-01 11:11:46作者: 划水的猫

一、环境说明

内核版本:Linux 3.10

内核源码地址:https://elixir.bootlin.com/linux/v3.10/source (包含各个版本内核源码,且网页可全局搜索函数)

二、应用层-bind()函数

将socket 套接字绑定指定的地址

/* 
 * sockfd:由socket函数返回的套接口描述符
 * sockaddr:一个指向特定于协议的地址结构的指针
 * socklen_t:该地址结构的长度
 * return:若成功则为0,若出错则为-1
 */
#include <sys/socket.h>
struct sockaddr_in sock_addr;
memset(&sock_addr,0,sizeof(sock_addr));
sock_addr.sin_family = AF_INET;
sock_addr.sin_addr.s_addr = htonl(INADDR_ANY);
sock_addr.sin_port = htons(SERVER_PORT);
// int bind(int sockfd, const struct sockaddr *myaddr, socklen_t addrlen);
err = bind(sockfd,(struct sockaddr*)(&sock_addr),sizeof(sock_addr));

三、BSD Socket层-sys_socketcall()函数

网络栈专用操作函数集的总入口函数,主要是将请求分配,调用具体的底层函数进行处理:

// file: net/socket.c
SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
{
    ......
    switch (call) {
    case SYS_SOCKET:
        err = sys_socket(a0, a1, a[2]);
        break;
    case SYS_BIND:
        err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]);
        break;
    case SYS_CONNECT:
        err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
        break;
    case SYS_LISTEN:
        err = sys_listen(a0, a1);
        break;
    case SYS_ACCEPT:
        err = sys_accept4(a0, (struct sockaddr __user *)a1, (int __user *)a[2], 0);
        break;
    ......

    }
    return err;
}

 四、sys_bind()函数

// file: net/socket.c
SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
{
    struct socket *sock;
    struct sockaddr_storage address;
    int err, fput_needed;

    sock = sockfd_lookup_light(fd, &err, &fput_needed); //获取fd对应的socket结构
    if (sock) {
        err = move_addr_to_kernel(umyaddr, addrlen, &address); //将地址从用户缓冲区复制到内核缓冲区,umyaddr->address
        if (err >= 0) {
            err = security_socket_bind(sock,(struct sockaddr *)&address, addrlen); //SElinux相关,跳过
            if (!err)
                err = sock->ops->bind(sock,(struct sockaddr *) &address, addrlen); //调用bind函数
        }
        fput_light(sock->file, fput_needed);
    }
    return err;
}

在上一篇文章中(socket系统调用分析),我们分析了sock->ops = answer->ops,而answer对应的结构:

// file: net/ipv4/af_inet.c
static struct inet_protosw inetsw_array[] =
{
    {
        .type =       SOCK_STREAM,
        .protocol =   IPPROTO_TCP,
        .prot =       &tcp_prot,
        .ops =        &inet_stream_ops,
        .no_check =   0,
        .flags =      INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK,
    },
    ......
}
// file: net/ipv4/af_inet.c
const struct proto_ops inet_stream_ops = {
    .family           = PF_INET,
    .owner           = THIS_MODULE,
    .release       = inet_release,
    .bind           = inet_bind,
    .connect       = inet_stream_connect,
    .socketpair       = sock_no_socketpair,
    .accept           = inet_accept,
    .getname       = inet_getname,
    .poll           = tcp_poll,
    .ioctl           = inet_ioctl,
    .listen           = inet_listen,
    ......
};

因此,sock->ops->bind最终调用的是inet_bind函数。

五、inet_bind()函数

inet_bind()函数的主要工作:
调用具体协议的bind函数;
校验端口是否冲突,是否可绑定
把传入的struct sockaddr赋值给struct inet_sock(四元组确定了源IP、源端口);

// file: net/ipv4/af_inet.c
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
    struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
    struct sock *sk = sock->sk;
    struct inet_sock *inet = inet_sk(sk);
    struct net *net = sock_net(sk);
    unsigned short snum;
    int chk_addr_ret;
    int err;

    /* If the socket has its own bind function then use it. (RAW) */
    if (sk->sk_prot->bind) { 
        err = sk->sk_prot->bind(sk, uaddr, addr_len);
        goto out;
    }
    err = -EINVAL;
    if (addr_len < sizeof(struct sockaddr_in))
        goto out;

    if (addr->sin_family != AF_INET) {
        /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET)
         * only if s_addr is INADDR_ANY.
         */
        err = -EAFNOSUPPORT;
        if (addr->sin_family != AF_UNSPEC ||
            addr->sin_addr.s_addr != htonl(INADDR_ANY))
            goto out;
    }

    chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);

    /* Not specified by any standard per-se, however it breaks too
     * many applications when removed.  It is unfortunate since
     * allowing applications to make a non-local bind solves
     * several problems with systems using dynamic addressing.
     * (ie. your servers still start up even if your ISDN link
     *  is temporarily down)
     */
    err = -EADDRNOTAVAIL;
    if (!sysctl_ip_nonlocal_bind &&
        !(inet->freebind || inet->transparent) &&
        addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
        chk_addr_ret != RTN_LOCAL &&
        chk_addr_ret != RTN_MULTICAST &&
        chk_addr_ret != RTN_BROADCAST)
        goto out;

    snum = ntohs(addr->sin_port);
    err = -EACCES;
    if (snum && snum < PROT_SOCK &&
        !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
        goto out;

    /*      We keep a pair of addresses. rcv_saddr is the one
     *      used by hash lookups, and saddr is used for transmit.
     *
     *      In the BSD API these are the same except where it
     *      would be illegal to use them (multicast/broadcast) in
     *      which case the sending device address is used.
     */
    lock_sock(sk);

    /* Check these errors (active socket, double bind). */
    err = -EINVAL;
    if (sk->sk_state != TCP_CLOSE || inet->inet_num) //bind时,state为TCP_CLOSE
        goto out_release_sock;

    inet->inet_rcv_saddr = inet->inet_saddr = addr->sin_addr.s_addr; //地址绑定
    if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
        inet->inet_saddr = 0;  /* Use device */

    /* Make sure we are allowed to bind here. */
    if (sk->sk_prot->get_port(sk, snum)) { //校验端口是否冲突,是否可绑定
        inet->inet_saddr = inet->inet_rcv_saddr = 0;
        err = -EADDRINUSE;
        goto out_release_sock;
    }

    if (inet->inet_rcv_saddr)
        sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
    if (snum)
        sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
    inet->inet_sport = htons(inet->inet_num); //端口绑定
    inet->inet_daddr = 0; //目标地址置为0
    inet->inet_dport = 0; //目标ip地址置为0
    sk_dst_reset(sk);
    err = 0;
out_release_sock:
    release_sock(sk);
out:
    return err;
}

在上一篇文章中(socket系统调用分析),我们分析了sk->sk_prot = answer->prot,而answer对应的结构:

// file: net/ipv4/af_inet.c
static struct inet_protosw inetsw_array[] =
{
    {
        .type =       SOCK_STREAM,
        .protocol =   IPPROTO_TCP,
        .prot =       &tcp_prot,
        .ops =        &inet_stream_ops,
        .no_check =   0,
        .flags =      INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK,
    },
    ......
}
// file: net/ipv4/tcp_ipv4.c
struct proto tcp_prot = {
    .name            = "TCP",
    .owner            = THIS_MODULE,
    .close            = tcp_close,
    .connect        = tcp_v4_connect,
    .disconnect        = tcp_disconnect,
    .accept            = inet_csk_accept,
    .ioctl            = tcp_ioctl,
    .init            = tcp_v4_init_sock,
    .get_port        = inet_csk_get_port,
    ......
}

针对,tcp_prot未设置bind函数,所以sk->sk_prot->bind为false,跳过执行下面的代码。

sk->sk_prot->get_port对应的则是inet_csk_get_port()函数,inet_csk_get_port()主要是校验端口是否冲突,是否可绑定。它里面的inet_get_local_port_range()函数,会获取内核设置的端口号范围(对应内核参数/proc/sys/net/ipv4/ip_local_port_range)

六、bind代码流程图