Linux 下epoll 网络模型

为什么需要epoll?

基于select 的I/O 复用技术速度过慢,从代码上分析,最主要的两点是

  1. 每次调用select 函数是都需要向改函数传递对象信息
  2. 需要遍历所有文件描述符才能获取有变化的文件描述符

epoll 不需要以上两点操作

epoll 函数介绍

epoll_create

/* Creates an epoll instance.  Returns an fd for the new instance.
   The "size" parameter is a hint specifying the number of file
   descriptors to be associated with the new instance.  The fd
   returned by epoll_create() should be closed with close().  */
extern int epoll_create (int __size) __THROW;

epoll_ctl

/* Manipulate an epoll instance "epfd". Returns 0 in case of success,
   -1 in case of error ( the "errno" variable will contain the
   specific error code ) The "op" parameter is one of the EPOLL_CTL_*
   constants defined above. The "fd" parameter is the target of the
   operation. The "event" parameter describes which events the caller
   is interested in and any associated user data.  */
extern int epoll_ctl (int __epfd, int __op, int __fd,
		      struct epoll_event *__event) __THROW;

epoll_wait

/* Wait for events on an epoll instance "epfd". Returns the number of
   triggered events returned in "events" buffer. Or -1 in case of
   error with the "errno" variable set to the specific error code. The
   "events" parameter is a buffer that will contain triggered
   events. The "maxevents" is the maximum number of events to be
   returned ( usually size of "events" ). The "timeout" parameter
   specifies the maximum wait time in milliseconds (-1 == infinite).

   This function is a cancellation point and therefore not marked with
   __THROW.  */
extern int epoll_wait (int __epfd, struct epoll_event *__events,
		       int __maxevents, int __timeout);

epoll_event

struct epoll_event
{
  uint32_t events;	/* Epoll events */
  epoll_data_t data;	/* User data variable */
} __EPOLL_PACKED;

epoll_data_t

typedef union epoll_data
{
  void *ptr;
  int fd;
  uint32_t u32;
  uint64_t u64;
} epoll_data_t;

利用epoll I/O 复用的服务端

服务端

  1. 建立套接字
  2. 绑定端口
  3. 监听客户端请求状态
  4. 设置epollfd
  5. 调用epoll_wait
    • 如果是服务端套接字发生变化,说明有新的连接,调用accept 函数受理
    • 如果不是服务端套接字发生变化,则响应客户端
  6. 关闭

代码如下:

epoll.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/epoll.h>

#define BUF_SIZE 100
#define EPOLL_SIZE 50

void error_handling(char *message)
{
    fputs(message, stderr);
    fputc('
', stderr);
    exit(1);
}
int main(int argc, char *argv[])
{
    int serv_sock, clnt_sock;
    struct sockaddr_in serv_adr, clnt_adr;
    socklen_t adr_sz;
    int str_len, i;
    char buf[BUF_SIZE];

    struct epoll_event *ep_events;
    struct epoll_event event;
    int epfd, event_cnt;

    if (argc != 2)
    {
        printf("Usage : %s <port> 
", argv[0]);
        exit(1);
    }
    serv_sock = socket(PF_INET, SOCK_STREAM, 0);
    memset(&serv_adr, 0, sizeof(serv_adr));
    serv_adr.sin_family = AF_INET;
    serv_adr.sin_addr.s_addr = htonl(INADDR_ANY);
    serv_adr.sin_port = htons(atoi(argv[1]));

    if (bind(serv_sock, (struct sockaddr *)&serv_adr, sizeof(serv_adr)) == -1)
        error_handling("bind error");
    if (listen(serv_sock, 5) == -1)
        error_handling("listen error");

    epfd = epoll_create(EPOLL_SIZE); //可忽略这个参数,填入的参数为操作系统参考
    ep_events = malloc(sizeof(struct epoll_event) * EPOLL_SIZE);

    event.events = EPOLLIN; //读取数据
    event.data.fd = serv_sock;
    epoll_ctl(epfd, EPOLL_CTL_ADD, serv_sock, &event); //epoll例程epfd 中添加文件描述符 serv_sock,目的是监听 enevt 中的事件

    while (1)
    {
        event_cnt = epoll_wait(epfd, ep_events, EPOLL_SIZE, -1); //获取改变了的文件描述符,返回数量
        if (event_cnt == -1)
        {
            puts("epoll_wait error");
            break;
        }

        for (i = 0; i < event_cnt; i++)
        {
            if (ep_events[i].data.fd == serv_sock) //客户端请求连接
            {
                adr_sz = sizeof(clnt_adr);
                clnt_sock = accept(serv_sock, (struct sockaddr *)&clnt_adr, &adr_sz);
                event.events = EPOLLIN;
                event.data.fd = clnt_sock; //把客户端套接字添加进去
                epoll_ctl(epfd, EPOLL_CTL_ADD, clnt_sock, &event);
                printf("connected client : %d 
", clnt_sock);
            }
            else //客户端套接字
            {
                str_len = read(ep_events[i].data.fd, buf, BUF_SIZE);
                if (str_len == 0)
                {
                    epoll_ctl(epfd, EPOLL_CTL_DEL, ep_events[i].data.fd, NULL); //从epoll中删除套接字
                    close(ep_events[i].data.fd);
                    printf("closed client : %d 
", ep_events[i].data.fd);
                }
                else
                {
                    printf("Message from client: %s
", buf); 
                    write(ep_events[i].data.fd, buf, str_len);
                }
            }
        }
    }
    close(serv_sock);
    close(epfd);

    return 0;
}


client.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>

#define BUF_SIZE 100
void error_handling(char *message)
{
    fputs(message, stderr);
    fputc('
', stderr);
    exit(1);
}

int main(int argc, char *argv[])
{
    int sock;
    char message[BUF_SIZE]="Hello World";
    int str_len;
    struct sockaddr_in serv_adr;

    if (argc != 3)
    {
        printf("Usage : %s <IP> <port>
", argv[0]);
        exit(1);
    }

    sock = socket(PF_INET, SOCK_STREAM, 0);
    if (sock == -1)
        error_handling("socket error");

    memset(&serv_adr, 0, sizeof(serv_adr));
    serv_adr.sin_family = AF_INET;
    serv_adr.sin_addr.s_addr = inet_addr(argv[1]);
    serv_adr.sin_port = htons(atoi(argv[2]));

    if (connect(sock, (struct sockaddr *)&serv_adr, sizeof(serv_adr)) == -1)
        error_handling("connect error!");
    else
        puts("Connected...........");

  
    write(sock, message, strlen(message));
    str_len = read(sock, message, BUF_SIZE - 1);
    message[str_len] = 0;
    printf("Message from server: %s
", message); 
    close(sock);
    return 0;
}

运行结果

connected client : 5 
Message from client: Hello World
closed client : 5 

Connected...........
Message from server: Hello World

epoll 的两种触发模式

条件触发和边缘触发的区别在于发生时间的时间点

1.条件触发 LT

epoll 默认以条件触发方式工作

条件触发时只要输入缓冲区任有数据需要读取,就会注册新的事件

测试代码:

ltServer.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/epoll.h>

#define BUF_SIZE 3
#define EPOLL_SIZE 50
void error_handling(char *message)
{
    fputs(message, stderr);
    fputc('
', stderr);
    exit(1);
}


int main(int argc, char *argv[])
{
    int serv_sock, clnt_sock;
    struct sockaddr_in serv_adr, clnt_adr;
    socklen_t adr_sz;
    int str_len, i;
    char buf[BUF_SIZE];

    struct epoll_event *ep_events;
    struct epoll_event event;
    int epfd, event_cnt;

    if (argc != 2)
    {
        printf("Usage : %s <port> 
", argv[0]);
        exit(1);
    }
    serv_sock = socket(PF_INET, SOCK_STREAM, 0);
    memset(&serv_adr, 0, sizeof(serv_adr));
    serv_adr.sin_family = AF_INET;
    serv_adr.sin_addr.s_addr = htonl(INADDR_ANY);
    serv_adr.sin_port = htons(atoi(argv[1]));

    if (bind(serv_sock, (struct sockaddr *)&serv_adr, sizeof(serv_adr)) == -1)
        error_handling("bind error");
    if (listen(serv_sock, 5) == -1)
        error_handling("listen error");

    epfd = epoll_create(EPOLL_SIZE); 
    ep_events = malloc(sizeof(struct epoll_event) * EPOLL_SIZE);

    event.events = EPOLLIN; 
    event.data.fd = serv_sock;
    epoll_ctl(epfd, EPOLL_CTL_ADD, serv_sock, &event); 

    while (1)
    {
        event_cnt = epoll_wait(epfd, ep_events, EPOLL_SIZE, -1); //获取改变了的文件描述符,返回数量

        if (event_cnt == -1)
        {
            puts("epoll_wait error");
            break;
        }

        puts("call epoll_wait");
        for (i = 0; i < event_cnt; i++)
        {

            if (ep_events[i].data.fd == serv_sock) 
            {
                adr_sz = sizeof(clnt_adr);
                clnt_sock = accept(serv_sock, (struct sockaddr *)&clnt_adr, &adr_sz);
                event.events = EPOLLIN;
                event.data.fd = clnt_sock; 
                epoll_ctl(epfd, EPOLL_CTL_ADD, clnt_sock, &event);
                printf("connected client : %d 
", clnt_sock);
            }
            else 
            {
                str_len = read(ep_events[i].data.fd, buf, BUF_SIZE);
                if (str_len == 0)
                {
                    epoll_ctl(epfd, EPOLL_CTL_DEL, ep_events[i].data.fd, NULL); //从epoll中删除套接字
                    close(ep_events[i].data.fd);
                    printf("closed client : %d 
", ep_events[i].data.fd);
                }
                else
                {
                    printf("Message from client: %s
", buf); 
                    write(ep_events[i].data.fd, buf, str_len);
                }
            }
        }
    }
    close(serv_sock);
    close(epfd);

    return 0;
}


client.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>

#define BUF_SIZE 1024
void error_handling(char *message)
{
    fputs(message, stderr);
    fputc('
', stderr);
    exit(1);
}

int main(int argc, char *argv[])
{
    //sock
    int sock;
    char message[BUF_SIZE];
    int str_len;
    struct sockaddr_in serv_adr;

    if (argc != 3)
    {
        printf("Usage : %s <IP> <port>
", argv[0]);
        exit(1);
    }

    sock = socket(PF_INET, SOCK_STREAM, 0);
    if (sock == -1)
        error_handling("socket error");

    memset(&serv_adr, 0, sizeof(serv_adr));
    serv_adr.sin_family = AF_INET;
    serv_adr.sin_addr.s_addr = inet_addr(argv[1]);
    serv_adr.sin_port = htons(atoi(argv[2]));

    if (connect(sock, (struct sockaddr *)&serv_adr, sizeof(serv_adr)) == -1)
        error_handling("connect error!");
    else
        puts("Connected...........");

    while (1)
    {
        fputs("Input message: ", stdout);
        fgets(message, BUF_SIZE, stdin);
        write(sock, message, strlen(message));
        str_len = read(sock, message, BUF_SIZE - 1);
        message[str_len] = 0;
        printf("Message from server: %s
", message);
    }
    close(sock);
    return 0;
}


测试结果

call epoll_wait
connected client : 5 
call epoll_wait
Message from client: abc
call epoll_wait
Message from client: def
call epoll_wait
Message from client: g


Connected...........
Input message: abcdefg
Message from server: abcdefg

2.边缘触发

在边缘触发模式中输入缓冲收到数据时仅注册一次该事件。
也就是说会一次性读完所有数据或者一次性写完全部数据,那么在使用边缘触发时许将套接字改为非阻塞模式,否则可能因为较长的I/O 时间引起服务的卡顿。

测试代码:

etServer.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include <sys/epoll.h>
#include <fcntl.h>
#include <errno.h>

#define BUF_SIZE 3 
#define EPOLL_SIZE 50
void error_handling(char *message)
{
    fputs(message, stderr);
    fputc('
', stderr);
    exit(1);
}
void setnonblockingmode(int fd)
{
    int flag = fcntl(fd, F_GETFL, 0);
    fcntl(fd, F_SETFL, flag | O_NONBLOCK);
}

int main(int argc, char *argv[])
{
    int serv_sock, clnt_sock;
    struct sockaddr_in serv_adr, clnt_adr;
    socklen_t adr_sz;
    int str_len, i;
    char buf[BUF_SIZE];

    struct epoll_event *ep_events;
    struct epoll_event event;
    int epfd, event_cnt;

    if (argc != 2)
    {
        printf("Usage : %s <port> 
", argv[0]);
        exit(1);
    }
    serv_sock = socket(PF_INET, SOCK_STREAM, 0);
    memset(&serv_adr, 0, sizeof(serv_adr));
    serv_adr.sin_family = AF_INET;
    serv_adr.sin_addr.s_addr = htonl(INADDR_ANY);
    serv_adr.sin_port = htons(atoi(argv[1]));

    if (bind(serv_sock, (struct sockaddr *)&serv_adr, sizeof(serv_adr)) == -1)
        error_handling("bind error");
    if (listen(serv_sock, 5) == -1)
        error_handling("listen error");

    epfd = epoll_create(EPOLL_SIZE); 
    ep_events = malloc(sizeof(struct epoll_event) * EPOLL_SIZE);

    setnonblockingmode(serv_sock);
    event.events = EPOLLIN; 
    event.data.fd = serv_sock;
    epoll_ctl(epfd, EPOLL_CTL_ADD, serv_sock, &event); 
    while (1)
    {
        event_cnt = epoll_wait(epfd, ep_events, EPOLL_SIZE, -1); 
        if (event_cnt == -1)
        {
            puts("epoll_wait error");
            break;
        }

        puts("call epoll_wait");
        for (i = 0; i < event_cnt; i++)
        {
            if (ep_events[i].data.fd == serv_sock) 
            {
                adr_sz = sizeof(clnt_adr);
                clnt_sock = accept(serv_sock, (struct sockaddr *)&clnt_adr, &adr_sz);
                setnonblockingmode(clnt_sock);    
                event.events = EPOLLIN | EPOLLET; 
                event.data.fd = clnt_sock;       
                epoll_ctl(epfd, EPOLL_CTL_ADD, clnt_sock, &event);
                printf("connected client : %d 
", clnt_sock);
            }
            else 
            {
                while (1)
                {
                    str_len = read(ep_events[i].data.fd, buf, BUF_SIZE);
                    if (str_len == 0)
                    {
                        epoll_ctl(epfd, EPOLL_CTL_DEL, ep_events[i].data.fd, NULL); //从epoll中删除套接字
                        close(ep_events[i].data.fd);
                        printf("closed client : %d 
", ep_events[i].data.fd);
                        break;
                    }
                    else if (str_len < 0)
                    {
                        if (errno == EAGAIN) //read 返回-1 且 errno 值为 EAGAIN ,意味读取了输入缓冲的全部数据
                            break;
                    }
                    else
                    {
                        printf("Message from client: %s
", buf); 
                        write(ep_events[i].data.fd, buf, str_len);
                    }
                }
            }
        }
    }
    close(serv_sock);
    close(epfd);

    return 0;
}


client.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <sys/socket.h>

#define BUF_SIZE 1024
void error_handling(char *message)
{
    fputs(message, stderr);
    fputc('
', stderr);
    exit(1);
}

int main(int argc, char *argv[])
{
    //sock
    int sock;
    char message[BUF_SIZE];
    int str_len;
    struct sockaddr_in serv_adr;

    if (argc != 3)
    {
        printf("Usage : %s <IP> <port>
", argv[0]);
        exit(1);
    }

    sock = socket(PF_INET, SOCK_STREAM, 0);
    if (sock == -1)
        error_handling("socket error");

    memset(&serv_adr, 0, sizeof(serv_adr));
    serv_adr.sin_family = AF_INET;
    serv_adr.sin_addr.s_addr = inet_addr(argv[1]);
    serv_adr.sin_port = htons(atoi(argv[2]));

    if (connect(sock, (struct sockaddr *)&serv_adr, sizeof(serv_adr)) == -1)
        error_handling("connect error!");
    else
        puts("Connected...........");

    while (1)
    {
        fputs("Input message: ", stdout);
        fgets(message, BUF_SIZE, stdin);
        write(sock, message, strlen(message));
        str_len = read(sock, message, BUF_SIZE - 1);
        message[str_len] = 0;
        printf("Message from server: %s
", message);
    }
    close(sock);
    return 0;
}


测试结果

call epoll_wait
connected client : 5 
call epoll_wait
Message from client: abc
Message from client: def
Message from client: fg
call epoll_wait
closed client : 5 

Connected...........
Input message: abcdeffg
Message from server: abcdeffg
Input message: ^C


边缘触发

分离了接受数据和处理数据的时间点 (一次性接受完数据,当有多个类别数据传输时可以保持各个类别数据的独立性,完整性由TCP保证)