Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d1da249

Browse files
committed
Scheduler: new REUSEPORT balancing method/architecture.
Starting from Linux Kernel 3.9, there is a new TCP/UDP socket option which allows to bind same port and address from multiples threads (or any instance under the same process context). This patch implements the SO_REUSEPORT TCP Flag if the running Kernel is >= 3.9, so on that mode each working thread create it own socket that bind the same address, with this implementation we reduce the number of system calls involved when a new connection arrives, avoid lookup the lowest loaded thread and also we allow the Kernel to perform a better Scheduling on SMP systems that requires to scale. If the detected Kernel is lower than 3.9, it will use the old Fair Balancing mechanism. Signed-off-by: Eduardo Silva <[email protected]>
1 parent 2787854 commit d1da249

14 files changed

Lines changed: 134 additions & 22 deletions

File tree

plugins/liana/liana.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ int _mkp_network_io_bind(int socket_fd, const struct sockaddr *addr, socklen_t a
181181

182182
ret = bind(socket_fd, addr, addrlen);
183183
if( ret == -1 ) {
184+
perror("bind");
184185
mk_warn("Error binding socket");
185186
return ret;
186187
}
@@ -211,7 +212,7 @@ int _mkp_network_io_bind(int socket_fd, const struct sockaddr *addr, socklen_t a
211212
return ret;
212213
}
213214

214-
int _mkp_network_io_server(int port, char *listen_addr)
215+
int _mkp_network_io_server(int port, char *listen_addr, int reuse_port)
215216
{
216217
int socket_fd = -1;
217218
int ret;
@@ -244,8 +245,14 @@ int _mkp_network_io_server(int port, char *listen_addr)
244245

245246
mk_api->socket_set_tcp_nodelay(socket_fd);
246247
mk_api->socket_reset(socket_fd);
247-
ret = _mkp_network_io_bind(socket_fd, rp->ai_addr, rp->ai_addrlen, MK_SOMAXCONN);
248248

249+
/* Check if reuse port can be enabled on this socket */
250+
if (reuse_port == MK_TRUE &&
251+
mk_kernel_runver >= MK_KERNEL_VERSION(3, 9, 0)) {
252+
mk_api->socket_set_tcp_reuseport(socket_fd);
253+
}
254+
255+
ret = _mkp_network_io_bind(socket_fd, rp->ai_addr, rp->ai_addrlen, MK_SOMAXCONN);
249256
if(ret == -1) {
250257
mk_err("Cannot listen on %s:%i\n", listen_addr, port);
251258
continue;
@@ -259,4 +266,3 @@ int _mkp_network_io_server(int port, char *listen_addr)
259266

260267
return socket_fd;
261268
}
262-

src/include/MKPlugin.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "mk_http.h"
3333
#include "mk_file.h"
3434
#include "mk_socket.h"
35+
#include "mk_kernel.h"
3536
#include "mk_macros.h"
3637

3738
/* global vars */
@@ -77,7 +78,7 @@ int MK_EXPORT _mkp_network_io_send_file(int socket_fd, int file_fd, off_t *file_
7778
int MK_EXPORT _mkp_network_io_create_socket(int domain, int type, int protocol);
7879
int MK_EXPORT _mkp_network_io_bind(int socket_fd, const struct sockaddr *addr,
7980
socklen_t addrlen, int backlog);
80-
int MK_EXPORT _mkp_network_io_server(int port, char *listen_addr);
81+
int MK_EXPORT _mkp_network_io_server(int port, char *listen_addr, int reuse_port);
8182
int MK_EXPORT _mkp_event_read(int sockfd);
8283
int MK_EXPORT _mkp_event_write(int sockfd);
8384
int MK_EXPORT _mkp_event_error(int sockfd);

src/include/mk_config.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
#define O_NOATIME 01000000
3333
#endif
3434

35-
#define M_DEFAULT_CONFIG_FILE "monkey.conf"
35+
#define MK_DEFAULT_CONFIG_FILE "monkey.conf"
3636
#define MK_DEFAULT_MIMES_CONF_FILE "monkey.mime"
3737
#define MK_DEFAULT_PLUGIN_LOAD_CONF_FILE "plugins.load"
3838
#define MK_DEFAULT_SITES_CONF_DIR "sites/"
@@ -86,6 +86,7 @@ struct server_config
8686
int8_t fdt; /* is FDT enabled ? */
8787
int8_t is_daemon;
8888
int8_t is_seteuid;
89+
int8_t scheduler_mode; /* Scheduler balancing mode */
8990

9091
char *serverconf; /* path to configuration files */
9192
char *listen_addr;

src/include/mk_epoll.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ struct epoll_state_index
8787

8888
/* Monkey epoll calls */
8989
int mk_epoll_create();
90-
void *mk_epoll_init(int efd, int max_events);
90+
void *mk_epoll_init(int server_fd, int efd, int max_events);
9191
struct epoll_state *mk_epoll_state_get(int fd);
9292

9393
int mk_epoll_add(int efd, int fd, int mode, unsigned int behavior);

src/include/mk_plugin.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ struct plugin_network_io
110110
int (*send_file) (int, int, off_t *, size_t);
111111
int (*create_socket) (int, int, int);
112112
int (*bind) (int, const struct sockaddr *addr, socklen_t, int);
113-
int (*server) (int, char *);
113+
int (*server) (int, char *, int);
114114
};
115115

116116
struct plugin
@@ -177,6 +177,7 @@ struct plugin_api
177177
int (*socket_reset) (int);
178178
int (*socket_set_tcp_fastopen) (int);
179179
int (*socket_set_tcp_nodelay) (int);
180+
int (*socket_set_tcp_reuseport) (int);
180181
int (*socket_connect) (char *, int);
181182
int (*socket_set_nonblocking) (int);
182183
int (*socket_create) ();

src/include/mk_scheduler.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,20 @@
3636
#define MK_SCHEDULER_CONN_PROCESS 1
3737
#define MK_SCHEDULER_SIGNAL_DEADBEEF 0xDEADBEEF
3838

39+
/*
40+
* Scheduler balancing mode:
41+
*
42+
* - Fair Balancing: use a single socket and upon accept
43+
* new connections, lookup the less loaded thread and
44+
* assign the socket to that specific epoll queue.
45+
*
46+
* - ReusePort: Use new Linux Kernel 3.14 feature that
47+
* allows thread to share binded address on a lister
48+
* socket. We let the Kernel to decide how to balance.
49+
*/
50+
#define MK_SCHEDULER_FAIR_BALANCING 0
51+
#define MK_SCHEDULER_REUSEPORT 1
52+
3953
extern __thread struct rb_root *cs_list;
4054

4155
struct sched_connection
@@ -55,6 +69,9 @@ struct sched_list_node
5569
unsigned long long accepted_connections;
5670
unsigned long long closed_connections;
5771

72+
/* Just used on MK_SCHEDULER_REUSEPORT mode */
73+
int server_fd;
74+
5875
/*
5976
* Red-Black tree queue to perform fast lookup over
6077
* the scheduler busy queue
@@ -93,6 +110,7 @@ struct sched_list_node *sched_list;
93110
/* Struct under thread context */
94111
typedef struct
95112
{
113+
int server_fd;
96114
int epoll_fd;
97115
int epoll_max_events;
98116
int max_events;
@@ -104,6 +122,7 @@ typedef struct
104122

105123
pthread_key_t MK_EXPORT worker_sched_node;
106124
extern pthread_mutex_t mutex_worker_init;
125+
pthread_mutex_t mutex_port_init;
107126

108127
void mk_sched_init();
109128
int mk_sched_launch_thread(int max_events, pthread_t *tout, mklib_ctx ctx);
@@ -129,6 +148,7 @@ void mk_sched_update_thread_status(struct sched_list_node *sched,
129148

130149
int mk_sched_check_timeouts(struct sched_list_node *sched);
131150
int mk_sched_add_client(int remote_fd);
151+
int mk_sched_add_client_reuseport(int remote_fd, struct sched_list_node *sched);
132152
int mk_sched_register_client(int remote_fd, struct sched_list_node *sched);
133153
int mk_sched_remove_client(struct sched_list_node *sched, int remote_fd);
134154
struct sched_connection *mk_sched_get_connection(struct sched_list_node

src/include/mk_socket.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
#define SOCK_NONBLOCK 04000
3333
#endif
3434

35+
#ifndef SO_REUSEPORT
36+
#define SO_REUSEPORT 15
37+
#endif
38+
3539
/*
3640
* TCP_FASTOPEN: as this is a very new option in the Linux Kernel, the value is
3741
* not yet exported and can be missing, lets make sure is available for all
@@ -52,14 +56,15 @@ int mk_socket_set_cork_flag(int fd, int state);
5256
int mk_socket_set_tcp_fastopen(int sockfd);
5357
int mk_socket_set_tcp_nodelay(int sockfd);
5458
int mk_socket_set_tcp_defer_accept(int sockfd);
59+
int mk_socket_set_tcp_reuseport(int sockfd);
5560
int mk_socket_set_nonblocking(int sockfd);
5661

5762
int mk_socket_close(int socket);
5863

5964
int mk_socket_create(void);
6065
int mk_socket_connect(char *host, int port);
6166
int mk_socket_reset(int socket);
62-
int mk_socket_server(int port, char *listen_addr);
67+
int mk_socket_server(int port, char *listen_addr, int reuse_port);
6368

6469
int mk_socket_accept(int server_fd);
6570
int mk_socket_sendv(int socket_fd, struct mk_iov *mk_io);

src/mk_config.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,15 @@ void mk_config_set_init_values(void)
585585
config->index_files = NULL;
586586
config->user_dir = NULL;
587587

588-
/* TCP Auto Corking: only available on Linux >= 3.14.0 */
588+
/* TCP REUSEPORT: available on Linux >= 3.9 */
589+
if (mk_kernel_runver >= MK_KERNEL_VERSION(3, 9, 0)) {
590+
config->scheduler_mode = MK_SCHEDULER_REUSEPORT;
591+
}
592+
else {
593+
config->scheduler_mode = MK_SCHEDULER_FAIR_BALANCING;
594+
}
595+
596+
/* TCP Auto Corking: only available on Linux >= 3.14 */
589597
if (mk_kernel_runver >= MK_KERNEL_VERSION(3, 14, 0) &&
590598
mk_socket_tcp_autocorking() == MK_TRUE) {
591599
config->corking = MK_FALSE;

src/mk_epoll.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "mk_utils.h"
4242
#include "mk_macros.h"
4343
#include "mk_linuxtrace.h"
44+
#include "mk_scheduler.h"
4445

4546
static __thread struct epoll_state_index mk_epoll_state_k;
4647

@@ -213,12 +214,12 @@ int mk_epoll_create()
213214
return efd;
214215
}
215216

216-
void *mk_epoll_init(int efd, int max_events)
217+
void *mk_epoll_init(int server_fd, int efd, int max_events)
217218
{
218219
int i, fd, ret = -1;
219220
int num_fds;
220221
int fds_timeout;
221-
222+
int remote_fd;
222223
struct epoll_event *events;
223224
struct sched_list_node *sched;
224225

@@ -254,6 +255,23 @@ void *mk_epoll_init(int efd, int max_events)
254255
}
255256
}
256257

258+
/* New connection under MK_SCHEDULER_REUSEPORT mode */
259+
if (fd == server_fd) {
260+
remote_fd = mk_socket_accept(server_fd);
261+
if (mk_unlikely(remote_fd == -1)) {
262+
#ifdef TRACE
263+
MK_TRACE("Could not accept connection");
264+
#endif
265+
continue;
266+
}
267+
#ifdef TRACE
268+
MK_TRACE("New connection arrived: FD %i", remote_fd);
269+
#endif
270+
/* Register new connection into the scheduler */
271+
mk_sched_add_client_reuseport(remote_fd, sched);
272+
mk_sched_register_client(remote_fd, sched);
273+
fd = remote_fd;
274+
}
257275
ret = mk_conn_read(fd);
258276
}
259277
else if (events[i].events & EPOLLOUT) {

src/mk_plugin.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ void mk_plugin_init()
387387
api->socket_connect = mk_socket_connect;
388388
api->socket_reset = mk_socket_reset;
389389
api->socket_set_tcp_fastopen = mk_socket_set_tcp_fastopen;
390+
api->socket_set_tcp_reuseport = mk_socket_set_tcp_reuseport;
390391
api->socket_set_tcp_nodelay = mk_socket_set_tcp_nodelay;
391392
api->socket_set_nonblocking = mk_socket_set_nonblocking;
392393
api->socket_create = mk_socket_create;

0 commit comments

Comments
 (0)