大规模套接字操作的分段故障
segmentation fault of massive sockets operation
这个问题困扰了我几个星期,我在网上找不到任何解决方案。所以我必须向你们大师们提出一个新问题。
我试图在大量套接字上读/写,请参阅下面的测试代码。当套接字数低于 1500 时,它表现正常。当套接字数超过 1500 时,程序将意外崩溃。我知道我应该使用命令ulimit -n 32768
来增加打开的文件数量限制。但是程序仍然无法正常运行。
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdint.h>
#include <netdb.h>
#include <errno.h>
#include <malloc.h>
#include <string.h>
int main(int argc, char* argv[])
{
if (argc!=2)
{
printf("usage: test <number of sockets>n");
return -1;
}
int socketsNum=atoi(argv[1]);
if (socketsNum<=0)
{
printf("error: invalid sockets numbern");
return -1;
}
int *socketHandles=(int*)malloc(sizeof(int)*socketsNum);
if (socketHandles==NULL)
{
printf("error: failed to alloc socket handle memoryn");
return -1;
}
for (int i=0;i<socketsNum;i++)
{
socketHandles[i]=-1;
}
printf("creating %d sockets ...n",socketsNum);
int createdSocketsNum=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP);
if (socketHandle==-1)
{
int lastError=errno;
printf("warning: socket() failed: index: %d, error: %dn",i+1,lastError);
continue;
}
sockaddr_in sockAddr; // 0.0.0.0:0
memset(&sockAddr,0,sizeof(sockAddr));
sockAddr.sin_family = AF_INET;
sockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
sockAddr.sin_port = htons(0);
if (bind( socketHandle, (sockaddr*) &sockAddr, sizeof(sockAddr)) == -1)
{
int lastError=errno;
printf("warning: bind() failed: index: %d, error: %dn",i+1,lastError);
close(socketHandle);
continue;
}
socketHandles[i]=socketHandle;
createdSocketsNum++;
}
printf("created %d sockets.n",createdSocketsNum);
//test reading;
printf("testing reading ...n");
int readableNumber=0;
int unreadableNumber=0;
int readingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
readingSkippedNumber++;
continue;
}
fd_set rset;
FD_ZERO(&rset);
FD_SET(socketHandle, &rset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, &rset, NULL, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %dn",i+1,lastError);
}
else if (retCode==0)
{
unreadableNumber++;
}
else
{
readableNumber++;
}
}
printf("readable: %d, unreadable: %d, skipped: %d, total: %dn",readableNumber,unreadableNumber,readingSkippedNumber,socketsNum);
//test writing
printf("testing writing ...n");
int writableNumber=0;
int unwritableNumber=0;
int writingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
writingSkippedNumber++;
continue;
}
fd_set wset;
FD_ZERO(&wset);
FD_SET(socketHandle, &wset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, NULL, &wset, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %dn",i+1,lastError);
}
else if (retCode==0)
{
unwritableNumber++;
}
else
{
writableNumber++;
}
}
printf("writable: %d, unwritable: %d, skipped: %d, total: %dn",writableNumber,unwritableNumber,writingSkippedNumber,socketsNum);
printf("closing ...n");
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
continue;
}
close(socketHandle);
}
free(socketHandles);
printf("completed!n");
return 0;
}
编译:
g++ TestSockets.cpp -ldl -g -ggdb -o TestSockets
配置:
ulimit -n 32768
一些典型结果:
./TestSockets 1500
的良好结果:creating 1500 sockets ... created 1500 sockets. testing reading ... readable: 0, unreadable: 1500, skipped: 0, total: 1500 testing writing ... writable: 1372, unwritable: 128, skipped: 0, total: 1500 closing ... completed!
./TestSockets 1900
的不良结果:creating 1900 sockets ... created 1900 sockets. testing reading ... warning: select() failed: index: 1797, error: 9 ...(more lines trimmed) warning: select() failed: index: 1820, error: 9 warning: select() failed: index: 1821, error: 22 readable: 0, unreadable: 1878, skipped: 0, total: 1900 testing writing ... warning: select() failed: index: 1641, error: 9 ...(more lines trimmed) warning: select() failed: index: 1660, error: 9 warning: select() failed: index: 1661, error: 22 writable: 1751, unwritable: 128, skipped: 0, total: 1900 closing ... completed!
评论:因为1900>1751+128,似乎堆栈损坏了。
./TestSockets 2000
的不良结果:creating 2000 sockets ... created 2000 sockets. testing reading ... Segmentation fault
更多调查:
根据 gdb 信息。似乎堆栈内存在运行过程中损坏:
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Program received signal SIGSEGV, Segmentation fault.
0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78
78 int socketHandle=socketHandles[i];
(gdb) print socketHandles
$1 = (int *) 0x0
(gdb) info local
socketHandle = 0
rset = {fds_bits = {0 <repeats 32 times>}}
timeout = {tv_sec = 0, tv_usec = 0}
retCode = 0
i = 1601
socketsNum = 2000
unreadableNumber = 1601
unwritableNumber = 134514249
socketHandles = 0x0
createdSocketsNum = 2000
readableNumber = 0
readingSkippedNumber = 0
writableNumber = -136436764
writingSkippedNumber = 0
(gdb) info stack
#0 0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78
fd_set
受文件描述符的最大值(而不是同时设置的文件描述符数量)的限制。通常是 1024。
因此,如果您的套接字值大于 1023,则根本无法对其使用 select
。
我知道的操作系统不支持重新定义FD_SETSIZE
。您也许能够在程序中成功重新定义fd_set
,但select
最多只能FD_SETSIZE
.
我已经解决了这个令人头疼的问题。Windows和Linux上的fd_set完全不同。在 Linux 上,如果套接字句柄 VALUE 大于 FD_SETSIZE,则在 Linux 版本FD_SET宏上会出现溢出问题。我做了一个解决方法来分配足够的缓冲区用于在 Linux 上fd_set。如
char rsetBuffer[10240];
memset(rsetBuffer,0,10240);
fd_set& rset=(fd_set&)rsetBuffer;
FD_ZERO(&rset);
FD_SET(socketHandle, &rset);
p.s. Windows和Linux上fd_set结构和FD_SET宏的定义:
在窗户上:
typedef struct fd_set {
u_int fd_count; /* how many are SET? */
SOCKET fd_array[FD_SETSIZE]; /* an array of SOCKETs */
} fd_set;
#define FD_SET(fd, set) do {
u_int __i;
for (__i = 0; __i < ((fd_set FAR *)(set))->fd_count; __i++) {
if (((fd_set FAR *)(set))->fd_array[__i] == (fd)) {
break;
}
}
if (__i == ((fd_set FAR *)(set))->fd_count) {
if (((fd_set FAR *)(set))->fd_count < FD_SETSIZE) {
((fd_set FAR *)(set))->fd_array[__i] = (fd);
((fd_set FAR *)(set))->fd_count++;
}
}
} while(0)
在 Linux 上:
/* fd_set for select and pselect. */
typedef struct
{
/* XPG4.2 requires this member name. Otherwise avoid the name
from the global namespace. */
#ifdef __USE_XOPEN
__fd_mask fds_bits[__FD_SETSIZE / __NFDBITS];
# define __FDS_BITS(set) ((set)->fds_bits)
#else
__fd_mask __fds_bits[__FD_SETSIZE / __NFDBITS];
# define __FDS_BITS(set) ((set)->__fds_bits)
#endif
} fd_set;
#define __FD_SET(d, set)
((void) (__FDS_BITS (set)[__FD_ELT (d)] |= __FD_MASK (d)))
#define __FD_CLR(d, set)
((void) (__FDS_BITS (set)[__FD_ELT (d)] &= ~__FD_MASK (d)))
#define __FD_ISSET(d, set)
((__FDS_BITS (set)[__FD_ELT (d)] & __FD_MASK (d)) != 0)
相关文章:
- 分段故障(堆芯转储)矢量
- 数组的指针从不分段故障
- Windows 10-使用gtkmm-3.0库和g++[包括再现]的分段故障
- 分段故障 运行C++代码时出现 SIGSEGV
- 分段故障背包问题
- 分段故障 11,从类函数显示动态 C 字符串
- 面临分段故障 使用 ffmpeg 读取视频时,因为"pFormatCtx-> streams [i]-> codecpar"的地址0x00
- 在C++中,当指向删除和指向不同对象时,分段故障指针
- 分段故障说明
- 分段故障(核心转储)-不知道为什么
- 分段故障线程
- hiredis SET遇到分段故障
- 分段故障,合并排序算法
- 多线程程序中的分段故障和gdb回溯上的不完整信息
- 到达主C++之前分段故障
- 分段故障核心使用 IF流转储
- 使用向量的移动键盘排列(分段故障)
- 在二进制树插入和遍历期间,我得到了分段故障
- 分段故障在类之间返回整数
- C++分段故障BST