把lpCompletionKey 绑定到一个句柄上后。如果大量的socket超时断开连接(用WSASend投递一个写操作,大小为0,以标识是关闭操作),会有GetQueuedCompletionStatus 的lpCompletionKey不是所投递的数据。(通过地址比较)。大家有没有遇到过这样的问题呢?谢谢。只要能解决问题,分好说。200分。由于这样的贴子,没有人回答,我这里先给一个小分数,如果解决了这个问题,会有200分的报酬。
调试欢乐多
当大量的socket超时断开连接时,为每个Socket投递一个WSASend操作,返回的应该一样。
不妨把代码贴出来看看!
、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、、CIOCPServer::BindSktToIOCP( &pClt->skt.m_oIOCPClnt);
int CIOCPServer::BindSktToIOCP( IOCPCLNT *pCltObj)
{
//associate the client socket with completion-port
if (CreateIoCompletionPort((HANDLE)pCltObj->hSocket,m_hCompletionPort, (DWORD)pCltObj, 0) == NULL)
{
AfxTrace( "CreateIoCompletionPort failed with error %d\n", GetLastError());
return -1;
}
return 0;
}完成端口返回
DWORD WINAPI CIOCPServer::ServerWorkerThread(LPVOID CompletionPortID)
{
HANDLE CompletionPort = (HANDLE) CompletionPortID;
DWORD BytesTransferred;
PIOCPCLNT pIOCPClnt;
OVERLAPPED ol;
DWORD SendBytes, RecvBytes;
DWORD Flags;
int rc;
BOOL bRun = TRUE;
while( bRun )
{
if (GetQueuedCompletionStatus(CompletionPort, &BytesTransferred,
(LPDWORD)&pIOCPClnt,(LPOVERLAPPED*)&pIOCPClnt, INFINITE) == 0)
{
rc = GetLastError();
if(rc == ERROR_NETNAME_DELETED || rc == ERROR_CONNECTION_ABORTED|| rc == ERROR_OPERATION_ABORTED)
{
AfxTrace( "GetQueuedCompletionStatus failed with error %d\n", rc);
continue;
}
return -1;
}
if(BytesTransferred == 0 )
{
//TRACE("************* client error ip %s, optype %d\n", pIOCPClnt->szIP, pIOCPClnt->opType);
CallBackClntClose(pIOCPClnt);
continue;
} switch(pIOCPClnt->opType)
{
case OP_IORead:
pIOCPClnt->nRecvF = BytesTransferred;
SetEvent( pIOCPClnt->hEventRead);
CallBackRcvCltData( pIOCPClnt);
break;
case OP_IOWrite:
pIOCPClnt->nSendF = BytesTransferred;
CallBackWriteData( pIOCPClnt);
SetEvent( pIOCPClnt->hEventWrite);
break;
case OP_IOCLNTCLOSE:
CallBackClntClose( pIOCPClnt);
break;
case OP_IOCLOSE:
AfxTrace("case OP_IOCLOSE:");
bRun = FALSE;
break;
default:
{
AfxTrace( "xxxxxxxxxxxxxx err ");
//经常有莫名其妙的操作类型出现
}
break;
}
}
return 0;
}
处理返回的数据DWORD WINAPI CallBackRcvCltData(void* dwData)
{
PIOCPCLNT pIOCPClnt = (PIOCPCLNT)dwData;
SCLIENT * pSClnt; DWORD nAllLen = 0;
DWORD nPerLen = 0;
DWORD nOff = 0;
if( CToolkit::FindSCLIENTSkt(pIOCPClnt->hSocket, CManagerView::m_pSrvSock->m_pHead, &pSClnt) == FALSE)
{
return 2;
} nAllLen = pIOCPClnt->nRecvF;
while ( (nOff < nAllLen))
{
memcpy(&nPerLen, pSClnt->skt.m_chRecvData+nOff, 4);
if ( nPerLen == 0 || ( nAllLen - nOff < nPerLen ) )
{
break;
}
pSClnt->skt.m_nTickCount = ::GetTickCount();
(*(pSClnt->skt.m_pFunOnRcvNetData))( pSClnt->skt.m_chRecvData+ nOff + 4, nPerLen, &pSClnt->skt, pSClnt->skt.m_dwIP, pSClnt->skt.m_nPort, TCP );
nOff += nPerLen;
} int nRtn = CIOCPServer::WSARecvFunc( &pSClnt->skt.m_oIOCPClnt); return 0;
}超时处理DWORD WINAPI ThreadClntConnectStatus( void *pData)
{
CListenSocket* pLsn = (CListenSocket*)pData;
DWORD nTickCount = 0;
SCLIENT *pClntCheck;
while (pLsn->m_bActive)
{
CListenSocket::m_hCriticalList.Lock();
nTickCount = ::GetTickCount();
pClntCheck = pLsn->m_pHead;
while ( pClntCheck)
{
if ( (nTickCount - pClntCheck->skt.m_nTickCount) > 3000 && pClntCheck->skt.m_bActive)
{
pClntCheck->skt.m_bActive = FALSE;
TRACE("************* TIME OUT IP %d\n", pClntCheck->skt.m_dwIP >> 24);
pClntCheck->skt.m_oIOCPClnt.nSendR = 0;
pClntCheck->skt.m_oIOCPClnt.sData.buf[0] = '\0';
ZeroMemory( &(pClntCheck->skt.m_oIOCPClnt.ol), sizeof(OVERLAPPED));
CIOCPServer::WSASendFunc( &pClntCheck->skt.m_oIOCPClnt);
}
pClntCheck = pClntCheck->pNext;
}
CListenSocket::m_hCriticalList.Unlock();
Sleep( 500);
}
return 0;
}
DWORD WINAPI CallBackClntClose(void* pData)
{
CListenSocket::m_hCriticalList.Lock();
PIOCPCLNT pIOCPClnt = (PIOCPCLNT)pData;
CClientSocket *pClntSkt = NULL;
SCLIENT * &pHead = CListenSocket::m_pHead;
SCLIENT * pClt = pHead;
while( pClt )
{
if ( (pClt->skt.m_hSocket) == (pIOCPClnt->hSocket))// && pClt->skt.m_nPort == nPort) /* edit by ly, 05-11-19 */
{
pClntSkt = &pClt->skt;
(*pClntSkt->m_pFunOnRcvNetData)( I_NULL, 0, pClntSkt, pClntSkt->m_dwIP, pClntSkt->m_nPort, TCP );
if( pClt->pPrev )
{ // Not of the head
pClt->pPrev->pNext = pClt->pNext;
if( pClt->pNext )
pClt->pNext->pPrev = pClt->pPrev; // Not of the tail!
}
else
{
pHead = pClt->pNext; // Head of the list
if( pHead )
(pHead)->pPrev = I_NULL; // Not of the tail!
}
AfxTrace( "close socket no %d %d\n", pClt->skt.m_hSocket, pClt->skt.m_nRegion );
pClntSkt->m_bActive = FALSE;
delete pClt;
pClt = NULL;
break;
}
pClt = pClt->pNext;
}
CListenSocket::m_hCriticalList.Unlock();
return 0;
}
GetQueuedCompletionStatus(CompletionPort, &BytesTransferred,
(LPDWORD)&pIOCPClnt,(LPOVERLAPPED*)&pIOCPClnt, INFINITE)
BOOL bSuccess = FALSE;
int nRet = 0;
LPWSAOVERLAPPED lpOverlapped = NULL;
PPER_SOCKET_DATA lpPerSocketContext = NULL;
PPER_IO_DATA lpIOContext = NULL;
WSABUF buffSend;
DWORD dwRecvNumBytes = 0;
DWORD dwSendNumBytes = 0;
DWORD dwFlags = 0;
DWORD dwIoSize = 0;
while( TRUE ) { bSuccess = GetQueuedCompletionStatus(g_hIOCP, &dwIoSize,
(PDWORD_PTR)&lpPerSocketContext,
(LPOVERLAPPED *)&lpOverlapped,
INFINITE); lpIOContext = (PPER_IO_DATA)lpOverlapped; int nRet = setsockopt(
lpIOContext->activeSocket,
SOL_SOCKET,
SO_UPDATE_ACCEPT_CONTEXT,
(char *)&lpPerSocketContext->listenSocket,
sizeof(lpPerSocketContext->listenSocket)
);
你那句代码,我没看明白啊。
那两个结构体,能贴出来吗? PPER_SOCKET_DATA lpPerSocketContext = NULL; PPER_IO_DATA lpIOContext = NULL;
我那样写,问题出在哪里了,能说明更好一些了。谢谢了。
lz的做法是将两个结构体定义为一个结构体来做,
udknight 的意思是定义两个结构体来做,
其实我觉得也可以用一个结构体来做,这样做思路反而清楚。
lz的问题好像出在这里的第四个参数:
if (GetQueuedCompletionStatus(CompletionPort, &BytesTransferred,
(LPDWORD)&pIOCPClnt,(LPOVERLAPPED*)&pIOCPClnt, INFINITE) == 0)msdn定义:
BOOL WINAPI GetQueuedCompletionStatus(
__in HANDLE CompletionPort,
__out LPDWORD lpNumberOfBytes,
__out PULONG_PTR lpCompletionKey,
__out LPOVERLAPPED *lpOverlapped, // 注意这个参数
__in DWORD dwMilliseconds
);所以我建议定义一个OVERLAPPED试试看
OVERLAPPED *pOverlapped = NULL;
if (GetQueuedCompletionStatus(CompletionPort, &BytesTransferred,
(LPDWORD)&pIOCPClnt,(LPOVERLAPPED*)&pOverlapped, INFINITE) == 0)
非常感谢
Windows网络编程 或者是 Windows网络与通信程序设计,这两本书,能详细一些吗,出版社,作者?这方面的书比较多。
一个需要事件,另一个需要socket信息
这里的一个、另一个应该指的是 PPER_SOCKET_DATA 和PPER_IO_DATA 吧。你的意思是客户端句柄和客户端数据分开。
但在下面这段代码里,出现了两个 socket
int nRet = setsockopt(
lpIOContext->activeSocket,
SOL_SOCKET,
SO_UPDATE_ACCEPT_CONTEXT,
(char *)&lpPerSocketContext->listenSocket,
sizeof(lpPerSocketContext->listenSocket)
);lpIOContext->activeSocket, 应该是指的客户端建立的连接的句柄。lpPerSocketContext->listenSocket,应该指的是监听句柄。lpIOContext 中,同时有数据和客户端句柄吧?
windows网络编程 (第二版) 杨何庆 译
Windows网络与通信程序设计 (第二版)王艳平
2本都是学习网络编程的好书,其中第一版已经绝版了,你可以下电子书或者在淘宝上面买打印版。
1 GetQueuedCompletionStatus(
IN HANDLE CompletionPort,
OUT LPDWORD lpNumberOfBytesTransferred,
OUT PULONG_PTR lpCompletionKey,
OUT LPOVERLAPPED *lpOverlapped,
IN DWORD dwMilliseconds
);
lpCompletionKey 就是完成键,由它传递的数据称为单句柄数据。对应PPER_SOCKET_DATA , lpOverlapped称为重叠结构体,由它传递的数据称为单IO数据。对应PPER_IO_DATA。 2 把listen套结字一些属性(包括socket内部接受/发送缓存大小等等)拷贝到新建立的套结字,可以使客户端能够优雅关闭。
lpIOContext->activeSocket, 应该是指的客户端建立的连接的句柄。lpPerSocketContext->listenSocket,应该指的是监听句柄。 你的理解是对的
lpIOContext 中,同时有数据和客户端句柄吧? 一般会包含以下信息, WSAOVERLAPPED ol; SOCKET sClient; // AcceptEx接收的客户方套节字 char *buff; // I/O操作使用的缓冲区
int nLen; // buff缓冲区(使用的)大小 ULONG nSequenceNumber; // 此I/O的序列号 int nOperation; // 操作类型
#define OP_ACCEPT 1
#define OP_WRITE 2
#define OP_READ 3我说的事件指的是 WSAOVERLAPPED结构体中的 hEvent
typedef struct _WSAOVERLAPPED {
DWORD Internal;
DWORD InternalHigh;
DWORD Offset;
DWORD OffsetHigh;
WSAEVENT hEvent;
} WSAOVERLAPPED, FAR * LPWSAOVERLAPPED;
while( bRun )
{
if (GetQueuedCompletionStatus( hCompletionPort, &BytesTransferred,
(LPDWORD)&lpPerHandleData,
(LPOVERLAPPED *)&lpPerIOData,
INFINITE) == 0)
{
rc = GetLastError();
if(rc == ERROR_NETNAME_DELETED || rc == ERROR_CONNECTION_ABORTED|| rc == ERROR_OPERATION_ABORTED)
{
AfxTrace( "GetQueuedCompletionStatus failed with error %d\n", rc);
continue;
}
return -1;
}
if(BytesTransferred == 0 )
{
//TRACE("************* client error ip %s, optype %d\n", pIOCPClnt->szIP, pIOCPClnt->opType);
CallBackClntClose( lpPerHandleData, lpPerIOData);
continue;
} switch(lpPerIOData->opType)
{
case OP_IORead:
lpPerIOData->nRecvF = BytesTransferred;
SetEvent( lpPerIOData->hEventRead);
CallBackRcvCltData( lpPerHandleData, lpPerIOData);
break;
case OP_IOWrite:
lpPerIOData->nSendF = BytesTransferred;
CallBackWriteData( lpPerIOData);
break;
case OP_IOCLNTCLOSE:
CallBackClntClose( lpPerHandleData, lpPerIOData);
break;
case OP_IOCLOSE:
AfxTrace("case OP_IOCLOSE:");
bRun = FALSE;
break;[code=C/C++]
default:
{
AfxTrace( "xxxxxxxxxxxxxx err ");
}
break;
}
}
return 0;
[/code]关闭void CClientSocket::Close()
{
m_bActive = FALSE;
if (m_hSocket != INVALID_SOCKET)
{
//shutdown( m_hSocket, SD_BOTH );
}
if (m_hThread)
{
AfxTrace( "WaitForSingleObject %s\n","CClientSocket::Close()");
if ( WAIT_TIMEOUT == WaitForSingleObject( m_hThread, 1000) )
{
DWORD dwStatus;
::GetExitCodeThread( m_hThread, &dwStatus) ;
if ( dwStatus == STILL_ACTIVE )
{
TerminateThread( m_hThread,0);
}
}
CloseHandle( m_hThread );
}
m_hThread = NULL;
(*m_pFunOnRcvNetData)( I_NULL, 0, this, m_dwIP, m_nPort, TCP );
if (closesocket( m_hSocket ) == SOCKET_ERROR)
{
TRACE("************* closesocket() failed with error %d\n", WSAGetLastError());
}
CancelIo((HANDLE)m_hSocket);
}
2 建议你把shutdown( m_hSocket, SD_BOTH );这句打开,让连接优雅断开。这样就不会出现数据还没有发送完,连接就关闭等等怪异现象。
2 检测到恶意连接直接关闭就可以了。为什么要发送写操作过去再关闭?
3 你先关闭了。然后对一个已关闭的soeket进行操作肯定会有问题。 一般操作的时候会先检测socket是否有效和连接是否断开。
过程是这样的。
有两个线程。一个线程来决断是否超时(超时线程)。完成端口工作线程,也是开了一个(工作线程)。当完成一个接收时,会继续投递一个接收。在投递时会判断这个连接是否被关闭。关闭的情况下,不再投递。投递和关闭,用到了互斥区。
当超时线程判断一个连接超时,无论这个连接是否有效,都要强制关闭并且释放资源。现在异常是在单句柄处。猜测可能是这样:
投递读-》关闭-》完成端口返回。也就是如何保证在关闭前,清空所有投递的操作,以保证关闭后完成端口不再返回该连接上单句柄数据。while( bRun )
{
if (GetQueuedCompletionStatus( hCompletionPort, &BytesTransferred,
(LPDWORD)&lpPerHandleData,
(LPOVERLAPPED *)&lpPerIOData,
INFINITE) == 0)
{
rc = GetLastError();
if(rc == ERROR_NETNAME_DELETED || rc == ERROR_CONNECTION_ABORTED|| rc == ERROR_OPERATION_ABORTED)
{
AfxTrace( "GetQueuedCompletionStatus failed with error %d\n", rc);
continue;
}
return -1;
}
if(BytesTransferred == 0 )
{
//TRACE("************* client error ip %s, optype %d\n", pIOCPClnt->szIP, pIOCPClnt->opType);
CallBackClntClose( lpPerHandleData, lpPerIOData);
continue;
} switch(lpPerIOData->opType)//总是在这里出错
{
case OP_IORead:
lpPerIOData->nRecvF = BytesTransferred;
CallBackRcvCltData( lpPerHandleData, lpPerIOData);
break;
case OP_IOWrite:
lpPerIOData->nSendF = BytesTransferred;
CallBackWriteData( lpPerIOData);
break;
case OP_IOCLNTCLOSE:
CallBackClntClose( lpPerHandleData, lpPerIOData);
break;
case OP_IOCLOSE:
AfxTrace("case OP_IOCLOSE:");
bRun = FALSE;
break;
default:
{
AfxTrace( "xxxxxxxxxxxxxx err ");
}
break;
}
现在关闭改成这样了。bRst = GetQueuedCompletionStatus( hCompletionPort, &BytesTransferred,
(LPDWORD)&lpPerHandleData, (LPOVERLAPPED *)&lpPerIOData,
INFINITE);
if ( bRst == FALSE )
{
if ( lpPerIOData == NULL)
{
AfxTrace( "GetQueuedCompletionStatus failed with unknown error ????????? \n");
return -1;
}
rc = GetLastError();
AfxTrace( "GetQueuedCompletionStatus failed with error %d %s\n", rc, lpPerHandleData->szIP);
/////释放资源
CallBackClntClose( lpPerHandleData);
}