我用科来分析系统抓下了http数据包。十六进制的,能够分析出前面部分是网络连接层、网络层、传输层的信息,剥开这些信息后得到应用层——http的数据包,再剥去http的响应报头后得到具体的数据段,我的问题是对这些数据进行还原了。我的理解是先按照压缩的算法进行解压(gzip、deflate、chunked等),然后按照编码算法进行解码(ASCII、gb2312、unicode等),这个思路有问题吗?盼高手赐教:,如果能具体拿数据分析下怎么解压和解码,感激不尽!
调试欢乐多
这句话不太理解。不知道你像解码成什么。
gzip解码后http协议已经解析完成了,mime类型和应用有关。
zlip里的解压算法需要改成状态机才能在传输未完成时解压。
注:以下代码需要依赖zlib开源库,可以到网上搜索下载。/* HTTP gzip decompress */
int CNNHttp::httpgzdecompress(const PVOID zdata, DWORD nzdata,
PVOID data, DWORD *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = (Bytef *)zdata;
d_stream.avail_in = 0;
d_stream.next_out = (Bytef *)data;
if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) {
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}----------------------------------------------------------------------------------
{
std::vector<BYTE> vecBuf;
public:
void Reset(DWORD dwNewSize = 0)
{
vecBuf.resize(dwNewSize);
}
void Reset(PVOID pData, DWORD nLen)
{
vecBuf.clear();
Append(pData, nLen);
}
bool IsEmpty() const
{
return vecBuf.empty();
}
void Append(PVOID pData, DWORD nLen)
{
vecBuf.insert(vecBuf.end(), (PBYTE)pData, (PBYTE)pData + nLen);
}
PBYTE GetData()
{
if (vecBuf.empty())
{
return NULL;
}
return &vecBuf[0];
}
DWORD GetDataLength()
{
return vecBuf.size();
}
BYTE& operator[](DWORD _Pos)
{
return vecBuf[_Pos];
}
};//返回true表示此次回应的所有ChunkData数据接收结束
bool OnRecvGzipData(CBuffer& cBuf, CBuffer& cBufLeft, CBuffer& cBufTmp, bool bIsChunked, char *szGzipData, int nLen)
{
if (!bIsChunked)
{
cBuf.Append((PBYTE)szGzipData, nLen);
return false;
} cBufLeft.Append(szGzipData, nLen);
szGzipData = (char*)cBufLeft.GetData();
nLen = (int)cBufLeft.GetDataLength(); while (nLen)
{
int nChunkSize = strtoul(szGzipData, NULL, 16);
if (nChunkSize == 0)
{
return true;
}
char* pos = strstr(szGzipData, "\r\n");
if (!pos)
{
goto ToExit;
}
pos += strlen("\r\n");
int len = (pos - szGzipData) + nChunkSize + strlen("\r\n");
if (len > nLen)
{
goto ToExit;
} cBuf.Append((PBYTE)pos, nChunkSize);
szGzipData += len;
nLen -= len;
} cBufLeft.Reset();
return false;ToExit:
cBufTmp.Reset(szGzipData, (DWORD)nLen);
cBufLeft.Reset(cBufTmp.GetData(), cBufTmp.GetDataLength());
return false;
}void OnRecvData(HANDLE hand, int iRet, char* buf)
{
//NNLOG_TRACE_FUN();
class CGzipDataPackBuf
{
public:
CBuffer vecByteGzipDataBuf;
CBuffer vecByteGzipDataDecodeBuf;
CBuffer vecByteBufLeft;
bool begin_gzip_text_html;
bool bIsChunked;
bool bIsUtf8;
DWORD dwGetTickCount; CGzipDataPackBuf()
{
Reset();
}
void Reset()
{
dwGetTickCount = ::GetTickCount();
begin_gzip_text_html = false;
bIsChunked = false;
bIsUtf8 = false;
}
}; typedef std::map<HANDLE, CGzipDataPackBuf> MapCGzipDataPackBuf_T;
static MapCGzipDataPackBuf_T s_MapCGzipDataPackBuf;
static CCriticalSection s_csMapCGzipDataPackBuf; if (!buf || (0 >= iRet))
{
return;
}
NNLOG_DEBUG(_T("len:%u, data:%S"), iRet, buf);
//CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), buf, (DWORD)iRet); const DWORD MAX_GzipDataBuf = 1024*1024;
/*
Content-Type: text/html; charset=utf-8
Content-Language: zh-CNcontent="text/html;charset=gb2312"Content-Type: text/html;charset=gbk
*/
char *tstr = NULL;
bool bRecvChunkGzipDataComplete = false;
CGzipDataPackBuf* pCGzipDataPackBuf = NULL;
if ((15 <= iRet)
&& (0 == StrCmpNIA(buf, "HTTP/1.1 200 OK", 15))
&& StrStrIA(buf, "Content-Type: text/html")
&& StrStrIA(buf, "Content-Encoding: gzip")
&& strstr(buf, "\r\n\r\n")//此处未考虑http头信息分包接收的情况
)
{
//NNLOG_TRACE_ACTION_SCOPE(HTTP_200_OK);
NN_WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
pCGzipDataPackBuf = &s_MapCGzipDataPackBuf[hand];
}
else
{
//NNLOG_TRACE_ACTION_SCOPE(Find pCGzipDataPackBuf);
NN_WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
MapCGzipDataPackBuf_T::iterator it = s_MapCGzipDataPackBuf.find(hand);
if (s_MapCGzipDataPackBuf.end() == it)
{
return;
}
pCGzipDataPackBuf = &it->second;
}
char* pos = NULL;
if (!pCGzipDataPackBuf->begin_gzip_text_html
&& (pos = strstr(buf, "\r\n\r\n"))
)
{
//NNLOG_TRACE_ACTION_SCOPE(check http data);
pos[0] = 0;
if (!StrStrIA(buf, "Content-Type: text/html")
|| !StrStrIA(buf, "Content-Encoding: gzip"))
{
//此处未考虑http头信息分包接收的情况
NNLOG_ASSERT(0);//上面已作初步判断,这里一般不太可能发生
WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
s_MapCGzipDataPackBuf.erase(hand);
return;
} pCGzipDataPackBuf->begin_gzip_text_html = true;
pCGzipDataPackBuf->bIsUtf8 = NULL != StrStrIA(buf, "charset=utf-8");
pCGzipDataPackBuf->bIsChunked = NULL != StrStrIA(buf, "Transfer-Encoding: chunked"); pos[0] = '\r';//还原
pos += strlen("\r\n\r\n");
iRet -= (pos - buf);
buf = pos;
bRecvChunkGzipDataComplete = OnRecvGzipData(pCGzipDataPackBuf->vecByteGzipDataBuf, pCGzipDataPackBuf->vecByteBufLeft, pCGzipDataPackBuf->vecByteGzipDataDecodeBuf, pCGzipDataPackBuf->bIsChunked, buf, iRet);
//if (pCGzipDataPackBuf->bIsChunked)
//{
// CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), (PVOID)buf, (DWORD)iRet);
//}
}
else if (pCGzipDataPackBuf->begin_gzip_text_html)
{
//NNLOG_TRACE_ACTION_SCOPE(append gzip data);
bRecvChunkGzipDataComplete = OnRecvGzipData(pCGzipDataPackBuf->vecByteGzipDataBuf, pCGzipDataPackBuf->vecByteBufLeft, pCGzipDataPackBuf->vecByteGzipDataDecodeBuf, pCGzipDataPackBuf->bIsChunked, buf, iRet);
//if (pCGzipDataPackBuf->bIsChunked)
//{
// CWinFile::Write(CWinModule::WinGetModuleFileName() + _T(".") _T(__FUNCTION__), (PVOID)buf, (DWORD)iRet);
//}
}
if (!pCGzipDataPackBuf->vecByteGzipDataBuf.IsEmpty()
&& (!pCGzipDataPackBuf->bIsChunked || bRecvChunkGzipDataComplete)
|| (MAX_GzipDataBuf < pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength()) )
{
//NNLOG_TRACE_ACTION_SCOPE(try parse gzip);
DWORD Length = MAX_GzipDataBuf*2;
pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.Reset(Length);
--Length;
int iRetDec = CNNHttp::httpgzdecompress(pCGzipDataPackBuf->vecByteGzipDataBuf.GetData(), pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength(), pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), &Length);
if (0 == iRetDec)
{
//<input type=hidden name=tn value="77071064_1_pg">
pCGzipDataPackBuf->vecByteGzipDataDecodeBuf[Length] = '\0';
CString gzipData;
if (pCGzipDataPackBuf->bIsUtf8)
{
gzipData = CA2CT((const char*)pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), CP_UTF8);
}
else
{
gzipData = CA2CT((const char*)pCGzipDataPackBuf->vecByteGzipDataDecodeBuf.GetData(), CP_ACP);
//NNLOG_DEBUG(_T("gzip len:%u, data:%S"), Length, vecByteGzipDataDecodeBuf.GetData());
}
if (!gzipData.IsEmpty())
{
if (mc.GetdwHttpGzipPackMaxShowLen() < (DWORD)gzipData.GetLength())
{
gzipData = gzipData.Left(mc.GetdwHttpGzipPackMaxShowLen() / 2) + _T("...") + gzipData.Right(mc.GetdwHttpGzipPackMaxShowLen() / 2);
}
NNLOG_DEBUG(_T("gzip len:%u, data:%s"), gzipData.GetLength(), gzipData.GetString());
}
//pCGzipDataPackBuf->vecByteGzipDataBuf.Reset();
}
if (((0 == iRetDec) && !pCGzipDataPackBuf->bIsChunked) || bRecvChunkGzipDataComplete || (MAX_GzipDataBuf < pCGzipDataPackBuf->vecByteGzipDataBuf.GetDataLength()))
{
WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
s_MapCGzipDataPackBuf.erase(hand);
}
} {
DWORD dwGetTickCount = ::GetTickCount();
WIN_SCOPED_LOCK(s_csMapCGzipDataPackBuf);
NNLOG_DEBUG(_T("s_MapCGzipDataPackBuf.size():%u"), s_MapCGzipDataPackBuf.size());
BOOST_FOREACH(MapCGzipDataPackBuf_T::value_type& v, s_MapCGzipDataPackBuf)
{
CGzipDataPackBuf& gdpb(v.second);
if (dwGetTickCount - gdpb.dwGetTickCount > 1000 * 60)
{
s_MapCGzipDataPackBuf.erase(v.first);
break;//下次再处理其它的
}
}
}
}在各层hook临控的代码中调用上面函数:
DEFINE_MY_WINAPI_RET(int, recv)(
IN SOCKET s,
__out_bcount_part(len, return) __out_data_source(NETWORK) char FAR * buf,
IN int len,
IN int flags
)
{
LOG_TRACE_FUN();
int iRet = recv_(s, buf, len, flags);
OnRecvData((HANDLE)s, iRet, buf);
return iRet;
}DEFINE_MY_WINAPI_RET(int, WSARecv)(
IN SOCKET s,
__in_ecount(dwBufferCount) __out_data_source(NETWORK) LPWSABUF lpBuffers,
IN DWORD dwBufferCount,
__out_opt LPDWORD lpNumberOfBytesRecvd,
IN OUT LPDWORD lpFlags,
__in_opt LPWSAOVERLAPPED lpOverlapped,
__in_opt LPWSAOVERLAPPED_COMPLETION_ROUTINE lpCompletionRoutine
)
{
int iRet = WSARecv_(s, lpBuffers, dwBufferCount, lpNumberOfBytesRecvd, lpFlags, lpOverlapped, lpCompletionRoutine);
if ((0 == iRet) && !(lpNumberOfBytesRecvd && (0 == *lpNumberOfBytesRecvd))/* || (WSA_IO_PENDING == ::GetLastError())*/)
{
LOG_TRACE_FUN();
for (DWORD i = 0; i < dwBufferCount; ++i)
{
OnRecvData((HANDLE)s, (lpNumberOfBytesRecvd && (1 == dwBufferCount)) ? *lpNumberOfBytesRecvd : (int)lpBuffers[i].len, lpBuffers[i].buf);
}
}
return iRet;
}NTSTATUS
MYNTAPI(NtDeviceIoControlFile)(HANDLE FileHandle,
HANDLE Event,
PIO_APC_ROUTINE ApcRoutine,
PVOID ApcContext,
PIO_STATUS_BLOCK IoStatusBlock,
ULONG IoControlCode,
PVOID InputBuffer,
ULONG InputBufferLength,
PVOID OutputBuffer,
ULONG OutputBufferLength
)
{
PAFD_WSABUF lpBuffers = NULL;
PAFD_INFO AfdInfo = (PAFD_INFO)InputBuffer; if (((AFD_RECV == IoControlCode) || (IoControlCode == AFD_SEND)) && AfdInfo && AfdInfo->BufferArray)
{
lpBuffers = AfdInfo->BufferArray;
} NTSTATUS st = NtDeviceIoControlFile_(FileHandle,
Event,
ApcRoutine,
ApcContext,
IoStatusBlock,
IoControlCode,
InputBuffer,
InputBufferLength,
OutputBuffer,
OutputBufferLength); if (AFD_RECV == IoControlCode)
{
if (NT_SUCCESS(st) && lpBuffers && lpBuffers->buf)
{
LOG_TRACE_FUN();
OnRecvData(FileHandle, IoStatusBlock->Information, lpBuffers->buf);
}
} return st;
}