我想在不使用IE控件的情况下获取某个网页的内容并使用HTML DOM进行解析.
我现在的解决方案是使用IXMLHTTPRequestPtr获取到网页内容,然后用一下方法
MSHTML::IHTMLDocument2Ptr pDoc;
pDoc.CreateInstance(MSHTML::CLSID_HTMLDocument);
CreateDocFromString(pDoc,strText);
void CreateDocFromString(MSHTML::IHTMLDocument2Ptr pDoc,CString &str)
{
IPersistStreamInitPtr spPersistStream(pDoc);
if (spPersistStream != NULL)
{
LPTSTR lpMem = (LPTSTR)::GlobalAlloc( GPTR,(str.GetLength()+1)*2);
lstrcpy(lpMem,str.GetBuffer());
IStreamPtr spStream;
CreateStreamOnHGlobal( lpMem, TRUE, &spStream );
// 初始化后,装载显示
spPersistStream->InitNew();
spPersistStream->Load(spStream );
}
}
现在的问题好像是无法写入这个htmldoc里面
请问一下,怎么样可以将文本内容(html格式)写入IHTMLDocument中
又或者说,有没有办法不需要使用IXMLHTTPRequestPtr,直接使用html dom 就可以获取某网站内容并生成IHTMLDocument?
我现在的解决方案是使用IXMLHTTPRequestPtr获取到网页内容,然后用一下方法
MSHTML::IHTMLDocument2Ptr pDoc;
pDoc.CreateInstance(MSHTML::CLSID_HTMLDocument);
CreateDocFromString(pDoc,strText);
void CreateDocFromString(MSHTML::IHTMLDocument2Ptr pDoc,CString &str)
{
IPersistStreamInitPtr spPersistStream(pDoc);
if (spPersistStream != NULL)
{
LPTSTR lpMem = (LPTSTR)::GlobalAlloc( GPTR,(str.GetLength()+1)*2);
lstrcpy(lpMem,str.GetBuffer());
IStreamPtr spStream;
CreateStreamOnHGlobal( lpMem, TRUE, &spStream );
// 初始化后,装载显示
spPersistStream->InitNew();
spPersistStream->Load(spStream );
}
}
现在的问题好像是无法写入这个htmldoc里面
请问一下,怎么样可以将文本内容(html格式)写入IHTMLDocument中
又或者说,有没有办法不需要使用IXMLHTTPRequestPtr,直接使用html dom 就可以获取某网站内容并生成IHTMLDocument?
(2)获取根IHtmldocument2接口,QI IHtmldocument3
(3)IHTMLDocument3::documentElement ,获取根节点
(4) IHTMLElement::get_children进行递归每一层的节点
CString strBody=_T("");
CHttpFile* httpfile=NULL;
try
{
httpfile=(CHttpFile*)session.OpenURL(_T("http://gongyi.qq.com/a/20101208/000027.htm"),1,INTERNET_FLAG_TRANSFER_BINARY|INTERNET_FLAG_RELOAD,NULL,0);
}
catch(CInternetException* m_pException)
{
httpfile=NULL;
m_pException->Delete();
return FALSE;
} if(httpfile)
{
int nBuf = 0;
char cBuf[1024]={0};
while (nBuf = httpfile->Read(cBuf,1023))
{
cBuf[nBuf]=0;
strBody+=cBuf;
}
httpfile->Close();
delete httpfile;
session.Close();
} session.Close();
CoInitialize(NULL); MSHTML::IHTMLDocument2Ptr pDoc;
HRESULT hr = CoCreateInstance(CLSID_HTMLDocument, NULL,
CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, (void**)&pDoc); SAFEARRAY* psa = SafeArrayCreateVector(VT_VARIANT, 0, 1);
VARIANT *param;
bstr_t bsData = (LPCTSTR)strBody;
hr = SafeArrayAccessData(psa, (LPVOID*)¶m);
param->vt = VT_BSTR;
param->bstrVal = (BSTR)bsData;
hr = pDoc->write(psa);
hr = pDoc->close();
SafeArrayDestroy(psa);