我现在已经可以根据IHTMLDocument2获得任意一段HTML代码,但怎么样才能操作单个标记的所有属性却始终没有解决.
我是先读入一个HTML文件的全部内容,然后创建IHTMLDocument2接口,再使用IPresistStreamInit,IMarkupServices,IMarkupContainer,IMarkupPointer等接口解析HTML.
也就是使用IMarkupContainer->ParseString这个函数.
具体代码如下:
int _tmain(int argc, _TCHAR* argv[])
{
if(argc==1)return 0;
HANDLE hFile=CreateFile(argv[1],GENERIC_READ,FILE_SHARE_READ|FILE_SHARE_WRITE,\
NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL);
if(hFile==INVALID_HANDLE_VALUE)
{
cout<<"cont open file"<<endl;
return 0;
}
DWORD len=GetFileSize(hFile,NULL);
DWORD dwRead=0;
if(len==0xFFFFFFFF)return 0;
LPSTR pszHtml=new char[len];
if(ReadFile(hFile,pszHtml,len,&dwRead,NULL)!=0)
{
CComBSTR bstrHtmlText=pszHtml;
USES_CONVERSION;
cout<<W2A(bstrHtmlText)<<endl;
cout<<endl;
CoInitialize(NULL);
IHTMLDocument2 *pDoc=NULL;
ComCheck(CoCreateInstance(CLSID_HTMLDocument,NULL,CLSCTX_INPROC_SERVER,IID_IHTMLDocument2,(void**)&pDoc));
//ComCheck是一个验证是否成功的函数
IPersistStreamInit * pPersist=NULL;
ComCheck(pDoc->QueryInterface(IID_IPersistStreamInit,(void**)&pPersist));
IMarkupServices *pMS=NULL;
ComCheck(pPersist->InitNew());
pPersist->Release();
ComCheck(pDoc->QueryInterface(IID_IMarkupServices,(void**)&pMS));
IMarkupContainer *pMC=NULL;
IMarkupPointer *pMPStart=NULL;
IMarkupPointer *pMPFinish=NULL;
ComCheck(pMS->CreateMarkupPointer(&pMPStart));
ComCheck(pMS->CreateMarkupPointer(&pMPFinish));
ComCheck(pMPFinish->SetGravity(POINTER_GRAVITY_Right));
ComCheck(pMS->ParseString(bstrHtmlText,0,&pMC,pMPStart,pMPFinish));
IHTMLDocument2 *pNewDoc=NULL;
ComCheck(pMC->QueryInterface(IID_IHTMLDocument2,(LPVOID*)&pNewDoc));
IHTMLElement *pBody=NULL;
ComCheck(pNewDoc->get_body(&pBody));
到了这一步,后面的就不帖了.请问现在怎么得到BODY标签的所有属性?
我是先读入一个HTML文件的全部内容,然后创建IHTMLDocument2接口,再使用IPresistStreamInit,IMarkupServices,IMarkupContainer,IMarkupPointer等接口解析HTML.
也就是使用IMarkupContainer->ParseString这个函数.
具体代码如下:
int _tmain(int argc, _TCHAR* argv[])
{
if(argc==1)return 0;
HANDLE hFile=CreateFile(argv[1],GENERIC_READ,FILE_SHARE_READ|FILE_SHARE_WRITE,\
NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL);
if(hFile==INVALID_HANDLE_VALUE)
{
cout<<"cont open file"<<endl;
return 0;
}
DWORD len=GetFileSize(hFile,NULL);
DWORD dwRead=0;
if(len==0xFFFFFFFF)return 0;
LPSTR pszHtml=new char[len];
if(ReadFile(hFile,pszHtml,len,&dwRead,NULL)!=0)
{
CComBSTR bstrHtmlText=pszHtml;
USES_CONVERSION;
cout<<W2A(bstrHtmlText)<<endl;
cout<<endl;
CoInitialize(NULL);
IHTMLDocument2 *pDoc=NULL;
ComCheck(CoCreateInstance(CLSID_HTMLDocument,NULL,CLSCTX_INPROC_SERVER,IID_IHTMLDocument2,(void**)&pDoc));
//ComCheck是一个验证是否成功的函数
IPersistStreamInit * pPersist=NULL;
ComCheck(pDoc->QueryInterface(IID_IPersistStreamInit,(void**)&pPersist));
IMarkupServices *pMS=NULL;
ComCheck(pPersist->InitNew());
pPersist->Release();
ComCheck(pDoc->QueryInterface(IID_IMarkupServices,(void**)&pMS));
IMarkupContainer *pMC=NULL;
IMarkupPointer *pMPStart=NULL;
IMarkupPointer *pMPFinish=NULL;
ComCheck(pMS->CreateMarkupPointer(&pMPStart));
ComCheck(pMS->CreateMarkupPointer(&pMPFinish));
ComCheck(pMPFinish->SetGravity(POINTER_GRAVITY_Right));
ComCheck(pMS->ParseString(bstrHtmlText,0,&pMC,pMPStart,pMPFinish));
IHTMLDocument2 *pNewDoc=NULL;
ComCheck(pMC->QueryInterface(IID_IHTMLDocument2,(LPVOID*)&pNewDoc));
IHTMLElement *pBody=NULL;
ComCheck(pNewDoc->get_body(&pBody));
到了这一步,后面的就不帖了.请问现在怎么得到BODY标签的所有属性?
我用下面的代码试了试,好象不行,我在BODY标记里明明指定了属性,但它得到的属性都是未指定的..
IDispatch* pACDisp;
IHTMLAttributeCollection* pAttrColl;
IDispatch* pItemDisp;
IHTMLDOMAttribute* pItem;LONG lACLength;
VARIANT vACIndex;
BSTR bstrName;
VARIANT vValue;
VARIANT_BOOL vbSpecified;pBody->QueryInterface(IID_IHTMLDOMNode, (void**)&pElemDN);
pElemDN->get_attributes(&pACDisp);
pACDisp->QueryInterface(IID_IHTMLAttributeCollection, (void**)&pAttrColl);
pAttrColl->get_length(&lACLength);
cout<<lACLength<<endl;
vACIndex.vt = VT_I4;
for (int i = 0; i < lACLength; i++)
{
vACIndex.lVal = i;
pAttrColl->item(&vACIndex, &pItemDisp);
pItemDisp->QueryInterface(IID_IHTMLDOMAttribute, (void**)&pItem);
pItem->get_specified(&vbSpecified);
pItem->get_nodeName(&bstrName);
cout<<W2A(bstrName)<<": ";
pItem->get_nodeValue(&vValue);
if(vbSpecified==VARIANT_TRUE)
{
cout<<i;
}cout<<endl;
pItemDisp->Release();
pItem->Release();
}pElemDN->Release();
pACDisp->Release();
pAttrColl->Release();
我就是不知道为什么才提出这个问题,望高手帮帮忙..
bgcolor 和text这两个属性是不是标准的?