如何提取html中Table 中的元素(急) 我在提取html中的时候,想了好多的方法都不能提取表格中的元素,有那位高手可以指点我一二,不甚感激,分数不够在加!多谢! 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 以文本方式扫描html文本,按html tag定位到表格呀! int CSurfSafeParser::Parse(IHTMLElementCollection *p_imgColl){ long cElems=0; // retrieve the count of elements in the collection HRESULT hr=S_OK; if (!SUCCEEDED(hr = p_imgColl->get_length( &cElems ))) return __LINE__; for ( int i=0; i<cElems; i++ ) { _variant_t vIndex((long)i,VT_I4); _variant_t var2((long)0,VT_I4);// VARIANT var2 = { 0 }; LPDISPATCH pDisp; if (SUCCEEDED(hr = p_imgColl->item( vIndex, var2, &pDisp ))) { IHTMLTable* pElement = NULL; if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLTable, (LPVOID*)&pElement ))) { CComPtr<IHTMLTable> ptable(pElement); DoTable(ptable); } pDisp->Release(); } // item } // for return 0;}int CSurfSafeParser::DoTable(IHTMLTable *pElement){ HRESULT hr=S_OK; IHTMLElementCollection* prowCol=NULL; pElement->get_rows(&prowCol); if(prowCol) { CComPtr<IHTMLElementCollection> rowcolptr(prowCol); long rowcount=0; rowcolptr->get_length(&rowcount); if(rowcount>20) { for(int rowi=1;rowi<rowcount;rowi++) { VARIANT vIndexRow; vIndexRow.vt = VT_UINT; vIndexRow.lVal = rowi; VARIANT var0 = { 0 }; LPDISPATCH pDisp; if (SUCCEEDED(hr = rowcolptr->item( vIndexRow, var0, &pDisp ))) { IHTMLElement* pverboselement=NULL; if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLElement, (LPVOID*)&pverboselement ))) { BSTR bstag; CComPtr<IHTMLElement> prowptr(pverboselement); pverboselement->get_tagName(&bstag); _bstr_t bstrtag(bstag); } IHTMLTableRow* pElement = NULL; if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLTableRow, (LPVOID*)&pElement ))) { CComPtr<IHTMLTableRow> prowptr(pElement); DoRow(prowptr); } } } }/* else { CString msg; msg.Format("table row count:%d",rowcount); OutputDebugString(msg); }*/ } return 0;}int CSurfSafeParser::DoRow(IHTMLTableRow *prow){ if(!prow) return __LINE__; IHTMLElementCollection* pcelcol=NULL; HRESULT hr=S_OK; prow->get_cells(&pcelcol); if(pcelcol) { long count =0; pcelcol->get_length(&count); if(count!=2) return __LINE__; VARIANT var0 = { 0 }; VARIANT vIndexRow; std::string proxyaddr; vIndexRow.vt = VT_UINT; { vIndexRow.lVal = 0; LPDISPATCH pDisp; if (SUCCEEDED(hr = pcelcol->item( vIndexRow, var0, &pDisp ))) { IHTMLElement* pElement = NULL; if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLElement, (LPVOID*)&pElement ))) { CComPtr<IHTMLElement> prowptr(pElement); std::string strip = GetCellText(prowptr); if(strip.length()<4) return __LINE__; proxyaddr = strip+":"; } pDisp->Release(); } } { vIndexRow.lVal = 1; LPDISPATCH pDisp; if (SUCCEEDED(hr = pcelcol->item( vIndexRow, var0, &pDisp ))) { IHTMLElement* pElement = NULL; if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLElement, (LPVOID*)&pElement ))) { CComPtr<IHTMLElement> prowptr(pElement); std::string strip = GetCellText(prowptr); if(strip.length()<4) return __LINE__; proxyaddr += strip; } pDisp->Release(); } } m_nProxyCount++; g_ProxyList.AddProxy(proxyaddr.c_str()); } return 0;}std::string CSurfSafeParser::GetCellText(IHTMLElement *pelement){ if(!pelement) return ""; BSTR bstext; pelement->get_innerText(&bstext); _bstr_t bstrtext(bstext); std::string celltext=(LPCTSTR)bstrtext; CString msg; msg.Format("%s:%d cell text:%s",__FILE__,__LINE__,celltext.c_str()); return celltext; } 一个客户端连接多个服务器的问题请教 PRJ0003 为何我的资源视图里面没有icon,menu,string table等编辑器啊?如何能添加进去,是不是正常都默认自带的啊 link 出错? 关于jsp页面嵌入activex的问题 为什么在栈上创建的对象可以被自动清除,而在堆上创建的不可以啊? 我想开发一个iis5.0远程管理程序 请问? 动态按钮,滑动条 很简单的一个问题,进来看看(在线等待.....) 请问“ |= ”这个符号是什么意思? 请问COM调用提示没有注册接口,怎么回事怎样实现? winmain 与 控制台
{
long cElems=0;
// retrieve the count of elements in the collection
HRESULT hr=S_OK;
if (!SUCCEEDED(hr = p_imgColl->get_length( &cElems )))
return __LINE__;
for ( int i=0; i<cElems; i++ )
{
_variant_t vIndex((long)i,VT_I4);
_variant_t var2((long)0,VT_I4);
// VARIANT var2 = { 0 };
LPDISPATCH pDisp;
if (SUCCEEDED(hr = p_imgColl->item( vIndex, var2, &pDisp )))
{
IHTMLTable* pElement = NULL;
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLTable, (LPVOID*)&pElement )))
{
CComPtr<IHTMLTable> ptable(pElement);
DoTable(ptable);
}
pDisp->Release();
} // item
} // for
return 0;
}int CSurfSafeParser::DoTable(IHTMLTable *pElement)
{
HRESULT hr=S_OK;
IHTMLElementCollection* prowCol=NULL;
pElement->get_rows(&prowCol);
if(prowCol)
{
CComPtr<IHTMLElementCollection> rowcolptr(prowCol);
long rowcount=0;
rowcolptr->get_length(&rowcount);
if(rowcount>20)
{
for(int rowi=1;rowi<rowcount;rowi++)
{
VARIANT vIndexRow;
vIndexRow.vt = VT_UINT;
vIndexRow.lVal = rowi;
VARIANT var0 = { 0 };
LPDISPATCH pDisp;
if (SUCCEEDED(hr = rowcolptr->item( vIndexRow, var0, &pDisp )))
{
IHTMLElement* pverboselement=NULL;
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLElement, (LPVOID*)&pverboselement )))
{
BSTR bstag;
CComPtr<IHTMLElement> prowptr(pverboselement);
pverboselement->get_tagName(&bstag);
_bstr_t bstrtag(bstag); }
IHTMLTableRow* pElement = NULL;
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLTableRow, (LPVOID*)&pElement )))
{
CComPtr<IHTMLTableRow> prowptr(pElement);
DoRow(prowptr);
}
}
}
}
/*
else
{
CString msg;
msg.Format("table row count:%d",rowcount);
OutputDebugString(msg);
}
*/
}
return 0;
}int CSurfSafeParser::DoRow(IHTMLTableRow *prow)
{
if(!prow)
return __LINE__;
IHTMLElementCollection* pcelcol=NULL;
HRESULT hr=S_OK;
prow->get_cells(&pcelcol);
if(pcelcol)
{
long count =0;
pcelcol->get_length(&count);
if(count!=2)
return __LINE__; VARIANT var0 = { 0 };
VARIANT vIndexRow;
std::string proxyaddr;
vIndexRow.vt = VT_UINT;
{
vIndexRow.lVal = 0;
LPDISPATCH pDisp;
if (SUCCEEDED(hr = pcelcol->item( vIndexRow, var0, &pDisp )))
{
IHTMLElement* pElement = NULL;
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLElement, (LPVOID*)&pElement )))
{
CComPtr<IHTMLElement> prowptr(pElement);
std::string strip = GetCellText(prowptr);
if(strip.length()<4)
return __LINE__;
proxyaddr = strip+":";
}
pDisp->Release();
}
}
{
vIndexRow.lVal = 1;
LPDISPATCH pDisp;
if (SUCCEEDED(hr = pcelcol->item( vIndexRow, var0, &pDisp )))
{
IHTMLElement* pElement = NULL;
if (SUCCEEDED(hr = pDisp->QueryInterface( IID_IHTMLElement, (LPVOID*)&pElement )))
{
CComPtr<IHTMLElement> prowptr(pElement);
std::string strip = GetCellText(prowptr);
if(strip.length()<4)
return __LINE__;
proxyaddr += strip;
}
pDisp->Release();
}
}
m_nProxyCount++;
g_ProxyList.AddProxy(proxyaddr.c_str());
}
return 0;
}std::string CSurfSafeParser::GetCellText(IHTMLElement *pelement)
{
if(!pelement)
return "";
BSTR bstext;
pelement->get_innerText(&bstext);
_bstr_t bstrtext(bstext);
std::string celltext=(LPCTSTR)bstrtext;
CString msg;
msg.Format("%s:%d cell text:%s",__FILE__,__LINE__,celltext.c_str());
return celltext;
}