System.Timers.Timer timer1 = new System.Timers.Timer();
string full = string.Empty;
bool flag = false;
/// <summary>
/// 获取全文
/// </summary>
/// <param name="linkUrl">全文链接地址</param>
/// <returns>返回全文</returns>
public string GetBodyHtml(string linkUrl, string webName, int time)
{
string strFullText = "";
try
{
wb.Navigate(linkUrl);
wb.ScriptErrorsSuppressed = true;
DateTime dtBase = DateTime.Now; //基时间
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
System.Windows.Forms.Application.DoEvents();
}
timer1.Enabled = true;
timer1.Interval = time;
timer1.Elapsed += new System.Timers.ElapsedEventHandler(timer1_Elapsed);
strFullText = full;
}
catch (Exception ex)
{
SpiderMethod.WriteFailLog(linkUrl, ex.Message, webName); //记录错误日志
return "";
}
return strFullText;
}
void timer1_Elapsed(object sender, System.Timers.ElapsedEventArgs e)
{
timer1.Stop();
string s = string.Empty; HtmlElement html = wb.Document.GetElementById("data_tbody");
s = html.InnerHtml;
while (!string.IsNullOrEmpty(s))
{
flag = true;
timer1.Stop();
full = s;
break;
}
timer1.Start();
}
我的需求是这样的,控制台应用程序,通过webbrowser抓取指定网页中的内容,这部分内容是js生成的,用以上方法总是报错,运行到wb.Document.GetElementById("data_tbody");的时候总是提示“指定的转换无效。”求大神指导。。
string full = string.Empty;
bool flag = false;
/// <summary>
/// 获取全文
/// </summary>
/// <param name="linkUrl">全文链接地址</param>
/// <returns>返回全文</returns>
public string GetBodyHtml(string linkUrl, string webName, int time)
{
string strFullText = "";
try
{
wb.Navigate(linkUrl);
wb.ScriptErrorsSuppressed = true;
DateTime dtBase = DateTime.Now; //基时间
while (wb.ReadyState != WebBrowserReadyState.Complete)
{
System.Windows.Forms.Application.DoEvents();
}
timer1.Enabled = true;
timer1.Interval = time;
timer1.Elapsed += new System.Timers.ElapsedEventHandler(timer1_Elapsed);
strFullText = full;
}
catch (Exception ex)
{
SpiderMethod.WriteFailLog(linkUrl, ex.Message, webName); //记录错误日志
return "";
}
return strFullText;
}
void timer1_Elapsed(object sender, System.Timers.ElapsedEventArgs e)
{
timer1.Stop();
string s = string.Empty; HtmlElement html = wb.Document.GetElementById("data_tbody");
s = html.InnerHtml;
while (!string.IsNullOrEmpty(s))
{
flag = true;
timer1.Stop();
full = s;
break;
}
timer1.Start();
}
我的需求是这样的,控制台应用程序,通过webbrowser抓取指定网页中的内容,这部分内容是js生成的,用以上方法总是报错,运行到wb.Document.GetElementById("data_tbody");的时候总是提示“指定的转换无效。”求大神指导。。
{
// Create a WebBrowser instance.
WebBrowser webBrowserForPrinting = new WebBrowser(); // Add an event handler that prints the document after it loads.
webBrowserForPrinting.DocumentCompleted +=
new WebBrowserDocumentCompletedEventHandler(PrintDocument); // Set the Url property to load the document.
webBrowserForPrinting.Url = new Uri(@"\\myshare\help.html");
}private void PrintDocument(object sender,
WebBrowserDocumentCompletedEventArgs e)
{
// Print the document now that it is fully loaded.
((WebBrowser)sender).Print(); // Dispose the WebBrowser now that the task is complete.
((WebBrowser)sender).Dispose();
}
webbrowser的ReadyState 是Complete的时候不就可以了么。和完成DocumentCompleted事件一样的吧?