网页是用__doPostBack翻页,只能取到第一页,不知道怎么回事? string responseData = RequestHelper.RequestHTML("http://www.xxxxxxx.cc/", "gbk"); string exp = "id=\"__VIEWSTATE\" value=\"([\\s\\S]*?)\" />";
Match match = Regex.Match(responseData, exp, RegexOptions.IgnoreCase); string num = match.Groups[1].Value.ToString();
for (int i = 0; i < 8; i++)
{
System.Net.WebClient WebClientObj = new System.Net.WebClient();
System.Collections.Specialized.NameValueCollection PostVars = new System.Collections.Specialized.NameValueCollection();
PostVars.Add("document.forms['form1'].__VIEWSTATE", num);
PostVars.Add("document.forms['form1'].__EVENTTARGET", "AspNetPager1");
PostVars.Add("document.forms['form1'].__EVENTARGUMENT", i.ToString());
WebClientObj.Headers.Add("ContentType", "text/html; charset=utf-8");
byte[] byte1 = WebClientObj.UploadValues("http://www.xxxxxx.cc/","POST", PostVars);
string ResponseStr = Encoding.UTF8.GetString(byte1); //得到当前页面对应的html 文本字符串
}
Match match = Regex.Match(responseData, exp, RegexOptions.IgnoreCase); string num = match.Groups[1].Value.ToString();
for (int i = 0; i < 8; i++)
{
System.Net.WebClient WebClientObj = new System.Net.WebClient();
System.Collections.Specialized.NameValueCollection PostVars = new System.Collections.Specialized.NameValueCollection();
PostVars.Add("document.forms['form1'].__VIEWSTATE", num);
PostVars.Add("document.forms['form1'].__EVENTTARGET", "AspNetPager1");
PostVars.Add("document.forms['form1'].__EVENTARGUMENT", i.ToString());
WebClientObj.Headers.Add("ContentType", "text/html; charset=utf-8");
byte[] byte1 = WebClientObj.UploadValues("http://www.xxxxxx.cc/","POST", PostVars);
string ResponseStr = Encoding.UTF8.GetString(byte1); //得到当前页面对应的html 文本字符串
}
if (!theForm.onsubmit || (theForm.onsubmit() != false)) {
theForm.__EVENTTARGET.value = eventTarget;
theForm.__EVENTARGUMENT.value = eventArgument;
theForm.submit();
}
}
至于怎么解析JS,楼主可以去研究一下javascript引擎。
我以前写了一个抓取新浪用户信息的,用的是webbrowser控件
PostVars.Add("__EVENTTARGET", "ctl00$MainContent$AspNetPager1");
PostVars.Add("__EVENTARGUMENT", i.ToString());还有获取网页内容改变下VIEWSTATE的值
string ResponseStr = Encoding.UTF8.GetString(byte1); //得到当前页面对应的html 文本字符串
match = Regex.Match(ResponseStr, exp, RegexOptions.IgnoreCase);
num = match.Groups[1].Value.ToString();再试试看