asp.net自动采集 求思路,在b/s架构下实现自动采集新闻,注意自动两个字。问题在于,当服务器没访问的时候,这个时候怎么实现采集。例如ie关闭了。 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 写个winform程序或windows服务放在后台. 在Application_start中加一个timer循环去做. 在Application_start中加一个timer循环去做. //PUSH登录的用户名和密码 MSXML2.XMLHTTP req = new MSXML2.XMLHTTP(); string query = "callCount=1" + (char)13 + (char)10 + "c0-scriptName=SecurityAccess" + (char)13 + (char)10 + "c0-methodName=validate" + (char)13 + (char)10 + "c0-id=990_1180409315890" + (char)13 + (char)10 + "c0-param0=string:cx_330300" + (char)13 + (char)10 + "c0-param1=string:123456" + (char)13 + (char)10 + "c0-param2=null:null" + (char)13 + (char)10 + "xml=true"; req.open("POST", " http://10.1.0.3/JDApp/dwr/exec/SecurityAccess.validate.dwr ", false, "", ""); req.setRequestHeader("Content-Type", "text/plain"); req.send(query); MSXML2.XMLHTTP Zpages = new MSXML2.XMLHTTP(); Zpages.open("GET", "http://10.1.0.3/JDApp/enterprise/enterpriseListAction.do?searchChilds=true&method=manageList&areaId=20050124181116206783432593202462", false, "", ""); Zpages.send(""); //获取总页数 Byte[] cons = (Byte[])Zpages.responseBody; string HtmlCodes = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(cons, 0, cons.Length); Match z = Regex.Match(HtmlCodes, @"页次:<b>(?<pageid>[^<]*)</b>页/<b>(?<pages>[^<]*)", RegexOptions.IgnoreCase); int pagesz=Convert.ToInt32( z.Groups["pages"].Value); //翻页 for (int pageid = 1; pageid <= 2; pageid++) { MSXML2.XMLHTTP oBao = new MSXML2.XMLHTTP(); oBao.open("GET", "http://wwww.aaa.com/JDApp/enterprise/enterpriseListAction.do?method=manageList&areaId=20050124181116206783432593202462&pageId=" + (int)pageid + "", false, "", ""); oBao.send(""); Byte[] b1 = (Byte[])oBao.responseBody; string HtmlCode1 = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(b1, 0, b1.Length); string yourStr = HtmlCode1; yourStr = yourStr.Replace(" ", ""); Match H0 = Regex.Match(yourStr, @"PK0([""']?)\svalue=([""']?)(?<PK0>[^""']*)", RegexOptions.IgnoreCase); if (H0.Groups["PK0"].Value != "") DetailData("" + H0.Groups["PK0"].Value + ""); oBao.abort(); } //上传文件后记录更新时间"DM"模块名称 WebService webService = new WebService(); webService.DataUpDate("CJ"); Response.Write("采集成功|"); } protected void DetailData(string pageid) { MSXML2.XMLHTTP Bao = new MSXML2.XMLHTTP(); Bao.open("GET", "http://10.1.0.3/JDApp/enterprise/view.jsp?id=" + pageid + "", false, "", ""); Bao.send(""); Byte[] b = (Byte[])Bao.responseBody; string HtmlCode = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(b, 0, b.Length); string yourStr = HtmlCode; yourStr = yourStr.Replace(" ", ""); Match H2 = Regex.Match(yourStr, @"通讯地址</td>\s*<td[^>]*>(?<txdz>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<yb>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<qywz>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<dzyx>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<lzzfzr>[^<]*)", RegexOptions.IgnoreCase); string EntID = pageid; string jgdm = (string)H1.Groups["jgdm"].Value; String sqlStr = ConfigurationManager.ConnectionStrings["SOUcon"].ToString(); SqlConnection sqlConn = new SqlConnection(sqlStr); //Insert采集到的记录 SqlCommand cmd = new SqlCommand("EntQualityInsert", sqlConn); cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add(new SqlParameter("@EntID", SqlDbType.VarChar, 50)); sqlConn.Open(); cmd.ExecuteNonQuery(); sqlConn.Close(); Bao.abort(); }我在用的可以自动通过用户名cx密码123456登录一个WEB系统,再分析总页数,自动分页采集每行记录. 貌似服务器端的运行与ie开不开都没关系,所以说application可行,或者说在服务器端写一个自动运行的程序也可以啊如果是要客户端做这个过程就比较复杂了,你只好先学学黑客技术了~~ 当服务器没访问的时候,这个时候怎么实现采集。例如ie关闭了关了还行?不太可能吧不如你写个在后台运行的程序啊,用asp.net干这事不太合适 Application_start 跟 IE开不开有关系? 【求助】通过request.form["chk"]获得checkbox选中的值 asp.net如何处理消息头 使用Session 验证用户登陆后的问题? .NET 调试时自动关闭 html如何能和aspx页面完全一样? 如何取得 在gridview里asp:textbox控件的回车事件 哪位大哥用c++给我写一个建注册表的项和值的代码 往数据库中添加数据时异常显示"将截断字符串或二进制数据"是什么意思 关于UpdatePanel的一个问题! 请问各位高手如何设置用户的菜单权限,能够提供一段完整的代码吗? 给LISTVIEW添加按钮 在线等.在panel中如何加入动态生成的table
MSXML2.XMLHTTP req = new MSXML2.XMLHTTP();
string query = "callCount=1" + (char)13 + (char)10 + "c0-scriptName=SecurityAccess" + (char)13 + (char)10 + "c0-methodName=validate" + (char)13 + (char)10 + "c0-id=990_1180409315890" + (char)13 + (char)10 + "c0-param0=string:cx_330300" + (char)13 + (char)10 + "c0-param1=string:123456" + (char)13 + (char)10 + "c0-param2=null:null" + (char)13 + (char)10 + "xml=true";
req.open("POST", " http://10.1.0.3/JDApp/dwr/exec/SecurityAccess.validate.dwr ", false, "", "");
req.setRequestHeader("Content-Type", "text/plain");
req.send(query); MSXML2.XMLHTTP Zpages = new MSXML2.XMLHTTP();
Zpages.open("GET", "http://10.1.0.3/JDApp/enterprise/enterpriseListAction.do?searchChilds=true&method=manageList&areaId=20050124181116206783432593202462", false, "", "");
Zpages.send(""); //获取总页数
Byte[] cons = (Byte[])Zpages.responseBody;
string HtmlCodes = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(cons, 0, cons.Length);
Match z = Regex.Match(HtmlCodes, @"页次:<b>(?<pageid>[^<]*)</b>页/<b>(?<pages>[^<]*)", RegexOptions.IgnoreCase);
int pagesz=Convert.ToInt32( z.Groups["pages"].Value); //翻页
for (int pageid = 1; pageid <= 2; pageid++)
{ MSXML2.XMLHTTP oBao = new MSXML2.XMLHTTP();
oBao.open("GET", "http://wwww.aaa.com/JDApp/enterprise/enterpriseListAction.do?method=manageList&areaId=20050124181116206783432593202462&pageId=" + (int)pageid + "", false, "", "");
oBao.send(""); Byte[] b1 = (Byte[])oBao.responseBody;
string HtmlCode1 = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(b1, 0, b1.Length); string yourStr = HtmlCode1;
yourStr = yourStr.Replace(" ", ""); Match H0 = Regex.Match(yourStr, @"PK0([""']?)\svalue=([""']?)(?<PK0>[^""']*)", RegexOptions.IgnoreCase); if (H0.Groups["PK0"].Value != "") DetailData("" + H0.Groups["PK0"].Value + "");
oBao.abort();
}
//上传文件后记录更新时间"DM"模块名称
WebService webService = new WebService();
webService.DataUpDate("CJ"); Response.Write("采集成功|");
}
protected void DetailData(string pageid)
{
MSXML2.XMLHTTP Bao = new MSXML2.XMLHTTP();
Bao.open("GET", "http://10.1.0.3/JDApp/enterprise/view.jsp?id=" + pageid + "", false, "", "");
Bao.send(""); Byte[] b = (Byte[])Bao.responseBody;
string HtmlCode = System.Text.ASCIIEncoding.GetEncoding("GB2312").GetString(b, 0, b.Length); string yourStr = HtmlCode;
yourStr = yourStr.Replace(" ", "");
Match H2 = Regex.Match(yourStr, @"通讯地址</td>\s*<td[^>]*>(?<txdz>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<yb>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<qywz>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<dzyx>[^<]*)</td>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*>(?<lzzfzr>[^<]*)", RegexOptions.IgnoreCase);
string EntID = pageid;
string jgdm = (string)H1.Groups["jgdm"].Value;
String sqlStr = ConfigurationManager.ConnectionStrings["SOUcon"].ToString();
SqlConnection sqlConn = new SqlConnection(sqlStr); //Insert采集到的记录
SqlCommand cmd = new SqlCommand("EntQualityInsert", sqlConn);
cmd.CommandType = CommandType.StoredProcedure; cmd.Parameters.Add(new SqlParameter("@EntID", SqlDbType.VarChar, 50));
sqlConn.Open();
cmd.ExecuteNonQuery();
sqlConn.Close(); Bao.abort(); }
我在用的可以自动通过用户名cx密码123456登录一个WEB系统,再分析总页数,自动分页采集每行记录.
如果是要客户端做这个过程就比较复杂了,你只好先学学黑客技术了~~
不如你写个在后台运行的程序啊,用asp.net干这事不太合适