如何实现用软件读取网页上的信息 做一个软件,用于读取网页上的信息比如,在页面上读取一个人的姓名、年龄、性别等等,然后用Excel导出,用什么原理或者方法。最好有模板。 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 楼主找找 小偷程序就是 文章采集!通常是用 httpRequest + 正则 #region catch from web int imgW = 0; int imgH = 0; string[] strUrls = null; string[] strCities = null; private bool CatchWeb() { bool ret = false; string[] strList = ConfigurationManager.AppSettings["CityList"].Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries); if (strList.Length == 0) return ret; strCities = new string[strList.Length]; strUrls = new string[strList.Length]; for (int i = 0; i < strList.Length; i++) { string[] strT = strList[i].Split(new string[] { "-" }, StringSplitOptions.RemoveEmptyEntries); if (strT.Length == 2) { strUrls[i] = strT[0]; strCities[i] = strT[1]; } } for (int i = 0; i < strUrls.Length; i++) { string strTmp = string.Format(ConfigurationManager.AppSettings["WebModel"], strUrls[i]); WebRequest myWebRequest = WebRequest.Create(strTmp); WebResponse myWebResponse = null; try { myWebResponse = myWebRequest.GetResponse(); } catch { AddMsg(DateTime.Now + ":连接失败。"); return ret; } Stream myStream = myWebResponse.GetResponseStream(); Encoding encode = System.Text.Encoding.GetEncoding(ConfigurationManager.AppSettings["Encoding"]); StreamReader myStreamReader = new StreamReader(myStream, encode); string strhtml = myStreamReader.ReadToEnd(); myWebResponse.Close(); string strCatch = HtmlCatchHelper.FindHtmlTagContent(strhtml, ConfigurationManager.AppSettings["Pattern"]); string strFile = System.IO.Path.Combine(Application.StartupPath, "res/weather.html"); FileInfo file = new FileInfo(strFile); if (file.Exists) { string strTmpFile = System.IO.Path.Combine(Application.StartupPath, "weather.html"); file.CopyTo(strTmpFile, true); if (HtmlCatchHelper.AppendToHtmlFile(strTmpFile, strCatch)) { if (ConfigurationManager.AppSettings["Width"] != null) int.TryParse(ConfigurationManager.AppSettings["Width"], out imgW); if (ConfigurationManager.AppSettings["Height"] != null) int.TryParse(ConfigurationManager.AppSettings["Height"], out imgH); if (imgH == 0) imgH = 300; if (imgW == 0) imgW = 650; string strPic; using (Bitmap thumbnail = GenerateScreenshot(strTmpFile, imgW, imgH)) { strPic = ConfigurationManager.AppSettings["SavePath"]; if (string.IsNullOrEmpty(strPic)) strPic = System.IO.Path.Combine(Application.StartupPath, strCities[i]); else { Regex re = new Regex(@"^(([a-zA-Z]:\\)|(\\{2}\w+)\$?)((([^/\\\?\*])(\\?))*)$"); if (!re.IsMatch(strPic)) { AddMsg(string.Format("{0}:保存路径非法", DateTime.Now)); return false; } DirectoryInfo dir = new DirectoryInfo(strPic); if (!dir.Exists) dir.Create(); strPic = System.IO.Path.Combine(strPic, string.Format("{0}.jpg", strCities[i])); } thumbnail.Save(strPic); } ret = true; //File.Delete(strTmpFile); AddMsg(string.Format("{0}:生成图片成功,保存路径({1})", DateTime.Now, strPic)); } } } if (ret) { ret = SendMail(); } return ret; } #endregion 呵呵,用HttpRequest比较方便这个就是用HttpRequest实现CSDN发帖回帖的。有兴趣请看~~[align=center]********************************************************本内容用 CSDN小秘书 回复每天回帖即可获得10分可用分!********************************************************[/align] ********************************************************本内容用 CSDN小秘书 回复每天回帖即可获得10分可用分!********************************************************有创意 下一个程序 fidder 可以监控你的HTTP请求和回发 然后模拟请求使用HttpRequest请求并设置请求的COOKIE,httpHeader来获得到你需要的页面上的HTML 再根据具体的页面写相应的正则 匹配出你需要的数据 至于为什么要模拟请求 就是因为有些网站的数据是加密了 需要对应的解密才行 或者是需要登录验证的等! 楼上GOOD,像网络蜘蛛、爬虫什么的都行 关于C# 未知数据类型的一个问题 用正则表达式解析字符串??求助! 水晶报表 数据源中有多个独立的表 Farpoint的每页的行数汇总。有朋友知道吗? c#高级编程中说堆栈中的内存高内存区是已用部分,低内存区是未用部分,还说得头头是道,可是小弟一测,为什么就截然相反啊,求教求教 怎么从Xml中读取数据并在DataSet中显示 怎么调用该方法?????????????????????? linq多表更新 准备学c#,大家给点建议吧! about bin folder InstallShield 2010如何打包.net Framework 3.5程序? ??VS2010无法启动调试
int imgH = 0;
string[] strUrls = null;
string[] strCities = null;
private bool CatchWeb() {
bool ret = false;
string[] strList = ConfigurationManager.AppSettings["CityList"].Split(new string[] { ";" }, StringSplitOptions.RemoveEmptyEntries);
if (strList.Length == 0)
return ret; strCities = new string[strList.Length];
strUrls = new string[strList.Length];
for (int i = 0; i < strList.Length; i++) {
string[] strT = strList[i].Split(new string[] { "-" }, StringSplitOptions.RemoveEmptyEntries);
if (strT.Length == 2) {
strUrls[i] = strT[0];
strCities[i] = strT[1];
}
} for (int i = 0; i < strUrls.Length; i++) {
string strTmp = string.Format(ConfigurationManager.AppSettings["WebModel"], strUrls[i]);
WebRequest myWebRequest = WebRequest.Create(strTmp);
WebResponse myWebResponse = null;
try {
myWebResponse = myWebRequest.GetResponse();
}
catch {
AddMsg(DateTime.Now + ":连接失败。");
return ret;
} Stream myStream = myWebResponse.GetResponseStream();
Encoding encode = System.Text.Encoding.GetEncoding(ConfigurationManager.AppSettings["Encoding"]);
StreamReader myStreamReader = new StreamReader(myStream, encode);
string strhtml = myStreamReader.ReadToEnd();
myWebResponse.Close();
string strCatch = HtmlCatchHelper.FindHtmlTagContent(strhtml, ConfigurationManager.AppSettings["Pattern"]); string strFile = System.IO.Path.Combine(Application.StartupPath, "res/weather.html");
FileInfo file = new FileInfo(strFile);
if (file.Exists) {
string strTmpFile = System.IO.Path.Combine(Application.StartupPath, "weather.html");
file.CopyTo(strTmpFile, true);
if (HtmlCatchHelper.AppendToHtmlFile(strTmpFile, strCatch)) { if (ConfigurationManager.AppSettings["Width"] != null)
int.TryParse(ConfigurationManager.AppSettings["Width"], out imgW);
if (ConfigurationManager.AppSettings["Height"] != null)
int.TryParse(ConfigurationManager.AppSettings["Height"], out imgH); if (imgH == 0) imgH = 300;
if (imgW == 0) imgW = 650;
string strPic;
using (Bitmap thumbnail = GenerateScreenshot(strTmpFile, imgW, imgH)) {
strPic = ConfigurationManager.AppSettings["SavePath"];
if (string.IsNullOrEmpty(strPic))
strPic = System.IO.Path.Combine(Application.StartupPath, strCities[i]);
else {
Regex re = new Regex(@"^(([a-zA-Z]:\\)|(\\{2}\w+)\$?)((([^/\\\?\*])(\\?))*)$");
if (!re.IsMatch(strPic)) {
AddMsg(string.Format("{0}:保存路径非法", DateTime.Now));
return false;
}
DirectoryInfo dir = new DirectoryInfo(strPic);
if (!dir.Exists)
dir.Create();
strPic = System.IO.Path.Combine(strPic, string.Format("{0}.jpg", strCities[i]));
} thumbnail.Save(strPic);
}
ret = true;
//File.Delete(strTmpFile);
AddMsg(string.Format("{0}:生成图片成功,保存路径({1})", DateTime.Now, strPic));
}
}
}
if (ret) {
ret = SendMail();
}
return ret;
} #endregion
这个就是用HttpRequest实现CSDN发帖回帖的。有兴趣请看~~[align=center]********************************************************
本内容用 CSDN小秘书 回复
每天回帖即可获得10分可用分!
********************************************************
[/align]
本内容用 CSDN小秘书 回复
每天回帖即可获得10分可用分!
********************************************************
有创意
像网络蜘蛛、爬虫什么的都行