求C#提取网页正文内容代码 哪位大虾有C#提取网页正文内容的代码,可不可以发上来我参考参考。谢谢啦!! 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 /// <summary> /// 重载GetData函数,多线程调用该函数 /// </summary> /// <param name="en"></param> public void GetDataOnline() { #region 在线程中用到的查询函数 DCurrentState oDCurrentState = new DCurrentState(CurrentState);//声明代理 this.Invoke(oDCurrentState, "Doing", null); Thread.Sleep(100); string en = this.textBox1.Text.ToString(); WebRequest oRequest = WebRequest.Create("http://dict.cn/search/?q=" + en); //oRequest.Timeout = 800;超时时间 WebResponse oResponse = oRequest.GetResponse(); Stream oStream = oResponse.GetResponseStream(); StreamReader oReader = new StreamReader(oStream, Encoding.Default); string oGetData = oReader.ReadToEnd(); string oRegexPat = @"<big><font\040size=\""2\""\040face=\""Trebuchet\040MS\"">([\w\W]*?)</big>"; Regex oRegex = new Regex(oRegexPat, RegexOptions.IgnoreCase); Match oMatch = oRegex.Match(oGetData); #endregion #region 如果匹配成功 if (oMatch.Success) { string[] CurrentData = oRegex.Split(oGetData); if (CurrentData[1].IndexOf("对不起") >= 0 || CurrentData[1].IndexOf("单词没找到") >= 0) { //没有查询到该单词 this.Invoke(oDCurrentState, "Failure", " "); return; } else { //查询到有数据,如果自动更新开关为on, if (this.autoUpdate.Checked) { #region 更新本地数据 this.Invoke(oDCurrentState, "Updated", CurrentData[1].Replace("<br>", "\r\n")); Dict d = new Dict(en); d._En = en; d._Cn = CurrentData[1]; d.Update(d); #endregion } else { #region 保存结果 this.Invoke(oDCurrentState, "Finish", CurrentData[1].Replace("<br>", "\r\n")); Dict d = new Dict(en); d._En = en; d._Cn = CurrentData[1]; try { d.Insert(d); } catch (Exception exception) { throw exception; } finally { d = null; } #endregion } return; } } #endregion return; } HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url); req.Method = "GET"; or "POST" req.ContentType = "application/x-www-form-urlencoded";Stream ReceiveStream = res.GetResponseStream(); public static int saveHtmlFile(string url,string filename){ int status = -1; string respHTML = string.Empty; StreamWriter sw = null; try { if(ReadHttp(url,ref respHTML)=="OK") { if(File.Exists(filename)) { File.Copy(filename,filename+".bak",true); } sw = new StreamWriter(filename,false,Encoding.GetEncoding("GB2312")); sw.WriteLine(respHTML); sw.Close(); status = 0; } else { System.Web.HttpContext.Current.Response.Write("找不到该页或服务器错误"); } } catch(Exception err) { System.Web.HttpContext.Current.Response.Write(err.Message); status = -1; } finally { if (sw != null) { sw.Close(); } } return(status);}public static string ReadHttp(string url,ref string content) { string status="ERROR"; HttpWebRequest Webreq = (HttpWebRequest) WebRequest.Create(url); HttpWebResponse Webresp=null; StreamReader strm = null; try { Webresp = (HttpWebResponse) Webreq.GetResponse(); status = Webresp.StatusCode.ToString(); strm = new StreamReader(Webresp.GetResponseStream(),Encoding.GetEncoding("GB2312")); content = strm.ReadToEnd(); } catch { } finally { if(Webresp != null) Webresp.Close(); if(strm != null) strm.Close(); } return(status);} 1.用正则。2.string.substring(),string.indexof(),etc. (?#Copyright 2005, by Laser Lu.)(?<Style_Block>(?<begin>\<(?<tag>style)(?:\s+(?<attribute>[\w-:]+)(?:=(?<value>[^\s\>\<]*|\"[\s\S]*?\"|\'[\s\S]*?\'))?)*\s*(?:/)?\>)(?<body>[\s\S]*?)(?<end>\</\k<tag>\>))|(?<Script_Block>(?<begin>\<(?<tag>script)(?:\s+(?<attribute>[\w-:]+)(?:=(?<value>[^\s\>\<]*|\"[\s\S]*?\"|\'[\s\S]*?\'))?)*\s*(?:/)?\>)(?<body>[\s\S]*?)(?<end>\</\k<tag>\>))|(?<Xml_Directive>\<!(?<name>[\w-:]+)(?:\s+(?<argument>[\w-:]+|\"[\s\S]*?\"|\'[\s\S]*?\'))*\s*\>)|(?<Xml_Comment>\<!--[\s\S]*?--\>)|(?<Beginning_Tag>\<(?<tag>[\w-:]+)(?:\s+(?<attribute>[\w-:]+)(?:=(?<value>[^\s\>\<]*|\"[\s\S]*?\"|\'[\s\S]*?\'))?)*\s*(?:/)?\>)|(?<Ending_Tag>\</(?<tag>[\w-:]+)\>)|(?<Xml_CDATA>\<!\[CDATA\[(?<data>[\s\S]*?)\]\]\>)|(?<Xml_Literal>(?:(?<blank>[ ]+)|[^ \<\>])+) WinForm窗体上的打X按钮 求助一个创建实例的问题 按datetimepicker时间访问数据库季度表的问题 问个水晶的小问题....... 客户端上传记事本到服务器端的指定的文件夹如何实现? 帮我看一下这个查询语句吧 找Thread+Socket的事例!! VS2005,有什么最新发现都来说说哦 我是初学者,高手来帮帮忙啊?? System名称空间的烦恼 急求!!我的工具箱中没有SplitContainer怎么办啊?? 将ip地址转换成整数,他的算法是什么?
/// 重载GetData函数,多线程调用该函数
/// </summary>
/// <param name="en"></param>
public void GetDataOnline()
{
#region 在线程中用到的查询函数
DCurrentState oDCurrentState = new DCurrentState(CurrentState);//声明代理
this.Invoke(oDCurrentState, "Doing", null);
Thread.Sleep(100); string en = this.textBox1.Text.ToString();
WebRequest oRequest = WebRequest.Create("http://dict.cn/search/?q=" + en);
//oRequest.Timeout = 800;超时时间
WebResponse oResponse = oRequest.GetResponse();
Stream oStream = oResponse.GetResponseStream();
StreamReader oReader = new StreamReader(oStream, Encoding.Default);
string oGetData = oReader.ReadToEnd();
string oRegexPat = @"<big><font\040size=\""2\""\040face=\""Trebuchet\040MS\"">([\w\W]*?)</big>";
Regex oRegex = new Regex(oRegexPat, RegexOptions.IgnoreCase);
Match oMatch = oRegex.Match(oGetData);
#endregion #region 如果匹配成功
if (oMatch.Success)
{
string[] CurrentData = oRegex.Split(oGetData);
if (CurrentData[1].IndexOf("对不起") >= 0 || CurrentData[1].IndexOf("单词没找到") >= 0)
{
//没有查询到该单词
this.Invoke(oDCurrentState, "Failure", " ");
return;
}
else
{
//查询到有数据,如果自动更新开关为on,
if (this.autoUpdate.Checked)
{
#region 更新本地数据
this.Invoke(oDCurrentState, "Updated", CurrentData[1].Replace("<br>", "\r\n"));
Dict d = new Dict(en);
d._En = en;
d._Cn = CurrentData[1];
d.Update(d);
#endregion
}
else
{
#region 保存结果
this.Invoke(oDCurrentState, "Finish", CurrentData[1].Replace("<br>", "\r\n"));
Dict d = new Dict(en);
d._En = en;
d._Cn = CurrentData[1]; try
{
d.Insert(d);
}
catch (Exception exception)
{ throw exception;
}
finally
{
d = null;
}
#endregion
}
return;
}
}
#endregion
return; }
req.Method = "GET"; or "POST"
req.ContentType = "application/x-www-form-urlencoded";Stream ReceiveStream = res.GetResponseStream();
{
int status = -1;
string respHTML = string.Empty;
StreamWriter sw = null;
try
{
if(ReadHttp(url,ref respHTML)=="OK")
{
if(File.Exists(filename))
{
File.Copy(filename,filename+".bak",true);
}
sw = new StreamWriter(filename,false,Encoding.GetEncoding("GB2312"));
sw.WriteLine(respHTML);
sw.Close();
status = 0;
}
else
{
System.Web.HttpContext.Current.Response.Write("找不到该页或服务器错误");
}
}
catch(Exception err)
{
System.Web.HttpContext.Current.Response.Write(err.Message);
status = -1;
}
finally
{
if (sw != null)
{
sw.Close();
}
}
return(status);
}public static string ReadHttp(string url,ref string content)
{
string status="ERROR";
HttpWebRequest Webreq = (HttpWebRequest) WebRequest.Create(url);
HttpWebResponse Webresp=null;
StreamReader strm = null;
try
{
Webresp = (HttpWebResponse) Webreq.GetResponse();
status = Webresp.StatusCode.ToString();
strm = new StreamReader(Webresp.GetResponseStream(),Encoding.GetEncoding("GB2312"));
content = strm.ReadToEnd();
}
catch
{
}
finally
{
if(Webresp != null) Webresp.Close();
if(strm != null) strm.Close();
}
return(status);
}
2.string.substring(),string.indexof(),etc.