http://dotnet.aspx.cc/ShowDetail.aspx?id=9D49B3EF-0F91-421B-841F-5D9A000BDA04
解决方案 »
- 进程守护和进程树的问题
- 如何将一个泛型List的数据移植到另一个泛型List里面
- c# winform 画图程序,只要一个类似windows画图中画刷功能的东西,画完能保存图片就行
- 在路由器上是否可以同时对内网的2个sqlserver服务器做映射?如果可以,怎么在连接字符串上区分它们?
- 关于水晶报表和sql server的问题
- 有在C#中调用RAPI里的CeRapiInvoke()函数的吗?他参数里的函数是怎么定义的呢?以及怎么获取返回值呢?那位达人知道告诉我或贴个C#例子,非常感谢了
- 大家在调整窗体内的控件大小时是怎么做的(改变窗体大小),有什么控件可以吗?
- C#引用对象赋值问题
- 关于 Crystal Reports,以用户需求的名义……
- [算法求助] 如何实现选取若干不同长度的物体连接成固定长度?
- 请大家帮忙,把 string 分割成 string[] 的问题。
- 一句话搞定的问题,高手门帮帮忙!!!
现在我已经有一个 string html 了。怎么使其成为 DOM 树结构。。或是不用 mshtml 也可以。
http://www.talkaboutsoftware.com/group/microsoft.public.inetsdk.programming.html_objmodel/messages/5247.html
using System.Drawing;
using System.Collections;
using System.ComponentModel;
using System.Windows.Forms;
using System.Data;
using System.Text.RegularExpressions;
using System.IO;namespace CSharpHTMLExtractor
{
/// <summary>
/// Summary description for Form1.
/// </summary>
public class frmHTMLExtractor : System.Windows.Forms.Form
{ Int32 intMatchesMade = 0;
Regex extractHTMLRegex =
new Regex("<(?<outertag>[a-z]+[\\d]?)(?<attributes> [^>]*)*>" +
"(?<innerhtml>(<(?<innertag>[a-z]+[\\d]?)[^>]*>.*?</\\k<innertag>>|" +
"<[a-z]+[\\d]?[^>]*>|(?>[^<]*))*(?=</\\k<outertag>>))?",
RegexOptions.IgnoreCase |
RegexOptions.Compiled |
RegexOptions.ExplicitCapture |
RegexOptions.Singleline);
internal System.Windows.Forms.OpenFileDialog OpenFileDialog1;
internal System.Windows.Forms.TextBox txtInputText;
internal System.Windows.Forms.Button cmdDisplayHTML;
internal System.Windows.Forms.TreeView HTMLTreeView;
internal System.Windows.Forms.Button cmdOpenHTML;
/// <summary>
/// Required designer variable.
/// </summary>
private System.ComponentModel.Container components = null; public frmHTMLExtractor()
{
//
// Required for Windows Form Designer support
//
InitializeComponent(); //
// TODO: Add any constructor code after InitializeComponent call
//
} /// <summary>
/// Clean up any resources being used.
/// </summary>
protected override void Dispose( bool disposing )
{
if( disposing )
{
if (components != null)
{
components.Dispose();
}
}
base.Dispose( disposing );
} #region Windows Form Designer generated code
/// <summary>
/// Required method for Designer support - do not modify
/// the contents of this method with the code editor.
/// </summary>
private void InitializeComponent()
{
this.OpenFileDialog1 = new System.Windows.Forms.OpenFileDialog();
this.txtInputText = new System.Windows.Forms.TextBox();
this.cmdDisplayHTML = new System.Windows.Forms.Button();
this.HTMLTreeView = new System.Windows.Forms.TreeView();
this.cmdOpenHTML = new System.Windows.Forms.Button();
this.SuspendLayout();
//
// OpenFileDialog1
//
this.OpenFileDialog1.Filter = "Web Pages|*.htm;*.html;*.asp;";
this.OpenFileDialog1.FileOk += new System.ComponentModel.CancelEventHandler(this.OpenFileDialog1_FileOk);
//
// txtInputText
//
this.txtInputText.Font = new System.Drawing.Font("Verdana", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((System.Byte)(0)));
this.txtInputText.Location = new System.Drawing.Point(32, 69);
this.txtInputText.Multiline = true;
this.txtInputText.Name = "txtInputText";
this.txtInputText.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
this.txtInputText.Size = new System.Drawing.Size(704, 243);
this.txtInputText.TabIndex = 31;
this.txtInputText.Text = "";
//
// cmdDisplayHTML
//
this.cmdDisplayHTML.Font = new System.Drawing.Font("Verdana", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((System.Byte)(0)));
this.cmdDisplayHTML.Location = new System.Drawing.Point(205, 20);
this.cmdDisplayHTML.Name = "cmdDisplayHTML";
this.cmdDisplayHTML.Size = new System.Drawing.Size(161, 39);
this.cmdDisplayHTML.TabIndex = 30;
this.cmdDisplayHTML.Text = "Display HTML Tree";
this.cmdDisplayHTML.Click += new System.EventHandler(this.cmdDisplayHTML_Click);
//
// HTMLTreeView
//
this.HTMLTreeView.CausesValidation = false;
this.HTMLTreeView.Font = new System.Drawing.Font("Microsoft Sans Serif", 9F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((System.Byte)(0)));
this.HTMLTreeView.ImageIndex = -1;
this.HTMLTreeView.Location = new System.Drawing.Point(32, 336);
this.HTMLTreeView.Name = "HTMLTreeView";
this.HTMLTreeView.Nodes.AddRange(new System.Windows.Forms.TreeNode[] {
new System.Windows.Forms.TreeNode("HTML Document")});
this.HTMLTreeView.SelectedImageIndex = -1;
this.HTMLTreeView.Size = new System.Drawing.Size(704, 320);
this.HTMLTreeView.TabIndex = 29;
//
// cmdOpenHTML
//
this.cmdOpenHTML.Font = new System.Drawing.Font("Verdana", 9.75F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((System.Byte)(0)));
this.cmdOpenHTML.Location = new System.Drawing.Point(31, 20);
this.cmdOpenHTML.Name = "cmdOpenHTML";
this.cmdOpenHTML.Size = new System.Drawing.Size(161, 39);
this.cmdOpenHTML.TabIndex = 28;
this.cmdOpenHTML.Text = "Open HTML File";
this.cmdOpenHTML.Click += new System.EventHandler(this.cmdOpenHTML_Click);
//
// frmHTMLExtractor
//
this.AutoScaleBaseSize = new System.Drawing.Size(6, 15);
this.ClientSize = new System.Drawing.Size(776, 687);
this.Controls.AddRange(new System.Windows.Forms.Control[] {
this.cmdDisplayHTML,
this.HTMLTreeView,
this.cmdOpenHTML,
this.txtInputText});
this.Name = "frmHTMLExtractor";
this.Text = "HTML Extractor";
this.ResumeLayout(false); }
#endregion /// <summary>
/// The main entry point for the application.
/// </summary>
[STAThread]
static void Main()
{
Application.Run(new frmHTMLExtractor());
} private void cmdOpenHTML_Click(object sender, System.EventArgs e)
{
OpenFileDialog1.ShowDialog();
} private void cmdDisplayHTML_Click(object sender, System.EventArgs e)
{
intMatchesMade = 0;
HTMLTreeView.Nodes.Clear();
HTMLTreeView.Nodes.Add(populateTagNode(this.txtInputText.Text, "HTML Document"));
HTMLTreeView.ExpandAll();
MessageBox.Show("Completed " + intMatchesMade + " matches");
} private void OpenFileDialog1_FileOk(object sender, System.ComponentModel.CancelEventArgs e)
{
String filePath = OpenFileDialog1.FileName;
StreamReader streamReaderRegex = File.OpenText(filePath);
this.txtInputText.Text = streamReaderRegex.ReadToEnd();
streamReaderRegex.Close();
} private TreeNode populateTagNode(String sInputString,
String sTitleText)
{
TreeNode htmlTagNode = new TreeNode(); try
{
MatchCollection matchesFound;
TreeNode htmlSubTagNode;
String sTag;
matchesFound = extractHTMLRegex.Matches(sInputString); htmlTagNode.Text = sTitleText;
foreach (Match matchMade in matchesFound)
{
intMatchesMade = intMatchesMade + 1;
sTag = "<" + matchMade.Groups[1].Value +
matchMade.Groups[2].Value + ">";
htmlSubTagNode =
populateTagNode(matchMade.Groups[3].Value,
sTag);
htmlTagNode.Nodes.Add(htmlSubTagNode);
}
}
catch (ArgumentException ex)
{
MessageBox.Show("The following error occurred "
+ "\r\n" + ex.Message);
} return htmlTagNode;
}
}
}
所以说买书并不是坏事!比如就可以加分的说
当然,没有耐心读到后面的章节是不会知道这个问题的解决办法di~~~
所以大家不但要买书,而且还要读书!!像我这样,读了25本及以上的.NET和软件工程以及数学方面的书,忘的不少,但是每及一个问题出现,头脑中便会映出问题的解决方法,最差也会记起在xx书xx页,这不也是学到了东西了么?
XmlDocument doc = new XmlDocument();
doc.LoadXml(html);//前提是你这个html必须符合xml规范,举个例子,如过获得的html源码有<br>你应该替换成<br/>
>-),买书好像不可以加分哦,哈哈挺佩服你的,向你学习!
----
摘自C#字符串和正则表达式参考手册(Wrox)
所以说买书并不是坏事!比如就可以加分的说
当然,没有耐心读到后面的章节是不会知道这个问题的解决办法di~~~
所以大家不但要买书,而且还要读书!!像我这样,读了25本及以上的.NET和软件工程以及数学方面的书,忘的不少,但是每及一个问题出现,头脑中便会映出问题的解决方法,最差也会记起在xx书xx页,这不也是学到了东西了么?