【【DataTable每次增加为什么会覆盖之前的数据？求助！！！】】

PS：每次单击按钮获取的XML都是不同的

还有一个小问题，就是如何才能实现HTML的解析，我从网上查到设置HtmlEncode="false" 可是我是动态创建的如何添加

if(!isPostBack)
{
}
使用mshtml和正则解析html
或XPath
string tag = @"(?:[\w-:]+)";
string attribute = @"(?:[\w-:]+)(?:=(?:[^\s\>\<]*|\""[\s\S]*?\""|\'[\s\S]*?\'))?";
string name = @"(?:[\w-:]+)";
string xmlDirective = @"(?:\<!" +name + @"(?:\s+" +argument + @")*\s*\>)";
string xmlCData = @"(?:\<!\[CDATA\[(?:[\s\S]*?)\]\]\>)";
参考

using System;
  2using System.Collections.Generic;
  3using System.Text;
  4using System.Text.RegularExpressions;
  5using MIL.Html;
  6
  7namespace Yuanso.Sitework.Crawler
  8{
  9     public class HtmlUtil
10     {
11         /**//// <summary>
12         /// Written:     [CHINA] Zhang Liu
13         /// Date:        1,Jun,2006
14         /// Version:     1.0
15         /// Support:     MYBASK <see cref="http://www.mybask.net"/>
16         /// Looking for latest version or similar implementation of this function, please visit: <seealso cref="http://www.mybask.net"/>
17         /// Summary:
18         /// Picking up text content from a html document. This function will remove:
19         /// 1. <%=%>
20         /// 2. script
21         /// 3. style
22         /// 4. html tags
23         /// 6.   and others
24         /// 7. html comments
25         /// After all above removed, \r\n will be replaced by an empty character.
26         /// </summary>
27         /// <param name="strHtml">string:Waiting for striping html,javascript, style elements</param>
28         /// <returns>string: Stripped text</returns>
29         public static string ExtractContent(string strHtml)
30         {
31             //All the regular expression for matching html, javascript, style elements and others.
32             string[] aryRegex ={@"<%=[\w\W]*?%>",    @"<script[\w\W]*?</script>",     @"<style[\w\W]*?</style>",   @"<[/]?[\w\W]*?>",   @"([\r\n])[\s]+",
33                                 @"&(nbsp|#160);",    @"&(iexcl|#161);",               @"&(cent|#162);",            @"&(pound|#163);",   @"&(copy|#169);",
34                                 @"&#(\d+);",         @"-->",                          @"<!--.*\n"};
35             //Corresponding replacment to the regular expressions.
36             //string[] aryReplacment = { "", "", "", "", "", " ", "\xa1", "\xa2", "\xa3", "\xa9", "", "\r\n", "" };
37             string[] aryReplacment = { "", "", "", "", "", " ", "", "", "", "", "", "", "" };
38             string strStripped = strHtml;
39             //Loop to replacing.
40             for (int i = 0; i < aryRegex.Length; i++)
41             {
42                 Regex regex = new Regex(aryRegex[i], RegexOptions.IgnoreCase);
43                 strStripped = regex.Replace(strStripped, aryReplacment[i]);
44             }
45             //Replace "\r\n" to an empty character.
46             strStripped.Replace("\r\n", "");
47             strStripped.Replace("\t", "");
48             //Return stripped string.
49             return strStripped;
50         }
51         public static string ExtractTitle(string strHtml)
52         {
53
54             string title;
55             //string titleResult;
56             Match m;
57             string titlePatern = @"<title[^>]*?>.*?</title>";
58             Regex regex = new Regex(titlePatern, RegexOptions.IgnoreCase);
59             m = regex.Match(strHtml);
60             if (m.Success)
61             {
62                 title = m.Value.ToString();
63                 title = title.Replace("<title>", "");
64                 title = title.Replace("</title>", "");
65             }
66             else title = "无标题";
67
68             return title;
69         }
70         /**//// <summary>
71         /// 此私有方法从一段HTML文本中提取出一定字数的纯文本
72         /// </summary>
73         /// <param name="instr">HTML代码</param>
74         /// <param name="firstN">提取从头数多少个字</param>
75         /// <param name="withLink">是否要链接里面的字</param>
76         /// <returns>纯文本</returns>
77         public static string getFirstNchar(string instr, int firstN, bool withLink)
78         {
79             string strStripped;
80             strStripped = instr.Clone() as string;
81             strStripped = new Regex(@"(?m)<script[^>]*>(\w|\W)*?</script[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(strStripped, "");
82             strStripped = new Regex(@"(?m)<style[^>]*>(\w|\W)*?</style[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(strStripped, "");
83             strStripped = new Regex(@"(?m)<select[^>]*>(\w|\W)*?</select[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(strStripped, "");
84             if (!withLink) strStripped = new Regex(@"(?m)<a[^>]*>(\w|\W)*?</a[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase).Replace(strStripped, "");
85             Regex objReg = new System.Text.RegularExpressions.Regex("(<[^>]+?>)| ", RegexOptions.Multiline | RegexOptions.IgnoreCase);
86             strStripped = objReg.Replace(strStripped, "");
87             Regex objReg2 = new System.Text.RegularExpressions.Regex("(\\s)+", RegexOptions.Multiline | RegexOptions.IgnoreCase);
88             strStripped = objReg2.Replace(strStripped, " ");
89             //return strStripped.Length > firstN ? strStripped.Substring(0, firstN) : strStripped;
90             return strStripped;
91         }
92
93         public static string getTitle(string strHtml)
94         {
95             string title="";
96             Regex reg = new Regex(@"(?m)<title[^>]*>(?<title>(?:\w|\W)*?)</title[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase);
97             Match mc = reg.Match(strHtml);
98             if (mc.Success)
99                 title = mc.Groups["title"].Value.Trim();
100
101             return title;
102         }
103     }
104     public class Htmlpage
105     {
106         public static string GetTitle(string strHtml)
107         {
108             MIL.Html.HtmlDocument documnet;
109             HtmlParser parser = new HtmlDomainTreeParser();
110             documnet = parser.Parse(strHtml);
111             StringBuilder text = new StringBuilder("");
112             foreach (HtmlNode node in documnet.Nodes.FindAllText(true))
113             {
114
115                 HtmlText textNode;
116                 textNode = (HtmlText)node;
117                 if (!textNode.Text.Contains("\r") && !textNode.Text.Contains("\n"))
118                 {
119                     text.Append(textNode.Text);
120                     break;
121                 }
122
123             }
124             return text.ToString();
125
126         }
127         public static string GetContent(string strHtml)
128         {
129             MIL.Html.HtmlDocument documnet;
130             HtmlParser parser = new HtmlDomainTreeParser();
131             documnet = parser.Parse(strHtml);
132             StringBuilder text = new StringBuilder();
133             foreach (HtmlNode node in documnet.Nodes.FindAllText(true))
134             {
135
136                 HtmlText textNode;
137                 textNode = (HtmlText)node;
138                 if (textNode.Text.Contains("\r") || textNode.Text.Contains("\n"))
139                     continue;
140                 else text.Append(textNode.Text);
141
142             }
143             return text.ToString();
144
145         }
146     }
147
148}
149

调试易

【【DataTable每次增加为什么会覆盖之前的数据？求助！！！】】

解决方案 »