如题
如题
谢谢

解决方案 »

  1.   

    先抓取内容 然后用正则来取。。抓取网页
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Text;
    using System.Net;
    using System.IO;
    using System.IO.Compression;
    using System.Text.RegularExpressions;namespace WikiPageCreater.Common
    {
        public class PageHelper
        {
            /// <summary>
            /// 根据 url 获取网页编码
            /// </summary>
            /// <param name="url"></param>
            /// <returns></returns>
            public static string GetEncoding(string url)
            {
                HttpWebRequest request = null;
                HttpWebResponse response = null;
                StreamReader reader = null;
                try
                {
                    request = (HttpWebRequest)WebRequest.Create(url);
                    request.Timeout = 20000;
                    request.AllowAutoRedirect = false;                response = (HttpWebResponse)request.GetResponse();
                    if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
                    {
                        if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
                            reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
                        else
                            reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);                    string html = reader.ReadToEnd();                    Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
                        if (reg_charset.IsMatch(html))
                        {
                            return reg_charset.Match(html).Groups["charset"].Value;
                        }
                        else if (response.CharacterSet != string.Empty)
                        {
                            return response.CharacterSet;
                        }
                        else
                            return Encoding.Default.BodyName;
                    }
                }
                catch
                {
                }
                finally
                {                if (response != null)
                    {
                        response.Close();
                        response = null;
                    }
                    if (reader != null)
                        reader.Close();                if (request != null)
                        request = null;            }            return Encoding.Default.BodyName;
            }        /// <summary>
            /// 根据 url 和 encoding 获取当前url页面的 html 源代码        
           /// </summary>
            /// <param name="url"></param>
            /// <param name="encoding"></param>
            /// <returns></returns>
            public static string GetHtml(string url, Encoding encoding)
            {
                HttpWebRequest request = null;
                HttpWebResponse response = null;
                StreamReader reader = null;
                try
                {
                    request = (HttpWebRequest)WebRequest.Create(url);
                    request.Timeout = 20000;
                    request.AllowAutoRedirect = false;                response = (HttpWebResponse)request.GetResponse();
                    if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
                    {
                        if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
                            reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);
                        else
                            reader = new StreamReader(response.GetResponseStream(), encoding);
                        string html = reader.ReadToEnd();                    return html;
                    }
                }
                catch
                {
                }
                finally
                {                if (response != null)
                    {
                        response.Close();
                        response = null;
                    }
                    if (reader != null)
                        reader.Close();                if (request != null)
                        request = null;            }            return string.Empty;
            }
        }
    }
      

  2.   

            /// <summary>
            /// 从地址栏获取值
            /// </summary>
            /// <param name="page">页面</param>
            /// <param name="para">参数</param>
            /// <returns></returns>
            public static string GetQueryString(string para)
            {
                string queryString = "";
                if (System.Web.HttpContext.Current.Request.QueryString[para] != null)
                {
                    queryString = System.Web.HttpContext.Current.Request.QueryString[para].ToString();
                }
                else
                {
                    queryString = "";
                }
                return StringHelper.InputTexts(queryString.Trim());
            }        /// <summary>
            /// 从地址栏获取相关int值
            /// </summary>
            /// <param name="page">页面</param>
            /// <param name="para">参数</param>
            /// <returns></returns>
            public static int GetQueryInt(string para)
            {
                int queryInt = -1;
                string tempQueryString = GetQueryString(para);
                if (ValidateHelper.IsNumber(tempQueryString))
                {
                    queryInt = int.Parse(tempQueryString);
                }
                return queryInt;
            }
            /// <summary>
            /// 清除所有脚本
            /// </summary>
            /// <param name="text"></param>
            /// <returns></returns>
            public static string InputTexts(string text)
            {
                if (string.IsNullOrEmpty(text))
                    return string.Empty;
                text = Regex.Replace(text, "[\\s]{2,}", " "); //two or more spaces
                text = Regex.Replace(text, "(<[b|B][r|R]/*>)+|(<[p|P](.|\\n)*?>)", "\n"); //<br>
                text = Regex.Replace(text, "(\\s*&[n|N][b|B][s|S][p|P];\\s*)+", " "); //&nbsp;
                text = Regex.Replace(text, "<(.|\\n)*?>", string.Empty); //any other tags
                text = text.Replace("'", "''");
                return text;
            }
            /// <summary>
            /// 是否数字字符串
            /// </summary>
            /// <param name="inputData">输入字符串</param>
            /// <returns></returns>
            public static bool IsNumber(string inputData)
            {
                if (!string.IsNullOrEmpty(inputData))
                {
                    Match m = RegNumber.Match(inputData);
                    return m.Success;
                }
                else
                {
                    return false;
                }
            }
            private static Regex RegNumber = new Regex("^[0-9]+$");
      

  3.   

    可以通过GET传值的方式把html中需要在后台使用的元素通过参数的方式传给处理页面
    在处理页面后台可以后去参数Request.QueryString["参数"]的方式获取