造成服务器资源100%,影响服务器运行。
麻烦各位前辈 帮忙优化下代码。
如果用最简洁的方法获取快照内容。请不暂用资源。
只要能获取快照的内容即可。其他的没特别限制。
        private List<string> GetCacheHtml(string strTitle)
        {
            WebOperation W = new WebOperation();            string strSearchHtml = W.Get(string.Format("http://www.baidu.com/s?wd={0}", System.Web.HttpUtility.UrlEncode(strTitle)));            MatchCollection CacheUrlList = RexGet("http://cache\\.baidu\\.com.*?(?=\\\")", strSearchHtml);            List<string> strHtmlList = new List<string>();            int i = 0;
            foreach (Match CacheUrl in CacheUrlList)
            {
                //下面的2代表只获取前两条快照了,如果需要的话修改一下就可以了
                i++;
                if (i > 1)
                {
                    break;
                }                //获取快照数据
                string strCacheHtml = W.Get(CacheUrl.Value);                //去除Baidu信息
                strCacheHtml = Regex.Replace(strCacheHtml, "<.*?(?=<html)", "",
                    RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture | RegexOptions.Singleline);                //过滤超链接
                strCacheHtml = Regex.Replace(strCacheHtml, "<a.*?>", "",
                RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture | RegexOptions.Singleline);
                strCacheHtml = strCacheHtml.Replace("</a>", "");                //过滤CSS样式
                strCacheHtml = Regex.Replace(strCacheHtml, "<style.*?</style>", "",
                RegexOptions.Singleline | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture | RegexOptions.Singleline);                //过滤所有HTML标记
                strCacheHtml = NoHTML(strCacheHtml);                //添加到结果集
                strHtmlList.Add(strCacheHtml);
            }            return strHtmlList;
        }        /**/
        ///   <summary>
        ///   去除HTML标记
        ///   </summary>
        ///   <param   name="NoHTML">包括HTML的源码   </param>
        ///   <returns>已经去除后的文字</returns>
        public string NoHTML(string Htmlstring)
        {
            //删除脚本
            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "",
              RegexOptions.IgnoreCase);
            //删除HTML
            Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",
              RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",
              RegexOptions.IgnoreCase);            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");
            Htmlstring.Replace("\r\n", "");
            Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();            return Htmlstring;
        }        /// <summary>
        /// 使用正则表达式
        /// </summary>
        /// <param name="strPattern">表达式</param>
        /// <param name="strText">查询文本</param>
        /// <returns></returns>
        private MatchCollection RexGet(string strPattern, string strText)
        {
            //定义一个模式字符串,不仅仅是纯文本,还可以是正则表达式  
            MatchCollection Matches = Regex.Matches(
                strText,
                strPattern,
                RegexOptions.IgnoreCase |         //忽略大小写  
                RegexOptions.ExplicitCapture |    //提高检索效率  
                RegexOptions.Singleline
                );            return Matches;
        }        [Serializable]
        public class WebOperation
        {
            public CookieContainer Cookies { get; set; }
            public WebProxy Proxy { get; set; }
            public int Timeout { get; set; }
            public Encoding En { get; set; }            /// <summary>
            /// 构造函数
            /// </summary>
            public WebOperation()
            {
                Cookies = new CookieContainer();                //设置模式
                System.Net.ServicePointManager.Expect100Continue = false;                //默认时间为30秒
                Timeout = 5000;                //设置默认编码
                En = Encoding.Default;
            }            /// <summary>
            /// 打开页面
            /// </summary>
            /// <param name="URI"></param>
            /// <returns></returns>
            public string Get(string URI)
            {
                // 设置打开页面的参数
                HttpWebRequest request = WebRequest.Create(URI) as HttpWebRequest;
                request.Method = "GET";
                request.KeepAlive = false;
                request.CookieContainer = Cookies;
                //request.UseDefaultCredentials = true;
                request.Proxy = Proxy;
                request.Timeout = Timeout;                // 接收返回的页面
                HttpWebResponse response = request.GetResponse() as HttpWebResponse;
                System.IO.Stream responseStream = response.GetResponseStream();
                System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, En);
                string srcString = reader.ReadToEnd();                response.Close();
                responseStream.Close();
                reader.Close();                //返回源文件
                return srcString;
            }            /// <summary>
            /// 获取页面的 VeiwState 
            /// </summary>
            /// <returns></returns>
            public string GetViewState(string srcString)
            {
                string viewStateFlag = "id=\"__VIEWSTATE\" value=\"";
                int i = srcString.IndexOf(viewStateFlag) + viewStateFlag.Length;
                int j = srcString.IndexOf("\"", i);
                string viewState = srcString.Substring(i, j - i);
                return viewState;
            }            /// <summary>
            /// 获取页面的 EventValidation   
            /// </summary>
            /// <returns></returns>
            public string GetEventValidation(string srcString)
            {
                string eventValidationFlag = "id=\"__EVENTVALIDATION\" value=\"";
                int i = srcString.IndexOf(eventValidationFlag) + eventValidationFlag.Length;
                int j = srcString.IndexOf("\"", i);
                string eventValidation = srcString.Substring(i, j - i);
                return eventValidation;
            }        }

解决方案 »

  1.   


                /// <summary>
                /// 以Post方式打开页面
                /// </summary>
                /// <param name="URI"></param>
                /// <param name="postData"></param>
                /// <returns></returns>
                public string Post(string URI, byte[] postData)
                {
                    // 设置提交的相关参数
                    HttpWebRequest request = WebRequest.Create(URI) as HttpWebRequest;
                    request.Method = "POST";
                    request.KeepAlive = false;
                    request.ContentType = "application/x-www-form-urlencoded";
                    request.CookieContainer = Cookies;
                    request.ContentLength = postData.Length;
                    request.Timeout = Timeout;
                    //request.UseDefaultCredentials = true;
                    request.Proxy = Proxy;                // 提交请求数据
                    System.IO.Stream outputStream = request.GetRequestStream();
                    outputStream.Write(postData, 0, postData.Length);
                    outputStream.Close();                // 接收返回的页面
                    HttpWebResponse response = request.GetResponse() as HttpWebResponse;
                    Stream responseStream = response.GetResponseStream();
                    StreamReader reader = new System.IO.StreamReader(responseStream, En);
                    string srcString = reader.ReadToEnd();
                    foreach (Cookie cookie in response.Cookies)
                    {
                        Cookies.Add(cookie);
                    }                responseStream.Close();
                    reader.Close();
                    response.Close();                return srcString;
                }            /// <summary>
                /// 构造参数
                /// </summary>
                /// <param name="strName"></param>
                /// <param name="strValue"></param>
                /// <returns></returns>
                public byte[] CreateParm(Hashtable Hash)
                {
                    StringBuilder strParm = new StringBuilder();                foreach (DictionaryEntry item in Hash)
                    {
                        strParm.AppendFormat("{0}={1}&", item.Key, System.Web.HttpUtility.UrlEncode(item.Value.ToString()));
                    }                if (strParm.Length > 0)
                    {
                        strParm.Remove(strParm.Length - 1, 1);
                    }                // 将提交的字符串数据转换成字节数组
                    byte[] postData = Encoding.Default.GetBytes(strParm.ToString());                return postData;
                }
                /// <summary>
                /// 下载图片
                /// </summary>
                /// <param name="URI"></param>
                /// <returns></returns>
                public System.Drawing.Image GetImg(string URI)
                {
                    // 设置打开页面的参数
                    HttpWebRequest request = WebRequest.Create(URI) as HttpWebRequest;
                    request.Method = "GET";
                    request.Timeout = Timeout;
                    request.KeepAlive = false;
                    request.CookieContainer = Cookies;
                    request.Proxy = Proxy;
                    // 接收返回的页面
                    HttpWebResponse response = request.GetResponse() as HttpWebResponse;
                    System.Drawing.Image img = new System.Drawing.Bitmap(response.GetResponseStream());                response.Close();                //返回图片
                    return img;
                }接上
      

  2.   

    请求数据时尽量用get,与数据库交互的动作完成时记得关闭练级,释放资源。
      

  3.   


    Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9",
                  RegexOptions.IgnoreCase);
                Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "",
                  RegexOptions.IgnoreCase);这些替换用普通的String.Replace也可以实现,所以这里别用正则