求C# 怎么从html中获取想要的数据 如题如题谢谢 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 先抓取内容 然后用正则来取。。抓取网页using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Net;using System.IO;using System.IO.Compression;using System.Text.RegularExpressions;namespace WikiPageCreater.Common{ public class PageHelper { /// <summary> /// 根据 url 获取网页编码 /// </summary> /// <param name="url"></param> /// <returns></returns> public static string GetEncoding(string url) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)); else reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)"); if (reg_charset.IsMatch(html)) { return reg_charset.Match(html).Groups["charset"].Value; } else if (response.CharacterSet != string.Empty) { return response.CharacterSet; } else return Encoding.Default.BodyName; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return Encoding.Default.BodyName; } /// <summary> /// 根据 url 和 encoding 获取当前url页面的 html 源代码 /// </summary> /// <param name="url"></param> /// <param name="encoding"></param> /// <returns></returns> public static string GetHtml(string url, Encoding encoding) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase)) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding); else reader = new StreamReader(response.GetResponseStream(), encoding); string html = reader.ReadToEnd(); return html; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return string.Empty; } }} /// <summary> /// 从地址栏获取值 /// </summary> /// <param name="page">页面</param> /// <param name="para">参数</param> /// <returns></returns> public static string GetQueryString(string para) { string queryString = ""; if (System.Web.HttpContext.Current.Request.QueryString[para] != null) { queryString = System.Web.HttpContext.Current.Request.QueryString[para].ToString(); } else { queryString = ""; } return StringHelper.InputTexts(queryString.Trim()); } /// <summary> /// 从地址栏获取相关int值 /// </summary> /// <param name="page">页面</param> /// <param name="para">参数</param> /// <returns></returns> public static int GetQueryInt(string para) { int queryInt = -1; string tempQueryString = GetQueryString(para); if (ValidateHelper.IsNumber(tempQueryString)) { queryInt = int.Parse(tempQueryString); } return queryInt; } /// <summary> /// 清除所有脚本 /// </summary> /// <param name="text"></param> /// <returns></returns> public static string InputTexts(string text) { if (string.IsNullOrEmpty(text)) return string.Empty; text = Regex.Replace(text, "[\\s]{2,}", " "); //two or more spaces text = Regex.Replace(text, "(<[b|B][r|R]/*>)+|(<[p|P](.|\\n)*?>)", "\n"); //<br> text = Regex.Replace(text, "(\\s*&[n|N][b|B][s|S][p|P];\\s*)+", " "); // text = Regex.Replace(text, "<(.|\\n)*?>", string.Empty); //any other tags text = text.Replace("'", "''"); return text; } /// <summary> /// 是否数字字符串 /// </summary> /// <param name="inputData">输入字符串</param> /// <returns></returns> public static bool IsNumber(string inputData) { if (!string.IsNullOrEmpty(inputData)) { Match m = RegNumber.Match(inputData); return m.Success; } else { return false; } } private static Regex RegNumber = new Regex("^[0-9]+$"); 可以通过GET传值的方式把html中需要在后台使用的元素通过参数的方式传给处理页面在处理页面后台可以后去参数Request.QueryString["参数"]的方式获取 将GridView里面选中的主键作为参数传递另外一个页面的办法? 一个数据源怎么用两种排序来显示??? asp.net 如何提示设为首页 请教各位个问题,asp.net做完网站之后怎么像VB一样打成可以运行的文件,怎么样才可以去测试呢? SQL ERROR CODE:260 wap编程 Session传值的问题?? 初来ASP区,请交XP HomeEdition版本下如何打开ASP程序? MVC自定义SimpleMembershipProvider 帮忙看看这段代码 如何使用Cookies? asp.net ajax 问题请教
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.IO.Compression;
using System.Text.RegularExpressions;namespace WikiPageCreater.Common
{
public class PageHelper
{
/// <summary>
/// 根据 url 获取网页编码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public static string GetEncoding(string url)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
else
reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
if (reg_charset.IsMatch(html))
{
return reg_charset.Match(html).Groups["charset"].Value;
}
else if (response.CharacterSet != string.Empty)
{
return response.CharacterSet;
}
else
return Encoding.Default.BodyName;
}
}
catch
{
}
finally
{ if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close(); if (request != null)
request = null; } return Encoding.Default.BodyName;
} /// <summary>
/// 根据 url 和 encoding 获取当前url页面的 html 源代码
/// </summary>
/// <param name="url"></param>
/// <param name="encoding"></param>
/// <returns></returns>
public static string GetHtml(string url, Encoding encoding)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), encoding);
else
reader = new StreamReader(response.GetResponseStream(), encoding);
string html = reader.ReadToEnd(); return html;
}
}
catch
{
}
finally
{ if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close(); if (request != null)
request = null; } return string.Empty;
}
}
}
/// 从地址栏获取值
/// </summary>
/// <param name="page">页面</param>
/// <param name="para">参数</param>
/// <returns></returns>
public static string GetQueryString(string para)
{
string queryString = "";
if (System.Web.HttpContext.Current.Request.QueryString[para] != null)
{
queryString = System.Web.HttpContext.Current.Request.QueryString[para].ToString();
}
else
{
queryString = "";
}
return StringHelper.InputTexts(queryString.Trim());
} /// <summary>
/// 从地址栏获取相关int值
/// </summary>
/// <param name="page">页面</param>
/// <param name="para">参数</param>
/// <returns></returns>
public static int GetQueryInt(string para)
{
int queryInt = -1;
string tempQueryString = GetQueryString(para);
if (ValidateHelper.IsNumber(tempQueryString))
{
queryInt = int.Parse(tempQueryString);
}
return queryInt;
}
/// <summary>
/// 清除所有脚本
/// </summary>
/// <param name="text"></param>
/// <returns></returns>
public static string InputTexts(string text)
{
if (string.IsNullOrEmpty(text))
return string.Empty;
text = Regex.Replace(text, "[\\s]{2,}", " "); //two or more spaces
text = Regex.Replace(text, "(<[b|B][r|R]/*>)+|(<[p|P](.|\\n)*?>)", "\n"); //<br>
text = Regex.Replace(text, "(\\s*&[n|N][b|B][s|S][p|P];\\s*)+", " "); //
text = Regex.Replace(text, "<(.|\\n)*?>", string.Empty); //any other tags
text = text.Replace("'", "''");
return text;
}
/// <summary>
/// 是否数字字符串
/// </summary>
/// <param name="inputData">输入字符串</param>
/// <returns></returns>
public static bool IsNumber(string inputData)
{
if (!string.IsNullOrEmpty(inputData))
{
Match m = RegNumber.Match(inputData);
return m.Success;
}
else
{
return false;
}
}
private static Regex RegNumber = new Regex("^[0-9]+$");
在处理页面后台可以后去参数Request.QueryString["参数"]的方式获取