将html文本转换为普通文本 我想做一个日志列表,但不是只列出题目,还要列出一少部分的内容,就像人人网的日志列表一样,大家可以去看一下自己的人人,但是日志内容是html文本,取出来之后有换行、还有格式,比如说我想取出正文的钱140个字,只要字,不加任何格式和换行,应该怎么处理呢?简单地说就是将一串有格式、有换行的html文本转换成没有格式、没有换行的纯文字文本,怎么做呢?求贴出aspx代码或者是c#代码! 解决方案 » 免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货 string str = "<span style="color:red;">文字</span>public string NoHTML(string Htmlstring) { //删除脚本 Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase); //删除HTML Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase); Htmlstring = Regex.Replace(Htmlstring, @"<img[^>]*>;", "", RegexOptions.IgnoreCase); Htmlstring.Replace("<", ""); Htmlstring.Replace(">", ""); Htmlstring.Replace("\r\n", ""); Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim(); return Htmlstring; } str = NoHTML(str.ToString()); 这里发言,表示您接受了CSDN社区的用户行为准则。 using System.Text.RegularExpressions; //需要引用 public string HtmlFilter(string strHtml) //从字符串里删除HTML { Regex regex = new Regex("<[^>]*>", RegexOptions.IgnoreCase); string strOutput = regex.Replace(strHtml, ""); return strOutput; } 调用自定义控键方法问题 卡了一下午的问题,上传图片,报错:对路径“D:\FtpUpFile\Web\Photo\40”的访问被拒绝。 为什么看不见我画的图?? 急啊,做不出來要被開除了,各位大哥們幫幫忙!再三扣首 纳闷,为什么web.config改为gb2312,整个项目的汉字都横排? datagrid分页的菜鸟问题 web.config的连接字符串 基础问题,ASP.NET网站怎么继承 App_Code中的某个类? ASP.net运行错误!!!请大家帮帮忙进来看看!!!在线等待!!! 这是用来做什么的? 数组之间的转换问题 急问!RadioButtonList能否分布在不同的单元格中?
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<img[^>]*>;", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring; } str = NoHTML(str.ToString());
public string HtmlFilter(string strHtml) //从字符串里删除HTML
{
Regex regex = new Regex("<[^>]*>", RegexOptions.IgnoreCase);
string strOutput = regex.Replace(strHtml, "");
return strOutput;
}