C# 求 C# 源码过滤 Html 标记，我只要Html 中的文本。。【谢谢】

Html 源码文件地址：
http://community.csdn.net/
谢谢。。

http://download.csdn.net/source/1882970上面这个地址。http://download.csdn.net/source/1882970

public string HtmlText(string HtmlStr)
{
    System.Windows.Forms.WebBrowser wb = new System.Windows.Forms.WebBrowser();
    wb.Navigate("about:blank");
    wb.Document.Write(HtmlStr);
    return wb.Document.Body.InnerText;
}

正则表达式
//过滤HTML TAG
function filterHtmlTag(htmlString){
var reg=/<[^>]*>|<\/[^>]*>/gm;
var myString=htmlString.replace(reg,"");
return myString;
}

Replace(HtmlString, "<(.|\n)*?>", "")

W我想要的Html中的文本内容
我不用脚本实现。。要是C#源码。。
写带Using System.IO;
***
**
写个整文件可以不。。
这种事情我第一次弄。。
很不明白的地方。。谢谢。。

我的正则。。没能得到正确Html内容
无误。。
public string NoHTML(string Htmlstring)
        {            //删除脚本  &sect;               Htmlstring = Htmlstring.Replace("§", "");
            Htmlstring = Htmlstring.Replace("—", "-");
            Htmlstring = Htmlstring.Replace("·", ".");            Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);            //删除HTML                 Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);               Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
            Htmlstring.Replace("<", "");
            Htmlstring.Replace(">", "");            Htmlstring.Replace("\r\n", "");

            //Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();            return Htmlstring;        }

string items = "<body>dddd</body>";
string itemhtml = System.Text.RegularExpressions.Regex.Replace(items, "<[^>]+>", "");
MessageBox.Show(itemhtml);

public string HtmlText(string HtmlStr)
{
    System.Windows.Forms.WebBrowser wb = new System.Windows.Forms.WebBrowser();
    wb.Navigate("about:blank");
    wb.Document.Write(HtmlStr);
    return wb.Document.Body.InnerText;
}HtmlStr为整个页面的源文件

效率很高的
Htmlstring = Regex.Replace(Htmlstring, @"<(?>[^>]*)>", string.Empty, RegexOptions.IgnoreCase);

如何获取 <body>中间的内容？？？</body>
谢谢

你好。。
请问一个String可以放1个文件的原码？？
能放。。改怎么放。。
谢谢

找个HTML 编辑控件，一般的都带有返回不带Html标签的纯文本的属性或方法

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.Drawing.Printing;
using System.IO;
using System.Text.RegularExpressions;
using System.Web;namespace HtmlPrint
{
    public partial class Form1 : Form
    {        private Font printFont;
        private StreamReader streamToPrint;
        static string filePath;
        public Form1()
        {
            InitializeComponent();
            filePath = "e:\\aa.html";
        }
        // 字符过滤方法        public string NoHTML(string Htmlstring)
        {

            //删除脚本  &sect;               Htmlstring = Htmlstring.Replace("§", "");
            Htmlstring = Htmlstring.Replace("—", "-");
            Htmlstring = Htmlstring.Replace("·", ".");                        //Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);            //Htmlstring = Regex.Replace(Htmlstring,@"<head>.*</head>","",RegexOptions.IgnoreCase);            //删除HTML                 //Htmlstring = Regex.Replace(Htmlstring, @"\<.*>", "", RegexOptions.IgnoreCase);            //Htmlstring = Regex.Replace(Htmlstring, @"\<.*", "", RegexOptions.IgnoreCase);
            //Htmlstring = Regex.Replace(Htmlstring, @".*>", "", RegexOptions.IgnoreCase);
            //Htmlstring.Replace("\r\n", "");

            //Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();            Htmlstring = Regex.Replace(Htmlstring, @"<head>.*</head>", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"<[^>]+>", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @" %66", "", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @" ", "", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"%66%66%66", "%66", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"%66%66%66", "%66", RegexOptions.IgnoreCase);
            Htmlstring = Regex.Replace(Htmlstring, @"%66%66%66", "%66", RegexOptions.IgnoreCase);            Htmlstring = Regex.Replace(Htmlstring, @"%66", "\r\n", RegexOptions.IgnoreCase);
                        return Htmlstring;

                    }
        private void button1_Click(object sender, EventArgs e)
        {
            this.Printing();        }
        //打印
        public void Printing()
        {
            try
            {
                streamToPrint = new StreamReader(filePath);
                try
                {
                    printFont = new Font("Arial", 10);
                    PrintDocument pd = new PrintDocument();
                    pd.PrintPage += new PrintPageEventHandler(pd_PrintPage);
                    // Print the document.
                    pd.Print();
                }
                finally
                {
                    streamToPrint.Close();
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
        // The PrintPage event is raised for each page to be printed.
        private void pd_PrintPage(object sender, PrintPageEventArgs ev)
        {
            float linesPerPage = 0;
            float yPos = 0;
            int count = 0;
            float leftMargin = ev.MarginBounds.Left;
            float topMargin = ev.MarginBounds.Top;
            String line = null;            // Calculate the number of lines per page.
            linesPerPage = ev.MarginBounds.Height /printFont.GetHeight(ev.Graphics);            // Iterate over the file, printing each line.
            while (count < linesPerPage &&
               ((line = streamToPrint.ReadLine()) != null))
            {
                yPos = topMargin + (count * printFont.GetHeight(ev.Graphics));
                ev.Graphics.DrawString(line, printFont, Brushes.Black,leftMargin, yPos, new StringFormat());
                count++;
            }            // If more lines exist, print another page.
            if (line != null)
                ev.HasMorePages = true;
            else
                ev.HasMorePages = false;
        }
        private void button2_Click(object sender, EventArgs e)
        {

            FileStream fs = new FileStream("e:\\ss.txt", FileMode.Create);
            StreamWriter sw = new StreamWriter(fs);
            StreamReader sr = new StreamReader("e:\\co_a001aft211.html");            box.Text = "文件正在导出！！！";            String buff = "";            try
            {
                String str_r = sr.ReadLine();                while (str_r != null) {
                    buff += str_r;
                    buff += "%66";
                    str_r = sr.ReadLine();
                }                sw.Write(this.NoHTML(buff));
                MessageBox.Show(buff);
            }
            catch(Exception str_e) {
                MessageBox.Show(str_e.Message);
            }
            sw.Flush();
            fs.Flush();
            fs.Close();
            sr.Close();
            box.Text = "文件导出成功！！！";
        }        private void button3_Click(object sender, EventArgs e)
        {
            OpenFileDialog op = new OpenFileDialog();
            op.ShowDialog();
        }
    }
}

看好 html 源码自己编写吧。。
他们都不知道具体结构没办法帮你。。

调试易

C# 求 C# 源码过滤 Html 标记，我只要Html 中的文本。。【谢谢】

解决方案 »

C# 求 C# 源码 过滤 Html 标记，我只要Html 中的文本。。【谢谢】

解决方案 »

C# 求 C# 源码过滤 Html 标记，我只要Html 中的文本。。【谢谢】