难度问题：***包含html代码的数据导入到word中的问题***

难度问题：包含html代码的数据导入到word中的问题

一个表TB中包含字段m,m中的数据因为是从FCKeditor中录入的，包含了很多html代码和图像的URL.问题：现在要将m中的数据在服务端导入到word中，不要显示html代码，要将这些代码导出和网页上一样的效果，即有图片的地方显示图片，有表格的地方显示表格，其它html代码过滤不显示，要如何做，请教。

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

可以独立创建一个页面是到出word的        System.Web.HttpContext HC = System.Web.HttpContext.Current;
        HC.Response.Clear();
        HC.Response.Charset = "GB2312";
        HC.Response.Buffer = true;
        HC.Response.ContentEncoding = System.Text.Encoding.UTF7;
        HC.Response.AddHeader("Content-Disposition", "attachment;filename=" + HttpUtility.UrlEncode("查询结果打印", System.Text.Encoding.UTF8) + ".doc");
        HC.Response.ContentType = "application/ms-word";//如果要打印为excel格式，则换为"application/excel"
        this.EnableViewState = false;
        System.IO.StringWriter sw = new System.IO.StringWriter();
        System.Web.UI.HtmlTextWriter htw = new System.Web.UI.HtmlTextWriter(sw);
        this.GV_result.RenderControl(htw);
        HC.Response.Write(sw.ToString());
        HC.Response.End();
public partial class ImportWord : System.Web.UI.Page
    {
        //Html文件名
        private string _htmlFileName;
        string wordfile = "";
        public string uploadpath = "";
        protected void Page_Load(object sender, EventArgs e)
        {
            uploadpath = Server.MapPath("~/") + "Portals\\0\\WordImport\\";            //判断路径是否存在,若不存在则创建路径
            DirectoryInfo upDir = new DirectoryInfo(uploadpath);
            if (!upDir.Exists)
            {
                upDir.Create();
            }
        }
        /// <summary>
        /// 转换word文档为html代码并插入编辑器
        /// </summary>
        protected void wordToHtml()
        {
            Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
            Type wordType = word.GetType();
            Microsoft.Office.Interop.Word.Documents docs = word.Documents;            // 打开文件
            Type docsType = docs.GetType();            //应当先把文件上传至服务器然后再解析文件为html
            object fileName = wordfile;            Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",
            System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });            // 转换格式，另存为html
            Type docType = doc.GetType();            string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +
            System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();            //被转换的html文档保存的位置
            //string ConfigPath = HttpContext.Current.Server.MapPath(uploadpath + filename + ".html");
            string ConfigPath = uploadpath + filename + ".html";
            object saveFileName = ConfigPath;            docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
            null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });            //docType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, doc, null);
            // 退出 Word--wzhw注释
            wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
            string line;
            StringBuilder strhtml = new StringBuilder();
            try
            {
                System.Threading.Thread.Sleep(1000);
                StreamReader sr = new StreamReader(ConfigPath, System.Text.Encoding.Default);                while ((line = sr.ReadLine()) != null)
                {
                    strhtml.Append(line);
                }
                sr.Close();
            }
            catch
            {
                //try
                //{                wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);
                StreamReader sr = new StreamReader(ConfigPath, System.Text.Encoding.Default);                while ((line = sr.ReadLine()) != null)
                {
                    strhtml.Append(line);
                }
                sr.Close();
                //}
                //catch (Exception ee)
                //{
                //    this.Response.Write("<script>alert('"+ee.Message+"');</script>");
                //    this.Response.Write(ee.Message);                //}
            }
            string photoname;
            //string content = getBody(strhtml.ToString()).Replace("src=", " src=").Replace(filename + ".files/", Request.CurrentExecutionFilePath.Replace("importword.aspx", "") + uploadpath);
            string content = getBody(strhtml.ToString()).Replace("src=", " src=").Replace(filename + ".files/", "\\Portals\\0\\WordImport\\");            //if (Directory.Exists(Server.MapPath(uploadpath + filename + ".files/")))
            if (Directory.Exists(uploadpath + filename + ".files/"))
            {
                //DirectoryInfo dir = new DirectoryInfo(Server.MapPath(uploadpath + filename + ".files/"));
                DirectoryInfo dir = new DirectoryInfo(uploadpath + filename + ".files/");
                FileInfo[] fis = dir.GetFiles();
                foreach (FileInfo fi in fis)
                {
                    //if (!File.Exists(Server.MapPath(uploadpath + fi.Name)))
                    //{
                    photoname = filename + fi.Name.Replace(fi.Extension, "");
                    photoname = photoname + fi.Extension.ToLower();
                    content = content.Replace(fi.Name, photoname);
                    //fi.MoveTo(Server.MapPath(uploadpath + photoname));
                    fi.MoveTo(uploadpath + photoname);
                    //}
                }
                //Directory.Delete(Server.MapPath(uploadpath + filename + ".files/"), true);
                Directory.Delete(uploadpath + filename + ".files/", true);
            }
            this.TextArea1.Value = content;
            this.resultcontent.InnerHtml = content;            File.Delete(wordfile);            File.Delete(ConfigPath);
        }        /// <summary>
        /// 处理上传word文件的操作
        /// </summary>
        public bool uploadWord()
        {
            if (true)
            {
                string fileName = fileWord.PostedFile.FileName;
                FileInfo file = new FileInfo(fileName);
                string extendName = file.Extension.ToLower();
                try
                {
                    if (extendName == ".doc")
                    {
                        DateTime now = DateTime.Now;
                        //wordfile = System.Web.HttpContext.Current.Server.MapPath(uploadpath + now.DayOfYear.ToString() + fileWord.PostedFile.ContentLength.ToString() + extendName);
                        wordfile = uploadpath + now.DayOfYear.ToString() + fileWord.PostedFile.ContentLength.ToString() + extendName;
                        if (!File.Exists(wordfile))
                        {
                            fileWord.PostedFile.SaveAs(wordfile);
                        }                    }
                    else
                    {
                        return false;
                    }
                }
                catch
                {
                    return false;
                }
                return true;
            }
            else
            {
                return false;
            }
        }        /// <summary>
        /// 过滤页头和页尾
        /// </summary>
        public string getBody(string html)
        {
            string resultStr = "";
            Regex re = new Regex(@"<body[^>]\s*[^>]*>(?<text>.*?)</body>", RegexOptions.IgnoreCase);            MatchCollection mc = re.Matches(html);
            foreach (Match m in mc)
            {
                resultStr += m.Groups["text"].Value;
            }
            return resultStr;
        }        /// <summary>
        /// 处理上传按键的操作
        /// </summary>
        protected void btnUpload_Click(object sender, EventArgs e)
        {
            if (uploadpath != "")
            {
                if (uploadWord())
                {
                    wordToHtml();
                }
            }
        }
    }这个是word导入html，反过来的话，你可以研究下。
应该可以的，数据导出到word已实现，插入图片也差不多了，现在主要卡在表格上了
论坛又维护了...html字段的截取,我刚刚试了下,有点麻烦,但是绝对可以做string tmp = "<table border=\"1\" cellspacing=\"1\" cellpadding=\"1\" width=\"200\">    <tbody>        <tr>            <td><a target=\"_blank\" href=\"http://c.csdn.net/bbs/t/5/i/pic_logo.gif\"><img alt=\"\" src=\"http://c.csdn.net/bbs/t/5/i/pic_logo.gif\" /></a></td>            <td>b</td>            <td>c</td>        </tr>        <tr>            <td><strike>AA</strike></td>            <td>BB</td>            <td><strong>CC</strong></td>        </tr>        <tr>            <td>AAA</td>            <td><u>BBB</u></td>            <td><a href=\"http://CCC\">CCC</a></td>        </tr>    </tbody></table>";
<table....   </table>
<thead....   </thead>
<tbody....   </table>
<tr....   </tr>
<td....   </td>
<img....   </img>
这些保留
其他的全都成对删除
indexof,substring轮换着用思路给你了剩下要你自己动手才行
保留后如何在word中显示成表格呢？
<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default12.aspx.cs" Inherits="Default12" %><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml" >
<head runat="server">
    <title></title>
</head>
<body>
    <form id="form1" runat="server">
    <div>
    <table id="tb" runat="server" border="1" cellspacing="1" cellpadding="1" width="200">    <tbody>        <tr>            <td><a target="_blank" href="http://c.csdn.net/bbs/t/5/i/pic_logo.gif"><img alt="" src="http://c.csdn.net/bbs/t/5/i/pic_logo.gif" /></a></td>            <td>b</td>            <td>c</td>        </tr>        <tr>            <td><strike>AA</strike></td>            <td>BB</td>            <td><strong>CC</strong></td>        </tr>        <tr>            <td>AAA</td>            <td><u>BBB</u></td>            <td><a href="http://CCC">CCC</a></td>        </tr>    </tbody></table>
        <asp:Button ID="btnToWord" runat="server" Text="Button" OnClick="btnToWord_Click" /></div>
    </form>
</body>
</html>
`
`
`
using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
//add
using System.IO;public partial class Default12 : System.Web.UI.Page
{
    protected void Page_Load(object sender, EventArgs e)
    {

    }    protected void btnToWord_Click(object sender, EventArgs e)
    {
        string fileName = "fileName.doc";
        HtmlDataToDoc hdtd = new HtmlDataToDoc(this.Page);
        hdtd.TableDataToWord(tb, fileName);
    }
}public class HtmlDataToDoc
{
    private Page _InvokePage;    public HtmlDataToDoc(Page invoke_page)
    {
        _InvokePage = invoke_page;
    }    public void TableDataToWord(System.Web.UI.HtmlControls.HtmlTable tab, string fileName)
    {
        if (fileName.ToLower().IndexOf(".doc") == -1)
        {
            fileName = fileName + ".doc";
        }
        TableExport(tab, fileName, "application/ms-word");
    }    /// <summary>
    /// /// 导入数据，保存文档
    /// /// </summary>
    /// /// <param name="tab"></param>
    /// /// <param name="fileName"></param>
    /// /// <param name="typeName"></param>
    private void TableExport(System.Web.UI.HtmlControls.HtmlTable tab, string fileName, string typeName)
    {
        System.Web.HttpResponse httpResponse = _InvokePage.Response;
        httpResponse.Clear();
        httpResponse.Buffer = true;
        httpResponse.Charset = "GB2312";
        //Response.Charset = "UTF-8";84
        httpResponse.Write("<meta http-equiv=Content-Type content=text/html;charset=GB2312>");
        httpResponse.AppendHeader("Content-Disposition", "attachment;filename=" + HttpUtility.UrlEncode(fileName, System.Text.Encoding.UTF8).ToString());
        httpResponse.ContentEncoding = System.Text.Encoding.GetEncoding("GB2312");
        httpResponse.ContentType = typeName;
        _InvokePage.EnableViewState = false;
        StringWriter tw = new StringWriter();
        System.Web.UI.HtmlTextWriter hw = new System.Web.UI.HtmlTextWriter(tw);
        tab.RenderControl(hw);
        httpResponse.Write(tw.ToString());
        httpResponse.End();
    }
}
借鉴的是这个牛人的方法,然后只保留了doc部分[ASP.NET]完美实现导出Table为Word或Excel
<table> 放到word中就乱了格式啊，，