以前在网上找了段将word转换成html文件的代码,如下:
public static string WordToHtml(string qq, string location, string newName)
{
string flag;//标记
try
{
string fileName = qq;
Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;
Type docsType = docs.GetType();// 打开文件
object oFileName = fileName;
Microsoft.Office.Interop.Word.Document doc = new Microsoft.Office.Interop.Word.Document();
doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { oFileName, true, true });
// 转换格式,另存为html
Type docType = doc.GetType();
//被转换的html文档保存的位置
string ConfigPath = HttpContext.Current.Server.MapPath(location + newName + ".html");
object saveFileName = ConfigPath;
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);// 退出 Word
return flag = "true";
}
catch (Exception ex)
{
return flag = ex.ToString();
} }但是它转换的是ANSI的,我用 StreamReader wt = File.OpenText(Server.MapPath("template.htm"));
string moban = wt.ReadToEnd();
wt.Close();
读取之后发现是乱码,我想应该是格式的问题,请问有什么办法控制它转换的格式为utf-8吗?
public static string WordToHtml(string qq, string location, string newName)
{
string flag;//标记
try
{
string fileName = qq;
Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();
Microsoft.Office.Interop.Word.Documents docs = word.Documents;
Type docsType = docs.GetType();// 打开文件
object oFileName = fileName;
Microsoft.Office.Interop.Word.Document doc = new Microsoft.Office.Interop.Word.Document();
doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { oFileName, true, true });
// 转换格式,另存为html
Type docType = doc.GetType();
//被转换的html文档保存的位置
string ConfigPath = HttpContext.Current.Server.MapPath(location + newName + ".html");
object saveFileName = ConfigPath;
docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);// 退出 Word
return flag = "true";
}
catch (Exception ex)
{
return flag = ex.ToString();
} }但是它转换的是ANSI的,我用 StreamReader wt = File.OpenText(Server.MapPath("template.htm"));
string moban = wt.ReadToEnd();
wt.Close();
读取之后发现是乱码,我想应该是格式的问题,请问有什么办法控制它转换的格式为utf-8吗?
解决方案 »
- 求救!通过javascript调用web service出错
- 急救!!!!如何将多表查询的结果装到一个list泛型集合里?
- 同一母版页不同控件如何引起共鸣事件?
- 手动绑定listview如何排序?
- treeView动态生成问题
- GridView的分页显示
- 在div分层中加入一个input按钮,点击input按钮,想在div层中显示“Google在办公工具软件领域正式向微软发起了挑战”,怎样实现呢?下面我
- 用asp.net读取数据速度为什么没asp快?
- 有一段xml字符串,如果转换成 DataView ,此Xml字符串是规则的,不要使用读取每个节点的方法,
- xml能给我讲讲吗
- GridView中HyperLinkField 如何动态改变
- DropDownListt要title时IE6没效果怎么办?
this.Page.ContentType="text/html";
this.Page.里面有个设置编码的方法,不好意思忘了
添加using System.Threading;using System.IO;Microsoft.Office.Interop.Word.ApplicationClass appclass = new Microsoft.Office.Interop.Word.ApplicationClass();//实例化一个Word
Type wordtype = appclass.GetType();
Microsoft.Office.Interop.Word.Documents docs = appclass.Documents;//获取Document
Type docstype = docs.GetType();
object filename = @"E:\AA.doc";//Word文件的路径
Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docstype.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new object[] { filename, true, true });//打开文件
Type doctype = doc.GetType();
object savefilename = @"E:\bb.html";//生成HTML的路径和名子
doctype.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[] { savefilename, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatHTML });//另存为Html格式
wordtype.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, appclass, null);//退出
Thread.Sleep(3000);//为了使退出完全,这里阻塞3秒
StreamReader objreader = new StreamReader(savefilename.ToString(), System.Text.Encoding.GetEncoding("GB2312")); //以下内容是为了在Html中加入对本身Word文件的下载
FileStream fs = new FileStream(savefilename.ToString().Split('.').GetValue(0).ToString() + "$.html", FileMode.Create); StreamWriter streamHtmlHelp = new System.IO.StreamWriter(fs, System.Text.Encoding.GetEncoding("GB2312"));
//streamHtmlHelp.WriteLine("<a href='E:\\AA.html'>源文件下载</a><br>");
string str = "";
do
{
str = objreader.ReadLine();
streamHtmlHelp.WriteLine(str);
}
while (str != "</html>");
streamHtmlHelp.Close();
objreader.Close();
File.Delete(savefilename.ToString());
File.Move(savefilename.ToString().Split('.').GetValue(0).ToString() + "$.html", savefilename.ToString());
注意编码
//第二个参数可以设置编码。
StreamReader wt = new StreamReader(Server.MapPath("template.htm"), Encoding...);