公司要求开发一个word转xml的软件,其他都实现了,就这一点难住我了,请问大家如何把word内的图片提取出来呢?
我已经写到这里了: #region 抓取图片
doc.Paragraphs.Item(iPara).Range.Characters.Item(iChar).Select();
if (word.Selection.Type == WdSelectionType.wdSelectionInlineShape)
{
isPicture = true;
Console.Write("(抓取图片)");
paraStr += "【这里是图片显示部分】"; }
#endregion
我已经写到这里了: #region 抓取图片
doc.Paragraphs.Item(iPara).Range.Characters.Item(iChar).Select();
if (word.Selection.Type == WdSelectionType.wdSelectionInlineShape)
{
isPicture = true;
Console.Write("(抓取图片)");
paraStr += "【这里是图片显示部分】"; }
#endregion
你可以先 doc.SaveAs(..) 保存为 html 格式
doc会生成一个网页和一个同名文件夹,里面包含了网页的各种资源
当然也有图片了,你可以到那文件夹查找 *.jpg ==图片格式的文件
之后你想干什么都行啦, 为了隐蔽点你可以SaveAs到临时文件夹,
操作完就删了吧,不然就产生垃圾了
而shape 我查了一MSDN 有如下解析
Represents an object in the drawing layer, such as an AutoShape, freeform, OLE object, ActiveX control, or picture (图片啊). 没找到更多资料了,觉得这个应该也能下手,需要解决
1.判断哪个Shape 是图片
2.如何转换 Shape 到Image
if (word.Selection.Type == WdSelectionType.wdSelectionInlineShape)
{
isPicture = true;
...我已经取到这个图片了,但是不知道如何把它保存下来,拿他没办法,郁闷啊
Represents an object in the drawing layer, such as an AutoShape, freeform, OLE object, ActiveX control, or picture你不单单表示图片
加入你获得一个 InlineShape 则if(yourInlineShape.Type == WdInlineShapeType.wdInlineShapePicture)
{
}
Word.Document doc = newApp.Documents.Open(...);
int count = doc.InlineShapes.Count;
for (int i = 0; i < count; i++)
{
if (doc.InlineShapes[i].Type == Word.WdInlineShapeType.wdInlineShapePicture)
{
doc.InlineShapes[i].Select();
newApp.Selection.Copy();
Image image = Clipboard.GetImage();
Bitmap bitmap = new Bitmap(image);
bitmap.Save("c:\\pic" + i + ".jpg" );
}
}
有一张图的时候成功,但有两张图的时候发现保存的是同样的图片,不知道为什么
是不是Select(); 可以多选的?
把之前的for 改为foreach来遍历,为什么用for不行而要foreach呢
不明白,有人说是是一个bugint i = 0;
foreach (Word.InlineShape ish in doc.InlineShapes)
{
if (ish.Type == Word.WdInlineShapeType.wdInlineShapePicture)
{
ish.Select();
newApp.Selection.Copy();
Image image = Clipboard.GetImage(); Bitmap bitmap = new Bitmap(image);
bitmap.Save("c:\\pic" + i.ToString() + ".jpg");
i++;
}
}
这里的Image我怎么不能用呢?需要引入上面库呢?
System.Drawing;
System.Windows.Forms;再添加两个usingusing System.Drawing;
using System.Windows.Forms;遇到问题多查查MSDN
doc.Paragraphs.Item(iPara).Range.Characters.Item(iChar).Select();
if (word.Selection.Type == WdSelectionType.wdSelectionInlineShape)
{
isPicture = true;
Console.Write("(抓取图片)");
word.Selection.Copy();
Image image = Clipboard.GetImage();
Bitmap bitmap = new Bitmap(image);
bitmap.Save(forderPath + "111.jpg");
paraStr += "【这里是图片显示部分】";
}
#endregion这是我的这一段,该怎么去改呢?
using System.Data;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Threading;
using System.Drawing;
using System.Windows.Forms;
using System.Text.RegularExpressions;
using Word;namespace Word2Xml
{
class Program
{
static void Main(string[] args)
{
//杀死word进程
killWord(1);
string fileDir;
Console.Write("请输入您要转换的路径:");
fileDir = Console.ReadLine();
string[] defaultPath = System.IO.Directory.GetFileSystemEntries(fileDir); //遍历目录DOC文件列表,并执行转换操作
listFile(defaultPath);
Console.WriteLine("文件转换完毕,按回车键退出");
Console.ReadLine(); } static void BuildXml(string wordPath)
{
string forderPath;
string xmlPath;
forderPath = wordPath.Substring(0, wordPath.Length - 4);
xmlPath = forderPath + ".xml";
Word.ApplicationClass word = new ApplicationClass();
Type wordType = word.GetType();
Word.Documents docs = word.Documents;
word.Visible = false; //是否显示word // Open word document.
Type docsType = docs.GetType();
Word.Document doc = (Word.Document)docsType.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { wordPath, true, true }); int lPara = doc.Paragraphs.Count; //读取word文档的行数
//Console.WriteLine("\n当前word文档的总段落数:{0}", lPara);
Console.Write("文件生成中,请稍后");
//建立StreamWrite,准备写入xml文件
StreamWriter xmlfile = new StreamWriter(xmlPath, false, Encoding.GetEncoding("GB2312"));
xmlfile.WriteLine("<?xml version='1.0' encoding='gb2312' ?>");
xmlfile.WriteLine("<root>"); int iPara;
int iChar;
string paraStr;
bool isUnderline = false; //下划线
bool isBold = false; //加粗
bool isItalic = false; //斜体
bool isSuperscript = false; //上标
bool isSubscript = false; //下标
bool isStrikethrough = false; //删除线
bool isColor = false; //字体颜色
bool isPicture; //图片
for (iPara = 1; iPara <= lPara; iPara++)
{
paraStr = "";
//paraStr = doc.Paragraphs.Item(iPara).Range.Text; //读取本段内容
for (iChar = 1; iChar <= doc.Paragraphs.Item(iPara).Range.Characters.Count; iChar++)
{
isPicture = false;
#region 加下划线
if (doc.Paragraphs.Item(iPara).Range.Characters.Item(iChar).Font.Underline == WdUnderline.wdUnderlineSingle)
{
if (isUnderline != true)
{
isUnderline = true;
Console.Write("(下划线)");
paraStr += "<u>";
}
}
else
{
if (isUnderline == true)
{
isUnderline = false;
paraStr += "</u>";
}
}
#endregion #region 加粗
if (doc.Paragraphs.Item(iPara).Range.Characters.Item(iChar).Bold == -1)
{
if (isBold != true)
{
isBold = true;
Console.Write("(加粗)");
paraStr += "<b>";
}
}
else
{
if (isBold == true)
{
isBold = false;
paraStr += "</b>";
}
}
#endregion #region 斜体
if (doc.Paragraphs.Item(iPara).Range.Characters.Item(iChar).Italic == -1)
{
if (isItalic != true)
{
isItalic = true;
Console.Write("(斜体)");
paraStr += "<i>";
}
}
else
{
if (isItalic == true)
{
isItalic = false;
paraStr += "</i>";
}
}
#endregion