string a=@"{\rtf1\ansi\ansicpg936\deff0\deflang1033\deflangfe2052{\fonttbl{\f0\fmodern\fprq6\fcharset134 \'cb\'ce\'cc\'e5;}{\f1\fnil ;}{\f2\fnil\fcharset134 \'b7\'bd\'d5\'fd\'d2\'a6\'cc\'e5;}{\f3\fnil\fcharset134 \'b7\'bd\'d5\'fd\'ca\'e6\'cc\'e5;}{\f4\fnil\fcharset134 \'bf\'ac\'cc\'e5_GB2312;}{\f5\fnil\fcharset134 \'c1\'a5\'ca\'e9;}{\f6\fnil\fcharset134 \'ba\'da\'cc\'e5;}{\f7\fswiss\fcharset0 Arial;}}
{\colortbl ;\red0\green0\blue0;\red0\green255\blue255;\red0\green0\blue255;\red138\green43\blue226;\red165\green42\blue42;\red169\green169\blue169;\red255\green0\blue0;\red0\green255\blue0;\red255\green255\blue0;}
\viewkind4\uc1\pard
\cf1\lang2052\f0\fs30 aaaaaaaaaa \red0\green0\blue255
\cf2\f2\fs40 bbbbbbbbbbbbb \par
\cf3\f3\fs50 cccccccccc
\cf4\f4\fs60 ddddddddddd \par
\cf5\f5\fs70 eeeeeeeeee \par
\cf6\f6\fs80 ffffffffffffffffffff \par
\cf6\f6 \par
}"
取出{\colortbl ;\red0\green0\blue0;\red0\green255\blue255;\red0\green0\blue255;\red138\green43\blue226;\red165\green42\blue42;\red169\green169\blue169;\red255\green0\blue0;\red0\green255\blue0;\red255\green255\blue0;}
之间的 “\red0\green0\blue0” “\red0\green255\blue255” “\red0\green0\blue255” ....
还有“ aaaaaaaaaa \red0\green0\blue255 ” “ bbbbbbbbbbbbb ” “ cccccccccc ” ...
两条正则。 a,b,c前面的那些空格也要。
{\colortbl ;\red0\green0\blue0;\red0\green255\blue255;\red0\green0\blue255;\red138\green43\blue226;\red165\green42\blue42;\red169\green169\blue169;\red255\green0\blue0;\red0\green255\blue0;\red255\green255\blue0;}
\viewkind4\uc1\pard
\cf1\lang2052\f0\fs30 aaaaaaaaaa \red0\green0\blue255
\cf2\f2\fs40 bbbbbbbbbbbbb \par
\cf3\f3\fs50 cccccccccc
\cf4\f4\fs60 ddddddddddd \par
\cf5\f5\fs70 eeeeeeeeee \par
\cf6\f6\fs80 ffffffffffffffffffff \par
\cf6\f6 \par
}"
取出{\colortbl ;\red0\green0\blue0;\red0\green255\blue255;\red0\green0\blue255;\red138\green43\blue226;\red165\green42\blue42;\red169\green169\blue169;\red255\green0\blue0;\red0\green255\blue0;\red255\green255\blue0;}
之间的 “\red0\green0\blue0” “\red0\green255\blue255” “\red0\green0\blue255” ....
还有“ aaaaaaaaaa \red0\green0\blue255 ” “ bbbbbbbbbbbbb ” “ cccccccccc ” ...
两条正则。 a,b,c前面的那些空格也要。
{\\colortbl\s*;(?<color>\\red[^;]+;)+}
2.空格也要
\\cf[^ ]+(.+?)(?=\\par|}) private static void TestRegex22()
{
string a = @"{\rtf1\ansi\ansicpg936\deff0\deflang1033\deflangfe2052{\fonttbl{\f0\fmodern\fprq6\fcharset134 \'cb\'ce\'cc\'e5;}{\f1\fnil ;}{\f2\fnil\fcharset134 \'b7\'bd\'d5\'fd\'d2\'a6\'cc\'e5;}{\f3\fnil\fcharset134 \'b7\'bd\'d5\'fd\'ca\'e6\'cc\'e5;}{\f4\fnil\fcharset134 \'bf\'ac\'cc\'e5_GB2312;}{\f5\fnil\fcharset134 \'c1\'a5\'ca\'e9;}{\f6\fnil\fcharset134 \'ba\'da\'cc\'e5;}{\f7\fswiss\fcharset0 Arial;}}
{\colortbl ;\red0\green0\blue0;\red0\green255\blue255;\red0\green0\blue255;\red138\green43\blue226;\red165\green42\blue42;\red169\green169\blue169;\red255\green0\blue0;\red0\green255\blue0;\red255\green255\blue0;}
\viewkind4\uc1\pard
\cf1\lang2052\f0\fs30 aaaaaaaaaa \par
\cf2\f2\fs40 bbbbbbbbbbbbb \par
\cf3\f3\fs50 cccccccccc \par
\cf4\f4\fs60 ddddddddddd \par
\cf5\f5\fs70 eeeeeeeeee \par
\cf6\f6\fs80 ffffffffffffffffffff \par
\cf6\f6 \par
}";
Regex reg1 = new Regex(@"{\\colortbl\s*;(?<color>\\red[^;]+;)+}", RegexOptions.Compiled);
Regex reg2 = new Regex(@"\\cf[^ ]+(.+?)(?=\\par|})", RegexOptions.Compiled);
foreach (Capture c in reg1.Match(a).Groups["color"].Captures)
{
Console.WriteLine(c.Value);
}
Console.WriteLine("--------------神奇的分割线--------------");
foreach (Match m in reg2.Matches(a))
{
Console.WriteLine(m.Groups[1].Value);
}
}输出\red0\green0\blue0;
\red0\green255\blue255;
\red0\green0\blue255;
\red138\green43\blue226;
\red165\green42\blue42;
\red169\green169\blue169;
\red255\green0\blue0;
\red0\green255\blue0;
\red255\green255\blue0;
--------------神奇的分割线--------------
aaaaaaaaaa
bbbbbbbbbbbbb
cccccccccc
ddddddddddd
eeeeeeeeee
ffffffffffffffffffff
是取 {\colortbl ;\red0\green0\blue0;\red0\green255\blue255;\red0\green0\blue255;\red138\green43\blue226;\red165\green42\blue42;\red169\green169\blue169;\red255\green0\blue0;\red0\green255\blue0;\red255\green255\blue0;} 中的 “\red0\green0\blue0” “\red0\green255\blue255” ... 我是怕 别的地方 还会有类似这样 那取出来 就坏了...
内容中有\没关系,不要有\par就成。