请教一个正则,可以匹配出正则表达式中的分组,最内层的分组,例如这样一个正则
(?:(?:^)|(?<!(^\\))|(?:(?!\\)\[[^\]]*))\((?!(?:\?<?[:=!']))((?:\\\()|(?:\\\))|[^()])*(?:(?:(?<=[^\\]))|(?:(?!\\)\[[^\]]*))\)能得到
(^\\)
((?:\\\()|(?:\\\))|[^()])
这两个分组就正确了
(?:(?:^)|(?<!(^\\))|(?:(?!\\)\[[^\]]*))\((?!(?:\?<?[:=!']))((?:\\\()|(?:\\\))|[^()])*(?:(?:(?<=[^\\]))|(?:(?!\\)\[[^\]]*))\)能得到
(^\\)
((?:\\\()|(?:\\\))|[^()])
这两个分组就正确了
string test = "(?:(?:^)|(?<!(^\\\\))|(?:(?!\\\\)\\[[^\\]]*))\\((?!(?:\\?<?[:=!']))((?:\\\\\\()|(?:\\\\\\))|[^()])*(?:(?:(?<=[^\\\\]))|(?:(?!\\\\)\\[[^\\]]*))\\)";test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\[.*?(?<=((\\\\)+|[^\\]))\]", new MatchEvaluator(regReplace));
test = test.Replace("[", "∵");
test = test.Replace("]", "∴");
test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\\\(", @"\∪");
test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\\\)", @"\∩");string result;
MatchCollection mc = Regex.Matches(test, @"\((?!\?)([^()]*(?<o>\(\?)[^()]*(?<-o>\))[^()]*)*\)(?(o)(?!))|\((?!\?)[^()]*\)");
foreach (Match m in mc)
{
result = m.Value.Replace("∵", "[");
result = result.Replace("∴", "]");
result = result.Replace("∪", "(");
result = result.Replace("∩", ")");
richTextBox2.Text += result + "\n";
}
string tempStr;
private string regReplace(Match m)
{
tempStr = m.Value.Replace("(", "∪");
return tempStr.Replace(")", "∩");
}
输出:
(^\\)
((?:\\\()|(?:\\\))|[^()])不过这里还是有问题,最后用到的平衡组,没有考虑非捕获组嵌套的问题,暂时只是得到楼主要的结果了,但是绝对不是最佳方法,效率不是一般的差...
string test = "(?:(?:^)|(?<!(^\\\\))|(?:(?!\\\\)\\[[^\\]]*))\\((?!(?:\\?<?[:=!']))((?:\\\\\\()|(?:\\\\\\))|[^()])*(?:(?:(?<=[^\\\\]))|(?:(?!\\\\)\\[[^\\]]*))\\)";//首先,吧[]中间包含的()替换掉,因为这里的()表示的是字符
test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\[.*?(?<=((\\\\)+|[^\\]))\]", new MatchEvaluator(regReplace));//把[]都替换掉,因为[]内的()已经被替换了。[]不在影响分组了
test = test.Replace("[", "∵");
test = test.Replace("]", "∴");//把\(和\)这样的转义字符替换掉。避免对后面分组匹配的影响
test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\\\(", @"\∪");
test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\\\)", @"\∩");string result;//这里要求是(开头,(后面不能是?,然后用平衡组把嵌套的(xx)都匹配出来得到最外层的分组,或者(....)
MatchCollection mc = Regex.Matches(test, @"\((?!\?)([^()]*(?<o>\(\?)[^()]*(?<-o>\))[^()]*)*\)(?(o)(?!))|\((?!\?)[^()]*\)");
foreach (Match m in mc)
{
result = m.Value.Replace("∵", "[");
result = result.Replace("∴", "]");
result = result.Replace("∪", "(");
result = result.Replace("∩", ")");
richTextBox2.Text += result + "\n";
}
string tempStr;
private string regReplace(Match m)
{
tempStr = m.Value.Replace("(", "∪");
return tempStr.Replace(")", "∩");
}
test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\\\(", @"\∪");
test = Regex.Replace(test, @"(?<=((\\\\)+|[^\\]))\\\)", @"\∩");string result;
MatchCollection mc = Regex.Matches(test, @"\((?!\?(<[=!]|[!=]|:))([^()]*(?<o>\(\?(<[=!]|[!=]|:))[^()]*(?<-o>\))[^()]*)*\)(?(o)(?!))|\((?!\?(<[=!]|[!=]|:))[^()]*\)");
foreach (Match m in mc)
{
result = m.Value.Replace("∪", "(");
result = result.Replace("∩", ")");
richTextBox2.Text += result + "\n";
}
string tempStr;
private string regReplace(Match m)
{
tempStr = m.Value.Replace("(", "∪");
return tempStr.Replace(")", "∩");
}输出:
(?<name>[^()]+)
(^\\)
((?:\\\()|(?:\\\))|[^()])
不过还是那句话,呵呵,这不是用正则来做的事,只是想研究下写写而已
---->
MatchCollection mc = Regex.Matches(test, @"\((?!\?(<[=!]|[!=]|:))([^()]*(?<o>\(\?(<[=!]|[!=]|:))[^()]*(?<-o>\))[^()]*)*\)(?(o)(?!))(?(o)(?!))|\((?!\?(<[=!]|[!=]|:))[^()]*\)");为了结构清晰,这个正则里没有用非捕获组来屏蔽那些没用的捕获组^o^看看IV的状态机是怎么写的,改天不写正则了,开始学状态机