我想用正则表达式提取收件箱中的发件人地址,但是有重复发件地址,我只需要一个,并且其他的用分号隔开存到临时字符串中,请问如何去掉匹配结果中的重复项? 谢谢
//匹配规则
private static readonly Lazy<Regex> s_EmailRegex = new Lazy<Regex>(() => new Regex(
"([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1" +
",3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})",
Options), true);string text="[email protected], [email protected] ,[email protected],[email protected],[email protected]"; MatchCollection matches = s_EmailRegex.Value.Matches(text); for (int i = 0; i < matches.Count; i++)
//在输入字符串中找到所有匹配
{
_emailcontainer = _emailcontainer + matches[i].Value + ";";
}
1,希望最终_emailcontainer 存的是 [email protected];[email protected];[email protected];如何高效去掉重复的邮箱地址?2,在扫描源文本的过程中用字符串存储最终的匹配的结果会不会存在溢出的情况? 请教还有哪些临时存储方案?用数据库怕频繁读写.
//匹配规则
private static readonly Lazy<Regex> s_EmailRegex = new Lazy<Regex>(() => new Regex(
"([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1" +
",3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})",
Options), true);string text="[email protected], [email protected] ,[email protected],[email protected],[email protected]"; MatchCollection matches = s_EmailRegex.Value.Matches(text); for (int i = 0; i < matches.Count; i++)
//在输入字符串中找到所有匹配
{
_emailcontainer = _emailcontainer + matches[i].Value + ";";
}
1,希望最终_emailcontainer 存的是 [email protected];[email protected];[email protected];如何高效去掉重复的邮箱地址?2,在扫描源文本的过程中用字符串存储最终的匹配的结果会不会存在溢出的情况? 请教还有哪些临时存储方案?用数据库怕频繁读写.
DEMO
string text = "[email protected],[email protected],[email protected],[email protected],[email protected],";
text.Split(',').ToList().Distinct().ToList().ForEach(I => Response.Write(I.ToString()));
先用正则。。提取出HTML的里的邮箱放进List中。。再做去重处理
for (int i = 0; i < matches.Count; i++)
//在输入字符串中找到所有匹配
{
if (!listMails.Contains(matches[i].Value))
{
listMails.Add(matches[i].Value);
_emailcontainer += matches[i].Value + ";";
}
}
Regex reg = new Regex(@"(([^,\s]+?@[^,\s]+?,).*?)\2");
while (str != (str = reg.Replace(str, "$1"))) ;
Console.WriteLine(str.Trim(','));
Console.ReadLine();
//[email protected],[email protected],[email protected]没必要用正则string str = "[email protected],[email protected],[email protected],[email protected],[email protected]";
str.Split(',').Distinct().ToList().ForEach(s => Console.WriteLine(s));
/*
[email protected]
[email protected]
[email protected]*/