一个List<string>集合,比如list1:{"关闭0","2地方","5法0规","fg","地方","9"}
又是一个List<string>集合,比如list2:{"0","5"}
一个List<int>集合,比如list3:{1,2}
目标:在list1中寻找满足下面条件的元素:对于list1中的某个元素而言,其包含list2中的元素,并且包含的个数是属于list3中规定的个数,那么,list1中的此元素就算满足条件(上例中有"关闭0"、"5法0规"满足条件)
求出list1中所有满足条件的元素,下面是我的方法: public List<string> get_1(List<string> list1, List<string> list2, List<int> list3)
{
List<string> list_中介 = new List<string>(list1);
List<string> list_返回值 = new List<string>();
foreach (string s in list_中介)
{
bool b = true;
foreach (int i in list3)
{
int i_出 = list2.Where(x => s.Contains(x)).Count();
if (i == i_出)
{
b = false;
break;
}
}
if (b)
{
list1.Remove(s);
}
}
return list1;
}
可是,很慢哦,如果list1有1百万个字符串,list2只有2个元素,list3也只有2个数字,计算结果就是死机...........哪位朋友有更好、更快的算法吗?
又是一个List<string>集合,比如list2:{"0","5"}
一个List<int>集合,比如list3:{1,2}
目标:在list1中寻找满足下面条件的元素:对于list1中的某个元素而言,其包含list2中的元素,并且包含的个数是属于list3中规定的个数,那么,list1中的此元素就算满足条件(上例中有"关闭0"、"5法0规"满足条件)
求出list1中所有满足条件的元素,下面是我的方法: public List<string> get_1(List<string> list1, List<string> list2, List<int> list3)
{
List<string> list_中介 = new List<string>(list1);
List<string> list_返回值 = new List<string>();
foreach (string s in list_中介)
{
bool b = true;
foreach (int i in list3)
{
int i_出 = list2.Where(x => s.Contains(x)).Count();
if (i == i_出)
{
b = false;
break;
}
}
if (b)
{
list1.Remove(s);
}
}
return list1;
}
可是,很慢哦,如果list1有1百万个字符串,list2只有2个元素,list3也只有2个数字,计算结果就是死机...........哪位朋友有更好、更快的算法吗?
这句是不是应该移到foreach (int i in list3)外面?
{
List<string> list_中介 = new List<string>(list1);
List<string> list_返回值 = new List<string>();
bool b = true;
foreach (string s in list_中介)
{
b = list3.Contains(list2.Where(x => s.Contains(x)).Count());
if (!b)
{
list1.Remove(s);
}
}
return list1;
}
1. list_中介 感觉没有必要!如果要返回符合条件的元素就直接保存那些元素;若要剔除符合条件的元素则不需要 list_中介,只是用for循环而不用foreach。 这个 list_中介 量太大,耗内存也耗时间!2. list1.Remove(s);感觉效果不好,list1需要在一百多万条记录中从头到尾地寻找第一个匹配值,并删除。个人觉得如果需要剔除某个元素,最好使用索引,这样可能快点(纯属臆测,你可以试试)
{
List<string> result = new List<string>(); list1.ForEach((s) =>
{
if (list3.Contains(list2.Count(x => s.Contains(x))))
{
result.Add(s);
}
}); return result;
}
这不是最优算法,这只是按照你的算法精简之后的写法。
{
class NewCls
{
public string a;
public int b;
}
static void Main(string[] args)
{
var list1 = new List<string>() { "关闭0", "2地方", "5法0规", "fg", "地方", "9" };
List<NewCls> list2 = new List<NewCls>() { new NewCls() { a = "0", b = 1 }, new NewCls() { a = "5", b = 2 } };
list1 = list1.Where(t => list2.Count(tt => t.Contains(tt.a.Trim()) && t.Length - t.Replace(tt.a, "").Length == tt.b) > 0).ToList();
foreach (var t in list1)
{
Console.WriteLine(t);
}
Console.ReadLine();
}
}
为啥非要用 =>
直接写都比那个快1/3 public static void get_2(List<string> list1, List<string> list2, List<int> list3)
{
List<string> result = new List<string>();
foreach (string s1 in list1)
{
int count = 0;
foreach (string s2 in list2)
{
if (s1.Contains(s2))
count++;
}
if (count > 0 && list3.Contains(count))
result.Add(s1);
}
{
return list1.Where(x => list3.Any(z => list2.Where(y => x.Contains(y)).Count() == z)).ToList();
}
难道是Add比Remove快?
{
List<string> result = new List<string>();
string reg = string.Join("|", list2.ToArray());
int min = list3.Min();
int max = list3.Max();
foreach (string str in list1)
{
var mc = Regex.Matches(str, reg);
if(list3.IndexOf(mc.Count)>-1)
{
result.Add(str);
break;
}
}
return result;
}
{
List<string> result = new List<string>();
string reg = string.Join("|", list2.ToArray());
//int min = list3.Min();
//int max = list3.Max();
foreach (string str in list1)
{
var mc = Regex.Matches(str, reg);
if(list3.IndexOf(mc.Count)>-1)
{
result.Add(str);
break;
}
}
return result;
}
正则麻烦多,还要进行转义,你的结果条数错了 static void Main()
{
string[] l1 = { "关闭0", "2地方", "5法0规", "fg", "地方", "9" };
string[] l2 = { "0", "5" };
int[] l3 = { 1, 2 };
List<string> list1 = new List<string>(l1);
for (int i = 0; i < 1000000; i++)
list1.Add(GetMix(20));
List<string> list2 = new List<string>(l2);
for (int i = 0; i < 10; i++)
list2.Add(GetMix(100));
List<int> list3 = new List<int>(l3);
for (int i = 3; i < 100; i++)
list3.Add(i);
//get_1
DateTime s = DateTime.Now;
int cou= get_1(list1, list2, list3).Count;
Console.WriteLine(cou);
Console.WriteLine(DateTime.Now.Subtract(s).ToString());
//get_2
s = DateTime.Now;
cou = get_2(list1, list2, list3).Count;
Console.WriteLine(cou);
Console.WriteLine(DateTime.Now.Subtract(s).ToString());
//get_3
s = DateTime.Now;
cou = get_3(list1, list2, list3).Count;
Console.WriteLine(cou);
Console.WriteLine(DateTime.Now.Subtract(s).ToString());
Console.Read();
} public static List<string> get_1(List<string> list1, List<string> list2, List<int> list3)
{
List<string> result = new List<string>();
list1.ForEach((s) =>
{
if (list3.Contains(list2.Count(x => s.Contains(x)))) { result.Add(s); }
});
return result;
}
public static List<string> get_2(List<string> list1, List<string> list2, List<int> list3)
{
List<string> result = new List<string>();
foreach (string s1 in list1)
{
int count = 0;
foreach (string s2 in list2)
{
if (s1.Contains(s2))
count++;
}
if (count > 0 && list3.Contains(count))
result.Add(s1);
}
return result;
}
public static List<string> get_3(List<string> list1, List<string> list2, List<int> list3)
{
List<string> result = new List<string>();
string reg = string.Join("|", list2.ToArray());
foreach (string str in list1)
{
var mc = Regex.Matches(str, reg);
if (list3.IndexOf(mc.Count) > -1)
{
result.Add(str);
}
}
return result;
}
#region 得到随机string
static string str = @"0123456789abcdefghigklmnopqrstuvwxyzABCDEFGHIGKLMNOPQRSTUVWXYZ";
static Random rd = new Random();
public static string GetMix(int len)
{
StringBuilder sb = new StringBuilder();
while (len > 0)
{
sb.Append(str[rd.Next(str.Length)]);
len--;
}
return sb.ToString();
}
#endregion 得到随机string事实说明,用啥也不能用正则
//list1 100W条
//list2 100条
//list3 1-100
//480483
//00:00:16.0229165
//480483
//00:00:14.0078012
//480483
//00:01:23.7747916//list1 100W条
//list2 1000条
//list3 1-100
//480458
//00:02:39.3061118
//480458
//00:02:19.9510047
//480458
//00:15:41.6178575 //15分钟才完
看起来其实更像分词的应用。