我要随机生成至少500万条数据,其中每条数据都只包含7个数字,前五个数字是1到35之间,随机产生,每一位都是随机,但是产生完之后要从小到大排列。后两位在1到12之间随机产生,同样的产生后排列。数据生成完之后,要统计这500万条数据中重复数据的条数,每一条都要统计。我的算法已经实现,但是,耗时有点长,如果达到千万级别就要好几十分钟。因此,在这里讨论一下更好的算法。
this.txtResult.Text = "";
Stopwatch stw = new Stopwatch();
Hashtable objHashtable = new Hashtable();
Random rand = new Random();
int[] Count = new int[100000];//新建一个数组,数组下标用作Hashtable中的key,数组元素用来存储key记录重复的次数
int flag = 0; //设立一个flag,用来标识Hashtable是否遍历完
int j = 0; //j代表数组的下标
stw.Start(); //开始计时 for (Int64 i = 0; i < 100000 && j < 100000; i++)
{
string strRtn = "";
int n = 0;
strRtn = BuildRandom(ref rand); //获取随机数
if (objHashtable.Keys.Count == 0) //如果Hashtable中没有记录,则先插入第一条随机数
{
objHashtable.Add(j, strRtn);
j++;
}
foreach (int key in objHashtable.Keys) //遍历Hashtable
{
if (strRtn == ((string)objHashtable[key])) //如果存在,相同的条数+1
{
Count[key]++;
break;
}
else
n++;
if (n == objHashtable.Keys.Count) //不存在,插入
{
objHashtable.Add(j, strRtn);
j++;
break;
}
}
}
for (int k = 0; k < Count.Length; k++) //遍历int数组,选取重复次数最少的那条随机数
{
for (int m = k + 1; m < Count.Length; m++)
{
if (Count[k] < Count[m])
{
flag = m;
Count[m] = Count[k];
}
}
}
this.txtResult.Text = Count[flag] + ":" + ((string)objHashtable[flag]);
stw.Stop(); //计时结束
this.txtTimeConsuming.Text = stw.Elapsed.TotalMilliseconds.ToString() + "毫秒";
private string BuildRandom(ref Random rand)
{ string[] str = new string[7];
string strRtn = "";
for (int i = 0; i < 5; i++) //随即生成5个数,存储在数字中
{
str[i] = Convert.ToString(rand.Next(1, 35));
for (int j = i - 1; j > 0; j--) //判断是否重复
{
if (str[i] == str[j])
str[i] = Convert.ToString(rand.Next(1, 35));
}
}
for (int i = 5; i < str.Length; i++) //随即生成后两位
{
str[i] = Convert.ToString(rand.Next(1, 12));
for (int j = i - 1; j > 4; j--)
{
if (str[i] == str[j])
str[i] = Convert.ToString(rand.Next(1, 12));
}
}
for (int i = 0; i < 5; i++) //冒泡排序,让随机数由小到大排列
{
for (int j = i + 1; j < 5; j++)
{
string strChange = "";
if (int.Parse(str[i]) > int.Parse(str[j]))
{
strChange = str[i];
str[i] = str[j];
str[j] = strChange;
}
}
}
if (int.Parse(str[5]) > int.Parse(str[6]))
{
string strChange = "";
strChange = str[5];
str[5] = str[6];
str[6] = strChange;
} for (int i = 0; i < str.Length; i++)
{
strRtn += str[i] + " ";
}
return strRtn;
}
this.txtResult.Text = "";
Stopwatch stw = new Stopwatch();
Hashtable objHashtable = new Hashtable();
Random rand = new Random();
int[] Count = new int[100000];//新建一个数组,数组下标用作Hashtable中的key,数组元素用来存储key记录重复的次数
int flag = 0; //设立一个flag,用来标识Hashtable是否遍历完
int j = 0; //j代表数组的下标
stw.Start(); //开始计时 for (Int64 i = 0; i < 100000 && j < 100000; i++)
{
string strRtn = "";
int n = 0;
strRtn = BuildRandom(ref rand); //获取随机数
if (objHashtable.Keys.Count == 0) //如果Hashtable中没有记录,则先插入第一条随机数
{
objHashtable.Add(j, strRtn);
j++;
}
foreach (int key in objHashtable.Keys) //遍历Hashtable
{
if (strRtn == ((string)objHashtable[key])) //如果存在,相同的条数+1
{
Count[key]++;
break;
}
else
n++;
if (n == objHashtable.Keys.Count) //不存在,插入
{
objHashtable.Add(j, strRtn);
j++;
break;
}
}
}
for (int k = 0; k < Count.Length; k++) //遍历int数组,选取重复次数最少的那条随机数
{
for (int m = k + 1; m < Count.Length; m++)
{
if (Count[k] < Count[m])
{
flag = m;
Count[m] = Count[k];
}
}
}
this.txtResult.Text = Count[flag] + ":" + ((string)objHashtable[flag]);
stw.Stop(); //计时结束
this.txtTimeConsuming.Text = stw.Elapsed.TotalMilliseconds.ToString() + "毫秒";
private string BuildRandom(ref Random rand)
{ string[] str = new string[7];
string strRtn = "";
for (int i = 0; i < 5; i++) //随即生成5个数,存储在数字中
{
str[i] = Convert.ToString(rand.Next(1, 35));
for (int j = i - 1; j > 0; j--) //判断是否重复
{
if (str[i] == str[j])
str[i] = Convert.ToString(rand.Next(1, 35));
}
}
for (int i = 5; i < str.Length; i++) //随即生成后两位
{
str[i] = Convert.ToString(rand.Next(1, 12));
for (int j = i - 1; j > 4; j--)
{
if (str[i] == str[j])
str[i] = Convert.ToString(rand.Next(1, 12));
}
}
for (int i = 0; i < 5; i++) //冒泡排序,让随机数由小到大排列
{
for (int j = i + 1; j < 5; j++)
{
string strChange = "";
if (int.Parse(str[i]) > int.Parse(str[j]))
{
strChange = str[i];
str[i] = str[j];
str[j] = strChange;
}
}
}
if (int.Parse(str[5]) > int.Parse(str[6]))
{
string strChange = "";
strChange = str[5];
str[5] = str[6];
str[6] = strChange;
} for (int i = 0; i < str.Length; i++)
{
strRtn += str[i] + " ";
}
return strRtn;
}
解决方案 »
免费领取超大流量手机卡,每月29元包185G流量+100分钟通话, 中国电信官方发货