你如果一定要用数组做的话,可以试试用join+instr,比如:dim a1(50000) as string,a2(10000) as string dim tmp as string dim i as longtmp="," & join(a1,",") & "," ' 先将第一数组join成用逗号分隔的字串 '用instr搜索 for i=0 to 10000 if instr(tmp,","& a2(i) & ",") then '...... end if next join和instr是VB中很快的函数了,试试看...
以下是简单的测试,你新建一个工程试试: Dim arr(50000) As String, a(10000) As String Dim i As Long Dim tmp As String, t As String Dim dt
'填充测试数组 Randomize For i = 0 To 50000 tmp = Space(25) tmp = Replace(tmp, " ", Chr(Int(Rnd * 74 + 33))) arr(i) = tmp Next
For i = 0 To 10000 tmp = Space(25) tmp = Replace(tmp, " ", Chr(Int(Rnd * 74 + 33))) a(i) = tmp Next
'匹配数据 dt = Timer '开始计时 tmp = vbNullString tmp = Join(arr, ",") For i = 0 To 10000 If InStr(tmp, a(i)) Then 'Debug.Print a(i) t = a(i) End If Next MsgBox Timer - dt
试试用字典,以下代码引用Microsoft Scripting RuntimeOption ExplicitPrivate Const STRING_LEN As Long = 25 '字符串长度 Private Const ARR_LENGTH = 50000 '大数组下标 Private Const A_LENGTH = 10000 '小数组下标Private arr(ARR_LENGTH) As String Private a(A_LENGTH) As StringPrivate Sub Command1_Click()
Dim dic As Dictionary Dim i As Long Dim dt
dt = Timer '把arr中的数据做为dic的key添加到字典中,同时过虑了重复值 Set dic = New Dictionary For i = 1 To ARR_LENGTH If Not dic.Exists(arr(i)) Then dic.Add arr(i), i End If Next '匹配数据 For i = 1 To A_LENGTH If dic.Exists(a(i)) Then Debug.Print a(i) Next MsgBox dic.Count & " =============== " & Timer - dt Set dic = NothingEnd Sub Private Sub Form_Load()
Dim i As Long Dim tmp As String
'添加测试数据 For i = 1 To ARR_LENGTH arr(i) = GetRndString(STRING_LEN) If i <= A_LENGTH Then a(i) = GetRndString(STRING_LEN) Next
End Sub'生成字符串 Function GetRndString(n As Long) As String
Dim i As Long Dim s As String Randomize s = Space(n) For i = 1 To n Mid(s, i, 1) = Int(Rnd * 10) '由0-9的数字组成n位字符串 Next GetRndString = s
End Function
Option ExplicitPrivate Const STRING_LEN As Long = 25 '字符串长度 Private Const ARR_LENGTH = 50000 '大数组下标 Private Const A_LENGTH = 10000 '小数组下标Private arr(ARR_LENGTH) As String Private a(A_LENGTH) As String'搜索数据 Private Sub Command1_Click()
Dim dic As Dictionary Dim i As Long Dim dt
dt = Timer '把arr中的数据做为dic的key添加到字典中,同时过虑了重复值 Set dic = New Dictionary For i = 0 To ARR_LENGTH If Not dic.Exists(arr(i)) Then dic.Add arr(i), i End If Next '匹配数据 For i = 0 To A_LENGTH If dic.Exists(a(i)) Then Debug.Print a(i) Next MsgBox "用时: " & Timer - dt Set dic = Nothing
End Sub '窗体载入时添加测试数据 Private Sub Form_Load()
Dim i As Long Dim tmp As String
For i = 0 To ARR_LENGTH arr(i) = GetRndString(STRING_LEN) If i <= A_LENGTH Then a(i) = GetRndString(STRING_LEN) Next
End Sub'组成N位字符串 Function GetRndString(n As Long) As String
Dim i As Long Dim s As String Randomize s = Space(n) For i = 1 To n Mid(s, i, 1) = Int(Rnd * 10) '由0-9的数字组成n位字符串 Next GetRndString = s
dim tmp as string
dim i as longtmp="," & join(a1,",") & "," ' 先将第一数组join成用逗号分隔的字串
'用instr搜索
for i=0 to 10000
if instr(tmp,","& a2(i) & ",") then
'......
end if
next join和instr是VB中很快的函数了,试试看...
每个25位长度的话,50000*(25+1)=1300000,会不会超出限制啊?
再进行 二分法比较 50000*LOG(10000)
以下是简单的测试,你新建一个工程试试: Dim arr(50000) As String, a(10000) As String
Dim i As Long
Dim tmp As String, t As String
Dim dt
'填充测试数组
Randomize
For i = 0 To 50000
tmp = Space(25)
tmp = Replace(tmp, " ", Chr(Int(Rnd * 74 + 33)))
arr(i) = tmp
Next
For i = 0 To 10000
tmp = Space(25)
tmp = Replace(tmp, " ", Chr(Int(Rnd * 74 + 33)))
a(i) = tmp
Next
'匹配数据
dt = Timer '开始计时
tmp = vbNullString
tmp = Join(arr, ",")
For i = 0 To 10000
If InStr(tmp, a(i)) Then 'Debug.Print a(i)
t = a(i)
End If
Next
MsgBox Timer - dt
String 数据类型字符串有两种:变长与定长的字符串。
变长字符串最多可包含大约 20 亿 ( 2^31)个字符。
定长字符串可包含 1 到大约 64K ( 2^16 ) 个字符。
我觉得 4F 的方法可能比最原始的方法(二重循环直接比较)还慢得多。
主要是其中产生的字符串操作太多了(而且大大增加了冗余的字符串比较)。
楼主可以把你的数据提取一部分来试一下。
比如:第一列取5000个,第二列取2000个。分别检查一下两种方法的耗时。
我对这种情况的处理方法是:
建两个 Long类型 的索引表:mlCodeA(50000), mlCodeB(10000)。
把每一个字符串简单转换一下变为一个 Long数值(类似计算哈希值,但不用这么复杂,可以用另的简单算法),对应保存到这两个表中。
再用二重循环比较 mlCodeA() 中的数据是否在 mlCodeB() 中出现。
如果出现,再比较对应的字符串是否相同。
这样的速度将快几倍。 如果对 mlCodeA()、mlCodeB() 分别排序,可以进一步缩短比较时间。
Private Const ARR_LENGTH = 50000 '大数组下标
Private Const A_LENGTH = 10000 '小数组下标Private arr(ARR_LENGTH) As String
Private a(A_LENGTH) As StringPrivate Sub Command1_Click()
Dim dic As Dictionary
Dim i As Long
Dim dt
dt = Timer
'把arr中的数据做为dic的key添加到字典中,同时过虑了重复值
Set dic = New Dictionary
For i = 1 To ARR_LENGTH
If Not dic.Exists(arr(i)) Then
dic.Add arr(i), i
End If
Next
'匹配数据
For i = 1 To A_LENGTH
If dic.Exists(a(i)) Then Debug.Print a(i)
Next
MsgBox dic.Count & " =============== " & Timer - dt
Set dic = NothingEnd Sub
Private Sub Form_Load()
Dim i As Long
Dim tmp As String
'添加测试数据
For i = 1 To ARR_LENGTH
arr(i) = GetRndString(STRING_LEN)
If i <= A_LENGTH Then a(i) = GetRndString(STRING_LEN)
Next
End Sub'生成字符串
Function GetRndString(n As Long) As String
Dim i As Long
Dim s As String
Randomize
s = Space(n)
For i = 1 To n
Mid(s, i, 1) = Int(Rnd * 10) '由0-9的数字组成n位字符串
Next
GetRndString = s
End Function
Option ExplicitPrivate Const STRING_LEN As Long = 25 '字符串长度
Private Const ARR_LENGTH = 50000 '大数组下标
Private Const A_LENGTH = 10000 '小数组下标Private arr(ARR_LENGTH) As String
Private a(A_LENGTH) As String'搜索数据
Private Sub Command1_Click()
Dim dic As Dictionary
Dim i As Long
Dim dt
dt = Timer
'把arr中的数据做为dic的key添加到字典中,同时过虑了重复值
Set dic = New Dictionary
For i = 0 To ARR_LENGTH
If Not dic.Exists(arr(i)) Then
dic.Add arr(i), i
End If
Next
'匹配数据
For i = 0 To A_LENGTH
If dic.Exists(a(i)) Then Debug.Print a(i)
Next
MsgBox "用时: " & Timer - dt
Set dic = Nothing
End Sub
'窗体载入时添加测试数据
Private Sub Form_Load()
Dim i As Long
Dim tmp As String
For i = 0 To ARR_LENGTH
arr(i) = GetRndString(STRING_LEN)
If i <= A_LENGTH Then a(i) = GetRndString(STRING_LEN)
Next
End Sub'组成N位字符串
Function GetRndString(n As Long) As String
Dim i As Long
Dim s As String
Randomize
s = Space(n)
For i = 1 To n
Mid(s, i, 1) = Int(Rnd * 10) '由0-9的数字组成n位字符串
Next
GetRndString = s
End Function
引用Microsoft Scripting Runtime