程序主要是为了实现将Output文件夹下面的所有TXT文件中,包含TRIGSTR_XXXX(X为1~4位数)字符串,分别对应1.XML(TRIGSTR_XXXX),2.XML(TRIGSTR_XXX),3.XML(TRIGSTR_XX),4.XML(TRIGSTR_X)。通过正则表达式,将TXT文件中的TRIGSTR_XXXX替换成XML文件中的内容(XML的格式为<item><old>TRIGSTR_XXXX</old><new>字符串</new></item>)。程序写出来了,没有什么逻辑错误,但是执行的速度很慢,整个OUTPUT文件夹大概500K的TXT文件,用另外一个软件(别人编写的)大概90秒就可以出结果。但是我自己的程序,就算是31K的文件,也要65秒,请问这个程序的速度瓶颈在哪里?
个人怀疑是文件操作的效率太低或者正则表达式的效率太低,不知道那位大侠知道。源程序Option Explicit
Private Sub Command1_Click()
Dim ofso As FileSystemObject
Dim fo As Folder
Dim f As File
Dim str1 As String
Dim str2 As String
Dim str3 As String
Dim Count As Integer
Dim regex1, regex2
Dim objMatch1, objMatch2 As Match
Dim colMatches1, colMatches2 As MatchCollection
Dim CountFind, CountReplace As Integer
Dim zerostart As BooleanIf Command1.Caption <> "转换" Then
MsgBox "已经完成转换,如果需要再次转换,请删除转换后产生的新文件,并重启本程序"
Exit Sub
End If
Command1.Caption = "正在转换……"
Set ofso = New FileSystemObject
Set fo = ofso.GetFolder("Output")
'Set fo = ofso.GetFolder("E:\Output")
Set regex1 = New RegExp
Set regex2 = New RegExp
For Each f In fo.Files
Open "Output\" + f.Name For Input As #1
'Open "E:\Output\"+ f.Name For Input As #1
Open "Output\New_" + f.Name For Output As #3
'Open "E:\Output\New_" + f.Name For Output As #3
'Open "E:\Output\Debug.txt" For Output As #4
Do While Not EOF(1)
Line Input #1, str1
'MsgBox "读入TXT字符串:" & str1
regex1.Pattern = "TRIGSTR_([0-9]+)"
Set colMatches1 = regex1.Execute(str1)
regex1.Global = True
For Each objMatch1 In colMatches1
CountFind = CountFind + 1
Count = 4 - Len(objMatch1.SubMatches(0)) + 1
Open Count & ".xml" For Input As #2
'Open "E:\" & Count & ".xml" For Input As #2
Do While Not EOF(2)
Line Input #2, str2
'If objMatch1 = "TRIGSTR_196" Then MsgBox str2
'开始正则表达式比较
regex2.Pattern = objMatch1 + "</old><new>(.+?)</new>"
If regex2.Test(str2) Then
Set colMatches2 = regex2.Execute(str2)
Set objMatch2 = colMatches2(0)
str3 = objMatch2.SubMatches(0)
'Print #4, "原字符串:" & str1
'Print #4, "查得字符串:" & str2
'Print #4, "替换字符串:" & str3
str1 = regex1.Replace(str1, str3)
CountReplace = CountReplace + 1
'Print #4, "替换结果:" & str1 & vbCrLf
End If
Loop
Close #2
Next
Print #3, str1
Loop
'Close #4
Close #3
Close #1
Next
If CountFind > CountReplace Then
MsgBox "总共查找到符合条件字符串" & CountFind & "个,替换" & CountReplace & "个" & vbCrLf & "请检查所提供文件是否正确!"
Else
MsgBox "转换成功!"
End If
Command1.Caption = "转换完成"
End Sub
个人怀疑是文件操作的效率太低或者正则表达式的效率太低,不知道那位大侠知道。源程序Option Explicit
Private Sub Command1_Click()
Dim ofso As FileSystemObject
Dim fo As Folder
Dim f As File
Dim str1 As String
Dim str2 As String
Dim str3 As String
Dim Count As Integer
Dim regex1, regex2
Dim objMatch1, objMatch2 As Match
Dim colMatches1, colMatches2 As MatchCollection
Dim CountFind, CountReplace As Integer
Dim zerostart As BooleanIf Command1.Caption <> "转换" Then
MsgBox "已经完成转换,如果需要再次转换,请删除转换后产生的新文件,并重启本程序"
Exit Sub
End If
Command1.Caption = "正在转换……"
Set ofso = New FileSystemObject
Set fo = ofso.GetFolder("Output")
'Set fo = ofso.GetFolder("E:\Output")
Set regex1 = New RegExp
Set regex2 = New RegExp
For Each f In fo.Files
Open "Output\" + f.Name For Input As #1
'Open "E:\Output\"+ f.Name For Input As #1
Open "Output\New_" + f.Name For Output As #3
'Open "E:\Output\New_" + f.Name For Output As #3
'Open "E:\Output\Debug.txt" For Output As #4
Do While Not EOF(1)
Line Input #1, str1
'MsgBox "读入TXT字符串:" & str1
regex1.Pattern = "TRIGSTR_([0-9]+)"
Set colMatches1 = regex1.Execute(str1)
regex1.Global = True
For Each objMatch1 In colMatches1
CountFind = CountFind + 1
Count = 4 - Len(objMatch1.SubMatches(0)) + 1
Open Count & ".xml" For Input As #2
'Open "E:\" & Count & ".xml" For Input As #2
Do While Not EOF(2)
Line Input #2, str2
'If objMatch1 = "TRIGSTR_196" Then MsgBox str2
'开始正则表达式比较
regex2.Pattern = objMatch1 + "</old><new>(.+?)</new>"
If regex2.Test(str2) Then
Set colMatches2 = regex2.Execute(str2)
Set objMatch2 = colMatches2(0)
str3 = objMatch2.SubMatches(0)
'Print #4, "原字符串:" & str1
'Print #4, "查得字符串:" & str2
'Print #4, "替换字符串:" & str3
str1 = regex1.Replace(str1, str3)
CountReplace = CountReplace + 1
'Print #4, "替换结果:" & str1 & vbCrLf
End If
Loop
Close #2
Next
Print #3, str1
Loop
'Close #4
Close #3
Close #1
Next
If CountFind > CountReplace Then
MsgBox "总共查找到符合条件字符串" & CountFind & "个,替换" & CountReplace & "个" & vbCrLf & "请检查所提供文件是否正确!"
Else
MsgBox "转换成功!"
End If
Command1.Caption = "转换完成"
End Sub
═══════════════════
http://www.egooglet.com 资料、源码下载http://bbs.j2soft.cn 论坛交流
═══════════════════
myjian(嗷嗷叫的老马--无业,正在到处游的人.......) 兄说的方法,我也有查到过,但是对于我这个程序实现起来有点困难,你说全部读入字符数组,但是正则表达式可以直接操作字符数组吗?麻烦给一个短小的例子……
如果这样,问题完全解答之后,再分别给各位分数,不好意思
Option Explicit
Private Sub Command1_Click()
Dim ofso As FileSystemObject
Dim fo As Folder
Dim f As File
Dim str1 As String
Dim str2 As String
Dim str3 As String
Dim Count As Integer
Dim regex1, regex2
Dim objMatch1, objMatch2 As Match
Dim colMatches1, colMatches2 As MatchCollection
Dim CountFind, CountReplace As Integer
Dim IsFind As Boolean
Dim output As StringIf Command1.Caption <> "转换" Then
MsgBox "已经完成转换,如果需要再次转换,请删除转换后产生的新文件,并重启本程序"
Exit Sub
End If
Command1.Caption = "正在转换……"
IsFind = False
Set ofso = New FileSystemObject
'Set fo = ofso.GetFolder("Output")
Set fo = ofso.GetFolder("E:\Output")
Set regex1 = New RegExp
Set regex2 = New RegExp
Open "E:\1.xml" For Input As #21
Open "E:\2.xml" For Input As #22
Open "E:\3.xml" For Input As #23
Open "E:\4.xml" For Input As #24
'Open "E:\Output\Debug.txt" For Output As #4
For Each f In fo.Files
'Open "Output\" + f.Name For Input As #1
Open "E:\Output\" + f.Name For Input As #1
'Open "Output\New_" + f.Name For Output As #3
Open "E:\Output\New_" + f.Name For Output As #3
Do While Not EOF(1)
Line Input #1, str1
'MsgBox "读入TXT字符串:" & str1
regex1.Pattern = "TRIGSTR_([0-9]+)"
Set colMatches1 = regex1.Execute(str1)
regex1.Global = True
For Each objMatch1 In colMatches1
CountFind = CountFind + 1
Count = 4 - Len(objMatch1.SubMatches(0)) + 1
'MsgBox "查找文件" & Count & ".xml"
Do While Not EOF(20 + Count) And IsFind = False
Line Input #(20 + Count), str2
'MsgBox "读入XML字符串:" & str2
'开始正则表达式比较
regex2.Pattern = objMatch1 + "</old><new>(.+?)</new>"
If regex2.Test(str2) Then
IsFind = True
Set colMatches2 = regex2.Execute(str2)
Set objMatch2 = colMatches2(0)
str3 = objMatch2.SubMatches(0)
'Print #4, "原字符串:" & str1
'Print #4, "查得字符串:" & str2
'Print #4, "替换字符串:" & str3
str1 = regex1.Replace(str1, str3)
CountReplace = CountReplace + 1
'Print #4, "替换结果:" & str1 & vbCrLf
End If
Loop
IsFind = False
Seek #(20 + Count), 1
Next
output = output & str1 & vbCrLf
Loop
Print #3, output
Close #3
Close #1
Next
'Close #4
Close #21
Close #22
Close #23
Close #24
If CountFind > CountReplace Then
MsgBox "总共查找到符合条件字符串" & CountFind & "个,替换" & CountReplace & "个" & vbCrLf & "请检查所提供文件是否正确!"
Else
MsgBox "转换成功!"
End If
Command1.Caption = "转换完成"
End Sub
StrArray() As String
End TypePrivate Sub Command1_Click()
Dim ofso As FileSystemObject
Dim fo As Folder
Dim f As File
Dim str1 As String
Dim str2 As String
Dim str3 As String
Dim Count As Integer
Dim regex1, regex2
Dim objMatch1, objMatch2 As Match
Dim colMatches1, colMatches2 As MatchCollection
Dim CountFind, CountReplace As Integer
Dim IsFind As Boolean
Dim output As String Dim strFile1() As String, strFile3() As String
Dim strFileXml() As MyStr
Dim strTmp() As String, I As Long
If Command1.Caption <> "转换" Then
MsgBox "已经完成转换,如果需要再次转换,请删除转换后产生的新文件,并重启本程序"
Exit Sub
End If
Command1.Caption = "正在转换……"
IsFind = False
Set ofso = New FileSystemObject
'Set fo = ofso.GetFolder("Output")
Set fo = ofso.GetFolder("E:\Output")
Set regex1 = New RegExp
Set regex2 = New RegExp
ReDim strFileXml(4)
strFileXml(1).StrArray = GetBinaryFile("E:\1.xml")
strFileXml(2).StrArray = GetBinaryFile("E:\2.xml")
strFileXml(3).StrArray = GetBinaryFile("E:\3.xml")
strFileXml(4).StrArray = GetBinaryFile("E:\4.xml")
For Each f In fo.Files
strFile1 = GetBinaryFile("E:\Output\" + f.Name)
strFile3 = GetBinaryFile("E:\Output\New_" + f.Name)
'Open "Output\" + f.Name For Input As #1
' Open "E:\Output\" + f.Name For Input As #1
'Open "Output\New_" + f.Name For Output As #3
' Open "E:\Output\New_" + f.Name For Output As #3 I = 0
Do
If I > UBound(strFile(1)) Then Exit Do
str1 = strFile1(I)
regex1.Pattern = "TRIGSTR_([0-9]+)"
Set colMatches1 = regex1.Execute(str1)
regex1.Global = True
For Each objMatch1 In colMatches1
CountFind = CountFind + 1
Count = 4 - Len(objMatch1.SubMatches(0)) + 1
'MsgBox "查找文件" & Count & ".xml"
Do While Not EOF(20 + Count) And IsFind = False
str2 = strFileXml(Count).StrArray(0)
'这里我不知道你是要第几行...我就直接取第一行了
' Line Input #(20 + Count), str2
'MsgBox "读入XML字符串:" & str2
'开始正则表达式比较
regex2.Pattern = objMatch1 + "</old><new>(.+?)</new>"
If regex2.Test(str2) Then
IsFind = True
Set colMatches2 = regex2.Execute(str2)
Set objMatch2 = colMatches2(0)
str3 = objMatch2.SubMatches(0)
'Print #4, "原字符串:" & str1
'Print #4, "查得字符串:" & str2
'Print #4, "替换字符串:" & str3
str1 = regex1.Replace(str1, str3)
CountReplace = CountReplace + 1
'Print #4, "替换结果:" & str1 & vbCrLf
End If
Loop
IsFind = False
Seek #(20 + Count), 1
Next
output = output & str1 & vbCrLf
Loop
Print #3, output
Close #3
Close #1
Next
'Close #4
Close #21
Close #22
Close #23
Close #24
If CountFind > CountReplace Then
MsgBox "总共查找到符合条件字符串" & CountFind & "个,替换" & CountReplace & "个" & vbCrLf & "请检查所提供文件是否正确!"
Else
MsgBox "转换成功!"
End If
Command1.Caption = "转换完成"
End SubPrivate Function GetBinaryFile(ByVal FileName As String) As String()
'以二进制方式读入文件
'FileName - 要读入的文件名
'返回值:
' 对读入的文件按行保存在返回的数组内
Dim strTmp As String, strOut() As String
Open FileName For Binary As #1
strTmp = Space(LOF(1))
Get 1, , strTmp
Close #1
strOut = Split(strTmp, vbCrLf)
GetBinaryFile = strOut
Erase strOut
End Function