Public Function html2txt(sourcehtml As String) As String '包含bug Dim pos1 As Long, pos2 As Long, intcount As Long, textout As String, texttem As String pos2 = InStr(1, sourcehtml, "<")’ 粗略检测是否是包含html代码,是否包含”<” If pos2 > 0 Then ‘如果是的话就做下面的动作 Do While pos2 <> 0 pos1 = InStr(pos2, sourcehtml, ">") pos2 = InStr(pos1, sourcehtml, "<") If pos2 = 0 Then Exit Do End If texttem = Mid(sourcehtml, pos1 + 1, pos2 - pos1 - 1) If texttem = Chr(10) Or texttem = " " Or texttem = " " Or texttem = " " Then texttem = "" End If textout = textout + texttem Loop html2txt = Replace(Replace(Replace(textout, " ", "", 1), Chr(10), "", 1), " ", "", 1) Else ?如果不是的话就做下面的动作,直接替换空格,和回车 html2txt = Replace(Replace(Replace(sourcehtml, " ", "", 1), Chr(10), "", 1), " ", "", 1) End If End Function
类似词法分析程序(可以参见csdn中的词法程序源代码)
Dim pos1 As Long, pos2 As Long, intcount As Long, textout As String, texttem As String
pos2 = InStr(1, sourcehtml, "<")’ 粗略检测是否是包含html代码,是否包含”<”
If pos2 > 0 Then ‘如果是的话就做下面的动作
Do While pos2 <> 0
pos1 = InStr(pos2, sourcehtml, ">")
pos2 = InStr(pos1, sourcehtml, "<")
If pos2 = 0 Then
Exit Do
End If
texttem = Mid(sourcehtml, pos1 + 1, pos2 - pos1 - 1)
If texttem = Chr(10) Or texttem = " " Or texttem = " " Or texttem = " " Then
texttem = ""
End If
textout = textout + texttem
Loop
html2txt = Replace(Replace(Replace(textout, " ", "", 1), Chr(10), "", 1), " ", "", 1)
Else ?如果不是的话就做下面的动作,直接替换空格,和回车
html2txt = Replace(Replace(Replace(sourcehtml, " ", "", 1), Chr(10), "", 1), " ", "", 1)
End If
End Function
但由于效率不是很好,所以……