uses Winapi.msxml, Winapi.ActiveX, Winapi.WinSock;{获取网页源码}
function Getwebcode(url:string; var code:string):Boolean; //获取网页源码
var
http: IServerXMLHTTPRequest2;
vHtml: Variant;
begin
result:=false;
CoInitialize(nil);
code:='';
if Pos('http://',LowerCase(url))<=0 then url:='http://'+url;
try
http := CoServerXMLHTTP60.Create;
try
http.open('GET', Trim(url), false, null, null);
//http.setRequestHeader('Content-Type', 'text/xml;charset=gbk'); 无论加不加都无效
http.setTimeouts(10 * 1000, 10 * 1000, 10 * 1000, 10 * 1000);//
http.send(null); if (http.readyState=4) and (http.status=200) then
begin
vHtml := Http.responsetext;
code := vHtml;
end;
if (code<>'null') and (Length(code)>10) then result:=true else result:=false;
finally
http := nil;
end;
except
on e:exception do
begin
code:='';
result:=false;
end;
end;
end;procedure TForm1.Button1Click(Sender: TObject);
var
code:string;
begin
//Getwebcode('http://www.baidu.com/',code); //UTF8 正常
//memo1.Lines.Add(code);
Getwebcode('http://www.dedecms.com/',code); //GB2312或GBK 全乱码
memo1.Lines.Add(code);
end;
VB方法
Option Explicit
Private Sub Form_Load()
'测试
Text1.Text = GetHtml("http://www.NewXing.com")
End SubPublic Function GetHtml(url As String)
Dim xmlHttp As Object
Set xmlHttp = CreateObject("Microsoft.XMLHTTP")
xmlHttp.open "GET", url, True
xmlHttp.send (Null)
While xmlHttp.ReadyState <> 4
DoEvents
Wend
GetHtml = BytesToBstr(xmlHttp.responseBody)
End FunctionPrivate Function BytesToBstr(Bytes)
Dim Unicode As String
If IsUTF8(Bytes) Then '如果不是UTF-8编码则按照GB2312来处理
Unicode = "UTF-8"
Else
Unicode = "GB2312"
End If Dim objstream As Object
Set objstream = CreateObject("ADODB.Stream")
With objstream
.Type = 1
.Mode = 3
.open
.Write Bytes
.Position = 0
.Type = 2
.Charset = Unicode
BytesToBstr = .ReadText
.Close
End With
End Function '判断网页编码函数
Private Function IsUTF8(Bytes) As Boolean
Dim i As Long, AscN As Long, Length As Long
Length = UBound(Bytes) + 1
If Length < 3 Then
IsUTF8 = False
Exit Function
ElseIf Bytes(0) = &HEF And Bytes(1) = &HBB And Bytes(2) = &HBF Then
IsUTF8 = True
Exit Function
End If Do While i <= Length - 1
If Bytes(i) < 128 Then
i = i + 1
AscN = AscN + 1
ElseIf (Bytes(i) And &HE0) = &HC0 And (Bytes(i + 1) And &HC0) = &H80 Then
i = i + 2 ElseIf i + 2 < Length Then
If (Bytes(i) And &HF0) = &HE0 And (Bytes(i + 1) And &HC0) = &H80 And (Bytes(i + 2) And &HC0) = &H80 Then
i = i + 3
Else
IsUTF8 = False
Exit Function
End If
Else
IsUTF8 = False
Exit Function
End If
Loop
If AscN = Length Then
IsUTF8 = False
Else
IsUTF8 = True
End IfEnd Function
查了几个通宵资料,国外网站也都翻遍了。原因:responsetext默认已经转换成UTF8,导致其它编码均乱码。看来只能从 Responsebody入手老外方法
vHtml := Http.responseBody;
for i:=0 to length(vhtml) do code:=code+chr(strtoint(inttostr(vHtml[i])));这个方法,我之前试过成功了,GBK和GB2312都没问题,但UTF-8乱码。(不知是否和我新装XE2关系)网上查到VB的方法(已附)很多,但小弟不懂,望洋兴叹。求翻译!!!解决后,保证当天结贴。同叟无欺。
function Getwebcode(url:string; var code:string):Boolean; //获取网页源码
var
http: IServerXMLHTTPRequest2;
vHtml: Variant;
begin
result:=false;
CoInitialize(nil);
code:='';
if Pos('http://',LowerCase(url))<=0 then url:='http://'+url;
try
http := CoServerXMLHTTP60.Create;
try
http.open('GET', Trim(url), false, null, null);
//http.setRequestHeader('Content-Type', 'text/xml;charset=gbk'); 无论加不加都无效
http.setTimeouts(10 * 1000, 10 * 1000, 10 * 1000, 10 * 1000);//
http.send(null); if (http.readyState=4) and (http.status=200) then
begin
vHtml := Http.responsetext;
code := vHtml;
end;
if (code<>'null') and (Length(code)>10) then result:=true else result:=false;
finally
http := nil;
end;
except
on e:exception do
begin
code:='';
result:=false;
end;
end;
end;procedure TForm1.Button1Click(Sender: TObject);
var
code:string;
begin
//Getwebcode('http://www.baidu.com/',code); //UTF8 正常
//memo1.Lines.Add(code);
Getwebcode('http://www.dedecms.com/',code); //GB2312或GBK 全乱码
memo1.Lines.Add(code);
end;
VB方法
Option Explicit
Private Sub Form_Load()
'测试
Text1.Text = GetHtml("http://www.NewXing.com")
End SubPublic Function GetHtml(url As String)
Dim xmlHttp As Object
Set xmlHttp = CreateObject("Microsoft.XMLHTTP")
xmlHttp.open "GET", url, True
xmlHttp.send (Null)
While xmlHttp.ReadyState <> 4
DoEvents
Wend
GetHtml = BytesToBstr(xmlHttp.responseBody)
End FunctionPrivate Function BytesToBstr(Bytes)
Dim Unicode As String
If IsUTF8(Bytes) Then '如果不是UTF-8编码则按照GB2312来处理
Unicode = "UTF-8"
Else
Unicode = "GB2312"
End If Dim objstream As Object
Set objstream = CreateObject("ADODB.Stream")
With objstream
.Type = 1
.Mode = 3
.open
.Write Bytes
.Position = 0
.Type = 2
.Charset = Unicode
BytesToBstr = .ReadText
.Close
End With
End Function '判断网页编码函数
Private Function IsUTF8(Bytes) As Boolean
Dim i As Long, AscN As Long, Length As Long
Length = UBound(Bytes) + 1
If Length < 3 Then
IsUTF8 = False
Exit Function
ElseIf Bytes(0) = &HEF And Bytes(1) = &HBB And Bytes(2) = &HBF Then
IsUTF8 = True
Exit Function
End If Do While i <= Length - 1
If Bytes(i) < 128 Then
i = i + 1
AscN = AscN + 1
ElseIf (Bytes(i) And &HE0) = &HC0 And (Bytes(i + 1) And &HC0) = &H80 Then
i = i + 2 ElseIf i + 2 < Length Then
If (Bytes(i) And &HF0) = &HE0 And (Bytes(i + 1) And &HC0) = &H80 And (Bytes(i + 2) And &HC0) = &H80 Then
i = i + 3
Else
IsUTF8 = False
Exit Function
End If
Else
IsUTF8 = False
Exit Function
End If
Loop
If AscN = Length Then
IsUTF8 = False
Else
IsUTF8 = True
End IfEnd Function
查了几个通宵资料,国外网站也都翻遍了。原因:responsetext默认已经转换成UTF8,导致其它编码均乱码。看来只能从 Responsebody入手老外方法
vHtml := Http.responseBody;
for i:=0 to length(vhtml) do code:=code+chr(strtoint(inttostr(vHtml[i])));这个方法,我之前试过成功了,GBK和GB2312都没问题,但UTF-8乱码。(不知是否和我新装XE2关系)网上查到VB的方法(已附)很多,但小弟不懂,望洋兴叹。求翻译!!!解决后,保证当天结贴。同叟无欺。
GetCode = xmlHTTP1.responsetext
Else
GetCode = BytesToBstr(xmlHTTP1.responseBody, "gb18030")
End If
Dim objstream
Set objstream = CreateObject("adodb.stream")
objstream.Type = 1
objstream.Mode = 3
objstream.Open
objstream.Write body
objstream.position = 0
objstream.Type = 2
objstream.Charset = Cset
BytesToBstr = objstream.ReadText
objstream.Close
Set objstream = Nothing
End Function