我用DELPHI7 + Indy10.0控件(IDHTTP)到网页源文件,对于日文(Shift-JIS)编码可以正常通过IDHTTP取到正确的源文件,但对于日文EUC-JP編碼去却以下程式不能得到正确结果,不解。贴出我的代码:var
IframeUrl: string;
ret:WideString;//TStringStream;
SourceHtmlCode: WideString;
begin
IframeUrl := 'http://ctplp.blog15.fc2.com/blog-entry-33.html';//这个网站是EUC-JS编码
//IframeUrl := 'http://www.nhk.or.jp/furusato/koremade/koremade_ibaraki.html';//这个网站是SHIFT-JS编码 IdHTTP.HandleRedirects:=true;
IdHTTP.Request.ContentType:= 'application/x-www-form-urlencoded';
IdHTTP.Request.UserAgent:= 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)';
IdHTTP.Request.SetHeaders;
try
SourceHtmlCode := StringToWideString(IdHTTP.Get(IframeUrl),51932);//日文EUC編碼是51932 日文(Shift-JIS)是932
Memo1.Lines.Text := SourceHtmlCode;
except
ShowMessage('未找到HTTP服务器');
end;
end; function StringToWideString(const S: string; CodePage: Word): WideString;
var
InputLength, OutputLength: Integer;
begin
InputLength := Length(S);
OutputLength := MultiByteToWideChar(CodePage, 0, PChar(S), InputLength, nil, 0);
SetLength(Result, OutputLength);
MultiByteToWideChar(CodePage, 0, PChar(S), InputLength, PWideChar(Result), OutputLength);
end;请帮忙分析原因,或提供其他的思路。注我用WebBrowser取网页源代码正常,但不想用它。
源问题出自:http://swifthorse.javaeye.com/blog/174467
IframeUrl: string;
ret:WideString;//TStringStream;
SourceHtmlCode: WideString;
begin
IframeUrl := 'http://ctplp.blog15.fc2.com/blog-entry-33.html';//这个网站是EUC-JS编码
//IframeUrl := 'http://www.nhk.or.jp/furusato/koremade/koremade_ibaraki.html';//这个网站是SHIFT-JS编码 IdHTTP.HandleRedirects:=true;
IdHTTP.Request.ContentType:= 'application/x-www-form-urlencoded';
IdHTTP.Request.UserAgent:= 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)';
IdHTTP.Request.SetHeaders;
try
SourceHtmlCode := StringToWideString(IdHTTP.Get(IframeUrl),51932);//日文EUC編碼是51932 日文(Shift-JIS)是932
Memo1.Lines.Text := SourceHtmlCode;
except
ShowMessage('未找到HTTP服务器');
end;
end; function StringToWideString(const S: string; CodePage: Word): WideString;
var
InputLength, OutputLength: Integer;
begin
InputLength := Length(S);
OutputLength := MultiByteToWideChar(CodePage, 0, PChar(S), InputLength, nil, 0);
SetLength(Result, OutputLength);
MultiByteToWideChar(CodePage, 0, PChar(S), InputLength, PWideChar(Result), OutputLength);
end;请帮忙分析原因,或提供其他的思路。注我用WebBrowser取网页源代码正常,但不想用它。
源问题出自:http://swifthorse.javaeye.com/blog/174467
codepage还是932,我已经取出,D7下通过
IframeUrl := 'http://ctplp.blog15.fc2.com/blog-entry-33.html';//这个网站是EUC-JS编码
// IframeUrl := 'http://www.nhk.or.jp/furusato/koremade/koremade_ibaraki.html';//这个网站是SHIFT-JS编码 IdHTTP.HandleRedirects:=true;
IdHTTP.Request.ContentType:= 'application/x-www-form-urlencoded';
IdHTTP.Request.UserAgent:= 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)';
IdHTTP.Request.SetHeaders;
try
SourceHtmlCode := StringToWideString(IdHTTP.Get(IframeUrl),932);//日文EUC編碼是51932 日文(Shift-JIS)是932
取出来的网页内容:
<html lang="ja">
<head>
<meta http-equiv="content-type" content="text/html; charset=euc-jp">
<meta http-equiv="content-style-type" content="style/css">
<meta http-equiv="content-script-type" content="text/javascript">
<link href="http://ctplp.blog15.fc2.com/template/diary04/style.css" type="text/css" rel="stylesheet">
<link rel="alternate" type="application/rss+xml" title="RSS" href="http://ctplp.blog15.fc2.com/?xml">
<title>Rule of 5</title><!--?????????????熙?????-->
<script type="text/javascript">
<!--
function showMore(varA1, varB1){
var123 = ('varXYZ' + (varA1));
varABC = ('varP' + (varA1));
if( document.getElementById ) {
if( document.getElementById(var123).style.display ) {
if( varB1 != 0 ) {
document.getElementById(var123).style.display = "block";
document.getElementById(varABC).style.display = "none";
} else { document.getElementById(var123).style.display = "none";
document.getElementById(varABC).style.display = "block"; }
} else { location.href = varB1;
return true; }
} else { location.href = varB1;
return true; }
}
//-->
</script>
<!--/??????????-->
..........
其余略!
你取出来的源文件是否也有以下乱码?嗯??????????????<br />????說?赧???你???醫????????????? ?暿?ˊ??#715;????<br />??#21834;?????????????????貎????釛?癸????眞?說??????狀???????眩?????????步????????霽????????????<br />嗯嗯???????跪?#22905;????#21966;??ˊvˋ<br />???鞋?????????????笋?咦????这正是我没有解决的问题。请继续帮忙找找解答。
object Form1: TForm1
Left = 276
Top = 159
Width = 479
Height = 412
Caption = 'Form1'
Color = clBtnFace
Font.Charset = DEFAULT_CHARSET
Font.Color = clWindowText
Font.Height = -11
Font.Name = 'MS Sans Serif'
Font.Style = []
OldCreateOrder = False
Position = poDesktopCenter
DesignSize = (
471
385)
PixelsPerInch = 96
TextHeight = 13
object Edit1: TEdit
Left = 0
Top = 8
Width = 385
Height = 21
Anchors = [akLeft, akTop, akRight]
TabOrder = 0
Text = 'Edit1'
end
object Button1: TButton
Left = 392
Top = 5
Width = 71
Height = 25
Anchors = [akTop, akRight]
Caption = 'Button1'
TabOrder = 1
OnClick = Button1Click
end
object Memo1: TMemo
Left = 0
Top = 40
Width = 465
Height = 337
Anchors = [akLeft, akTop, akRight, akBottom]
Lines.Strings = (
'Memo1')
ScrollBars = ssBoth
TabOrder = 2
end
end//=========代码
unit Unit1;interfaceuses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, WinSock;type TForm1 = class(TForm)
Edit1: TEdit;
Button1: TButton;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;var
Form1: TForm1;implementation{$R *.dfm}
function GetHttpData(Url:String):String;
procedure TranslateURL(URL: String; var Server,Link:String; var Port:Integer);
var
i: Integer;
begin
if Pos('http://', LowerCase(URL)) <> 0 then System.Delete(URL, 1, 7);
i := Pos('/', URL);
if I>0 then
begin
Server := Copy(URL, 1, i-1);
Link := Copy(URL, i+1, MaxInt);;
end
else
begin
Server := URL;
Link := '';
end;
i := Pos(':', Server);
if I>0 then
begin
Port := StrToInt(Copy(Server, i+1, MaxInt));
Server := Copy(Server, 1, i-1);
end
else
begin
Port := 80;
end;
end;
var
len,s:integer;
name:sockaddr_in;
he:PHostEnt;
buf:array[0..1023]of char;
str,data:string;
wsd:WSADATA;
Server, Link: String;
Port:Integer;
begin
TranslateURL(Url,Server,Link,Port);
WSAStartup($101,wsd);
s:=socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);
he:=gethostbyname(PChar(Server));
if he=nil then Exit; FillChar(name,sizeof(name),0);
name.sin_family:=AF_INET;
name.sin_port:=htons(Port);
name.sin_addr.S_addr:=PDWORD(PDWORD(he.h_addr)^)^;
connect(s,name,sizeof(name));
str := 'GET /'+ Link +' HTTP/1.1'#13#10;
str := str + 'Referer: view-source:http://' + Server;
if Port<>80 then
str := str + ':' + IntToStr(Port);
str := str + '/' + Link + #13#10;
str := str + 'User-Agent: Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0; MyIE 3.01)'#13#10;
str := str + 'Host: ' + Server;
if Port<>80 then
str := str + ':' + IntToStr(Port);
str := str + ''#13#10;
str := str + 'Connection: Close'#13#10;
str := str + 'Cache-Control: no-cache'#13#10; str := str + #13#10;
send(s,PChar(str)^,Length(str),0);
while true do
begin
len:=recv(s,buf,sizeof(buf),0);
if len<1 then break;
SetString(str,buf,len);
data:=data+str;
end;
closesocket(s);
WSACleanup();
Result := data;
Result := StringReplace(Result,#10,#13,[rfReplaceAll, rfIgnoreCase]);
Result := StringReplace(Result,#13#13,#13,[rfReplaceAll, rfIgnoreCase]);
Result := StringReplace(Result,#13,#13#10,[rfReplaceAll, rfIgnoreCase]);
end;procedure TForm1.Button1Click(Sender: TObject);
begin
Memo1.Text := GetHttpData(Edit1.Text );
end;end.可以完整取得网码内容
可参阅http://ikaruga.name/Technology/JpCode/index.html和http://ikaruga.name/Technology/JpCode/src/common.h.html这个两个地址里的代码我已经实验过了,可行,不乱码了