http://taizhou.19lou.com/forum-804-thread-147601348048508030-1-1.html上面这个网页用idhttp打开居然只显示几行java代码,请问是什么原因?是js输出?以下是IDHTTP获得的内容,与实际网页相差很大,为什么?
<script language="javascript">var url=document.location.href;
var s = url.indexOf("?"); var redirectUrl='';
var domain=document.domain.substr(document.domain.indexOf(".")+1);if(s>0)
{
redirectUrl=url.substr(s+1);
SetCookie("_Z3nY0d4C_","37XgPK9h",365,"/",domain);
document.location.href=redirectUrl;}
else
{ document.location.href="http://www."+domain;
}function SetCookie (name, value) {
var expdate = new Date();
var argv = SetCookie.arguments;
var argc = SetCookie.arguments.length;
var expires = (argc > 2) ? argv[2] : null;
var path = (argc > 3) ? argv[3] : null;
var domain = (argc > 4) ? argv[4] : null;
var secure = (argc > 5) ? argv[5] : false;if(expires!=null && expires>=0) expdate.setTime(expdate.getTime() + ( expires * 24*60*60*1000 ));document.cookie = name + "=" + escape (value) +((expires == null || expires < 0) ? ((expires==-1)?"; expires=-1":"") : ("; expires="+ expdate.toGMTString()))
+((path == null) ? "" : ("; path=" + path)) +((domain == null) ? "" : ("; domain=" + domain))
+((secure == true) ? "; secure" : "");
}</script>
<script language="javascript">var url=document.location.href;
var s = url.indexOf("?"); var redirectUrl='';
var domain=document.domain.substr(document.domain.indexOf(".")+1);if(s>0)
{
redirectUrl=url.substr(s+1);
SetCookie("_Z3nY0d4C_","37XgPK9h",365,"/",domain);
document.location.href=redirectUrl;}
else
{ document.location.href="http://www."+domain;
}function SetCookie (name, value) {
var expdate = new Date();
var argv = SetCookie.arguments;
var argc = SetCookie.arguments.length;
var expires = (argc > 2) ? argv[2] : null;
var path = (argc > 3) ? argv[3] : null;
var domain = (argc > 4) ? argv[4] : null;
var secure = (argc > 5) ? argv[5] : false;if(expires!=null && expires>=0) expdate.setTime(expdate.getTime() + ( expires * 24*60*60*1000 ));document.cookie = name + "=" + escape (value) +((expires == null || expires < 0) ? ((expires==-1)?"; expires=-1":"") : ("; expires="+ expdate.toGMTString()))
+((path == null) ? "" : ("; path=" + path)) +((domain == null) ? "" : ("; domain=" + domain))
+((secure == true) ? "; secure" : "");
}</script>
document.location.href="http://www."+domain;
显然是重定向了
你只能分析这段js代码,把它要做的事在自己的代码中写上。比如为idhttp设置cookie、得到跳转地址并用idhttp去访问。
document.location.href="http://www."+domain;
决定了最终到底访问了哪个网站了
只是,那个网站可能还根据cookies和ref才显示真正的网页
关键就在cookie
js中这一行SetCookie("_Z3nY0d4C_","37XgPK9h",365,"/",domain);
第一次访问这页面,会判断上面这个cookie,如果错误就调用js设置这个cookie(但在idhttp中js是不会运行的),如果正确就给出正确页面内容。帮你写了段代码,可以正常得到页面内容。function GetStr(sStart,sEnd,InputStr:string):string;//截取字符串
var
pos1,pos2:Integer;
begin
pos1:=PosEx(sStart,InputStr,1);
pos2:=PosEx(sEnd,InputStr,pos1+Length(sStart));
if (pos1>0) and (pos2>0) then
Result:=MidBStr(InputStr,pos1+Length(sStart),pos2-pos1-Length(sStart))
else
Result:='';
end;procedure TForm1.Button1Click(Sender: TObject);
var
IdHTTP1:TIdHTTP;
sHtml,sCookie,sCookieTmp:string;
i:Integer;
begin
IdHTTP1:=TIdHTTP.Create(nil);
try
IdHTTP1.Request.UserAgent:='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)';
IdHTTP1.Request.Connection:='Keep-Alive';
IdHTTP1.Request.Referer:='http://taizhou.19lou.com/';
IdHTTP1.Request.Accept:='image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*';
IdHTTP1.Request.AcceptLanguage:='zh-cn';
IdHTTP1.ReadTimeout:=10000;
IdHTTP1.HandleRedirects:=True; try
sHtml:=IdHTTP1.Get('http://taizhou.19lou.com/forum-804-thread-147601348048508030-1-1.html');
except
end;
finally
IdHTTP1.Free;
end; //取得js设置的cookie
sCookie:=GetStr('SetCookie("','"',sHtml)+'='+GetStr('","','"',sHtml)+';'; IdHTTP1:=TIdHTTP.Create(nil);
try
IdHTTP1.Request.UserAgent:='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)';
IdHTTP1.Request.Connection:='Keep-Alive';
IdHTTP1.Request.Referer:='http://taizhou.19lou.com/';
IdHTTP1.Request.Accept:='image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*';
IdHTTP1.Request.AcceptLanguage:='zh-cn';
IdHTTP1.ReadTimeout:=10000;
IdHTTP1.HandleRedirects:=True;
IdHTTP1.Request.SetHeaders;
IdHTTP1.Request.CustomHeaders.Add('Cookie: '+sCookie);
try//有了cookie后,再次访问
sHtml:=IdHTTP1.Get('http://taizhou.19lou.com/forum-804-thread-147601348048508030-1-1.html');
except
end;
finally
IdHTTP1.Free;
end; Memo1.Lines.Add(sHtml);
end;
Memo1中能正确得到页面内容。用了一个自己写的GetStr函灵敏,要uese StrUtils
D7+indy9通过测试