在线等！日本yahoo搜索结果页面，程序抓到得页面格式和从浏览器里看到的不一样

      先贴下代码
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using System.IO;
using System.Net;
using System.Data.Common;
using System.Data.SqlClient;
using System.Text.RegularExpressions;
using System.Collections;
using System.Configuration;
using System.Web;namespace yahooCache
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }        private void Form1_Load(object sender, EventArgs e)
        {
            string url = "http://search.yahoo.co.jp/search?p=%E3%83%AF%E3%83%BC%E3%82%AD%E3%83%B3%E3%82%B0%E3%83%9B%E3%83%AA%E3%83%87%E3%83%BC&search.x=1&fr=top_ga1_sa&tid=top_ga1_sa&ei=UTF-8&aq=&oq=";
            string encod = GetEncoding(url);
            string a = GetStringByUrl(url, encod);
            FileStream fs = new FileStream(@"e:\yahoo.co.jp_14.html", FileMode.OpenOrCreate, FileAccess.Write);
            StreamWriter sw = new StreamWriter(fs, Encoding.GetEncoding(encod));//通过指定字符编码方式可以实现对汉字的支持，否则在用记事本打开查看会出现乱码
            sw.Flush();
            sw.BaseStream.Seek(0, SeekOrigin.Begin);
            sw.WriteLine(a);
            sw.Flush();
            sw.Close();
        }
        //得到页面
        private string GetStringByUrl(string strUrl, string encod)
        {
            WebRequest wrt = WebRequest.Create(strUrl);
            WebResponse wrse = wrt.GetResponse();
            Stream strM = wrse.GetResponseStream();
            StreamReader SR = new StreamReader(strM, Encoding.GetEncoding(encod));
            string strallstrm = SR.ReadToEnd();
            return strallstrm;
        }
        //获取页面编码
        public string GetEncoding(string url)
        {
            WebClient myWebClient = new WebClient();
            myWebClient.Credentials = CredentialCache.DefaultCredentials;            byte[] myDataBuffer = myWebClient.DownloadData(url);
            string strWebData = Encoding.Default.GetString(myDataBuffer);            //获取网页字符编码描述信息
            Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)\"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
            string webCharSet = charSetMatch.Groups[2].Value;
            return webCharSet;
        }
    }
}这是我写的下载页面的程序，但是下载下来的页面却和从浏览器里直接搜索出来的结果不一样，有哪位高手可以帮我分析下，是我代码里缺少什么还是其他别的原因，旨在“程序下载的页面和浏览器里看到的一致”。

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

补充一点：这是程序抓取到得页面。
如果在www.yahoo.co.jp里搜索相同的关键字得到的页面是：
两个页面的格式是完全不一样的
刚才发错了，我要发的是url：这是程序抓到的页面http://clockoo.com/photo/20090618/yahoo.co.jp_14.html如果在www.yahoo.co.jp里搜索相同的关键字得到的页面是：
http://search.yahoo.co.jp/search?p=%E3%83%AF%E3%83%BC%E3%82%AD%E3%83%B3%E3%82%B0%E3%83%9B%E3%83%AA%E3%83%87%E3%83%BC&search.x=1&fr=top_ga1_sa&tid=top_ga1_sa&ei=UTF-8&aq=&oq=
两个页面的格式是完全不一样的
       //得到页面
        private string GetStringByUrl(string strUrl, string encod)
        {
            WebRequest wrt = WebRequest.Create(strUrl);
            WebResponse wrse = wrt.GetResponse();
            Stream strM = wrse.GetResponseStream();
            StreamReader SR = new StreamReader(strM, Encoding.GetEncoding(encod));
            string strallstrm = SR.ReadToEnd();
            return strallstrm;
        } try       //得到页面
        private string GetStringByUrl(string strUrl, string encod)
        {
            WebRequest wrt = WebRequest.Create(strUrl);
            wrt.Headers.Add ( "UserAgent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)" );
            wrt.ContentType = "text/html; charset=utf-8";  // 不要UTF8的话可以不用
            wrt.Headers.Add ( "Accept-Charset", "UTF-8" ); // 不要UTF8的话可以不用
                 //一般做yahoo的话我还要加 proxy 的
            WebResponse wrse = wrt.GetResponse();
            Stream strM = wrse.GetResponseStream();
            StreamReader SR = new StreamReader(strM, Encoding.GetEncoding(encod));
            string strallstrm = SR.ReadToEnd();
            return strallstrm;
        }
试过了你的方法，还是得不到跟浏览器一样的效果，“一般做yahoo的话我还要加 proxy 的”这句是什么意思
很明显  编码不一致
http://search.yahoo.co.jp/search?p=%E3%83%AF%E3%83%BC%E3%82%AD%E3%83%B3%E3%82%B0%E3%83%9B%E3%83%AA%E3%83%87%E3%83%BC&search.x=1&fr=top_ga1_sa&tid=top_ga1_sa&ei=UTF-8&aq=&oq=
是UTF-8
http://clockoo.com/photo/20090618/yahoo.co.jp_14.html
这个是EUC-JP
这个我也发现了，但是程序下载下来的确是是EUC_JP编码的页面，我想知道的就是为什么程序下载的页面会和浏览器搜索到得不一样。
确实是少了赞助商广告链接，楼主有详细去了解这个日本的雅虎吗？或者看看它的API 。据我所了解不同区域的yahoo，变化是很大的。
大家有新闻抓取程序吗，发个给我吧，[email protected]
1.以Js动态写入页面的内容你是抓不到的（Ajax）2.Yahoo可能会根据不同的浏览器显示不同的结果页，所以要加上Headers，类似
request.Headers.Add ( "UserAgent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)" );3.Yahoo可能也会判断你的客户端Cookie支持及是否有特定Cookie等，所以还要加上一定的Cookie信息，具体可以用一些浏览器插件进行查看