我想采集别人网站的所有图片,不知道要如何做！！请高手赐教！！

比如说
我要采集百度所有美女图片要如何采集还有采集图片的正则表达式要如何写！！请赐教！！

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

好吧，告诉你。    public static List<string> getImg(string url)
    {
        List<string> list = new List<string>();
        string temp = string.Empty;
        string htmlStr = getHtml(url);
        MatchCollection matchs = Regex.Matches(htmlStr, @"<(IMG|img)[^>]+>"); //抽取所有图片
        for (int i = 0; i < matchs.Count; i++)
        {
            list.Add(matchs[i].Value);
        }
        return list;
    }    //所有图片路径(如果是相对路径的话，自动设置成绝对路径)
    public static List<string> getImgPath(string url)
    {
        List<string> list = new List<string>();
        string htmlStr = getHtml(url);
        string pat = @"<img\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>";
        MatchCollection matches = Regex.Matches(htmlStr, pat, RegexOptions.IgnoreCase | RegexOptions.Multiline);
        foreach (Match m in matches)
        {
            string imgPath = m.Groups["imgUrl"].Value.Trim();
            if (Regex.IsMatch(imgPath, @"\w+\.(gif|jpg|bmp|png)$")) //用了2次匹配，去除链接是网页的只留图片
            {
                if (!imgPath.Contains("http"))//必须包含http 否则无法下载
                {
                    imgPath = getUrl(url) + imgPath;
                }
                list.Add(imgPath);
            }
        }
        return list;
    }    //下载图片
    public void DownloadImg(string fileurl)
    {
        if (fileurl.Contains('.'))//url路径必须是绝对路径例如http://xxx.com/img/logo.jpg
        {
            string imgName = DateTime.Now.ToString("yyyyMMddHHmmssffff") + fileurl.Substring(fileurl.LastIndexOf('.')); // 生成图片的名字
            string filepath = System.Web.HttpContext.Current.Server.MapPath("") + "/" + imgName;
            WebClient mywebclient = new WebClient();
            mywebclient.DownloadFile(fileurl, filepath);
        }
    }
直接用WepZip软件把网站拷下来然后图片都在里面了
//少复制了一个方法
    public static string getHtml(string url)
    {
        try
        {
            string str = "";
            Encoding en = Encoding.GetEncoding(getEncoding(url));
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            request.Headers.Set("Pragma", "no-cache");
            request.Timeout = 30000;
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
            {
                Stream strM = response.GetResponseStream();
                StreamReader sr = new StreamReader(strM, en);
                str = sr.ReadToEnd();
                strM.Close();
                sr.Close();
            }
            return str;
        }
        catch
        {
            return String.Empty;
        }
    }
哥，是不是还少一个getEncoding的方法！！
getHtml只有这个方法调用多次其他方法都没有调用啊~~~~~~~
CMSde有的是啊，采集规则我会写，什么站的