我想做个数据采集器,把中国体育彩票开奖的信息取出来。
不懂怎么动手,请详细指点一下。网站数据采集彩票

解决方案 »

  1.   

    可以用http协议获取中国体育彩票网站的信息,然后根据获取到的html数据进行正则匹配出开奖的信息……
      

  2.   

        protected void Button1_Click(object sender, EventArgs e)
        {
            WebRequest wc = HttpWebRequest.Create("http://www.cznd.gov.cn/node/jrgxq_qnyw/2013-7-5/137512575342148320.html");
            wc.ContentType = "application/x-www-form-urlencoded;charset=gb2312";
            using (WebResponse wq = wc.GetResponse())
            {
                using (Stream s = wq.GetResponseStream())
                {
                    using (StreamReader sr = new StreamReader(s, Encoding.GetEncoding("gb2312")))
                    {
                        string html = sr.ReadToEnd();
                        Match m = Regex.Match(html, @"(?i)<td[^>]*?class=(['""]?)NewsContent\1[^>]*?>\s*?<p[^>]*?>\s*?([\s\S]*?)</p>");
                        string result = m.Groups[2].Value;
                        Console.Write(result);
                        Console.ReadLine();
                    }
                }
            }
        }
    前几天看到的一个案例.
      

  3.   

    </div><TABLE width="366" align="center" cellpadding="0" cellspacing="0" style="color: #4a4a48;"><TR bgcolor="#ececec" align="center"><TD width="54" height="24">玩法</TD><TD width="50">期号</TD><TD width="166">开奖号</TD><TD width="32"><FONT style="font-size: 13px;">详情</FONT></TD><TD width="32"><FONT style="font-size: 13px;">历史</FONT></TD><TD width="32"><FONT style="font-size: 13px;">图表</FONT></TD></TR><TR align="center"><TD height="40"><FONT>大乐透</FONT></TD><TD>13082 </TD><TD align="left"><TABLE width='159' height='21' align='left' cellpadding='0' cellspacing='0' style='color: #ffffff;font-weight:bold;font-family: 宋体;'><TR align='center'><TD width='21' background='/images/20055.gif' style='color: #ffffff'>03</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>09</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>25</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>26</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>33</TD><TD width='2'></TD><TD width='21' background='/images/20056.gif' style='color: #ffffff'>03</TD><TD width='2'></TD><TD width='21' background='/images/20056.gif' style='color: #ffffff'>12</TD></TR></TABLE></TD><TD><A href='/news/11010219.shtml' target='_blank'><IMG src='/images/20014.gif' border='0' /></A></TD><TD><A href='/lottery/dlt/History.aspx' target="_blank"><IMG src="/images/20016.gif" border="0" /></A></TD><TD><A href='http://data.lottery.gov.cn/chart_tc2/chart.shtml?LotID=23529&ChartID=20001&StatType=0&MinIssue=2012026&MaxIssue=2012125&IssueTop=100&tab=0' target="_blank"><IMG src="/images/20017.gif" border="0" /></A></TD></TR><TR><TD colspan='7' height='1' background='/images/20022.gif'></TD></TR><TR align="center"><TD height="40"><FONT>排列3</FONT></TD><TD>13191 </TD><TD align="left"><TABLE width='67' height='21' align='left' cellpadding='0' cellspacing='0' style='color: #000000;font-weight:bold;font-family: 宋体;'><TR align='center'><TD width='21' background='/images/20057.gif' style='color: #ffffff'>4</TD><TD width='2'></TD><TD width='21' background='/images/20057.gif' style='color: #ffffff'>7</TD><TD width='2'></TD><TD width='21' background='/images/20057.gif' style='color: #ffffff'>3</TD></TR></TABLE></TD><TD><A href='/news/11010220.shtml' target='_blank'><IMG src='/images/20014.gif' border='0' /></A></TD><TD><A href='/lottery/pls/History.aspx' target="_blank"><IMG src="/images/20016.gif" border="0" /></A></TD><TD><A href='http://data.lottery.gov.cn/chart_tc2/chart.shtml?LotID=33&ChartID=20001&StatType=0&MinIssue=2012263&MaxIssue=2012292&IssueTop=30&tab=0' target="_blank"><IMG src="/images/20017.gif" border="0" /></A></TD></TR><TR><TD colspan='7' height='1' background='/images/20022.gif'></TD></TR><TR align="center"><TD height="40"><FONT>排列5</FONT></TD><TD>13191 </TD><TD align="left"><TABLE width='113' height='21' align='left' cellpadding='0' cellspacing='0' style='color: #000000;font-weight:bold;font-family: 宋体;'><TR align='center'><TD width='21' background='/images/20057.gif' style='color: #ffffff'>4</TD><TD width='2'></TD><TD width='21' background='/images/20057.gif' style='color: #ffffff'>7</TD><TD width='2'></TD><TD width='21' background='/images/20057.gif' style='color: #ffffff'>3</TD><TD width='2'></TD><TD width='21' background='/images/20057.gif' style='color: #ffffff'>4</TD><TD width='2'></TD><TD width='21' background='/images/20057.gif' style='color: #ffffff'>4</TD></TR></TABLE></TD><TD><A href='/news/11010221.shtml' target='_blank'><IMG src='/images/20014.gif' border='0' /></A></TD><TD><A href='/lottery/plw/History.aspx' target="_blank"><IMG src="/images/20016.gif" border="0" /></A></TD><TD><A href='http://data.lottery.gov.cn/chart_tc2/chart.shtml?LotID=35&ChartID=20001&StatType=0&MinIssue=&MaxIssue=&IssueTop=30' target="_blank"><IMG src="/images/20017.gif" border="0" /></A></TD></TR><TR><TD colspan='7' height='1' background='/images/20022.gif'></TD></TR><TR align="center"><TD height="40"><FONT>22选5</FONT></TD><TD>13172 </TD><TD align="left"><TABLE width='113' height='21' align='left' cellpadding='0' cellspacing='0' style='color: #ffffff;font-weight:bold;font-family: 宋体;'><TR align='center'><TD width='21' background='/images/20055.gif' style='color: #ffffff'>08</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>09</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>14</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>15</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>19</TD></TR></TABLE></TD><TD><A href='/news/11009537.shtml' target='_blank'><IMG src='/images/20014.gif' border='0' /></A></TD><TD><A href='/lottery/eexw/History.aspx' target="_blank"><IMG src="/images/20016.gif" border="0" /></A></TD><TD><A href='http://data.lottery.gov.cn/chart_tc2/chart.shtml?LotID=23525&ChartID=20001&StatType=0&MinIssue=&MaxIssue=&IssueTop=30' target="_blank"><IMG src="/images/20017.gif" border="0" /></A></TD></TR><TR><TD colspan='7' height='1' background='/images/20022.gif'></TD></TR><TR align="center"><TD height="40"><FONT>7星彩</FONT></TD><TD>13082 </TD><TD align="left"><TABLE width='159' height='21' align='left' cellpadding='0' cellspacing='0' style='color: #ffffff;font-weight:bold;font-family: 宋体;'><TR align='center'><TD width='21' background='/images/20055.gif' style='color: #ffffff'>6</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>0</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>8</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>0</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>1</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>4</TD><TD width='2'></TD><TD width='21' background='/images/20055.gif' style='color: #ffffff'>8</TD></TR></TABLE></TD><TD><A href='/news/11010180.shtml' target='_blank'><IMG src='/images/20014.gif' border='0' /></A></TD><TD><A href='/lottery/qxc/History.aspx' target="_blank"><IMG src="/images/20016.gif" border="0" /></A></TD><TD><A href='http://data.lottery.gov.cn/chart_tc2/chart.shtml?LotID=10022&ChartID=20001&StatType=0&MinIssue=&MaxIssue=&IssueTop=30' target="_blank"><IMG src="/images/20017.gif" border="0" /></A></TD></TR></tr><tr><TR><TD colspan="6" height="31" background="/images/20078.gif"><TABLE width="360" align="center" cellpadding="0" cellspacing="0" border="0"><TR><TD colspan="2" height="2"></TD></TR><TR><TD width="65"></TD><TD width="295">超级大乐透 <span id="LabelDLT" class="FontPool">1.71 亿元</span>&nbsp;&nbsp; 派奖 <span id="LabelQXC" class="FontPool">500 万元</span></TD></TR></TABLE></TD></TR></TABLE><SCRIPT type="text/javascript">var _bdhmProtocol = (("https:" == document.location.protocol) ? " https://" : " http://"); document.write(unescape("%3Cscript src='" + _bdhmProtocol + "hm.baidu.com/h.js%3F8929ffae85e1c07a7ded061329fbf441' type='text/javascript'%3E%3C/script%3E")); </SCRIPT></form></BODY></HTML>怎么写正则取出如下数据
      

  4.   

    其实主页面采用了框架iframe,指向地址为http://www.lottery.gov.cn/lottery/draws/Global.aspx因此你得到该地址的内容就可以了示例代码如下 WebRequest wc = HttpWebRequest.Create("http://www.lottery.gov.cn/lottery/draws/Global.aspx");
                wc.ContentType = "application/x-www-form-urlencoded;charset=gb2312";
                using (WebResponse wq = wc.GetResponse())
                {
                    using (Stream s = wq.GetResponseStream())
                    {
                        using (StreamReader sr = new StreamReader(s, Encoding.GetEncoding("utf-8")))
                        {
                            string html = sr.ReadToEnd();
                            
                            string pattern=@"(?i)<tr((?!.*?bgcolor)[^>]*?)>\s*?<td[^>]*?>\s*?<font>([^>]*?)</font>\s*?</td>\s*?<td[^>]*?>([^<>]*?)</td>\s*?<td[^>]*?>\s*?<table[^>]*?>[\s\S]*?(<td[^>]*?>((?<Num>\d+)|\s*?)</td>)*?\s*?</tr>\s*?[\s\S]*?</table>";
                            var result = Regex.Matches(html, pattern).OfType<Match>().Select(a => new { 
                                玩法=a.Groups[2].Value,
                                期号=a.Groups[3].Value,
                                开奖号=string.Join(" ",a.Groups["Num"].Captures.OfType<Capture>().Select(b=>b.Value))
                            });
                            /*
                             + [0] { 玩法 = "大乐透", 期号 = "13082 ", 开奖号 = "03 09 25 26 33 03 12" } <Anonymous Type>
                            + [1] { 玩法 = "排列3", 期号 = "13191 ", 开奖号 = "4 7 3" } <Anonymous Type>
                            + [2] { 玩法 = "排列5", 期号 = "13191 ", 开奖号 = "4 7 3 4 4" } <Anonymous Type>
                            + [3] { 玩法 = "22选5", 期号 = "13172 ", 开奖号 = "08 09 14 15 19" } <Anonymous Type>
                            + [4] { 玩法 = "7星彩", 期号 = "13082 ", 开奖号 = "6 0 8 0 1 4 8" } <Anonymous Type>                         */                    }
                    }
      

  5.   

    爬取数据啊
    主要还是html分析 可以使用 htmlagilitypack参考 http://www.cnblogs.com/wangchuang/archive/2013/03/11/2953638.html