http://www.dce.com.cn/portal/cate?cid=1261730308135
我想把这个网页上的数据抓取下来,建立数据库,比如品种豆一的成交量,从2009年1月1号到2012年12月31日的数据抓取下来,该怎么做到?

解决方案 »

  1.   

    http://blog.csdn.net/supermanking/article/details/5989227
    http://blog.csdn.net/supermanking/article/details/3082860
    http://download.csdn.net/detail/SupermanKing/639058
      

  2.   

    用Excel:Sub 豆一的成交量()
        On Error Resume Next
        Set oDoc = CreateObject("htmlfile")
        With CreateObject("WinHttp.WinHttpRequest.5.1")
            .Open "GET", "http://www.dce.com.cn/PublicWeb/MainServlet?action=Pu00011_search", False
            .setRequestHeader "Connection", "Keep-Alive"
            .Send
            m1 = DateDiff("d", #12/31/2008#, Date)    '从2009-01-01到现在的天数
            For p = 1 To m1
                rq1 = Format(Now() - DateDiff("d", #12/31/2008#, Date) + p, "yyyymmdd")    '从20090101开始
                n = Range("a65536").End(xlUp).Row
                .Open "POST", "http://www.dce.com.cn/PublicWeb/MainServlet", True
                .setRequestHeader "Referer", "http://www.dce.com.cn/PublicWeb/MainServlet?action=Pu00011_search"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Connection", "Keep-Alive"
                .Send "action=Pu00011_result&Pu00011_Input.trade_date=" & rq1 & "&Pu00011_Input.variety=a&Pu00011_Input.trade_type=0&Submit=%B2%E9+%D1%AF" '查询:“%B2%E9+%D1%AF”
                                                                                 'Pu00011_Input.variety=a 豆一
                                                                                                       'b 豆二
                                                                                                       'c 玉米
                                                                                                       'j 焦炭
                                                                                                       'jm 焦煤
                                                                                                       'l 聚乙烯
                                                                                                       'm 豆粕
                                                                                                       'p 棕榈油
                                                                                                       'v 聚氯乙烯
                                                                                                       'y 豆油
                                                                                                       's 大豆
                .WaitForResponse
                If .responsetext Like "*不是交易日*" Then GoTo 1
                oDoc.body.innerHTML = .responsetext
                Set r = oDoc.All.tags("table")(1).Rows
                n = Range("a65536").End(xlUp).Row
                Cells(n + 1, 1) = oDoc.All.tags("div")(1).innerText
                For i = 1 To r.Length - 1
                    For j = 0 To r(i).Cells.Length - 1
                        Cells(i + n + 1, j + 1) = r(i).Cells(j).innerText
                    Next j
                Next i
    1:
            Next p
        End With
    End Sub