这几天在研究数据抓取,现在想抓取芒果网站的酒店列表分页信息。
这个使用了POST请求验证,研究好半天也没有得到想要的信息,请大家指点一下。
地址:http://hotel.mangocity.com/list-she.html附上代码,postData数据是通过Firefox监测得到的。
string url = "http://hotel.mangocity.com/list-she.html";
string postData = "7|0|12|http://hotel.mangocity.com/hotelGWT/|BBFA476A78E8E6F35749FB2930CA9731|com.mangocity.client.HotelListSearchService|getHotelTemplate|com.mangocity.client.hotel.gwt.queryCondition.GWTQueryCondition/907978818||SHE|沈阳|1|您可以输入酒店名称查询|2012-08-16|2012-08-17|1|2|3|4|1|5|5|6|7|8|6|0|9|6|6|6|0|10|0|11|0|0|12|5|15|0|1|1|1|6|0|";
Stream outstream = null;
Stream instream = null;
StreamReader sr = null;
HttpWebResponse response = null;
HttpWebRequest request = null;
string content = null; Encoding encoding = Encoding.UTF8;
//Encoding encoding = Encoding.GetEncoding("GBK");
byte[] data = encoding.GetBytes(postData); // 设置参数
request = (HttpWebRequest)WebRequest.Create(url); request.Method = "POST";
request.Host = "hotel.mangocity.com";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1";
request.ContentType = "text/x-gwt-rpc; charset=utf-8";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
request.Referer = url; CookieContainer cookieContainer = new CookieContainer();
request.CookieContainer = cookieContainer; //request.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
//request.Headers.Add("Accept-Encoding", "gzip, deflate");
//request.Headers.Add("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7"); request.Headers.Add("X-GWT-Module-Base", "http://hotel.mangocity.com/hotelGWT/");
request.Headers.Add("X-GWT-Permutation", "3BE9A97BC33F30E2DDE95D4807626C8A");
request.ContentLength = data.Length; outstream = request.GetRequestStream();
outstream.Write(data, 0, data.Length);
outstream.Close(); //发送请求并获取相应回应数据
response = (HttpWebResponse)request.GetResponse(); //直到request.GetResponse()程序才开始向目标网页发送Post请求
instream = response.GetResponseStream();
sr = new StreamReader(instream, encoding); //返回结果网页(html)代码
content = sr.ReadToEnd();
这个使用了POST请求验证,研究好半天也没有得到想要的信息,请大家指点一下。
地址:http://hotel.mangocity.com/list-she.html附上代码,postData数据是通过Firefox监测得到的。
string url = "http://hotel.mangocity.com/list-she.html";
string postData = "7|0|12|http://hotel.mangocity.com/hotelGWT/|BBFA476A78E8E6F35749FB2930CA9731|com.mangocity.client.HotelListSearchService|getHotelTemplate|com.mangocity.client.hotel.gwt.queryCondition.GWTQueryCondition/907978818||SHE|沈阳|1|您可以输入酒店名称查询|2012-08-16|2012-08-17|1|2|3|4|1|5|5|6|7|8|6|0|9|6|6|6|0|10|0|11|0|0|12|5|15|0|1|1|1|6|0|";
Stream outstream = null;
Stream instream = null;
StreamReader sr = null;
HttpWebResponse response = null;
HttpWebRequest request = null;
string content = null; Encoding encoding = Encoding.UTF8;
//Encoding encoding = Encoding.GetEncoding("GBK");
byte[] data = encoding.GetBytes(postData); // 设置参数
request = (HttpWebRequest)WebRequest.Create(url); request.Method = "POST";
request.Host = "hotel.mangocity.com";
request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1";
request.ContentType = "text/x-gwt-rpc; charset=utf-8";
request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
request.Referer = url; CookieContainer cookieContainer = new CookieContainer();
request.CookieContainer = cookieContainer; //request.Headers.Add("Accept-Language", "zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3");
//request.Headers.Add("Accept-Encoding", "gzip, deflate");
//request.Headers.Add("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7"); request.Headers.Add("X-GWT-Module-Base", "http://hotel.mangocity.com/hotelGWT/");
request.Headers.Add("X-GWT-Permutation", "3BE9A97BC33F30E2DDE95D4807626C8A");
request.ContentLength = data.Length; outstream = request.GetRequestStream();
outstream.Write(data, 0, data.Length);
outstream.Close(); //发送请求并获取相应回应数据
response = (HttpWebResponse)request.GetResponse(); //直到request.GetResponse()程序才开始向目标网页发送Post请求
instream = response.GetResponseStream();
sr = new StreamReader(instream, encoding); //返回结果网页(html)代码
content = sr.ReadToEnd();
这里面后面的数字什么意思?