GET /comments/~ajax/vpcommentContent.html? 参数:__ap={"id":"XMzcyNjAwNzM2","sid":354909363,"page":4,"last_modify":1333031704} __callback=displayComments __ai=
URLConnection conn = new URL("http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap={%22id%22:%22XMzcyNjAwNzM2%22,%22sid%22:354909363,%22page%22:4,%22last_modify%22:1333031704}&__ai=&__callback=displayComments").openConnection();
InputStream is = conn.getInputStream();
OutputStream os = new FileOutputStream("o:\\save.html");
参数:__ap={"id":"XMzcyNjAwNzM2","sid":354909363,"page":4,"last_modify":1333031704}
__callback=displayComments
__ai=
URLConnection conn = new URL("http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap={%22id%22:%22XMzcyNjAwNzM2%22,%22sid%22:354909363,%22page%22:4,%22last_modify%22:1333031704}&__ai=&__callback=displayComments").openConnection();
InputStream is = conn.getInputStream();
OutputStream os = new FileOutputStream("o:\\save.html");
byte[] buffer = new byte[2048];
int length = 0;
while(-1 != (length = is.read(buffer, 0, buffer.length)))
{
os.write(buffer, 0, length);
}
is.close();
os.close();
HttpClient client = new HttpClient();
GetMethod get = new GetMethod("http://v.youku.com/v_playlist/f17252787o1p0.html#replyLocation");
GetMethod get2 = new GetMethod("http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap=%7B%22id%22%3A%22XMzcyOTQ0MTY4%22%2C%22sid%22%3A355016202%2C%22page%22%3A2%2C%22last_modify%22%3A1333080602%7D&__ai=&__callback=displayComments");
try {
System.out.println(client.executeMethod(get));
System.out.println(client.executeMethod(get2));
System.out.println(get2.getResponseBodyAsString());
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
<div class=\"comment\" id=\"comment354915617\">\r\n<div class=\"bar\">\r\n\t <a href=\"http:\/\/u.youku.com\/user_show\/id_UMzI1MzM1MDUy.html\" charset=\"400-4\" target=\"_blank\" id=comment_name_354915617 name=coment_name_354915617>kaka123456782<\/a>\r\n<\/div>\r\n\t<div class=\"con\">\r\n\t\t\t\t\t\t\t\t<div class=\"text\" id=\"content_354915617\" name=\"content_354915617\">\r\n\t\t\t\t\t\t<p id=\"content_4f7475f62e5b94e433000002\">\u90a32\u4e2a\u8001\u4eba\u4e00\u4e2a\u662f\u4e2a\u8001\u6d41\u6c13 \u4e00\u4e2a\u662f\u5f53\u4fdd\u5b89\u90a3\u4e2a\u4e5f\u662f\u6d41\u6c13\u51fa\u8eab\u7684 \u5929\u5929\u6b3a\u8d1f\u90a3\u9ed1\u8863\u670d\u8001\u5988 \u4ed6\u8001\u7238\u6b7b\u7684\u65e9\u5c31\u8001\u5988\u5728\u90a3\u5de5\u5382\u4e0a\u73ed\u5929\u5929\u88ab\u90a3\u8001\u6d41\u6c13\u6027\u68a2\u6270 \u624d\u53eb\u4e0a\u4ed6\u90a3\u8fdc\u623f\u8868\u5f1f\u6765\u5e2e\u5fd9\u7684222222<br \/> <\/p>Unicode编码 可以转成汉字
package other;import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;public class 获取视频评论 {
public static String escape (String src) { int i;
char j;
StringBuffer tmp = new StringBuffer();
tmp.ensureCapacity(src.length()*6);
for (i=0;i<src.length() ;i++ )
{
j = src.charAt(i);
if (Character.isDigit(j) || Character.isLowerCase(j) || Character.isUpperCase(j))
tmp.append(j);
else if (j<256) {
tmp.append( "%" );
if (j<16)
tmp.append( "0" );
tmp.append( Integer.toString(j,16) );
} else {
tmp.append( "%u" );
tmp.append( Integer.toString(j,16) );
}
}
return tmp.toString();
}
public static String unescape (String src) {
StringBuffer tmp = new StringBuffer();
tmp.ensureCapacity(src.length());
int lastPos=0,pos=0;
char ch;
while (lastPos<src.length()) { pos = src.indexOf("%",lastPos);
if (pos == lastPos) { if (src.charAt(pos+1)=='u') { ch = (char)Integer.parseInt(src.substring(pos+2,pos+6),16);
tmp.append(ch);
lastPos = pos+6;
} else { ch = (char)Integer.parseInt(src.substring(pos+1,pos+3),16);
tmp.append(ch);
lastPos = pos+3;
} } else { if (pos == -1) { tmp.append(src.substring(lastPos));
lastPos=src.length();
} else { tmp.append(src.substring(lastPos,pos));
lastPos=pos;
} } } return tmp.toString();
}
public static void main(String[]args){
HttpClient client = new HttpClient();
GetMethod get = new GetMethod("http://v.youku.com/v_playlist/f17252787o1p0.html#replyLocation");
GetMethod get2 = new GetMethod("http://comments.youku.com/comments/~ajax/vpcommentContent.html?__ap=%7B%22id%22%3A%22XMzcyOTQ0MTY4%22%2C%22sid%22%3A355016202%2C%22page%22%3A2%2C%22last_modify%22%3A1333080602%7D&__ai=&__callback=displayComments");
Pattern pp = Pattern.compile("<p id=\\\"content_.*?>(.*?)<");
try {
System.out.println(client.executeMethod(get));
System.out.println(client.executeMethod(get2));
String rsult=get2.getResponseBodyAsString();
rsult=rsult.replaceAll("\\\\\"","\"");
Matcher mm = pp.matcher(rsult);
while(mm.find()){
String yy=mm.group(1).replaceAll("\\\\","%");
System.out.println(unescape(yy));
}
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println();
}
}花了一大晚上的时间才弄出了这个类....纠结的java编码
谢谢你哦,昨天没来看!还有一点问题再麻烦您下,为什么控制台输出的\,在replaceAll里面的需要\\\\来转义。还有具体的unescape 函数能讲解下嘛。