我要获取的是该http://v.youku.com/v_show/id_XMzI5NTIyNzMy.html页面源码中的<div class="stat stat_area">内容</div>,这个DIV中的内容,可是提取为空,正则表达式应该也没错,高手进来帮个帮。
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class testtest {
public StringBuffer getContent(URL url)
{
StringBuffer contentBuffer = new StringBuffer();
try {
InputStreamReader istreamReader = new InputStreamReader(url.openStream());
int ch = 0;
//ch读取的字符,如果已到达流的末尾,则返回 -1
while ((ch = istreamReader.read()) != -1)
contentBuffer.append((char)ch);
} catch (IOException e) {
e.printStackTrace();
} return contentBuffer;
}
public String getVideoInf(StringBuffer contentString,String patternString)
{
String temp = "";
List<String> list = new ArrayList<String>();
Pattern pattern = Pattern.compile(patternString,Pattern.CANON_EQ);
Matcher matcher = pattern.matcher(contentString);
while(matcher.find())
{
list.add(matcher.group());
}
for(int i=0;i<list.size();i++)
temp += list.get(i);
return temp;
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
testtest ddTesttest = new testtest();
String urlStr = "http://v.youku.com/v_show/id_XMzI5NTIyNzMy.html";
try {
StringBuffer stringBuffer = ddTesttest.getContent(new URL(urlStr));
String temp = ddTesttest.getVideoInf(stringBuffer,"<div class=\"stat stat_area\">(.*?)</div>");
System.out.println("test:\n"+temp);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}}
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class testtest {
public StringBuffer getContent(URL url)
{
StringBuffer contentBuffer = new StringBuffer();
try {
InputStreamReader istreamReader = new InputStreamReader(url.openStream());
int ch = 0;
//ch读取的字符,如果已到达流的末尾,则返回 -1
while ((ch = istreamReader.read()) != -1)
contentBuffer.append((char)ch);
} catch (IOException e) {
e.printStackTrace();
} return contentBuffer;
}
public String getVideoInf(StringBuffer contentString,String patternString)
{
String temp = "";
List<String> list = new ArrayList<String>();
Pattern pattern = Pattern.compile(patternString,Pattern.CANON_EQ);
Matcher matcher = pattern.matcher(contentString);
while(matcher.find())
{
list.add(matcher.group());
}
for(int i=0;i<list.size();i++)
temp += list.get(i);
return temp;
}
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
testtest ddTesttest = new testtest();
String urlStr = "http://v.youku.com/v_show/id_XMzI5NTIyNzMy.html";
try {
StringBuffer stringBuffer = ddTesttest.getContent(new URL(urlStr));
String temp = ddTesttest.getVideoInf(stringBuffer,"<div class=\"stat stat_area\">(.*?)</div>");
System.out.println("test:\n"+temp);
} catch (MalformedURLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}}
解决方案 »
- 精通xsl的来帮忙
- tomcat6.0配置多个端口
- (在线)tomcat 控制台不显示(运转正常)
- 一个hibernate getSession()为空的问题
- 请问,怎样给Server2000里的一个已经存在的数据库设置密码
- 关于购物网站订单与会员等级联系
- 刚学java,用的是struts1.0,1.0里面怎么什么都是手写的呀,各位看看这是什么错误,怎么改呀
- 请问habernate怎么支持access?急~
- 如果import 两个包里都有同一个方法,那引用的时候怎么知道是引用哪个包里的?
- 紧急请教JAVA高手(在Servlet中读取文件)
- 如何实现文本的分页显示
- <s:iterator>设置只迭代出5行数据
document.getElementById("div的id").innerHTML
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class Test { public String getContent(URL url) {
StringBuilder builder = new StringBuilder();
try {
HttpURLConnection con = (HttpURLConnection)url.openConnection();
con.connect();
InputStream iStream = con.getInputStream();
InputStreamReader isr = new InputStreamReader(iStream);
BufferedReader br = new BufferedReader(isr);
while(true){
String line = br.readLine();
if(line==null){
break;
}
if(line.length()>0){
builder.append(line.trim());
}
}
} catch (IOException e) {
return null;
}
return builder.toString();
} public String getVideoInf(String content, String regex) {
Pattern pattern = Pattern.compile(regex);
Matcher m = pattern.matcher(content);
if(!m.find()){
return null;
}
return m.group(1);
} /**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
Test test = new Test();
String urlStr = "http://v.youku.com/v_show/id_XMzI5NTIyNzMy.html";
try {
String str= test.getContent(new URL(urlStr));
String regexp = "<div\\s*class=\"stat stat_area\"\\s*[^>]*>(.+?)</div>";
String tmp = test.getVideoInf(str, regexp);
System.out.println(tmp);
} catch (MalformedURLException e) {
e.printStackTrace();
} }
}
却能提取出<title></title>中的内容
使用html解析jar包 jsoupDocument doc = Jsoup.connect("网页地址").get();
Elements hrefs = doc.select("div[class^=listtitle_b]");
System.out.println(hrefs.text());