我这个代码是抓取当当网产品的产品名,图片,和价格
我的正则一次匹配产品名,图片,价格中的一个,我想把三个一次全部匹配了,求指点
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class test {
public static void main(String[] args) {
String url = "http://product.dangdang.com/product.aspx?product_id=20689512";
new test().spiderProduct(url);
} public void spiderProduct(String url) {
String content = getURLContent(url, "gb2312");
String regStr = "";
regStr = "<h1>(.*?)</h1>";// 产品名称
regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片
regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格
Pattern pattern = Pattern.compile(regStr);
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
System.out.println(matcher.group(1));
}
} public String getURLContent(String url, String encoding) {
if (url == null || "".equals(url.trim()))
return null; StringBuffer content = new StringBuffer();
try {
// 新建URL对象
URL u = new URL(url);
InputStream in = new BufferedInputStream(u.openStream());
InputStreamReader theHTML = new InputStreamReader(in,
encoding != null ? encoding : "gb2312");
int c;
while ((c = theHTML.read()) != -1) {
content.append((char) c);
}
}
// 处理异常
catch (MalformedURLException e) {
System.err.println(e);
} catch (IOException e) {
System.err.println(e);
}
return content.toString();
}}
我的正则一次匹配产品名,图片,价格中的一个,我想把三个一次全部匹配了,求指点
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;public class test {
public static void main(String[] args) {
String url = "http://product.dangdang.com/product.aspx?product_id=20689512";
new test().spiderProduct(url);
} public void spiderProduct(String url) {
String content = getURLContent(url, "gb2312");
String regStr = "";
regStr = "<h1>(.*?)</h1>";// 产品名称
regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片
regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格
Pattern pattern = Pattern.compile(regStr);
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
System.out.println(matcher.group(1));
}
} public String getURLContent(String url, String encoding) {
if (url == null || "".equals(url.trim()))
return null; StringBuffer content = new StringBuffer();
try {
// 新建URL对象
URL u = new URL(url);
InputStream in = new BufferedInputStream(u.openStream());
InputStreamReader theHTML = new InputStreamReader(in,
encoding != null ? encoding : "gb2312");
int c;
while ((c = theHTML.read()) != -1) {
content.append((char) c);
}
}
// 处理异常
catch (MalformedURLException e) {
System.err.println(e);
} catch (IOException e) {
System.err.println(e);
}
return content.toString();
}}
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;class ProductItem {
String name;
String picture;
String price; public String getPrice() {
return price;
} public void setPrice(String price) {
this.price = price;
} public String getName() {
return name;
} public void setName(String name) {
this.name = name;
} public String getPicture() {
return picture;
} public void setPicture(String picture) {
this.picture = picture;
} public ProductItem(String name, String picture, String price) {
super();
this.name = name;
this.picture = picture;
this.price = price;
} public static ProductItem createItem(String urlString) {
String name = null, picture = null, price = null;
String content = getURLContent(urlString, "gb2312");
String regStr = "<h1>(.*?)</h1>";// 产品名称
Pattern pattern = Pattern.compile(regStr);
Matcher matcher = pattern.matcher(content);
if (matcher.find())
name = matcher.group(1); regStr = "src=\"(.*?b\\.jpg)\"";// 产品图片
pattern = Pattern.compile(regStr);
matcher = pattern.matcher(content);
if (matcher.find())
picture = matcher.group(1); regStr = "class=\"num\".*?(\\d+\\.\\d+)";// 价格
pattern = Pattern.compile(regStr);
matcher = pattern.matcher(content);
if (matcher.find())
price = matcher.group(1); return new ProductItem(name, picture, price);
} public static ProductItem getItem(String urlString) {
String name = null, picture = null, price = null;
String content = getURLContent(urlString, "gb2312");
String regStr = "<h1>(.*?)</h1>.*?src=\"(.*?b\\.jpg)\".*?num\".*?(\\d+\\.\\d+).*";// 产品名称
Pattern pattern = Pattern.compile(regStr, Pattern.DOTALL);
Matcher matcher = pattern.matcher(content);
while (matcher.find()) {
name = matcher.group(1);
picture = matcher.group(2);
price = matcher.group(3); } return new ProductItem(name, picture, price);
} public static String getURLContent(String urlString, String encoding) {
if (urlString == null || "".equals(urlString.trim()))
return null; StringBuffer content = new StringBuffer();
try {
// 新建URL对象
URL url = new URL(urlString);
InputStream in = new BufferedInputStream(url.openStream());
InputStreamReader theHTML = new InputStreamReader(in,
encoding != null ? encoding : "gb2312");
int c;
while ((c = theHTML.read()) != -1) {
content.append((char) c);
}
}
// 处理异常
catch (MalformedURLException e) {
System.err.println(e);
} catch (IOException e) {
System.err.println(e);
}
return content.toString();
} public String toString() {
return "name = " + name + " \npicture = " + picture + " \nprice = "
+ price;
}}public class dsfdsf {
public static void main(String[] args) {
String url = "http://product.dangdang.com/product.aspx?product_id=20689512"; ProductItem productItem = ProductItem.createItem(url);
System.out.println(productItem);
productItem = ProductItem.getItem(url);
System.out.println(productItem);
}}
/*
name = 目送:龙应台
picture = http://img32.ddimg.cn/96/0/20689512-1_b.jpg
price = 25.30
name = 目送:龙应台
picture = http://img32.ddimg.cn/96/0/20689512-1_b.jpg
price = 25.30
*/