请问我的一个程序
package URLUtil;import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class URLUtil {
public static String getHtml(String urlString) {
try {
StringBuffer html = new StringBuffer();
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
InputStreamReader isr = new InputStreamReader(conn.getInputStream());
BufferedReader br = new BufferedReader(isr);
String temp;
while ((temp = br.readLine()) != null) {
html.append(temp).append("\n");
}
br.close();
isr.close();
return html.toString();
}
catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) throws IOException {
FilterTags ft = new FilterTags();
String s = ft.getFilterTags(URLUtil.getHtml("http://www.sina.com.cn"));
System.out.println(s);
BufferedWriter bw = new BufferedWriter(new FileWriter("d:/doc/temp.txt"));
bw.write(s);
bw.close();
}
}
输入的网址会放在一个txt文件里面,以
http://www.baidu.com
http://www.google.com.hk
http://www.sina.com.cn
这样单行排列的形式给出
要让结果变成以 http://后面的网址为标题的txt文件,分别输出
请问应该怎么改进?
package URLUtil;import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
public class URLUtil {
public static String getHtml(String urlString) {
try {
StringBuffer html = new StringBuffer();
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
InputStreamReader isr = new InputStreamReader(conn.getInputStream());
BufferedReader br = new BufferedReader(isr);
String temp;
while ((temp = br.readLine()) != null) {
html.append(temp).append("\n");
}
br.close();
isr.close();
return html.toString();
}
catch (Exception e) {
e.printStackTrace();
return null;
}
}
public static void main(String[] args) throws IOException {
FilterTags ft = new FilterTags();
String s = ft.getFilterTags(URLUtil.getHtml("http://www.sina.com.cn"));
System.out.println(s);
BufferedWriter bw = new BufferedWriter(new FileWriter("d:/doc/temp.txt"));
bw.write(s);
bw.close();
}
}
输入的网址会放在一个txt文件里面,以
http://www.baidu.com
http://www.google.com.hk
http://www.sina.com.cn
这样单行排列的形式给出
要让结果变成以 http://后面的网址为标题的txt文件,分别输出
请问应该怎么改进?
你需要把URL转义,或者只用 www.baidu.com 这个部分。(我建议这个。)
循环做不就解决问题了
1、用字符流readline读取每一行网址数据A
2、根据网址A抓取网页内容
3、生成网页内容文件File,以你想要的任何命名形式,生成命名不就是个对A的字符串操作
urls.txt文件内容:
http://www.baidu.com
http://www.google.com.hk
http://www.sina.com.cn
代码如下:package csdn.impulsehu.june;import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;public class URLUtil {
public static void main(String[] args) {
List<String> urls = getURLFromFile(new File("urls.txt"));
for(String url : urls) {
pageContentToLocalFile(url, new File(url.substring(7)+".txt"));
}
}
public static void pageContentToLocalFile(String sourceUrl, File destFileName) {
URL url = null;
InputStream is = null;
BufferedReader br = null;
FileWriter fw = null;
BufferedWriter bw = null;
try {
url = new URL(sourceUrl);
is = url.openConnection().getInputStream();
br = new BufferedReader(new InputStreamReader(is));
if(destFileName != null) {
fw = new FileWriter(destFileName);
} else {
fw = new FileWriter(new File("temp.tmp"));
}
bw = new BufferedWriter(fw);
String line;
while(null != (line = br.readLine())) {
System.out.println(line);
bw.write(line);
bw.newLine();
}
} catch (Exception e) {
e.printStackTrace();
} finally {
closeIOStream(bw, fw, br, is);
}
}
private static List<String> getURLFromFile(File file) {
List<String> urls = new ArrayList<String>();
FileReader fr = null;
BufferedReader br = null;
try {
fr = new FileReader(file);
br = new BufferedReader(fr);
String line;
while(null != (line = br.readLine())) {
System.out.println(line.trim());
urls.add(line.trim());
}
} catch (Exception e) {
e.printStackTrace();
} finally {
if(null != br) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(null != fr) {
try {
fr.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
return urls;
}
private static void closeIOStream(BufferedWriter bw, FileWriter fw,
BufferedReader br, InputStream is) {
if(null != bw) {
try {
bw.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(null != fw) {
try {
fw.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(null != br) {
try {
br.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if(null != is) {
try {
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
}
}
}
}}
FilterTags ft = new FilterTags();
这个 信息文本化的类 往哪儿加都不知道。。
最好还是能基于1楼给的代码改进一下
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
String url = null;
String content = null;
BufferedWriter bw = null;
while((url = br.readLine()) != null){
content = URLUtil.getHtml(url);
System.out.println(content);
bw = new BufferedWriter(new FileWriter("d:/" + url.substring(7) + ".txt"));
bw.write(content);
bw.close();
}
br.close();
}