比如这个网址的pdf文件:http://www.alfa.com/MSDSPDF/English_/L06278.pdf
我想将这个文件读到一个字符串里,存储到数据库中。
我用httpclient读网页源文件的程序读取,读到的是乱码,有人说itext可以读取,请问具体应该怎么操作呢?有没有程序例子?附我的httpclient读取网页程序:
String htmldata = "";
// 构造HttpClient的实例
HttpClient httpClient = new HttpClient();
httpClient.getParams().setCookiePolicy(
CookiePolicy.BROWSER_COMPATIBILITY);
httpClient.getParams().setBooleanParameter(
HttpMethodParams.SINGLE_COOKIE_HEADER, true);
// 创建GET方法的实例
GetMethod getMethod = new GetMethod(url); // 使用系统提供的默认的恢复策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(10, false));
try { // 执行getMethod
int statusCode = httpClient.executeMethod(getMethod); if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: "
+ getMethod.getStatusLine());
} InputStream is = getMethod.getResponseBodyAsStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String tempbf; while ((tempbf = br.readLine()) != null ) {
htmldata = htmldata.concat(tempbf);
}
} catch (HttpException e) {
// 发生致命的异常,可能是协议不对或者返回的内容有问题
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e) {
// 发生网络异常
e.printStackTrace();
} finally {
// 释放连接
getMethod.releaseConnection();
//pw.close();
}
我想将这个文件读到一个字符串里,存储到数据库中。
我用httpclient读网页源文件的程序读取,读到的是乱码,有人说itext可以读取,请问具体应该怎么操作呢?有没有程序例子?附我的httpclient读取网页程序:
String htmldata = "";
// 构造HttpClient的实例
HttpClient httpClient = new HttpClient();
httpClient.getParams().setCookiePolicy(
CookiePolicy.BROWSER_COMPATIBILITY);
httpClient.getParams().setBooleanParameter(
HttpMethodParams.SINGLE_COOKIE_HEADER, true);
// 创建GET方法的实例
GetMethod getMethod = new GetMethod(url); // 使用系统提供的默认的恢复策略
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(10, false));
try { // 执行getMethod
int statusCode = httpClient.executeMethod(getMethod); if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: "
+ getMethod.getStatusLine());
} InputStream is = getMethod.getResponseBodyAsStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String tempbf; while ((tempbf = br.readLine()) != null ) {
htmldata = htmldata.concat(tempbf);
}
} catch (HttpException e) {
// 发生致命的异常,可能是协议不对或者返回的内容有问题
System.out.println("Please check your provided http address!");
e.printStackTrace();
} catch (IOException e) {
// 发生网络异常
e.printStackTrace();
} finally {
// 释放连接
getMethod.releaseConnection();
//pw.close();
}
楼主【abenbottle】截止到2008-07-08 13:25:57的历史汇总数据(不包括此帖):
发帖的总数量:5 发帖的总分数:130 每贴平均分数:26
回帖的总数量:2 得分贴总数量:0 回帖的得分率:0%
结贴的总数量:2 结贴的总分数:70
无满意结贴数:0 无满意结贴分:0
未结的帖子数:3 未结的总分数:60
结贴的百分比:40.00 % 结分的百分比:53.85 %
无满意结贴率:0.00 % 无满意结分率:0.00 %
楼主该结一些帖子了
import java.io.InputStream;
import java.io.File;
import java.net.URL;public class DownLoadFile{
public static boolean downLoadFile(String source,String path){
String filename="";
if(path.indexOf(".")==-1){
filename=source.substring(source.lastIndexOf("/")+1,source.length());
if(!path.endsWith("/"))
path=path+"/";
}
File file=new File(path+filename);
if(file.exists()){
System.out.println("has a same file in : \n"+path+filename+"\n and now it will be overwrite");
}
else
System.out.println("文件存放路径为:"+path+filename);
try{
URL sourceurl=new URL(source);
InputStream is=sourceurl.openStream();
FileOutputStream fos=new FileOutputStream(path+filename);
byte[] bytes=new byte[1];
int c;
while ((c=is.read(bytes))!=-1) {
System.out.println("bytes[0]="+bytes[0]);
fos.write(bytes,0,c);
}
is.close();
fos.close();
}
catch(Exception e){
return false;
}
return true;
}
public static void main(String[] args){
System.out.println(downLoadFile("http://www.dajiadu.net/files/article/html/0/727/index.html","E:/temp"));
}
}