一个采集程序。持续运行一天后 jvm内存溢出。各位老大帮忙来看看,如何让他持续运行。。public static void main(String [] args){
CrawlerThread mainThread = new CrawlerThread();
mainThread.fiveOneJob.start();
mainThread.chinaHr.start();
mainThread.zhaopin.start();
}public class CrawlerThread {
public Thread51Job fiveOneJob = new Thread51Job();
public ThreadCjol cjol= new ThreadCjol();
public ThreadZhaopin zhaopin = new ThreadZhaopin();
}
class Thread51Job extends Thread
{
public void run()
{
while(true)
{
FiveOneJobDotCom fiveOneJob = new FiveOneJobDotCom();
fiveOneJob .run();
fiveOneJob = null;
crawler = null;
System.gc();
try {
sleep(3600*60*3);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
class ThreadCjol extends Thread
{
public void run()
{
while(true){
CjolDotCom cjol = new CjolDotCom();
cjol.run();
cjol= null;
System.gc();
try {
sleep(3600*60*3);
}
catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
class ThreadZhaopin extends Thread
{
public void run()
{
while(true){
ZhaoPinDotCom zhaopin = new ZhaoPinDotCom();
zhaopin.run();
zhaopin = null;
crawler=null;
System.gc();
try{
sleep(3600*60*3);
}
catch(InterruptedException e){
e.printStackTrace();
}
}
}
}
CrawlerThread mainThread = new CrawlerThread();
mainThread.fiveOneJob.start();
mainThread.chinaHr.start();
mainThread.zhaopin.start();
}public class CrawlerThread {
public Thread51Job fiveOneJob = new Thread51Job();
public ThreadCjol cjol= new ThreadCjol();
public ThreadZhaopin zhaopin = new ThreadZhaopin();
}
class Thread51Job extends Thread
{
public void run()
{
while(true)
{
FiveOneJobDotCom fiveOneJob = new FiveOneJobDotCom();
fiveOneJob .run();
fiveOneJob = null;
crawler = null;
System.gc();
try {
sleep(3600*60*3);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
class ThreadCjol extends Thread
{
public void run()
{
while(true){
CjolDotCom cjol = new CjolDotCom();
cjol.run();
cjol= null;
System.gc();
try {
sleep(3600*60*3);
}
catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
class ThreadZhaopin extends Thread
{
public void run()
{
while(true){
ZhaoPinDotCom zhaopin = new ZhaoPinDotCom();
zhaopin.run();
zhaopin = null;
crawler=null;
System.gc();
try{
sleep(3600*60*3);
}
catch(InterruptedException e){
e.printStackTrace();
}
}
}
}
解决方案 »
- 问个最基础的问题:类的访问控制符只有public
- 估计是一个连James Gosling(java之父)也没法回答的java问题
- [求助]我这个冒泡排序写的有什么问题?
- 帮小弟个忙,我会给分给大家的!谢谢了!:)
- 数据库系统的客户端和服务器问题
- 把二个图片合并起来,就是叠在一起,成为一个图片,有好办法吗?谢谢
- 为何我的java线程会引起oracle的TNSLSNR.EXE进程占用100%的CPU
- 如何在servlet 里控制页面跳转的target.
- 一个很菜的问题,困扰了我好久,5555555,菜鸟求救!!!!!!!
- socket多线程 外网通信输入流获取出问题
- 关于ZipFile的问题
- ClassNotFoundException 求助
3个(实际上有很多个)采集类代码太多 执行流程是function void processOneListPage(){
}
for(int i=0 ;i<=需要采集的列表总页数;i++){ //获取列表页html for(int j=0; j<=列表页的详细页链接 ;j++){ //解析每一个详细页链接,并写入数据库。 }
}关键代码:private int processOneAreaJob(String searchAreaId) {
int count=0;
int pageCount=160;
String baseUrl = "http://search.zhaopin.com/jobs/request.asp?SchAdv=1&PublishDate=3&CurPage=1&industry=&SchJobType=&subJobType=&KeyWord=&ref=homepage&JobLocation="+searchAreaId+"&page=";
String url = null;
for(int page=1;page<pageCount;page++)
{
url = baseUrl + page;
listPage = HttpClient.getGetMothedResponseString(url, "string", "UTF-8");
if("" != listPage)
{
System.out.println("success get list page:"+url);
count++;
//解析此列表页面
this.processOneJobListPage(listPage);
}
else
{
System.out.println("Fail to get list page:"+url);
}
}
return count;
} private void processOneJobListPage(String listPage) {
try
{
parser=new Parser(listPage);
NodeFilter filter =new AndFilter(new TagNameFilter("tr"), new HasParentFilter(new HasAttributeFilter("id","joblist")));
NodeList nodeList = parser.parse(filter);
nodeList.remove(0);
Node[] nodes = nodeList.toNodeArray();
String job_url = "",company_name="",show_date_str="";
for(Node n:nodes)
{
job_url=((LinkTag)(n.getChildren().elementAt(3).getFirstChild())).getLink().replace("?f=ss", "");
System.out.println("job_url:"+job_url);
company_name = ((LinkTag)(n.getChildren().elementAt(5).getFirstChild())).getLinkText();
this.setCompany_name(company_name);
show_date_str = n.getChildren().elementAt(9).getFirstChild().toHtml().toString();
SimpleDateFormat df=new SimpleDateFormat("yy-MM-dd");
Date cDate=df.parse(show_date_str);
String show_date_1=String.valueOf(cDate.getTime()).substring(0, 10);
this.setJ_id(job_url);
this.setShow_date(Integer.parseInt(String.valueOf(show_date_1)));
this.setJob_url(job_url);
int job_id = this.isExistsJob();
if(job_id>0)//数据表已存在此j_id 改数据主键为job_id
{
System.out.println("Is exist: " + this.getJ_id());
this.setJob_id(job_id);
int ret = this.updateJobShowDate();
if(1 == ret)
{
System.out.println("succes update date: " + this.getJ_id());
this.addUpdateJobCount();
}
else
{
System.out.println("no update date: " + this.getJ_id());
}
}
else
{
//获取并解析页面
System.out.println("New job info: " + this.getJ_id());
String ret = HttpClient.getGetMothedResponseString(this.getJob_url(), "string", "UTF-8");
if("" != ret)
{
System.out.println("succes get jobhtmlpage: " + this.getJob_url());
//解析此文件
if(this.parseOneJobInfo(ret))
{
this.processData();
}
}
else
{
System.out.println("fail to jobhtmlpage: " + this.getJob_url());
}
}
}
}
catch(Exception e)
{
System.out.println(""+e);
}
}
现在来说用cache的比较多. 在 effctive java上有介绍. 这两种方法. 也可以参考下论坛其他用cache的例子. 都很有参考价值.
不知道怎么会这样 我动他一下 他才回收内存