四个线程做的mapreduce比单线程快140倍？怎么解释！！！

要求：计算1个3000维的点（1*3000的数组），到100000个3000维的点（100000*3000的数组）中点的最近欧式距离。
用四个线程做的mapreduce比单线程快140倍？怎么解释！！！代码如下，用的fork joinpackage cn.com.iscs.matho_digiters.forkjoin;import java.util.Arrays;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.Future;
import java.util.concurrent.RecursiveTask;public class FindRecentDistanceTask extends RecursiveTask{
private double[] point;
private double[][] points;

public FindRecentDistanceTask(double[] point, double[][] points) {
super();
this.point = point;
this.points = points;
} @Override
protected Double compute() {
//System.out.println(Thread.currentThread().getName());
double minDistance = Integer.MAX_VALUE;
int pointsLen = points.length;
if(pointsLen > 25000){               //拆分
int middle = pointsLen/2;
double[][] pointsA = Arrays.copyOfRange(points, 0, middle);
double[][] pointsB = Arrays.copyOfRange(points, middle, pointsLen);
FindRecentDistanceTask distanceTaskA = new FindRecentDistanceTask(point, pointsA);
FindRecentDistanceTask distanceTaskB = new FindRecentDistanceTask(point, pointsB);
//执行子任务
distanceTaskA.fork();
distanceTaskB.fork();
double distanceTaskAResult = (double) distanceTaskA.join();
double distanceTaskBResult = (double) distanceTaskB.join();
if(distanceTaskAResult < minDistance) minDistance = distanceTaskAResult;
if(distanceTaskBResult < minDistance) minDistance = distanceTaskBResult;
}else{                             //计算
for (int i = 0; i < points.length; i++) {
double[] row = points[i];
double distance = 0.0;
for (int j = 0; j < row.length; j++) {
distance += Math.pow(point[j]-row[j], 2);
}
distance = Math.sqrt(distance);
if(distance < minDistance) minDistance = distance;
}
}
return minDistance;
}

public static void main(String[] args) throws InterruptedException, ExecutionException {
double[] point = new double[3000];
double[][] points = new double[100000][3000];
for (int i = 0; i < points.length; i++) {
for (int j = 0; j < points[i].length; j++) {
points[i][j] = Math.random();
}
}
System.out.println("1*3000的点和 100000*3000的双精度矩阵生成完成");
//***************************************************************单线程
System.out.println("开始单线程计算...");
long t3 = System.currentTimeMillis();
double minDistance = Integer.MAX_VALUE;
for (int i = 0; i < points.length; i++) {
double[] row = points[i];
double distance = 0.0;
for (int j = 0; j < row.length; j++) {
distance += Math.pow(point[j]-row[j], 2);
}
distance = Math.sqrt(distance);
if(distance < minDistance) minDistance = distance;
}
long t4 = System.currentTimeMillis();
System.out.println("单线程耗时(ms)："+(t4-t3));
System.out.println("最近欧式距离："+minDistance);
//***************************************************************
System.out.println();

//***************************************************************多线程
System.out.println("开始mapreduce(多线程)计算...");
long t1 = System.currentTimeMillis();
ForkJoinPool forkJoinPool = new ForkJoinPool();
FindRecentDistanceTask distanceTask = new FindRecentDistanceTask(point, points);
Future result = forkJoinPool.submit(distanceTask);
long t2 = System.currentTimeMillis();
System.out.println("多线程耗时(ms)："+ (t2-t1));
System.out.println("最近欧式距离："+result.get());
//***************************************************************

//*****************************************************测试两层循环100000>3000
System.out.println();
long t5 = System.nanoTime();
int countA = 0;
for (int i = 0; i < 100000; i++) {
for (int j = 0; j < 3000; j++) {
countA++;
}
}
long t6 = System.nanoTime();
System.out.println("外层循环100000次内层循环3000次耗时（ns）："+(t6-t5));

//*****************************************************测试两层循环25000>3000
int countB = 0;
long t7 = System.nanoTime();
for (int i = 0; i < 25000; i++) {
for (int j = 0; j < 3000; j++) {
countB++;
}
}
long t8 = System.nanoTime();
System.out.println("外层循环   25000次内层循环3000次耗时（ns）："+(t8-t7));

}}

解决方案 »

免费领取超大流量手机卡，每月29元包185G流量+100分钟通话, 中国电信官方发货

submit是不是还没真开始运算,future返回结果才是真的运算完毕应该,不大懂
只想感慨下楼主的内存好大呀~
底层重新编译，提高了CPU利用率
没细看你的代码，直觉告诉我，这是时间复杂度的问题。
单线程时间复杂度：O(n^2) // 推测值，但肯定不是线性的，应该是个指数级别。
四线程后：4*O((n/4)^2) // 这个值比上一个值小140倍是有可能的。
简单的说，
就是单线程时，程序串行执行，cpu利用率不高。
4线程时，程序有些部分可以并行执行，提高了cpu利用率；当任务越多时，越能体现出性能优化效果。
用 fork join只跑一个线程，跟正常的单线程比，也相差140倍的时间？求解释