import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;import java.util.ArrayList;
import java.util.List;/**
* Computes an approximation to pi
* Usage: JavaSparkPi [slices]
*/
public final class RemoteDebug { public static void main(String[] args) throws Exception {
SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi")
.setMaster("spark://10.3.9.135:7077")
.setJars(new String[]{"D:\\****\\workspace\\SparkRemoteDebug\\out\\artifacts\\SparkRemoteDebug_jar\\SparkRemoteDebug.jar"});
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
//jsc.addJar("D:\\***\\workspace\\SparkRemoteDebug\\out\\artifacts\\SparkRemoteDebug_jar\\SparkRemoteDebug.jar"); int slices = (args.length == 1) ? Integer.parseInt(args[0]) : 2;
int n = 100000 * slices;
List<Integer> l = new ArrayList<Integer>(n);
for (int i = 0; i < n; i++) {
l.add(i);
} JavaRDD<Integer> dataSet = jsc.parallelize(l, slices); int count = dataSet.map((Function<Integer, Integer>) integer -> {
double x = Math.random() * 2 - 1;
double y = Math.random() * 2 - 1;
return (x * x + y * y < 1) ? 1 : 0;
}).reduce((Function2<Integer, Integer, Integer>) (integer, integer2) -> integer + integer2); System.out.println("Pi is roughly " + 4.0 * count / n); jsc.stop();
}
}
出现错误:55:15 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
16/08/24 12:55:15 INFO Utils: Successfully started service 'SparkUI' on port 4041.
16/08/24 12:55:15 INFO SparkUI: Started SparkUI at http://10.3.9.155:4041
16/08/24 12:55:15 INFO HttpFileServer: HTTP File server directory is C:\Users\lw_co\AppData\Local\Temp\spark-a3786942-b296-4123-99ab-68afc6a575b6\httpd-6b1db50c-6184-4dbe-91c5-477710823b53
16/08/24 12:55:15 INFO HttpServer: Starting HTTP Server
16/08/24 12:55:15 INFO Utils: Successfully started service 'HTTP file server' on port 2115.
16/08/24 12:55:15 INFO SparkContext: Added JAR D:\jiuzhouwork\workspace\SparkRemoteDebug\out\artifacts\SparkRemoteDebug_jar\SparkRemoteDebug.jar at http://10.3.9.155:2115/jars/SparkRemoteDebug.jar with timestamp 1472014515970
16/08/24 12:55:16 INFO AppClient$ClientEndpoint: Connecting to master spark://10.3.9.135:7077...
16/08/24 12:55:16 ERROR TransportClient: Failed to send RPC 6246311634782781203 to /10.3.9.135:7077: java.lang.AbstractMethodError: org.apache.spark.network.protocol.MessageWithHeader.touch(Ljava/lang/Object;)Lio/netty/util/ReferenceCounted;
java.lang.AbstractMethodError: org.apache.spark.network.protocol.MessageWithHeader.touch(Ljava/lang/Object;)Lio/netty/util/ReferenceCounted;
at io.netty.util.ReferenceCountUtil.touch(ReferenceCountUtil.java:73)
at io.netty.channel.DefaultChannelPipeline.touch(DefaultChannelPipeline.java:107)
at io.netty.channel.AbstractChannelHandlerContext.write(AbstractChannelHandlerContext.java:796)
at io.netty.channel.AbstractChannelHandlerContext.write(AbstractChannelHandlerContext.java:709)
at io.netty.handler.codec.MessageToMessageEncoder.write(MessageToMessageEncoder.java:111)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:724)
其中10.3.9.155为本地ip并不是集集群上的ip啊,为啥子喃。明明设置了
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;import java.util.ArrayList;
import java.util.List;/**
* Computes an approximation to pi
* Usage: JavaSparkPi [slices]
*/
public final class RemoteDebug { public static void main(String[] args) throws Exception {
SparkConf sparkConf = new SparkConf().setAppName("JavaSparkPi")
.setMaster("spark://10.3.9.135:7077")
.setJars(new String[]{"D:\\****\\workspace\\SparkRemoteDebug\\out\\artifacts\\SparkRemoteDebug_jar\\SparkRemoteDebug.jar"});
JavaSparkContext jsc = new JavaSparkContext(sparkConf);
//jsc.addJar("D:\\***\\workspace\\SparkRemoteDebug\\out\\artifacts\\SparkRemoteDebug_jar\\SparkRemoteDebug.jar"); int slices = (args.length == 1) ? Integer.parseInt(args[0]) : 2;
int n = 100000 * slices;
List<Integer> l = new ArrayList<Integer>(n);
for (int i = 0; i < n; i++) {
l.add(i);
} JavaRDD<Integer> dataSet = jsc.parallelize(l, slices); int count = dataSet.map((Function<Integer, Integer>) integer -> {
double x = Math.random() * 2 - 1;
double y = Math.random() * 2 - 1;
return (x * x + y * y < 1) ? 1 : 0;
}).reduce((Function2<Integer, Integer, Integer>) (integer, integer2) -> integer + integer2); System.out.println("Pi is roughly " + 4.0 * count / n); jsc.stop();
}
}
出现错误:55:15 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
16/08/24 12:55:15 INFO Utils: Successfully started service 'SparkUI' on port 4041.
16/08/24 12:55:15 INFO SparkUI: Started SparkUI at http://10.3.9.155:4041
16/08/24 12:55:15 INFO HttpFileServer: HTTP File server directory is C:\Users\lw_co\AppData\Local\Temp\spark-a3786942-b296-4123-99ab-68afc6a575b6\httpd-6b1db50c-6184-4dbe-91c5-477710823b53
16/08/24 12:55:15 INFO HttpServer: Starting HTTP Server
16/08/24 12:55:15 INFO Utils: Successfully started service 'HTTP file server' on port 2115.
16/08/24 12:55:15 INFO SparkContext: Added JAR D:\jiuzhouwork\workspace\SparkRemoteDebug\out\artifacts\SparkRemoteDebug_jar\SparkRemoteDebug.jar at http://10.3.9.155:2115/jars/SparkRemoteDebug.jar with timestamp 1472014515970
16/08/24 12:55:16 INFO AppClient$ClientEndpoint: Connecting to master spark://10.3.9.135:7077...
16/08/24 12:55:16 ERROR TransportClient: Failed to send RPC 6246311634782781203 to /10.3.9.135:7077: java.lang.AbstractMethodError: org.apache.spark.network.protocol.MessageWithHeader.touch(Ljava/lang/Object;)Lio/netty/util/ReferenceCounted;
java.lang.AbstractMethodError: org.apache.spark.network.protocol.MessageWithHeader.touch(Ljava/lang/Object;)Lio/netty/util/ReferenceCounted;
at io.netty.util.ReferenceCountUtil.touch(ReferenceCountUtil.java:73)
at io.netty.channel.DefaultChannelPipeline.touch(DefaultChannelPipeline.java:107)
at io.netty.channel.AbstractChannelHandlerContext.write(AbstractChannelHandlerContext.java:796)
at io.netty.channel.AbstractChannelHandlerContext.write(AbstractChannelHandlerContext.java:709)
at io.netty.handler.codec.MessageToMessageEncoder.write(MessageToMessageEncoder.java:111)
at io.netty.channel.AbstractChannelHandlerContext.invokeWrite0(AbstractChannelHandlerContext.java:724)
其中10.3.9.155为本地ip并不是集集群上的ip啊,为啥子喃。明明设置了
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <groupId>cn.arvidlw.jz.keywords</groupId>
<artifactId>keywords-news</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>hanlp</artifactId>
<version>portable-1.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-thrift</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-prefix-tree</artifactId>
<version>1.2.4</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-testing-util</artifactId>
<version>1.2.4</version> </dependency> <dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.10</artifactId>
<version>2.0.0</version> </dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-mllib_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.10</artifactId>
<version>2.0.0</version>
</dependency>
<dependency>
<groupId>org.spark-project.akka</groupId>
<artifactId>akka-actor_2.11</artifactId>
<version>2.3.4-spark</version>
</dependency>
</dependencies></project>
2.你能够连接到spark集群的master7077端口一般第二种都能实现,但是第一种,不具备环境,即使端口通了也没法提交呀。
除非你在spark集群的某个节点中,安装idea