Spark scala 运行报错 .test$$anonfun$1

同样的写法再scala中执行报错,然而在java中能够正常执行
以下是报错内容

 helloOffSLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/C:/Users/zsts/.m2/repository/org/apache/logging/log4j/log4j-slf4j-impl/2.5/log4j-slf4j-impl-2.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/C:/Users/zsts/.m2/repository/org/slf4j/slf4j-log4j12/1.7.10/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
ERROR StatusLogger No log4j2 configuration file found. Using default configuration: logging only errors to the console.
19:25:11.875 [main] ERROR org.apache.hadoop.util.Shell - Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
    at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:355) ~[hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:370) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.util.Shell.<clinit>(Shell.java:363) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:79) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.Groups.parseStaticMapping(Groups.java:116) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.Groups.<init>(Groups.java:93) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.Groups.<init>(Groups.java:73) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.Groups.getUserToGroupsMappingService(Groups.java:293) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:283) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:260) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:789) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:774) [hadoop-common-2.6.4.jar:?]
    at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:647) [hadoop-common-2.6.4.jar:?]
    at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2198) [spark-core_2.10-1.6.2.jar:1.6.2]
    at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2198) [spark-core_2.10-1.6.2.jar:1.6.2]
    at scala.Option.getOrElse(Option.scala:120) [?:?]
    at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2198) [spark-core_2.10-1.6.2.jar:1.6.2]
    at org.apache.spark.SparkContext.<init>(SparkContext.scala:322) [spark-core_2.10-1.6.2.jar:1.6.2]
    at tarot.test$.main(test.scala:19) [bin/:?]
    at tarot.test.main(test.scala) [bin/:?]
Exception in thread "main" org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: hdfs://tarot1:9000/sparkTest/hello
    at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:285)
    at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:228)
    at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:313)
    at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:199)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:239)
    at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:237)
    at scala.Option.getOrElse(Option.scala:120)
    at org.apache.spark.rdd.RDD.partitions(RDD.scala:237)
    at org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1307)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
    at org.apache.spark.rdd.RDD.take(RDD.scala:1302)
    at tarot.test$.main(test.scala:26)
    at tarot.test.main(test.scala)

scala代码

 package tarot

import scala.tools.nsc.doc.model.Val

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.api.java.JavaSparkContext

object test {
  def main(hello:Array[String]){
    print("helloOff")
    val conf = new SparkConf()
        conf.setMaster("spark://tarot1:7077")
                .setAppName("hello_off")
                .set("spark.executor.memory", "4g")
                .set("spark.executor.cores", "4")
                .set("spark.cores.max", "4")
                .set("spark.sql.crossJoin.enabled", "true")
    val sc = new SparkContext(conf)
        sc.setLogLevel("ERROR")
    val file = sc.textFile("hdfs://tarot1:9000/sparkTest/hello")
    val filterRDD = file.filter {  (ss:String) =>    ss.contains("hello") }
    val f=filterRDD.cache()
//    println(f)
//    filterRDD.count()
    for(x <- f.take(100)){
      println(x)
    }
  }
    def helloingTest(jsc:SparkContext){
    val sc = jsc

    val file = sc.textFile("hdfs://tarot1:9000/sparkTest/hello")
    val filterRDD = file.filter((ss:String) =>   ss.contains("hello"))

    val f=filterRDD.cache()
    println(f)
    val i = filterRDD.count()
    println(i)
  }
//  val seehello =
  def helloingTest(jsc:JavaSparkContext){
    val sc = jsc

    val file = sc.textFile("hdfs://tarot1:9000/sparkTest/hello")
    val filterRDD = file.filter((ss:String) =>   ss.contains("hello"))

    val f=filterRDD.cache()
    println(f)
    val i = filterRDD.count()
    println(i)

  }

}

java代码

 package com.tarot.sparkToHdfsTest;

import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.rdd.RDD;

//import scalaModule.hello;


public class App 
{
    public static void main( String[] args )
    {
        SparkConf sparkConf = new SparkConf();
        sparkConf.setMaster("spark://tarot1:7077")
                .setAppName("hello off")
                .set("spark.executor.memory","4g")
                .set("spark.executor.cores", "4")
                .set("spark.cores.max","4")
                .set("spark.sql.crossJoin.enabled", "true");
        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
        jsc.setLogLevel("ERROR");
        text(jsc);
//      test.helloingTest(jsc);
    }

/**
 * test
 * @param jsc
 */
private static void text(JavaSparkContext jsc){
//  jsc.textFile("hdfs://tarot1:9000/sparkTest/hello");
    JavaRDD<String> jr= jsc.textFile("hdfs://tarot1:9000/sparkTest/hello",1);
    jr.cache();
//  test t = new test();
    jr.filter(f);

    for (String string :  jr.take(100)) {
        System.out.println(string);
    }
    System.out.println("hello off");
}

public static Function<String, Boolean> f = new Function<String, Boolean>() {
    public Boolean call(String s) {
        return s.contains("hello");
    }
};
}

坑了很久了,网上的解决办法不是让我去shell上就是让我上传jar包,但是java不用啊?都调用的JVM。
大神救命

1个回答

Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问