jieni_ 2018-11-14 16:00 采纳率: 0%
浏览 3545

spark--java.lang.ArrayIndexOutOfBoundsException: 10582

源文件:

package com.wy.movie;

import java.util.ArrayList;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.mllib.classification.NaiveBayes;
import org.apache.spark.mllib.classification.NaiveBayesModel;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.junit.Test;

public class BayesTest1 {

    @Test
    public void TestA(){

        /**
         * 本地模式,*表示启用多个线程并行计算
         */
        SparkConf conf = new SparkConf().setAppName("NaiveBayesTest").setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);


        /**
         * MLlib的本地向量主要分为两种,DenseVector和SparseVector
         * 前者是用来保存稠密向量,后者是用来保存稀疏向量         
         */

        /**
         * 短发(1)  长发(2) 运动鞋(3) 高跟鞋(4) 喉结(5) 皮肤白(6) 
         */

        /**
         * 两种方式分别创建向量  == 其实创建稀疏向量的方式有两种,本文只讲一种
         * (1.0, 0.0, 1.0, 0.0, 1.0, 0.0)
         * (1.0, 1.0, 1.0, 1.0, 0.0, 1.0)
         */

        //稠密向量 == 连续的
        Vector vMale = Vectors.dense(1,0,1,0,1,0);


        //稀疏向量 == 间隔的、指定的,未指定位置的向量值默认 = 0.0
        int len = 6;
        int[] index = new int[]{0,1,2,3,5};
        double[] values = new double[]{1,1,1,1,1};
        //索引0、1、2、3、5位置上的向量值=1,索引4没给出,默认0
        Vector vFemale = Vectors.sparse(len, index, values);
        //System.err.println("vFemale == "+vFemale);
        /**
         * labeled point 是一个局部向量,要么是密集型的要么是稀疏型的
         * 用一个label/response进行关联
         * 在MLlib里,labeled points 被用来监督学习算法
         * 我们使用一个double数来存储一个label,因此我们能够使用labeled points进行回归和分类
         * 在二进制分类里,一个label可以是 0(负数)或者 1(正数)
         * 在多级分类中,labels可以是class的索引,从0开始:0,1,2,......
         */

        //训练集生成 ,规定数据结构为LabeledPoint == 构建方式:稠密向量模式  ,1.0:类别编号 == 男性
        LabeledPoint train_one = new LabeledPoint(1.0,vMale);  //(1.0, 0.0, 1.0, 0.0, 1.0, 0.0)
        //训练集生成 ,规定数据结构为LabeledPoint == 构建方式:稀疏向量模式  ,2.0:类别编号 == 女性
        LabeledPoint train_two = new LabeledPoint(2.0,vFemale); //(1.0, 1.0, 1.0, 1.0, 0.0, 1.0)
        //我们也可以给同一个类别增加多个训练集
        LabeledPoint train_three = new LabeledPoint(2.0,Vectors.dense(0,1,1,1,0,1)); 

        //List存放训练集【三个训练样本数据】
        List<LabeledPoint> trains = new ArrayList<>();
        trains.add(train_one);
        trains.add(train_two);
        trains.add(train_three);

        /**
         * SPARK的核心是RDD(弹性分布式数据集)
         * Spark是Scala写的,JavaRDD就是Spark为Java写的一套API
         * JavaSparkContext sc = new JavaSparkContext(sparkConf);    //对应JavaRDD
         * SparkContext     sc = new SparkContext(sparkConf)    ;    //对应RDD
         * 数据类型为LabeledPoint
         */
        JavaRDD<LabeledPoint> trainingRDD = sc.parallelize(trains); 

        /**
         * 利用Spark进行数据分析时,数据一般要转化为RDD
         * JavaRDD转Spark的RDD
         */
        NaiveBayesModel nb_model = NaiveBayes.train(trainingRDD.rdd());

        //测试集生成  == 以下的向量表示,这个人具有特征:短发(1),运动鞋(3)
        double []  dTest = {0,0,0,0,1,0};
        Vector vTest =  Vectors.dense(dTest);//测试对象为单个vector,或者是RDD化后的vector

        //朴素贝叶斯用法
        int modelIndex =(int) nb_model.predict(vTest);
        System.out.println("标签分类编号:"+modelIndex);// 分类结果 == 返回分类的标签值
        /**
         * 计算测试目标向量与训练样本数据集里面对应的各个分类标签匹配的概率结果
         */
        System.out.println(nb_model.predictProbabilities(vTest)); 
        if(modelIndex == 1){
            System.out.println("答案:贝叶斯分类器推断这个人的性别是男性");
        }else if(modelIndex == 2){
            System.out.println("答案:贝叶斯分类器推断这个人的性别是女性");
        }
        //最后不要忘了释放资源
        sc.close();

    }
}

报错如下:

 java.lang.ArrayIndexOutOfBoundsException: 10582

    at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.accept(BytecodeReadingParanamer.java:563)
    at com.thoughtworks.paranamer.BytecodeReadingParanamer$ClassReader.access$200(BytecodeReadingParanamer.java:338)
    at com.thoughtworks.paranamer.BytecodeReadingParanamer.lookupParameterNames(BytecodeReadingParanamer.java:103)
    at com.thoughtworks.paranamer.CachingParanamer.lookupParameterNames(CachingParanamer.java:90)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.getCtorParams(BeanIntrospector.scala:44)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1(BeanIntrospector.scala:58)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$1$adapted(BeanIntrospector.scala:58)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:240)
    at scala.collection.Iterator.foreach(Iterator.scala:937)
    at scala.collection.Iterator.foreach$(Iterator.scala:937)
    at scala.collection.AbstractIterator.foreach(Iterator.scala:1425)
    at scala.collection.IterableLike.foreach(IterableLike.scala:70)
    at scala.collection.IterableLike.foreach$(IterableLike.scala:69)
    at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:240)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:237)
    at scala.collection.AbstractTraversable.flatMap(Traversable.scala:104)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.findConstructorParam$1(BeanIntrospector.scala:58)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$19(BeanIntrospector.scala:176)
    at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:233)
    at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:32)
    at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:29)
    at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:194)
    at scala.collection.TraversableLike.map(TraversableLike.scala:233)
    at scala.collection.TraversableLike.map$(TraversableLike.scala:226)
    at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:194)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14(BeanIntrospector.scala:170)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.$anonfun$apply$14$adapted(BeanIntrospector.scala:169)
    at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:240)
    at scala.collection.immutable.List.foreach(List.scala:388)
    at scala.collection.TraversableLike.flatMap(TraversableLike.scala:240)
    at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:237)
    at scala.collection.immutable.List.flatMap(List.scala:351)
    at com.fasterxml.jackson.module.scala.introspect.BeanIntrospector$.apply(BeanIntrospector.scala:169)
    at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$._descriptorFor(ScalaAnnotationIntrospectorModule.scala:21)
    at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.fieldName(ScalaAnnotationIntrospectorModule.scala:29)
    at com.fasterxml.jackson.module.scala.introspect.ScalaAnnotationIntrospector$.findImplicitPropertyName(ScalaAnnotationIntrospectorModule.scala:77)
    at com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair.findImplicitPropertyName(AnnotationIntrospectorPair.java:490)
    at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector._addFields(POJOPropertiesCollector.java:380)
    at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.collectAll(POJOPropertiesCollector.java:308)
    at com.fasterxml.jackson.databind.introspect.POJOPropertiesCollector.getJsonValueAccessor(POJOPropertiesCollector.java:196)
    at com.fasterxml.jackson.databind.introspect.BasicBeanDescription.findJsonValueAccessor(BasicBeanDescription.java:251)
    at com.fasterxml.jackson.databind.ser.BasicSerializerFactory.findSerializerByAnnotations(BasicSerializerFactory.java:346)
    at com.fasterxml.jackson.databind.ser.BeanSerializerFactory._createSerializer2(BeanSerializerFactory.java:216)
    at com.fasterxml.jackson.databind.ser.BeanSerializerFactory.createSerializer(BeanSerializerFactory.java:165)
    at com.fasterxml.jackson.databind.SerializerProvider._createUntypedSerializer(SerializerProvider.java:1388)
    at com.fasterxml.jackson.databind.SerializerProvider._createAndCacheUntypedSerializer(SerializerProvider.java:1336)
    at com.fasterxml.jackson.databind.SerializerProvider.findValueSerializer(SerializerProvider.java:510)
    at com.fasterxml.jackson.databind.SerializerProvider.findTypedValueSerializer(SerializerProvider.java:713)
    at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:308)
    at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:3905)
    at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:3219)
    at org.apache.spark.rdd.RDDOperationScope.toJson(RDDOperationScope.scala:52)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:145)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.SparkContext.withScope(SparkContext.scala:699)
    at org.apache.spark.SparkContext.parallelize(SparkContext.scala:716)
    at org.apache.spark.api.java.JavaSparkContext.parallelize(JavaSparkContext.scala:134)
    at org.apache.spark.api.java.JavaSparkContext.parallelize(JavaSparkContext.scala:146)
    at com.wy.movie.BayesTest1.TestA(BayesTest1.java:83)

补充一个pom.xml

<?xml version="1.0" encoding="UTF-8"?>
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
4.0.0

<groupId>com.wy</groupId>
<artifactId>movie</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>

<name>movie</name>
<description>Demo project for Spring Boot</description>

<parent>
    <groupId>org.springframework.boot</groupId>
    <artifactId>spring-boot-starter-parent</artifactId>
    <version>2.1.0.RELEASE</version>
    <relativePath/> <!-- lookup parent from repository -->
</parent>

<properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
    <java.version>1.8</java.version>
</properties>

<dependencies>
    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-web</artifactId>
    </dependency>

    <dependency>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-test</artifactId>
        <scope>test</scope>
    </dependency>

    <!-- JUnit单元测试 -->
    <dependency>
        <groupId>junit</groupId>
        <artifactId>junit</artifactId>
    </dependency>

    <!-- HanLP汉语言处理包 -->
    <dependency>
        <groupId>com.hankcs</groupId>
        <artifactId>hanlp</artifactId>
        <version>portable-1.7.0</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-core -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.12</artifactId>
        <version>2.4.0</version>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.apache.spark/spark-mllib -->
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-mllib_2.12</artifactId>
        <version>2.4.0</version>
        <scope>runtime</scope>
    </dependency>

    <!-- https://mvnrepository.com/artifact/org.codehaus.janino/janino -->
    <dependency>
        <groupId>org.codehaus.janino</groupId>
        <artifactId>janino</artifactId>
        <version>3.0.10</version>
    </dependency>
</dependencies>

<build>
    <plugins>
        <plugin>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-maven-plugin</artifactId>
        </plugin>
    </plugins>
</build>


就是执行到JavaRDD<LabeledPoint> trainingRDD = sc.parallelize(trains); 这句时出的错,请各位大佬帮忙看看 应该是个小问题 因为我没学过spark和scala什么的 所有很懵逼 先谢谢大家了!


  • 写回答

4条回答 默认 最新

  • 唯有杜康TM CSDN副总裁 2018-11-18 03:23
    关注

    升级 你的 paranamer 到2.8 ,这是由于你的jdk版本1.8导致

     <!-- https://mvnrepository.com/artifact/com.thoughtworks.paranamer/paranamer -->
    <dependency>
        <groupId>com.thoughtworks.paranamer</groupId>
        <artifactId>paranamer</artifactId>
        <version>2.8</version>
    </dependency>
    
    

    为了给你评论折腾半天,祝你在机器学习的路上早日成功!

    评论

报告相同问题?

悬赏问题

  • ¥60 版本过低apk如何修改可以兼容新的安卓系统
  • ¥25 由IPR导致的DRIVER_POWER_STATE_FAILURE蓝屏
  • ¥50 有数据,怎么建立模型求影响全要素生产率的因素
  • ¥50 有数据,怎么用matlab求全要素生产率
  • ¥15 TI的insta-spin例程
  • ¥15 完成下列问题完成下列问题
  • ¥15 C#算法问题, 不知道怎么处理这个数据的转换
  • ¥15 YoloV5 第三方库的版本对照问题
  • ¥15 请完成下列相关问题!
  • ¥15 drone 推送镜像时候 purge: true 推送完毕后没有删除对应的镜像,手动拷贝到服务器执行结果正确在样才能让指令自动执行成功删除对应镜像,如何解决?