为什么插上电源适配器之后代码运行不了?这么神奇?
这是没插电的时候!
这是插电的时候!
哪位大伸看看有什么问题
代码如下
package L02
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.functions._
case class P2(selling_price:Double,bedrooms_num:Double,bathroom_num:Double,housing_area:Double,
parking_area:Double,floor_num:Double,housing_rating:Double,built_area:Double,basement_area:Double,
year_built:String,year_repair:String,latitude:Double,longitude:Double,sale_data:String)
object L001 {
def main(args: Array[String]): Unit = {
// val spark = new SparkConf().setMaster("local[*]").setAppName("test")
// val sc = new SparkContext(spark)
val spark = SparkSession.builder()
.master("local[*]")
.appName("test")
.getOrCreate()
val sc = spark.sparkContext
import spark.implicits._
val path = "src/main/scala/L02/house.csv"
val rdd = sc.textFile(path, 1)
rdd.collect().foreach(println)
val df = rdd.map(
x => {
var y = x.split(",")
P2(y(0).toDouble, y(1).toDouble, y(2).toDouble, y(3).toDouble, y(4).toDouble, y(5).toDouble,
y(6).toDouble, y(7).toDouble, y(8).toDouble, y(9), y(10), y(11).toDouble, y(12).toDouble, y(13))
}
).toDF()
df.show(5)
def null_count(data: DataFrame, columnName: String) = {
println(columnName + ":缺失值数量" + (data.count() - data.na.drop().count()))
}
// 定义一个函数区求各列的最大值,最小值,平均值
def mmas(data: DataFrame, columnName: String) = {
println(columnName + ":")
data.selectExpr("max(" + columnName + ") as max").foreach(x => println("max:" + x.toString()))
data.selectExpr("min(" + columnName + ") as min").foreach(x => println("min:" + x.toString()))
data.selectExpr("mean(" + columnName + ") as mean").foreach(x => println("mean:" + x.toString()))
data.selectExpr("stddev(" + columnName + ") as stddev").foreach(x => println("stddev:" + x.toString()))
null_count(data, "selling_price")
println("*" * 30)
}
val dataColumnName = df.columns.toList
for (i <- dataColumnName) {
if (i == "year_built" || i == "year_repair" || i == "sale_data") {
println(i + ":")
null_count(df, i)
println("-" * 20)
} else {
mmas(df, i)
}
}
null_count(df, "selling_price")
mmas(df, "selling_price")
val houseDate = df.na.drop().withColumn("date", to_date(col("sale_data"), "yyyyMMdd"))
val houseQuarter = houseDate.withColumn("quarter", quarter(col("date")))
// 对各季度房屋销售额的统计分析
houseQuarter.groupBy("quarter").sum("selling_price").sort("quarter").show()
// 对各季度房屋评分的统计分析
houseQuarter.groupBy("housing_rating").count().sort(desc("count")).show()
houseQuarter.groupBy("housing_rating").agg(avg(col("selling_price") / col("housing_area"))).sort("housing_rating").show()
}
}