为什么用pyecharts跑不出图形,在海豚人工智能与大数据实验室这个网站进行的,分析想要有可视化图
from pyspark import SparkConf,SparkContext
from pyspark.sql.types import Row
from pyspark.sql import SparkSession
import pandas as pd
spark = SparkSession.builder.appName("test").master("local").getOrCreate()
df = spark.read.csv('file:///home/dolphin/Downloads/data.csv',header=True,encoding="GBK")
df.show(5)
#df.printSchema()
from pyspark.sql.types import IntegerType
df = df.withColumn('最低年薪资',df['最低年薪资'].cast(IntegerType()))
df = df.withColumn('最高年薪资',df['最高年薪资'].cast(IntegerType()))
df = df.withColumn('平均年薪资',df['平均年薪资'].cast(IntegerType()))
#df.printSchema()
s0_10_num = df.filter(df.平均年薪资< 10).count()
s10_20_num = df.filter((df.平均年薪资>= 10) & (df.平均年薪资 < 20)).count()
s20_30_num = df.filter((df.平均年薪资 >= 20) & (df.平均年薪资 < 30)).count()
s30_40_num = df.filter((df.平均年薪资 >= 30) & (df.平均年薪资 < 40)).count()
s40_50_num = df.filter((df.平均年薪资 >= 40) & (df.平均年薪资 < 50)).count()
s50_num = df.filter(df.平均年薪资 >= 50).count()
salary = ['0~10','10~20','20~30','30~40','40~50','50+']
salary_count = [s0_10_num,s10_20_num,s20_30_num,s30_40_num,s40_50_num,s50_num]
print(salary_count)
companyind_data = df.groupBy('所属行业').count().sort('count',ascending=False).toPandas()
companysize_data = df.groupBy('公司规模').count().sort('count',ascending=False).toPandas()
companytype_data = df.groupBy('公司类型').count().sort('count',ascending=False).toPandas()
jobcity_data = df.groupBy('工作城市').count().sort('count',ascending=False).toPandas()
experience_data = df.groupBy('经验要求').count().sort('count',ascending=False).toPandas()
educational_data = df.groupBy('学历要求').count().sort('count',ascending=False).toPandas()
welfare_data = df.groupBy('提供福利').count().sort('count',ascending=False).toPandas()
def loc_salary_count(df,loc_list):
lis = []
for loc in loc_list:
s0_10_num=df.filter((df.平均年薪资 >= 0) & (df.平均年薪资 < 10) & (df.工作城市 == loc)).count()
s10_20_num=df.filter((df.平均年薪资 >= 10) & (df.平均年薪资 < 20) & (df.工作城市 == loc)).count()
s20_30_num=df.filter((df.平均年薪资 >= 20) & (df.平均年薪资 < 30) & (df.工作城市 == loc)).count()
s30_40_num=df.filter((df.平均年薪资 >= 30) & (df.平均年薪资 < 40) & (df.工作城市 == loc)).count()
s40_50_num=df.filter((df.平均年薪资 >= 40) & (df.平均年薪资 < 50) & (df.工作城市 == loc)).count()
s50_num=df.filter((df.平均年薪资 >= 50) & (df.工作城市 == loc)).count()
lis.extend([s0_10_num,s10_20_num,s20_30_num,s30_40_num,s40_50_num,s50_num])
return lis
loc_salary_count = loc_salary_count(df,'工作城市')
from pyecharts.charts import Line
import pyecharts.options as opts
def line_base():
line1=(
Line()
.add_xaxis(price)
.add_yaxis('fenbu',price_count)
.set_global_opts(title_opts=opts.TitleOpts(title='zhexiantu'))
).render_notebook()
return line1
line_base()