为什么我已经改了数据类型还是报错啊,希望各位帮助解决一下
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 377, in main
process()
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/worker.py", line 372, in process
serializer.dump_stream(func(split_index, iterator), outfile)
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 400, in dump_stream
vs = list(itertools.islice(iterator, batch))
File "/usr/local/spark/python/lib/pyspark.zip/pyspark/util.py", line 99, in wrapper
return f(*args, **kwargs)
File "/usr/local/spark/python/pyspark/sql/session.py", line 730, in prepare
verify_func(obj)
File "/usr/local/spark/python/pyspark/sql/types.py", line 1391, in verify
verify_value(obj)
File "/usr/local/spark/python/pyspark/sql/types.py", line 1372, in verify_struct
verifier(v)
File "/usr/local/spark/python/pyspark/sql/types.py", line 1391, in verify
verify_value(obj)
File "/usr/local/spark/python/pyspark/sql/types.py", line 1317, in verify_integer
verify_acceptable_types(obj)
File "/usr/local/spark/python/pyspark/sql/types.py", line 1280, in verify_acceptable_types
% (dataType, obj, type(obj))))
TypeError: field id: IntegerType can not accept object '3' in type <class 'str'>
```python
from pyspark.sql import Row
from pyspark.sql.types import *
from pyspark import SparkContext,SparkConf
from pyspark.sql import SparkSession
spark = SparkSession.builder.config(conf = SparkConf()).getOrCreate()
#设置模式信息
schema = StructType([StructField("id",IntegerType(),True),StructField("name",StringType(),True),StructField("gender",StringType(),True),StructField("age",IntegerType(),True)])
#创建RDD
employeeRDD = spark.sparkContext.parallelize(["3 Mary F 26","4 Tom M 23"]).map(lambda x:x.split(" "))
#创建Row对象
rowRDD = employeeRDD.map(lambda p:Row(int(p[0].strip()),p[1].strip(),p[2].strip(),int(p[3].strip())))
#建立Row对象与模式之间的对应关系,即把数据与模式对应起来
employeeDF = spark.createDataFrame(employeeRDD,schema)