在运行XLnet进行文本分类的时候,出现了下面的问题,该如何解决,请大家赐教:
def tokenize(text):
tokenized = tokenizer(text,
padding=True,
truncation=True,
return_tensors='tf',
return_token_type_ids=False,
return_attention_mask=False,
verbose=True)
return tokenized
x_train_enc = tokenize(x_train)
x_test_enc = tokenize(x_test)
y_train_enc_one_hot = to_categorical(y_train_enc, num_classes=num_classes)
y_test_enc_one_hot = to_categorical(y_test_enc, num_classes=num_classes)
ValueError Traceback (most recent call last)
/tmp/ipykernel_10776/2763515379.py in
11 return tokenized
12
---> 13 x_train_enc = tokenize(x_train)
14 x_test_enc = tokenize(x_test)
15
/tmp/ipykernel_10776/2763515379.py in tokenize(text)
8 return_token_type_ids=False,
9 return_attention_mask=False,
---> 10 verbose=True)
11 return tokenized
12
/environment/miniconda3/lib/python3.7/site-packages/transformers/tokenization_utils_base.py in call(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
2391 if not _is_valid_text_input(text):
2392 raise ValueError(
-> 2393 "text input must of type str
(single example), List[str]
(batch or single pretokenized example) "
2394 "or List[List[str]]
(batch of pretokenized examples)."
2395 )
ValueError: text input must of type str
(single example), List[str]
(batch or single pretokenized example) or List[List[str]]
(batch of pretokenized examples).