在跑LR分类的时候,遇到了以下的问题:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
# Defining the vectorizer function.
def cv(data):
count_vectorizer = CountVectorizer()
emb = count_vectorizer.fit_transform(data)
return emb, count_vectorizer
# Here I define my corpus and lables. These are used throughout all the other models/processes.
list_corpus = df_clean_list
list_labels = df_clean_text["sentiment"].values
X_train, X_test, y_train, y_test = train_test_split(list_corpus, list_labels, test_size=0.2,
random_state=40)
X_train_counts, count_vectorizer = cv(X_train)
X_test_counts = count_vectorizer.transform(X_test)
AttributeError Traceback (most recent call last)
<ipython-input-10-de04ef9a7d29> in <module>()
17 random_state=40)
18
---> 19 X_train_counts, count_vectorizer = cv(X_train)
20 X_test_counts = count_vectorizer.transform(X_test)
4 frames
/usr/local/lib/python3.7/dist-packages/sklearn/feature_extraction/text.py in _preprocess(doc, accent_function, lower)
69 """
70 if lower:
---> 71 doc = doc.lower()
72 if accent_function is not None:
73 doc = accent_function(doc)
AttributeError: 'float' object has no attribute 'lower'
请教大家该如何解决,非常感谢!