#问题遇到的现象和发生背景
对人口普查数据 分析,但是数据分割后,进行训练,出现错误。
采用的训练模型为SGDClassifier
#问题相关代码,请勿粘贴截图
from sklearn.linear_model import SGDClassifier
lr=SGDClassifier(loss='log',max_iter=100) #迭代的最大次数,只影响fit方法,默认值为5
lr.fit(trainx,trainy)
lr.score(testx,testy)
#运行结果及报错内容
ValueError: could not convert string to float: 'ID_122543'
ValueError Traceback (most recent call last)
in ()
2
3 lr=SGDClassifier(loss='log',max_iter=100)
-> 4 lr.fit(trainx,trainy)
5 lr.score(testx,testy)
D:\anaconda\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in fit(self, X, y, coef_init, intercept_init, sample_weight)
584 loss=self.loss, learning_rate=self.learning_rate,
585 coef_init=coef_init, intercept_init=intercept_init,
-> 586 sample_weight=sample_weight)
587
588
D:\anaconda\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in fit(self, X, y, alpha, C, loss, learning_rate, coef_init, intercept_init, sample_weight)
416 self.classes = None
417
-> 418 X, y = check_X_y(X, y, 'csr', dtype=np.float64, order="C")
419 n_samples, n_features = X.shape
420
D:\anaconda\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
571 X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,
572 ensure_2d, allow_nd, ensure_min_samples,
-> 573 ensure_min_features, warn_on_dtype, estimator)
574 if multi_output:
575 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
D:\anaconda\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
431 force_all_finite)
432 else:
-> 433 array = np.array(array, dtype=dtype, order=order, copy=copy)
434
435 if ensure_2d:
#我的解答思路和尝试过的方法
百度查询各种原因是空格,特殊字符,以及格式不正确,但是核对了错误的数据ID,没有问题。
#我想要达到的结果