加载sklearn新闻数据集出错 fetch_20newsgroups() HTTPError: HTTP Error 403: Forbidden怎么解决?
程序:
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
news=fetch_20newsgroups(subset='all')
报错:
HTTPError Traceback (most recent call last)
Input In [7], in <cell line: 3>()
1 from sklearn.datasets import fetch_20newsgroups
2 from sklearn.model_selection import train_test_split
----> 3 news=fetch_20newsgroups(subset='all')
File I:\anaconda\lib\site-packages\sklearn\datasets\_twenty_newsgroups.py:264, in fetch_20newsgroups(data_home, subset, categories, shuffle, random_state, remove, download_if_missing, return_X_y)
262 if download_if_missing:
263 logger.info("Downloading 20news dataset. This may take a few minutes.")
--> 264 cache = _download_20newsgroups(
265 target_dir=twenty_home, cache_path=cache_path
266 )
267 else:
268 raise IOError("20Newsgroups dataset not found")
File I:\anaconda\lib\site-packages\sklearn\datasets\_twenty_newsgroups.py:74, in _download_20newsgroups(target_dir, cache_path)
71 os.makedirs(target_dir)
73 logger.info("Downloading dataset from %s (14 MB)", ARCHIVE.url)
---> 74 archive_path = _fetch_remote(ARCHIVE, dirname=target_dir)
76 logger.debug("Decompressing %s", archive_path)
77 tarfile.open(archive_path, "r:gz").extractall(path=target_dir)
File I:\anaconda\lib\site-packages\sklearn\datasets\_base.py:1454, in _fetch_remote(remote, dirname)
1432 """Helper function to download a remote dataset into path
1433
1434 Fetch a dataset pointed by remote's url, save into path using remote's
(...)
1450 Full path of the created file.
1451 """
1453 file_path = remote.filename if dirname is None else join(dirname, remote.filename)
-> 1454 urlretrieve(remote.url, file_path)
1455 checksum = _sha256(file_path)
1456 if remote.checksum != checksum:
File I:\anaconda\lib\urllib\request.py:239, in urlretrieve(url, filename, reporthook, data)
222 """
223 Retrieve a URL into a temporary location on disk.
224
(...)
235 data file as well as the resulting HTTPMessage object.
236 """
237 url_type, path = _splittype(url)
--> 239 with contextlib.closing(urlopen(url, data)) as fp:
240 headers = fp.info()
242 # Just return the local path and the "headers" for file://
243 # URLs. No sense in performing a copy unless requested.
File I:\anaconda\lib\urllib\request.py:214, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
212 else:
213 opener = _opener
--> 214 return opener.open(url, data, timeout)
File I:\anaconda\lib\urllib\request.py:523, in OpenerDirector.open(self, fullurl, data, timeout)
521 for processor in self.process_response.get(protocol, []):
522 meth = getattr(processor, meth_name)
--> 523 response = meth(req, response)
525 return response
File I:\anaconda\lib\urllib\request.py:632, in HTTPErrorProcessor.http_response(self, request, response)
629 # According to RFC 2616, "2xx" code indicates that the client's
630 # request was successfully received, understood, and accepted.
631 if not (200 <= code < 300):
--> 632 response = self.parent.error(
633 'http', request, response, code, msg, hdrs)
635 return response
File I:\anaconda\lib\urllib\request.py:561, in OpenerDirector.error(self, proto, *args)
559 if http_err:
560 args = (dict, 'default', 'http_error_default') + orig_args
--> 561 return self._call_chain(*args)
File I:\anaconda\lib\urllib\request.py:494, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
492 for handler in handlers:
493 func = getattr(handler, meth_name)
--> 494 result = func(*args)
495 if result is not None:
496 return result
File I:\anaconda\lib\urllib\request.py:641, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
640 def http_error_default(self, req, fp, code, msg, hdrs):
--> 641 raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: HTTP Error 403: Forbidden