Chaoy6565 2023-04-13 16:35 采纳率: 0%
浏览 157

加载sklearn新闻数据集出错 fetch_20newsgroups() HTTPError: HTTP Error 403: Forbidden怎么解决?

加载sklearn新闻数据集出错 fetch_20newsgroups() HTTPError: HTTP Error 403: Forbidden怎么解决?

程序:

from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
news=fetch_20newsgroups(subset='all')

报错:

HTTPError                                 Traceback (most recent call last)
Input In [7], in <cell line: 3>()
      1 from sklearn.datasets import fetch_20newsgroups
      2 from sklearn.model_selection import train_test_split
----> 3 news=fetch_20newsgroups(subset='all')

File I:\anaconda\lib\site-packages\sklearn\datasets\_twenty_newsgroups.py:264, in fetch_20newsgroups(data_home, subset, categories, shuffle, random_state, remove, download_if_missing, return_X_y)
    262 if download_if_missing:
    263     logger.info("Downloading 20news dataset. This may take a few minutes.")
--> 264     cache = _download_20newsgroups(
    265         target_dir=twenty_home, cache_path=cache_path
    266     )
    267 else:
    268     raise IOError("20Newsgroups dataset not found")

File I:\anaconda\lib\site-packages\sklearn\datasets\_twenty_newsgroups.py:74, in _download_20newsgroups(target_dir, cache_path)
     71     os.makedirs(target_dir)
     73 logger.info("Downloading dataset from %s (14 MB)", ARCHIVE.url)
---> 74 archive_path = _fetch_remote(ARCHIVE, dirname=target_dir)
     76 logger.debug("Decompressing %s", archive_path)
     77 tarfile.open(archive_path, "r:gz").extractall(path=target_dir)

File I:\anaconda\lib\site-packages\sklearn\datasets\_base.py:1454, in _fetch_remote(remote, dirname)
   1432 """Helper function to download a remote dataset into path
   1433 
   1434 Fetch a dataset pointed by remote's url, save into path using remote's
   (...)
   1450     Full path of the created file.
   1451 """
   1453 file_path = remote.filename if dirname is None else join(dirname, remote.filename)
-> 1454 urlretrieve(remote.url, file_path)
   1455 checksum = _sha256(file_path)
   1456 if remote.checksum != checksum:

File I:\anaconda\lib\urllib\request.py:239, in urlretrieve(url, filename, reporthook, data)
    222 """
    223 Retrieve a URL into a temporary location on disk.
    224 
   (...)
    235 data file as well as the resulting HTTPMessage object.
    236 """
    237 url_type, path = _splittype(url)
--> 239 with contextlib.closing(urlopen(url, data)) as fp:
    240     headers = fp.info()
    242     # Just return the local path and the "headers" for file://
    243     # URLs. No sense in performing a copy unless requested.

File I:\anaconda\lib\urllib\request.py:214, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    212 else:
    213     opener = _opener
--> 214 return opener.open(url, data, timeout)

File I:\anaconda\lib\urllib\request.py:523, in OpenerDirector.open(self, fullurl, data, timeout)
    521 for processor in self.process_response.get(protocol, []):
    522     meth = getattr(processor, meth_name)
--> 523     response = meth(req, response)
    525 return response

File I:\anaconda\lib\urllib\request.py:632, in HTTPErrorProcessor.http_response(self, request, response)
    629 # According to RFC 2616, "2xx" code indicates that the client's
    630 # request was successfully received, understood, and accepted.
    631 if not (200 <= code < 300):
--> 632     response = self.parent.error(
    633         'http', request, response, code, msg, hdrs)
    635 return response

File I:\anaconda\lib\urllib\request.py:561, in OpenerDirector.error(self, proto, *args)
    559 if http_err:
    560     args = (dict, 'default', 'http_error_default') + orig_args
--> 561     return self._call_chain(*args)

File I:\anaconda\lib\urllib\request.py:494, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
    492 for handler in handlers:
    493     func = getattr(handler, meth_name)
--> 494     result = func(*args)
    495     if result is not None:
    496         return result

File I:\anaconda\lib\urllib\request.py:641, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
    640 def http_error_default(self, req, fp, code, msg, hdrs):
--> 641     raise HTTPError(req.full_url, code, msg, hdrs, fp)

HTTPError: HTTP Error 403: Forbidden
  • 写回答

2条回答 默认 最新

  • ㋛㋛㋛㋛ 2023-07-25 18:18
    关注

    请问你解决了吗?我也遇到了这个问题

    评论

报告相同问题?

问题事件

  • 创建了问题 4月13日

悬赏问题

  • ¥15 winFrom界面无法打开
  • ¥15 crossover21 ARM64版本安装软件问题
  • ¥15 mymetaobjecthandler没有进入
  • ¥15 mmo能不能做客户端怪物
  • ¥15 osm下载到arcgis出错
  • ¥15 Dell g15 每次打开eiq portal后3分钟内自动退出
  • ¥200 使用python编写程序,采用socket方式获取网页实时刷新的数据,能定时print()出来就行。
  • ¥15 matlab如何根据图片中的公式绘制e和v的曲线图
  • ¥15 我想用Python(Django)+Vue搭建一个用户登录界面,但是在运行npm run serve时报错了如何解决?
  • ¥15 QQ邮箱过期怎么恢复?