import requests
from bs4 import BeautifulSoup
def search_pubmed(query):
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
search_url = base_url + "esearch.fcgi?db=pubmed&term=" + query
response = requests.get(search_url)
soup = BeautifulSoup(response.text, '')
id_list = [id.text for id in soup.find_all('Id')]
return id_list
def fetch_details(pubmed_id):
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
fetch_url = base_url + "efetch.fcgi?db=pubmed&id=" + pubmed_id + "&retmode=xml"
response = requests.get(fetch_url)
soup = BeautifulSoup(r.text, 'html.parser')
soup = BeautifulSoup(response.text, 'xml')
try:
title = soup.find('ArticleTitle').text
except AttributeError:
title = None
try:
abstract = soup.find('AbstractText').text
except AttributeError:
abstract = None
try:
journal = soup.find('JournalTitle').text
except AttributeError:
journal = None
try:
doi = soup.find('ArticleId', {'IdType': 'doi'}).text
except AttributeError:
doi = None
return {'title': title, 'abstract': abstract, 'journal': journal, 'doi': doi}
# Example usage
ids = search_pubmed('human')
for id in ids:
details = fetch_details(id)
print(details)
报错如下;
Traceback (most recent call last):
File "F:/桌面/抓2.py", line 38, in <module>
ids = search_pubmed('human')
File "F:/桌面/抓2.py", line 9, in search_pubmed
soup = BeautifulSoup(response.text, '')
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python311\Lib\site-packages\bs4\__init__.py", line 249, in __init__
raise FeatureNotFound(
bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: . Do you need to install a parser library?
已经安装1xlm仍然报错,求解
用于抓取文章的代码,爬取时报错,请解决
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
1条回答 默认 最新
关注 代码 存在问题,已经帮你修改好了,下面的代码能够正常运行。
如果对你有帮助,点个采纳谢谢!# !/usr/bin/env python # -*- coding: utf-8 -*- # @author: yjp # @software: PyCharm # @file: main.py # @time: 2022-08-08 16:49 import requests from bs4 import BeautifulSoup def search_pubmed(query): base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" search_url = base_url + "esearch.fcgi?db=pubmed&term=" + query print(search_url) response = requests.get(search_url) soup = BeautifulSoup(response.text, 'xml') id_list = [id.text for id in soup.find_all('Id')] print(id_list) return id_list def fetch_details(pubmed_id): base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" fetch_url = base_url + "efetch.fcgi?db=pubmed&id=" + pubmed_id + "&retmode=xml" print(fetch_url) response = requests.get(fetch_url) soup = BeautifulSoup(response.text, 'xml') try: title = soup.find('ArticleTitle').text except AttributeError: title = None try: abstract = soup.find('AbstractText').text except AttributeError: abstract = None try: journal = soup.find('JournalTitle').text except AttributeError: journal = None try: doi = soup.find('ArticleId', {'IdType': 'doi'}).text except AttributeError: doi = None return {'title': title, 'abstract': abstract, 'journal': journal, 'doi': doi} if __name__ == '__main__': # Example usage ids = search_pubmed('human') for id in ids: details = fetch_details(id) print(details)
本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 1无用
悬赏问题
- ¥15 DS18B20内部ADC模数转换器
- ¥15 做个有关计算的小程序
- ¥15 MPI读取tif文件无法正常给各进程分配路径
- ¥15 如何用MATLAB实现以下三个公式(有相互嵌套)
- ¥30 关于#算法#的问题:运用EViews第九版本进行一系列计量经济学的时间数列数据回归分析预测问题 求各位帮我解答一下
- ¥15 setInterval 页面闪烁,怎么解决
- ¥15 如何让企业微信机器人实现消息汇总整合
- ¥50 关于#ui#的问题:做yolov8的ui界面出现的问题
- ¥15 如何用Python爬取各高校教师公开的教育和工作经历
- ¥15 TLE9879QXA40 电机驱动