先按原文贴出问题:
“UnicodeEncodeError: 'gbk' codec can't encode character '\U0001f917' in position 3664416: illegal multibyte sequence”
查遍了网上的方法,不得其解,可能是我碰到的这个问题的背景与网上解决方案的背景有所不同。这个问题我是在学习Python API用”plotly“可视化"Github"上的”repositories"的过程中碰到的。程序代码如下:
import requests
from plotly.graph_objs import Bar
from plotly import offline
url = 'https://api.github.com/search/repositories?q=language:python&sort=stars'
headers = {'Accept': 'application/vnd.github.v3+json'}
r = resquests.get(url, headers=headers)
print(f"Status code: {r.status_code}")
response_dict = r.json()
repo_dicts = response_dict['items']
repo_links, stars, labels = [], [], []
for repo_dict in repo_dicts:
repo_name = repo_dict['name']
repo_url = repo_dict['html_url']
repo_link = f"<a href='{repo_url}'>{repo_name}</a>"
repo_links.append(repo_link)
stars.append(repo_dict['stargazers_count'])
owner = repo_dict['owner']['login']
description = repo_dict['description']
label = f"{owner}<br />{description}"
labels.append(label)
data = [{
'type': 'bar',
'x': repo_links,
'y': stars,
'hovertext': labels,
'marker': {
'color': 'rgb(60,100,150)',
'line': {'width': 1.5, 'color': 'rgb(25,25,25)'},
},
'opacity': 0.6,
}]
my_layout = {
'title': 'Most-Starred Python Projects on Github',
'titlefont': {'size': 18, 'color': 'rgb(100,0,100)'},
'xaxis': {
'title': 'Repository',
'titlefont': {'size': 14, 'color': 'rgb(100,0,100)'},
'tickfont': {'size': 12, 'color': 'rgb(100,0,100)'},
},
'yaxis': {
'title': 'Stars',
'titlefont': {'size': 14, 'color': 'rgb(100,0,100)'},
'tickfont': {'size': 12, 'color': 'rgb(100,0,100)'},
},
}
fig = {'data': data, 'layout': my_layou}
offline.plot(fig, filename='python_repos_visual.html')
运行结果:
PS C:\Users......
Status code: 200
Traceback (most recent call last):
File "c:/Users......\python_repos_visual.py", line 60, in
offline.plot(fig, filename='python_repos_visual.html')
File "C:\Users......\Anaconda3\lib\site-packages\plotly\offline\offline.py", line 597, in plot
auto_open=auto_open,
File "C:\Users......\Anaconda3\lib\site-packages\plotly\io_html.py", line 542, in write_html
path.write_text(html_str)
File "C:\Users......\Anaconda3\lib\pathlib.py", line 1198, in write_text
return f.write(data)
UnicodeEncodeError: 'gbk' codec can't encode character '\U0001f917' in position 3664416: illegal multibyte sequence
直觉上判断,可能是几处HTML标记导致报错,于是对部分代码作修改:
# repo_links, stars, labels = [], [], []
# 修改为:
repo_names, stars, labels = [], [], []
# repo_url = repo_dict['html_url']
# repo_link = f"<a href='{repo_url}'>{repo_name}</a>"
# repo_links.append(repo_link)
# 修改为:
repo_names.append(repo_dict['name'])
# label = f"{owner}<br />{description}"
# 修改为:
label = f"{owner}"
# 'x': repo_links,
# 修改为:
‘x’: repo_names,
则程序可以正常运行:
修改后的代码失去了可单击的主页链接、以及鼠标放置条形图上显示完整更多信息的功能。