# encoding=utf-8
import jieba
jieba.enable_paddle()# 启动paddle模式。 0.40版之后开始支持,早期版本不支持
strs=["我来到北京清华大学","乒乓球拍卖完了","中国科学技术大学"]
for str in strs:
seg_list = jieba.cut(str,use_paddle=True) # 使用paddle模式
print("Paddle Mode: " + '/'.join(list(seg_list)))
seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
print("Full Mode: " + "/ ".join(seg_list)) # 全模式
seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
print("Default Mode: " + "/ ".join(seg_list)) # 精确模式
seg_list = jieba.cut("他来到了网易杭研大厦") # 默认是精确模式
print(", ".join(seg_list))
seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式
print(", ".join(seg_list))
Traceback (most recent call last):
File "C:\Users\67597\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "C:\Users\67597\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "d:\app\vs2022\common7\ide\extensions\microsoft\python\core\debugpy\__main__.py", line 45, in <module>
cli.main()
File "d:\app\vs2022\common7\ide\extensions\microsoft\python\core\debugpy/..\debugpy\server\cli.py", line 444, in main
run()
File "d:\app\vs2022\common7\ide\extensions\microsoft\python\core\debugpy/..\debugpy\server\cli.py", line 285, in run_file
runpy.run_path(target_as_str, run_name=compat.force_str("__main__"))
File "C:\Users\67597\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 267, in run_path
code, fname = _get_code_from_file(run_name, path_name)
File "C:\Users\67597\AppData\Local\Programs\Python\Python39\lib\runpy.py", line 242, in _get_code_from_file
code = compile(f.read(), fname, 'exec')
File "D:\repos\Python\Test\Test.py", line 5
strs=["�����������廪��ѧ","ƹ������������","�й���ѧ������ѧ"]
^
SyntaxError: (unicode error) 'utf-8' codec can't decode byte 0xce in position 0: invalid continuation byte