问题遇到的现象和发生背景
问题相关代码,请勿粘贴截图
# -*- coding: utf-8 -*-
"""
Tushare社区股票数据抓取
"""
import urllib.request
import re
import pandas as pd
import pymysql
import os
def getHtml(url):
html = urllib.request.urlopen(url).read()
html = html.decode('gbk')
return html
def getStackCode(html):
s = r'<li><a target="_blank" href="http://quote.eastmoney.com/\S\S(.*?).html">'
pat = re.compile(s)
code = pat.findall(html)
return code
Url = 'http://quote.eastmoney.com/stocklist.html'
filepath = 'C:\\data\\'
code = getStackCode(getHtml(Url))
CodeList = []
for item in code:
if item[0]=='6':
CodeList.append(item)
for code in CodeList:
print('正在获取股票%s数据'%code)
url = 'http://quotes.money.163.com/service/chddata.html?code=0'+code+\
'&end=20161231&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP'
urllib.request.urlretrieve(url, filepath+code+'.csv')
运行结果及报错内容
Traceback (most recent call last):
File "C:/Python/WorkSpace/py_case/股票数据抓取.py", line 30, in
code = getStackCode(getHtml(Url))
File "C:/Python/WorkSpace/py_case/股票数据抓取.py", line 17, in getHtml
html = html.decode('gbk')
UnicodeDecodeError: 'gbk' codec can't decode byte 0xad in position 139: illegal multibyte sequence