# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import traceback
import re
import time
import requests
def GetHTMLSource(url):
try:
r=requests.get(url)
r.raise_for_status ()
r.encoding = r.apparent_encoding
return r.text
except:
print ( "异常" )
return ""
def SetFileName():
dirname = time.strftime ( '%Y%m%d' , time.localtime ( time.time () ) ) #获取当前日期
dirname += 'sh'
return dirname
def getStockList(lst , stock_list_url): # 获得东方财富网上以sh6开头的股票代码
html = GetHTMLSource ( stock_list_url )
soupdata = BeautifulSoup ( html , 'html.parser' )
a = soupdata.find_all ( 'a' ) # 用find_all方法遍历所有'a'标签,并取出在'a'标签里面的’href’数据
for i in a:
try:
href = i.attrs[ 'href' ]
lst.append ( re.findall ( r"sh6d{5}" , href )[ 0 ] )
except:
continue
def getStockInfo(lst , stock_info_url , fpath):
ndate = time.strftime ( '%Y%m%d' , time.localtime ( time.time () ) )
for stock in lst:
url = stock_info_url + stock + '.html'
html = GetHTMLSource ( url )
try:
if html == "":
continue
infoDict = {}
soup = BeautifulSoup ( html, 'html.parser' )
stockInfo = soup.find ( 'div' , attrs={'class': 'stock-bets'} )
if stockInfo == None:
continue
keyData = stockInfo.find_all ( 'dt' )
valueData = stockInfo.find_all ( 'dd' )
inp = stock + "," + ndate + ","
for i in range ( len ( keyData ) ):
key = keyData[ i ].text
val = valueData[ i ].text
infoDict[ key ] = val
inp += infoDict[ '最高' ] + "," + infoDict[ '换手率' ] + "," + infoDict[ '成交量' ] + "," + infoDict[ '成交额' ] + ""
with open ( fpath , 'a' , encoding='utf-8' ) as f:
f.write ( inp )
except:
traceback.print_exc ()
continue
def main():
stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
stock_info_url = 'https://gupiao.baidu.com/stock/'
output_file = 'D://a.txt'
slist = []
getStockList(slist,stock_list_url)
getStockInfo(slist,stock_info_url,output_file)
main()
爬取股票信息,python没报错但不能爬取出结果!急求大神啊!!???
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
2条回答 默认 最新
- BuXianShan 2019-05-03 22:51关注
你好,我调试了你的代码,发现 getStockList 函数的这一行出错了:
lst.append ( re.findall ( r"sh6d{5}" , href )[ 0 ] )
这里一直没有匹配到。所以lst列表一直为空,导致后面没有结果。
希望可以帮到你。解决 1无用
悬赏问题
- ¥50 导入文件到网吧的电脑并且在重启之后不会被恢复
- ¥15 (希望可以解决问题)ma和mb文件无法正常打开,打开后是空白,但是有正常内存占用,但可以在打开Maya应用程序后打开场景ma和mb格式。
- ¥20 ML307A在使用AT命令连接EMQX平台的MQTT时被拒绝
- ¥20 腾讯企业邮箱邮件可以恢复么
- ¥15 有人知道怎么将自己的迁移策略布到edgecloudsim上使用吗?
- ¥15 错误 LNK2001 无法解析的外部符号
- ¥50 安装pyaudiokits失败
- ¥15 计组这些题应该咋做呀
- ¥60 更换迈创SOL6M4AE卡的时候,驱动要重新装才能使用,怎么解决?
- ¥15 让node服务器有自动加载文件的功能