python3
源码如下
import os # 调用系统变量
import re # 正则表达式相关
import urllib
import urllib.request
import urllib.error
import urllib.parse
import json
import socket
import time
class ImoocSpider:
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
def getPythonInfo(self,keyWord):
myKeyWord = urllib.parse.quote(keyWord)
searchUrl='https://www.imooc.com/search/?words='+myKeyWord
try:
request = urllib.request.Request(url=searchUrl, headers=self.headers)
page = urllib.request.urlopen(request)
rsp = page.read().decode('unicode_escape')
except UnicodeDecodeError as e:
print(e)
print('-----UnicodeDecodeErrorurl:', searchUrl)
except urllib.error.URLError as e:
print(e)
print("-----urlErrorurl:", searchUrl)
except socket.timeout as e:
print(e)
print("-----socket timout:", searchUrl)
else:
myres=rsp
self.saveFile(myres,keyWord)
finally:
page.close()
print("get_finally")
def saveFile(self,res,keyWord):
b="./" + keyWord +'.txt'
if not os.path.exists(b): # 判断路径指定下是否存在文件/文件夹
try :
fp=open(b,'w')
print(res,file=fp) #print到文件
except :
# print (e)
print('文件写入有误')
finally :
fp.close()
print('save_finally')
def start (self, keyWord):
self.getPythonInfo(keyWord)
if __name__ == '__main__':
imoocInfo = ImoocSpider()
imoocInfo.start('python')