问题遇到的现象和发生背景
由于最后一行代码一直要求申明 self,所以代码改为了K.parms_start_url(self= K)
这一问题解决后运行又发现报错
(K这个object并没有start_url的参数)
原文:
AttributeError: type object 'KSchengdu_event' has no attribute 'start_url'
问题相关代码,请勿粘贴截图
for page in range(3): # 模拟翻页场景,使用for循环
response = requests.post(self.start_url, headers=self.headers, json=self.data)
运行结果及报错内容
Traceback (most recent call last):
File "C:\Users\surface\PycharmProjects\爬虫程序\post实战,采集快手热门视频.py", line 60, in <module>
k.parms_start_url(self=k)
File "C:\Users\surface\PycharmProjects\爬虫程序\post实战,采集快手热门视频.py", line 28, in parms_start_url
response = requests.post(self.start_url, headers=self.headers, json=self.data)
AttributeError: type object 'KSchengdu_event' has no attribute 'start_url'
我的解答思路和尝试过的方法
我尝试过将 init中self的参数删除移植到 parms_start_url中直接定义,但是报错
我全部代码:
import requests
import os # 创建对应的保存文件夹
class KSchengdu_event(object):
os_path = os.getcwd() + '/成都活动视频/' #创建文件夹路径
if not os.path.exists(os_path):
os.mkdir(os_path)
def __init__(self):
self.start_url = 'https://www.kuaishou.com/graphql'
self.headers = {
'content - type': 'application / json',
'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_e6ce240f9ecffb9dc0cc2d826821e185; client_key=65890b29; userId=2759025539; kuaishou.server.web_st=ChZrdWFpc2hvdS5zZXJ2ZXIud2ViLnN0EqABe0zRDaO8HRKHXIbzw_P8Exx_YU0eZlfbGRGBS4X4pNpZ4xrdcfTHsJTuF-7_tXotCED7iBV4PuxWQxLtHciHQoPOIbJrFwQQYSyqnpbZmjyTlwoIOggQuut-iTagJB7ct4N5K1MxU9SfXoaOzOo4i-6zY3HB0Fj1eLv3agyYheN3LKcLGzJncYDrcUwump29samxzJfTuuD1qRqzuKRGwRoS-1Rj5-IBBNoxoIePYcxZFs4oIiDrpN2yGP1gXQvUJTQ-bFSIAdBNCsRqskqFDVw2LI50tSgFMAE; kuaishou.server.web_ph=4dcd55ac36f14b144915a410d1b3b4e7b559',
'Host': 'www.kuaishou.com',
'Origin': 'https: // www.kuaishou.com',
'Referer': 'https://www.kuaishou.com/search/video?searchKey=%E6%88%90%E9%83%BD%E6%B4%BB%E5%8A%A8',
'User - Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36'
}
self.data = {"operationName":"visionSearchPhoto","variables":{"keyword":"成都活动","pcursor":"1","page":"search","searchSessionId":"MTRfMjc1OTAyNTUzOV8xNjQ1Mzc1NzI1MTUwX-aIkOmDvea0u-WKqF82MDU4"},"query":"query visionSearchPhoto($keyword: String, $pcursor: String, $searchSessionId: String, $page: String, $webPageArea: String) {\n visionSearchPhoto(keyword: $keyword, pcursor: $pcursor, searchSessionId: $searchSessionId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n photoUrl\n liked\n timestamp\n expTag\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n animatedCoverUrl\n stereoType\n videoRatio\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n searchSessionId\n pcursor\n aladdinBanner {\n imgUrl\n link\n __typename\n }\n __typename\n }\n}\n"}
def parms_start_url(self):
for page in range(3): # 模拟翻页场景,使用for循环
response = requests.post(self.start_url, headers=self.headers, json=self.data)
# 将同类方法中数据传输需要加上self.
self.parms_response_data(response)
def parms_response_data(self, response):
# 将数据改为json模式
json_data = response.json()
# 数据提取
data_list = json_data['data']['visionSearchPhoto']['feeds']
# for 循环遍历大列表
for data_dict in data_list:
# 处理按照喜欢收集数据
likes = int(data_dict['photo']['realLikeCount'])
if likes > 10000:
title = data_dict['photo']['caption']
# 获取视频链接
video = data_dict['photo']['photoUrl']
# 发送请求,获取视频的字节流数据
data = requests.get(video).content
self.parms_save_data(title, data)
def parms_save_data(self, title, data):
with open(self.os_path + title + '.mp4' + 'wb') as f:
f.write(data)
print(f"{title}---------采集OK")
if __name__ == '__main__':
k = KSchengdu_event
k.parms_start_url(self=k)