import requests
import csv
from lxml import etree
def amd():
for i in range(1,6):
url = "http://www.spiderbuf.cn/beginner?level=4&pageno={}".format(i)
print(url)
header = {
"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
}
response = requests.get(url=url,headers=header).text
# s1 = ".*?"
# asd = re.compile(s1,response)
# print(asd)
# f = open("01.html","w",encoding="utf8")
# f.write(response)
# f.close()
s2 = etree.HTML(response)
# s3 = s2.xpath('//tr/td/text()')
# for td in s3:
# ts = td + "|"
# print(ts)
s3 = s2.xpath('//tr/td')
s = ""
for j in s3:
# s = s + str(j.text()) + "|"
"""
在字符串里面text后面不能加()
"""
# s = s + str(j.text) + "|"
"""
最大的改变在这里,
"""
s = s + str(j.xpath("string(.)")) + "|"
# 此处的string会绕过一些干扰直接去提取
print("--------以下为第%d页数--------"%(i))
print(s)
with open("训练3.csv","a+",newline="",encoding="utf8") as fin:
writes = csv.writer(fin)
writes.writerow(s)
# 上面的csv文件能存进去
"""
对比csv文件和txt文件的写入方式
"""
with open("训练3.txt","a+",encoding="utf8") as fins:
fins.writelines(s)
# f = open("01.html","w",encoding="utf8")
# f.write(td)
# f.close()
if __name__ == '__main__':
amd()
而在txt文件中保存时,提示没有定义变量s,怎么回事?缩进了取消都一样
![img](https://img-mid.csdnimg.cn/release/static/image/mid/ask/956493124586176.png "#left")
python中txt保存不了
- 写回答
- 好问题 0 提建议
- 追加酬金
- 关注问题
- 邀请回答
-
3条回答 默认 最新
关注 import requests import csv from lxml import etree def amd(): for i in range(1,6): url = "http://www.spiderbuf.cn/beginner?level=4&pageno={}".format(i) print(url) header = { "user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36" } response = requests.get(url=url,headers=header).text s2 = etree.HTML(response) s3 = s2.xpath('//tr/td') s = "" for j in s3: s += str(j.xpath("string(.)")) + "|" print("--------以下为第%d页数--------"%(i)) print(s) with open("训练3.csv","a+",newline="",encoding="utf8") as fin: writes = csv.writer(fin) writes.writerow(s) with open("训练3.txt","a+",encoding="utf8") as fins: fins.write(s + "\n") # add a newline character at the end of each line if __name__ == '__main__': amd()
解决 1无用
悬赏问题
- ¥15 使用百度地图api 位置函数报错?
- ¥15 metamask如何添加TRON自定义网络
- ¥66 关于川崎机器人调速问题
- ¥15 winFrom界面无法打开
- ¥30 crossover21 ARM64版本安装软件问题
- ¥15 mymetaobjecthandler没有进入
- ¥15 mmo能不能做客户端怪物
- ¥15 osm下载到arcgis出错
- ¥15 Dell g15 每次打开eiq portal后3分钟内自动退出
- ¥200 使用python编写程序,采用socket方式获取网页实时刷新的数据,能定时print()出来就行。