为什么python爬虫的结果存储不到MySQL中?
# coding:utf-8
import requests
from bs4 import BeautifulSoup
import time
import pymysql
# 爬取数据
def get_information(page=0):
url = 'https://tieba.baidu.com/f?ie=utf-8&kw=%E5%A4%8D%E6%97%A6%E5%A4%A7%E5%AD%A6' + str(page+1)
headers={
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36",
"Referer": "https://tieba.baidu.com/f?ie=utf-8&kw=%E5%A4%8D%E6%97%A6%E5%A4%A7%E5%AD%A6"
}
r = requests.get(url,headers=headers)
soup = BeautifulSoup(r.content.decode("utf-8"),"html.parser")
out = soup.find("ul",attrs={"class":"for-list"})
datas = out.find_all('li')
datas_list = []
try:
for data in datas:
title = data.find('a', attrs={"class":"truetit"}).text.split()[0]
artical_link = "https://bbs.hupu.com" + data.find('a', attrs={"class": "truetit"}).attrs['href']
author = data.find('a', class_="aulink").text
author_link = data.find('a', class_="aulink").attrs['href']
create_time = data.find('a', style="color:#808080;cursor: initial; ").text
lastest_reply = data.find('span', class_='endauthor').text
datas_list.append({"title":title,"artical_link":artical_link,"author":author,"author_link":author_link,"create_time":create_time,"lastest_reply":lastest_reply})
except:
None
return datas_list
if __name__ == "__main__":
config = {
'host':'localhost',
'port':3306,
'user':'root',
'password':'root',
'charset':'utf8',
'database':'xinxiz',
}
connection = pymysql.connect(**config) # 创建连接
try:
cur = connection.cursor() # 创建游标
for page in range(2):
datas = get_information(page)
for data in datas:
cur.execute("INSERT INTO hupu_datas (title, artical_link, author, author_link,create_time, lastest_reply) VALUES(%s,%s,%s,%s,%s,%s)",(data['title'], data['artical_link'], data['author'], data['author_link'], data['create_time'], data['lastest_reply']))
print("正在爬取第%s页"%(page+1))
time.sleep(1)
except:
connection.rollback() # 若出错了,则回滚
finally:
cur.close() # 关闭游标
connection.commit() # 提交事务
connection.close() # 关闭连接