想把两个按照最左边一列标题进行表合并,求解,需要用来做数据分析
from bs4 import BeautifulSoup
import requests
import csv
import bs4
import easygui
import sys
headers = {
User-Agent Mozilla5.0 (Windows NT 10.0; WOW64) AppleWebKit537.36 (KHTML, like Gecko) Chrome56.0.2924.87 Safari537.36
}
name = tr
save = d主板A股01.csv
def check_link(url)
r = requests.post(url, headers=headers)
soup = BeautifulSoup(r.text, html.parser)
tdwe = soup.findAll(name=td, attrs={class tbcaption})
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
def get_contents(ulist, rurl)
soup = BeautifulSoup(rurl, html.parser)
trs = soup.find_all(name)
for tr in trs
ui = []
for td in tr
ui.append(td.string)
ulist.append(ui)
def save_contents(urlist, d, keyd, go, a,name)
with open(
d + name + .csv, a+, encoding=utf-8-sig
) as f
writer = csv.writer(f)
for a in range(len(urlist))
new_list = []
i = a
if a 26
one = urlist[i][0]
if not urlist[i][0] == urlist[i - 1][0]
for f in range(len(urlist[i]))
new_list.append(urlist[i][f])
writer.writerow(new_list)
def main(url, a, name)
urli = []
rs = check_link(url)
get_contents(urli, rs)
save_contents(urli, 0, True, 0, a,name)
#爬取
for u in range(1, 6)
print(u)
urs1 = (
httpsstock.cfi.cncfidata.aspxsortfd=&sortway=&curpage=
+ str(u)
+ &fr=content&ndk=A0A1934A1939A1959A1960&xztj=&mystock=
)
main(urs1, u, name)
可以在源代码上进行修改,这是个爬虫的源码,爬完会存到表格里,需要把2个表格合并,谢谢了