import requests,re,os
from bs4 import BeautifulSoup
def get_url(url):
headers={
'User_Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
'Referrer':url
}
res = requests.get(url,headers=headers)
text = res.text
soup = BeautifulSoup(text,'lxml')
divs = soup.find('div',class_='page-content text-center')
a_s = divs.find_all('a',attrs={'class': 'col-xs-6 col-sm-3'})
for a in a_s:
herf = a['href']
img = a.find('img')
print(img)
if a.img['class']==['gif']:
pass
else:
alt = a.img['alt']
alt = re.sub(r'[,@??!!:。]','',alt)
data = a.img['data-original']
print(data)
datastr = '.'+data.split('.')[-1]
filename = alt + datastr
if os.path.exists(os.getcwd() + "\斗图啦\\"+filename):
print('文件已经存在')
else:
filename = os.getcwd() + "\斗图啦\\"+filename
print(filename)
with open(filename,'w') as fp:
fp.write(data)
def main():
if os.path.exists(os.getcwd()+'\斗图啦\\'):
print('文件夹已存在')
else:
os.mkdir(os.getcwd() + "\斗图啦\\")
url = 'http://www.doutula.com/photo/list/?page=1'
get_url(url)
if __name__ == '__main__':
main()