ZeBinYao 2020-03-27 19:22 采纳率: 0%
浏览 460

进不去第三个for循环 print("2")都输出不了

pyhton代码

import time
import re
import codecs
from selenium import webdriver
from pony.orm import *
db = Database()
class Star(db.Entity):
    id = PrimaryKey(int, column='id', auto=True)
    name = Optional(str, column='name')
    gender = Optional(str, column='gender')
    href = Optional(str, column='href')
    year = Optional(int, column='year')
    month = Optional(int, column='month')
    day = Optional(int, column='day')
    xz = Optional(str, column='xz')
    html = Optional(str, column='html')
    address = Optional(str, column='address')
    height = Optional(int, column='height')
# db.generate_mapping()
time.sleep(1)
db.bind(provider = 'sqlite', filename = 'e:/python/star.sqlite')
db.generate_mapping(create_tables = True)
set_sql_debug(True)

#打开网页
driver = webdriver.Chrome()
url = 'https://www.baidu.com/s?wd=%E6%98%8E%E6%98%9F&rsv_spt=1&rsv_iqid=0xbe4b76860031fb66&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=&tn=baiduhome_pg&ch=&rsv_enter=1&rsv_dl=ib&inputT=2978'
driver.get(url)

#点击女
driver.find_element_by_xpath("//div[@id='1']/div/div/div/div[2]/p/span[4]").click()

#点击内地
driver.find_element_by_xpath("//div[@id='1']/div/div/div/div[2]/p[2]/span[3]").click()
time.sleep(1)

f1 = True
while f1 == True:
    html = driver.page_source
    reobj = re.compile(r'<p class="c-gap-top-small"><a href="([\d\D]*?)" title="(.{1,20})" target="_blank">[\d\D]*?</a></p>')#采集每个明星的href
    for match in reobj.finditer(html):
        hrefs = match.group(1)
        driver2 = webdriver.Chrome()
        url2 = 'https://www.baidu.com' + hrefs
        driver2.get(url2)#打开浏览器进入明星的链接
        names = match.group(2)
        html2 = driver2.page_source
        reobj4 = re.compile(r"""<div class="result-op c-container xpath-log" srcid="1547"[\d\D]*?<h3 class="t c-gap-bottom-small">
        <a href="([\d\D]*?)" target="_blank"><em>.{1,30}</em>_(.{4})</a>
        </h3>""")
        for match4 in reobj4.finditer(html2):
            if match4.group(2) == "百度百科":#判断点击改明星链接之后是否有该明星的百度百科
                driver3 = webdriver.Chrome()
                url3 = match4.group(1)
                driver2.quit()#关闭明星链接的浏览器
                driver3.get(url3)#进入明星百度百科的链接
                time.sleep(1)
                html3 = driver3.page_source
                reobj3 = re.compile(r"""<dt class="basicInfo-item name">星&nbsp;&nbsp;&nbsp;&nbsp;座</dt>
                <dd class="basicInfo-item value">
                ([\d\D]*?)
                </dd>
                <dt class="basicInfo-item name">血&nbsp;&nbsp;&nbsp;&nbsp;型</dt>
                <dd class="basicInfo-item value">
                A型
                </dd>
                <dt class="basicInfo-item name">[\d\D]*?
                <dd class="basicInfo-item value">
                ([\d\D]*?)
                </dd>
                [\d\D]*?ss="basicInfo-item name">出生地</dt>
                <dd class="basicInfo-item value">
                ([\d\D]*?)
                </dd>
                <dt class="basicInfo-item name">出生日期</dt>
                <dd class="basicInfo-item value">
                ([\d\D]*?)年([\d\D]*?)月([\d\D]*?)日[\d\D]*?</dd>""")
                for match3 in reobj3.finditer(html3):
                    print("2")
                    #新增一条数据
                    s = Star(name = names,gender = "女",href = hrefs,year = match3.group(6),month = match3.group(7),day = match3.group(8),xz = match3.group(1),html = html3,address = match3.group(5),height = match3.group(3))
                    db.commit()
                driver3.quit()#关闭明星百度百科链接的浏览器
    #判断下一页按钮是否存在,若存在则点击下一页
    list = []
    reobj2 = re.compile(r'<span class="opui-page-next OP_LOG_BTN" style="display: ([\d\D]*?)">下一页</span>')
    for match2 in reobj2.finditer(html):
        list.append(match2.group(1))
    if len(list)>0:
        f1 = False
    else:
        driver.find_element_by_xpath("//div[@id='1']/div/div/div[2]/div[2]/p/span[6]").click()#点击下一页
    time.sleep(1)
f.close()
  • 写回答

1条回答 默认 最新

  • 关注
    评论

报告相同问题?

悬赏问题

  • ¥15 maple软件,用solve求反函数出现rootof,怎么办?
  • ¥50 汇编语言除法溢出问题
  • ¥65 C++实现删除N个数据列表共有的元素
  • ¥15 Visual Studio问题
  • ¥15 state显示变量是字符串形式,但是仍然红色,无法引用,并显示类型不匹配
  • ¥20 求一个html代码,有偿
  • ¥100 关于使用MATLAB中copularnd函数的问题
  • ¥20 在虚拟机的pycharm上
  • ¥15 jupyterthemes 设置完毕后没有效果
  • ¥15 matlab图像高斯低通滤波