爬取豆瓣电影存入数据库,报错TypeError: %d format: a number is required, not str
import requests
from lxml import etree
import pymysql
import re
import time
conn=pymysql.connect(host='localhost',user='root',passwd='123456',db='mydb',port='3306',charset='utf8')
cursor=conn.cursor()#连接数据库及光标
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
def get_movie_url(url):
    html=requests.get(url,headers=headers)
    selector=etree.HTML(html.text)
    movie_hrefs=selector.xpath('//div[@class="hd"/a/@href')
    for movie_href in movie_hrefs:
        get_movie_info(movie_href)

def get_movie_info(url):
    html = requests.get(url, headers=headers)
    selector = etree.HTML(html.text)
    try:
        name=selector.xpath('//div[@id="content"]/h1/span/text()')[0]
        director=selector.xpath('//div[@id="info"]/span[1]/span[2]/a/text()')[0]
        actors=selector.xpath('//div[@id="info"]/span[3]/span[2]/text()')[0]
        actor=actors.xpath('string(.)')
        style=re.findall('<span property="v:genre">(.*?)</span>',html.text,re.S)[0]
        country=re.findall('<span class="pl">制片国家/地区:</span>(.*?)<br>',html.text,re.S)[0]
        release_time=re.findall('上映日期:</span>.*?>(.*?)</span>',html.text,re.S)[0]
        time=re.findall('片长:</span>.*?>(.*?)</span>',html.text,re.S)[0]
        score=selector.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/strong/text()"')[0]
        cursor.execute(
            "insert into doubanmovie (name,director,actor,style,country,release_time,time,score) values(%s,%s,%s,%s,%s,%s,%s,%s)",
            (str(name),str(director),str(actor),str(style),str(country),str(release_time),str(time),str(score)))


    except IndexError:
        pass

if __name__=='__main__':
    urls=['https://movie.douban.com/top250?start={}'.format(str(i)) for i in range(0,250,25)]
    for url in urls:
        get_movie_url(url)
        time.sleep(2)
    conn.commit()



3个回答

问题出在port='3306',应该是port=3306

urls=['https://movie.douban.com/top250?start={}'.format(str(i)) for i in range(0,250,25)]
->
urls=['https://movie.douban.com/top250?start={}'.format(i) for i in range(0,250,25)]

问题如果解决,请点下我回答左上角的采纳,谢谢

weixin_43289424
2017133130 问题不是出在这
接近 2 年之前 回复
 Traceback (most recent call last):
  File "H:/python代码/爬取豆瓣电影存入数据库.py", line 6, in <module>
    conn=pymysql.connect(host='localhost',user='root',passwd='123456',db='mydb',port='3306',charset='utf8')
  File "H:\Anaconda\lib\site-packages\pymysql\__init__.py", line 94, in Connect
    return Connection(*args, **kwargs)
  File "H:\Anaconda\lib\site-packages\pymysql\connections.py", line 327, in __init__
    self.connect()
  File "H:\Anaconda\lib\site-packages\pymysql\connections.py", line 588, in connect
    self.host_info = "socket %s:%d" % (self.host, self.port)
TypeError: %d format: a number is required, not str

Csdn user default icon
上传中...
上传图片
插入图片
抄袭、复制答案,以达到刷声望分或其他目的的行为,在CSDN问答是严格禁止的,一经发现立刻封号。是时候展现真正的技术了!
立即提问
相关内容推荐