应该是有的用户没有生日那一行,才出现了错误。但我不知道怎么改,请帮忙看看,谢谢。
# 代码来自龙王山小青椒https://www.bilibili.com/video/BV1M64y1u7wE
import requests
from lxml import etree
from collections import OrderedDict
from urllib.parse import quote
import csv
import traceback
import random
import re
from time import sleep
import os
from datetime import datetime, timedelta
import sys
import numpy as np
import pandas as pd
import time
header = {'Content-Type':'xx','User-Agent':'xx'}
Cookie = {'Cookie':'xxx'}
# 导入用户id
weibo_comment_df = pd.read_csv('weibo_comment.csv')
weibo_comments = weibo_comment_df.values.tolist()
print(len(weibo_comments))
for i in range(len(weibo_comments)):
url_base_1 = "https://weibo.cn/"
url_base_2 = "/info"
url = url_base_1 + str(weibo_comments[i][0]) + url_base_2
print(i)
print(url)
html = requests.get(url, headers=header, cookies=Cookie)
html.encoding='utf-8'
nickname = re.findall(r'<div class="c">昵称:(.*?)<br/>', html.text)
sex = re.findall(r'<br/>性别:(.*?)<br/>', html.text)
location = re.findall(r'<br/>地区:(.*?)<br/>', html.text)
birthday = re.findall(r'<br/>生日:(.*?)<br/>', html.text)
if birthday == []:
data1 = [(nickname[0], sex[0], location[0], ' ')]
else:
data1 = [(nickname[0], sex[0], location[0], birthday[0])]
data2 = pd.DataFrame(data1)
print(data2)
print(type(data2))
data2.to_csv('id_2011.csv')
time.sleep(1)