我想获取网络上对联名产品发展的看法,选取微博,知乎针对“联名产品发展前景”搜索词下的用户评价,对比一下,选择两到三家网站进行抓取,得到一个词频统计,和关键词统计,如图所示一下的代码!


我想获取网络上对联名产品发展的看法,选取微博,知乎针对“联名产品发展前景”搜索词下的用户评价,对比一下,选择两到三家网站进行抓取,得到一个词频统计,和关键词统计,如图所示一下的代码!


import requests
from bs4 import BeautifulSoup
from collections import Counter
import jieba.analyse
# 定义抓取微博内容的函数
def fetch_weibo_content():
url = "https://s.weibo.com/weibo?q=%E8%81%94%E5%90%8D%E4%BA%A7%E5%93%81%E5%8F%91%E5%B1%95%E5%89%8D%E6%99%AF"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
contents = soup.find_all("p", class_="txt")
weibo_texts = [content.get_text() for content in contents]
return weibo_texts
# 定义抓取知乎内容的函数
def fetch_zhihu_content():
url = "https://www.zhihu.com/search?q=%E8%81%94%E5%90%8D%E4%BA%A7%E5%93%81%E5%8F%91%E5%B1%95%E5%89%8D%E6%99%AF&type=content"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
contents = soup.find_all("div", class_="RichContent-inner")
zhihu_texts = [content.get_text() for content in contents]
return zhihu_texts
# 统计词频
def word_frequency(texts):
words = []
for text in texts:
words += list(jieba.cut(text))
word_counts = Counter(words)
return word_counts
# 提取关键词
def extract_keywords(texts):
all_text = ' '.join(texts)
keywords = jieba.analyse.extract_tags(all_text, topK=10)
return keywords
if __name__ == "__main__":
weibo_texts = fetch_weibo_content()
zhihu_texts = fetch_zhihu_content()
# 词频统计
weibo_word_counts = word_frequency(weibo_texts)
zhihu_word_counts = word_frequency(zhihu_texts)
# 提取关键词
weibo_keywords = extract_keywords(weibo_texts)
zhihu_keywords = extract_keywords(zhihu_texts)
print("微博词频统计:", weibo_word_counts)
print("知乎词频统计:", zhihu_word_counts)
print("微博关键词:", weibo_keywords)
print("知乎关键词:", zhihu_keywords)