OllerGrande 2024-12-31 12:45 采纳率: 14.3%
浏览 15

爬虫+数据库+可视化


 
import requests
from bs4 import BeautifulSoup
import time
import sqlite3
 
 
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36 Edg/128.0.0.0"
}
 
 
def getPositionInfo(detail_url):
    res = requests.get(detail_url, headers=headers)
    if res.status_code == 200:
        html = res.text
        soup = BeautifulSoup(html, "lxml")
        job = soup.find(class_="new_job_name")
        if job:
            job = job.text.strip()
        else:
            job = "N/A"
        academic = soup.find(class_="job_academic")
        if academic:
            academic = academic.text
        else:
            academic = "N/A"
        position = soup.find(class_="job_position")
        if position:
            position = position.text
        else:
            position = "N/A"
        salary = soup.find(class_="job_money cutom_font")
        if salary:
            salary = salary.text
        else:
            salary = "N/A"
        insert_into_db(job, academic, position, salary)
    else:
        print(f"请求失败,状态码:{res.status_code}")
 
 
def insert_into_db(job, academic, position, salary):
    conn = sqlite3.connect('internship_data.db')
    cursor = conn.cursor()
    # 创建表,如果表不存在
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS positions (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            job TEXT,
            academic TEXT,
            position TEXT,
            salary TEXT
        )
    ''')
    # 插入数据
    cursor.execute('''
        INSERT INTO positions (job, academic, position, salary)
        VALUES (?,?,?,?)
    ''', (job, academic, position, salary))
    conn.commit()
    cursor.close()
    conn.close()
 
 
def main():
    for i in range(1, 16):
        url = f"https://www.shixiseng.com/interns?page={i}&type=intern&keyword=%E7%AE%97%E6%B3%95%E5%AE%9E%E4%B9%A0%E7%94%9F&area=&months=&days=&degree=&official=entry&enterprise=&salary=-0&publishTime=&sortType=&city=%E5%85%A8%E5%9B%BD&internExtend="
        res = requests.get(url, headers=headers)
        if res.status_code == 200:
            html = res.text
            soup = BeautifulSoup(html, "lxml")
            titles = soup.find_all(class_="title ellipsis font")
            for item in titles:
                detail_url = item.attrs["href"]
                getPositionInfo(detail_url)
        else:
            print(f"请求失败,状态码:{res.status_code}")
        time.sleep(2)
 
 
if __name__ == "__main__":
    main()
 
 
 
from pyecharts.charts import Line, Pie
import sqlite3
import pandas as pd
from pyecharts import options as opts
 
 
def fetch_data_from_db():
    conn = sqlite3.connect('internship_data.db')
    query = """
    SELECT position AS city, salary
    FROM positions
    """
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df
 
 
def process_data(df):
    cityDict = {}
    city_num_dict = {}
    for index, row in df.iterrows():
        city = row['city']
        salary = row['salary']
        if "薪资面议" in salary:
            continue
        daily = salary.split("/")[0]
        daily_list = daily.split("-")
        if len(daily_list) == 1:
            start = daily_list[0]
            end = daily_list[0]
        elif len(daily_list) == 2:
            start = daily_list[0]
            end = daily_list[1]
        try:
            average = (int(start) + int(end)) / 2
        except ValueError:
            continue
        if city not in cityDict:
            cityDict[city] = []
        cityDict[city].append(average)
    for city, values in cityDict.items():
        average_value = sum(values) // len(values)
        cityDict[city] = average_value
        city_num_dict[city] = len(values)
    return cityDict, city_num_dict
 
 
def visualize_data(cityDict, city_num_dict):
    # 可视化工资平均值
    line_salary = Line()
    line_salary.add_xaxis(list(cityDict.keys()))
    line_salary.add_yaxis(
        series_name="工资平均值",
        y_axis=list(cityDict.values()),
        label_opts=opts.LabelOpts(is_show=False),  # 不显示标签
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值")
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_="average", name="平均值")
            ]
        )
    )
    line_salary.set_global_opts(
        title_opts=opts.TitleOpts(title="不同城市工资平均值", subtitle="实习岗位"),
        tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
        xaxis_opts=opts.AxisOpts(
            axislabel_opts=opts.LabelOpts(rotate=45),  # x轴标签旋转45度
            name="城市",
            name_location="middle",
            name_gap=30
        ),
        yaxis_opts=opts.AxisOpts(
            name="工资平均值",
            name_location="middle",
            name_gap=30
        ),
        toolbox_opts=opts.ToolboxOpts(is_show=True),  # 显示工具箱
        legend_opts=opts.LegendOpts(is_show=True)
    )
    line_salary.render("salary_line.html")
 
    # 可视化职位数量
    line_positions = Line()
    line_positions.add_xaxis(list(city_num_dict.keys()))
    line_positions.add_yaxis(
        series_name="职位数量",
        y_axis=list(city_num_dict.values()),
        label_opts=opts.LabelOpts(is_show=False),
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值")
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[
                opts.MarkLineItem(type_="average", name="平均值")
        ]
        )
    )
    line_positions.set_global_opts(
        title_opts=opts.TitleOpts(title="不同城市职位数量", subtitle="实习岗位"),
        tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
        xaxis_opts=opts.AxisOpts(
            axislabel_opts=opts.LabelOpts(rotate=45),
            name="城市",
            name_location="middle",
            name_gap=30
        ),
        yaxis_opts=opts.AxisOpts(
            name="职位数量",
            name_location="middle",
            name_gap=30
        ),
        toolbox_opts=opts.ToolboxOpts(is_show=True),
        legend_opts=opts.LegendOpts(is_show=True)
    )
    line_positions.render("positions_line.html")
 
 
def visualize_pie(city_num_dict):
    # 可视化职位数量占比的饼图
    pie = Pie()
    data_pair = [(city, num) for city, num in city_num_dict.items()]
    pie.add(
        series_name="不同城市职位数量占比",
        data_pair=data_pair,
        label_opts=opts.LabelOpts(formatter="{b}: {d}%")
    )
    pie.set_global_opts(
        title_opts=opts.TitleOpts(title="不同城市职位数量占比", subtitle="实习岗位"),
        legend_opts=opts.LegendOpts(orient="vertical", pos_left="left", pos_top="20%")
    )
    pie.render("positions_pie.html")
 
 
if __name__ == "__main__":
    df = fetch_data_from_db()
    cityDict, city_num_dict = process_data(df)
    visualize_data(cityDict, city_num_dict)
    visualize_pie(city_num_dict)

在每张图表上加上一个背景图怎么做

  • 写回答

1条回答 默认 最新

  • 道友老李 JWE233286一种基于机器视觉的水表指针读数识别及修正的方法 专利发明者 2024-12-31 12:50
    关注
    让【道友老李】来帮你解答,本回答参考通义千问大模型编写,并整理提供,如果还有疑问可以点击头像关注私信或评论。
    如果答案让您满意,请采纳、关注,非常感谢!
    要在每张图表上加上背景图,你可以使用 `pyecharts` 库中的 `graphic` 组件来添加背景图片。以下是如何在每张图表上添加背景图的详细步骤和代码示例。

    步骤

    1. 准备背景图片:确保你有一个背景图片文件(例如 background.png)。
    2. 修改图表生成代码:在每个图表的全局配置中添加 graphic 组件来设置背景图片。

    代码示例

    假设你已经有一个背景图片文件 background.png,以下是修改后的代码示例:

    from pyecharts.charts import Line, Pie
    import sqlite3
    import pandas as pd
    from pyecharts import options as opts
    from pyecharts.globals import ThemeType
    
    def fetch_data_from_db():
        conn = sqlite3.connect('internship_data.db')
        query = """
        SELECT position AS city, salary
        FROM positions
        """
        df = pd.read_sql_query(query, conn)
        conn.close()
        return df
    
    def process_data(df):
        cityDict = {}
        city_num_dict = {}
        for index, row in df.iterrows():
            city = row['city']
            salary = row['salary']
            if "薪资面议" in salary:
                continue
            daily = salary.split("/")[0]
            daily_list = daily.split("-")
            if len(daily_list) == 1:
                start = daily_list[0]
                end = daily_list[0]
            elif len(daily_list) == 2:
                start = daily_list[0]
                end = daily_list[1]
            try:
                average = (int(start) + int(end)) / 2
            except ValueError:
                continue
            if city not in cityDict:
                cityDict[city] = []
            cityDict[city].append(average)
        for city, values in cityDict.items():
            average_value = sum(values) // len(values)
            cityDict[city] = average_value
            city_num_dict[city] = len(values)
        return cityDict, city_num_dict
    
    def visualize_data(cityDict, city_num_dict):
        # 可视化工资平均值
        line_salary = Line(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
        line_salary.add_xaxis(list(cityDict.keys()))
        line_salary.add_yaxis(
            series_name="工资平均值",
            y_axis=list(cityDict.values()),
            label_opts=opts.LabelOpts(is_show=False),
            markpoint_opts=opts.MarkPointOpts(
                data=[
                    opts.MarkPointItem(type_="max", name="最大值"),
                    opts.MarkPointItem(type_="min", name="最小值")
                ]
            ),
            markline_opts=opts.MarkLineOpts(
                data=[
                    opts.MarkLineItem(type_="average", name="平均值")
                ]
            )
        )
        line_salary.set_global_opts(
            title_opts=opts.TitleOpts(title="不同城市工资平均值", subtitle="实习岗位"),
            tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
            xaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(rotate=45),
                name="城市",
                name_location="middle",
                name_gap=30
            ),
            yaxis_opts=opts.AxisOpts(
                name="工资平均值",
                name_location="middle",
                name_gap=30
            ),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            legend_opts=opts.LegendOpts(is_show=True),
            graphic_opts=[
                opts.GraphicImage(
                    graphic_item=opts.GraphicItem(
                        id_="background",
                        right=0,
                        bottom=0,
                        z=-10,
                        bounding="raw",
                        origin=[75, 75]
                    ),
                    graphic_imagestyle_opts=opts.GraphicImageStyleOpts(
                        image="path/to/your/background.png",
                        width=800,
                        height=600,
                        opacity=0.5
                    )
                )
            ]
        )
        line_salary.render("salary_line.html")
    
        # 可视化职位数量
        line_positions = Line(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
        line_positions.add_xaxis(list(city_num_dict.keys()))
        line_positions.add_yaxis(
            series_name="职位数量",
            y_axis=list(city_num_dict.values()),
            label_opts=opts.LabelOpts(is_show=False),
            markpoint_opts=opts.MarkPointOpts(
                data=[
                    opts.MarkPointItem(type_="max", name="最大值"),
                    opts.MarkPointItem(type_="min", name="最小值")
                ]
            ),
            markline_opts=opts.MarkLineOpts(
                data=[
                    opts.MarkLineItem(type_="average", name="平均值")
                ]
            )
        )
        line_positions.set_global_opts(
            title_opts=opts.TitleOpts(title="不同城市职位数量", subtitle="实习岗位"),
            tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"),
            xaxis_opts=opts.AxisOpts(
                axislabel_opts=opts.LabelOpts(rotate=45),
                name="城市",
                name_location="middle",
                name_gap=30
            ),
            yaxis_opts=opts.AxisOpts(
                name="职位数量",
                name_location="middle",
                name_gap=30
            ),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            legend_opts=opts.LegendOpts(is_show=True),
            graphic_opts=[
                opts.GraphicImage(
                    graphic_item=opts.GraphicItem(
                        id_="background",
                        right=0,
                        bottom=0,
                        z=-10,
                        bounding="raw",
                        origin=[75, 75]
                    ),
                    graphic_imagestyle_opts=opts.GraphicImageStyleOpts(
                        image="path/to/your/background.png",
                        width=800,
                        height=600,
                        opacity=0.5
                    )
                )
            ]
        )
        line_positions.render("positions_line.html")
    
    def visualize_pie(city_num_dict):
        # 可视化职位数量占比的饼图
        pie = Pie(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
        data_pair = [(city, num) for city, num in city_num_dict.items()]
        pie.add(
            series_name="不同城市职位数量占比",
            data_pair=data_pair,
            label_opts=opts.LabelOpts(formatter="{b}: {d}%")
        )
        pie.set_global_opts(
            title_opts=opts.TitleOpts(title="不同城市职位数量占比", subtitle="实习岗位"),
            legend_opts=opts.LegendOpts(orient="vertical", pos_left="left", pos_top="20%"),
            graphic_opts=[
                opts.GraphicImage(
                    graphic_item=opts.GraphicItem(
                        id_="background",
                        right=0,
                        bottom=0,
                        z=-10,
                        bounding="raw",
                        origin=[75, 75]
                    ),
                    graphic_imagestyle_opts=opts.GraphicImageStyleOpts(
                        image="path/to/your/background.png",
                        width=800,
                        height=600,
                        opacity=0.5
                    )
                )
            ]
        )
        pie.render("positions_pie.html")
    
    if __name__ == "__main__":
        df = fetch_data_from_db()
        cityDict, city_num_dict = process_data(df)
        visualize_data(cityDict, city_num_dict)
        visualize_pie(city_num_dict)
    

    说明

    1. 背景图片路径:请将 path/to/your/background.png 替换为你的实际背景图片路径。
    2. 透明度opacity=0.5 控制背景图片的透明度,可以根据需要调整。
    3. 位置和尺寸origin=[75, 75]width=800, height=600 控制背景图片的位置和大小,可以根据需要调整。

    通过这种方式,你可以在每张图表上添加背景图片,使图表更加美观和个性化。

    评论

报告相同问题?

问题事件

  • 创建了问题 12月31日