Bson蛋 2023-06-05 11:05 采纳率: 20%
浏览 22
已结题

Python爬虫出现python/@g6tConnection aborted. , FileNotFoundError(2, No such file or directory )


# coding=utf-8
import random
import requests
from bs4 import BeautifulSoup
import xlwt
import time
import csv
requests.packages.urllib3.disable_warnings()
requests.adapters.DEFAULT_RETRIES = 10#增加重连次数
base_url = 'https://www.ti.com'
des_url = 'https://www.ti.com/product/'
url = 'https://www.ti.com/selectionmodel/api/gpn/result-list'
url_list = []
all_url = []
headers_list = [
    {
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G955U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (iPad; CPU OS 13_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/87.0.4280.77 Mobile/15E148 Safari/604.1','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.109 Safari/537.36 CrKey/1.54.248666','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.188 Safari/537.36 CrKey/1.54.250320','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/10.0.9.2372 Mobile Safari/537.10+','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.1.0; en-US) AppleWebKit/536.2+ (KHTML like Gecko) Version/7.2.1.0 Safari/536.2+','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.1; en-us; GT-N7100 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.0; en-us; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.80 Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=true','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; LGMS323 Build/KOT49I.MS32310c) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/14.14263','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Nexus 6P Build/OPP3.170518.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 520)','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.181105.017.A1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.158 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 10; Pixel 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 11; Pixel 3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.181 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1','Connection': 'close'
    }, {
        'user-agent': 'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1','Connection': 'close'
    },


]

headers = random.choice(headers_list)
proxies = {
        'https': '47.100.69.29',
        'http': '47.100.69.29'
    }
# list = [57, 376, 346, 82, 3658, 1742, 361, 897, 1, 4, 2004, 367, 353, 2003, 727 , 64]
# list=[1,4,2004,64]
# for i in list:
params = {
    'destinationId': '64',
    'destinationType': 'GPT',
    'mode': 'parametric',
    'locale': 'en-US',
}
response = requests.get(url,verify=False, params=params,headers=headers,timeout=600)# 超时设置为300秒
json_data = response.json()  # 将响应JSON数据转换成Python的字典类型
for item in json_data['results']:
    for item1 in item['opnList']:
        # print(item1)
        name2 = item1
        all_url.append(des_url + str(item['genericPartNumber']) + '/part-details/' + str(name2))
# print(len(all_url))
product_name = []
package=[]
easy = []
description = []
img = []
rating = []
price=[]
feature=[]
qty=[]
eccn=[]
for url_base in all_url:
    time.sleep(random.randint(0,1))
    print(url_base)
    response = requests.get(url_base,verify=False,headers=headers,timeout=600)# 超时设置为300秒
    soup = BeautifulSoup(response.text, 'lxml')
    # print(soup.find('div',class_='ti_p-col-5 ti_p-col-phone-12 ti_p-layout-space-small-only-on-phone').findNext('h5').text!='Error 404')
    if (soup.find('h1',class_='mod-large')==None):
        # 获取产品名称
        product_name_element = soup.find('span', class_='u-margin-right-4').text
        product_name.append(product_name_element)
        # 获取产品rate
        if (soup.find('div',class_='ti_ocb-quality-information ti_p-layout-space-small')==None):
            rate_element=' '
            # rating.append(' ')
        else:
            rate_element = soup.find('div',class_='ti_ocb-quality-information ti_p-layout-space-small').findNext('tr').findAllNext('span')[1].text
        rating.append(rate_element)
        # 获取产品package/pins
        if(soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small')==None):
            pack_element=' '
        else:
            pack_element = soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small').findNext('tr').findNext('a').text
        package.append(pack_element)
        # 获取产品Package qty | Carrier
        if(soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small')==None):
            qty_element=' '
        else:
            qty_element = soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small').findAllNext('tr')[2].findNext('a').text
        qty.append(qty_element)
        # 获取产品US ECCN
        if(soup.find('div', class_='ti_ocb-export-classification ti_p-layout-space-small')==None):
            eccn_element=' '
        else:
            eccn_element = soup.find('div', class_='ti_ocb-export-classification ti_p-layout-space-small').findNext('ul').findNext('li').text.split(':')[1]
        eccn.append(eccn_element)
        # 获取产品图片
        img_elements =soup.find('div', class_='ti_p-row').findNext('div').findNext('img')
        if img_elements:
            img_element=img_elements.get('src')
        else:
            img_element=' '
        img.append(img_element)
        # # 获取产品简单描述
        easy_element = soup.find('h2',class_='ti_ocb-pdp-short-description u-header-3 u-margin-top-2 u-margin-bottom-0').text
        easy.append(easy_element)
        # # 获取详细描述信息
        if(soup.find('div', class_='ti_ocb-description ti_p-layout-space-small')==None):
            description_element=' '
            # description.extend(' ')
        else:
            description_element = soup.find('div', class_='ti_ocb-description ti_p-layout-space-small').findNext('div').text
        description.append(description_element)
        # # 获取产品价格
        price_html =soup.find('tbody').findAllNext('tr')[3].findAllNext('td')[1].findNext('span').get('data-priceitemval')
        price.append(price_html)
        # # 获取产品特征
        if (soup.find('div', class_='ti_ocb-features ti_p-layout-space-small')==None):
            feature_element=' '
            # feature.append(' ')
        else:
            feature_element = soup.find('div', class_='ti_ocb-features ti_p-layout-space-small').findNext('div').text
        feature.append(feature_element)
with open('data.csv', 'a', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    # writer.writerow(['Img','Product Name','Easydescription','Rating','Package | Pins','Package qty | Carrier','Price','US ECCN','Description','Feature'])
    for i in range(len(product_name)):
        writer.writerow([base_url + img[i],product_name[i],easy[i],rating[i],package[i],qty[i],'1000+/$'+str(price[i]),eccn[i],description[i],feature[i]])
        print(f'第{i + 1}条写入成功')
# with open('data.csv', 'r',encoding='utf-8') as csvfile:
#     # 创建一个csv读取器
#     csvreader = csv.reader(csvfile)
#     # 创建一个新的excel文件和一个sheet
#     workbook = xlwt.Workbook(encoding='utf-8')
#     worksheet = workbook.add_sheet('Sheet1')
#     # 逐行读取csv文件,并将每行数据写入excel文件
#     for i, row in enumerate(csvreader):
#         for j, cell in enumerate(row):
#             worksheet.write(i, j, label=cell)
#     # 保存excel文件
#     workbook.save('example.xls')
  • 写回答

1条回答 默认 最新

  • lzl2040 人工智能领域新星创作者 2023-06-05 11:16
    关注

    在哪个位置报错的

    本回答被题主选为最佳回答 , 对您是否有帮助呢?
    评论

报告相同问题?

问题事件

  • 系统已结题 8月31日
  • 已采纳回答 8月23日
  • 创建了问题 6月5日

悬赏问题

  • ¥15 x趋于0时tanx-sinx极限可以拆开算吗
  • ¥500 把面具戴到人脸上,请大家贡献智慧
  • ¥15 任意一个散点图自己下载其js脚本文件并做成独立的案例页面,不要作在线的,要离线状态。
  • ¥15 各位 帮我看看如何写代码,打出来的图形要和如下图呈现的一样,急
  • ¥30 c#打开word开启修订并实时显示批注
  • ¥15 如何解决ldsc的这条报错/index error
  • ¥15 VS2022+WDK驱动开发环境
  • ¥30 关于#java#的问题,请各位专家解答!
  • ¥30 vue+element根据数据循环生成多个table,如何实现最后一列 平均分合并
  • ¥20 pcf8563时钟芯片不启振