# coding=utf-8
import random
import requests
from bs4 import BeautifulSoup
import xlwt
import time
import csv
requests.packages.urllib3.disable_warnings()
requests.adapters.DEFAULT_RETRIES = 10#增加重连次数
base_url = 'https://www.ti.com'
des_url = 'https://www.ti.com/product/'
url = 'https://www.ti.com/selectionmodel/api/gpn/result-list'
url_list = []
all_url = []
headers_list = [
{
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G955U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (iPad; CPU OS 13_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/87.0.4280.77 Mobile/15E148 Safari/604.1','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.109 Safari/537.36 CrKey/1.54.248666','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (X11; Linux aarch64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.188 Safari/537.36 CrKey/1.54.250320','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (BB10; Touch) AppleWebKit/537.10+ (KHTML, like Gecko) Version/10.0.9.2372 Mobile Safari/537.10+','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (PlayBook; U; RIM Tablet OS 2.1.0; en-US) AppleWebKit/536.2+ (KHTML like Gecko) Version/7.2.1.0 Safari/536.2+','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.1; en-us; GT-N7100 Build/JRO03C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.0; en-us; GT-I9300 Build/IMM76D) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 7.0; SM-G950U Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; SM-G965U Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.111 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.1.0; SM-T837A) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.80 Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; en-us; KFAPWI Build/JDQ39) AppleWebKit/535.19 (KHTML, like Gecko) Silk/3.13 Safari/535.19 Silk-Accelerated=true','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; LGMS323 Build/KOT49I.MS32310c) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Windows Phone 10.0; Android 4.2.1; Microsoft; Lumia 550) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Mobile Safari/537.36 Edge/14.14263','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 10 Build/MOB31T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Nexus 5X Build/OPR4.170623.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 7.1.1; Nexus 6 Build/N6F26U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Nexus 6P Build/OPP3.170518.006) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 7 Build/MOB30X) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (compatible; MSIE 10.0; Windows Phone 8.0; Trident/6.0; IEMobile/10.0; ARM; Touch; NOKIA; Lumia 520)','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (MeeGo; NokiaN9) AppleWebKit/534.13 (KHTML, like Gecko) NokiaBrowser/8.5.0 Mobile Safari/534.13','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 9; Pixel 3 Build/PQ1A.181105.017.A1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.158 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 10; Pixel 4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 11; Pixel 3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.181 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0.0; Pixel 2 XL Build/OPD1.170816.004) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Mobile Safari/537.36','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1','Connection': 'close'
}, {
'user-agent': 'Mozilla/5.0 (iPad; CPU OS 11_0 like Mac OS X) AppleWebKit/604.1.34 (KHTML, like Gecko) Version/11.0 Mobile/15A5341f Safari/604.1','Connection': 'close'
},
]
headers = random.choice(headers_list)
proxies = {
'https': '47.100.69.29',
'http': '47.100.69.29'
}
# list = [57, 376, 346, 82, 3658, 1742, 361, 897, 1, 4, 2004, 367, 353, 2003, 727 , 64]
# list=[1,4,2004,64]
# for i in list:
params = {
'destinationId': '64',
'destinationType': 'GPT',
'mode': 'parametric',
'locale': 'en-US',
}
response = requests.get(url,verify=False, params=params,headers=headers,timeout=600)# 超时设置为300秒
json_data = response.json() # 将响应JSON数据转换成Python的字典类型
for item in json_data['results']:
for item1 in item['opnList']:
# print(item1)
name2 = item1
all_url.append(des_url + str(item['genericPartNumber']) + '/part-details/' + str(name2))
# print(len(all_url))
product_name = []
package=[]
easy = []
description = []
img = []
rating = []
price=[]
feature=[]
qty=[]
eccn=[]
for url_base in all_url:
time.sleep(random.randint(0,1))
print(url_base)
response = requests.get(url_base,verify=False,headers=headers,timeout=600)# 超时设置为300秒
soup = BeautifulSoup(response.text, 'lxml')
# print(soup.find('div',class_='ti_p-col-5 ti_p-col-phone-12 ti_p-layout-space-small-only-on-phone').findNext('h5').text!='Error 404')
if (soup.find('h1',class_='mod-large')==None):
# 获取产品名称
product_name_element = soup.find('span', class_='u-margin-right-4').text
product_name.append(product_name_element)
# 获取产品rate
if (soup.find('div',class_='ti_ocb-quality-information ti_p-layout-space-small')==None):
rate_element=' '
# rating.append(' ')
else:
rate_element = soup.find('div',class_='ti_ocb-quality-information ti_p-layout-space-small').findNext('tr').findAllNext('span')[1].text
rating.append(rate_element)
# 获取产品package/pins
if(soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small')==None):
pack_element=' '
else:
pack_element = soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small').findNext('tr').findNext('a').text
package.append(pack_element)
# 获取产品Package qty | Carrier
if(soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small')==None):
qty_element=' '
else:
qty_element = soup.find('div', class_='ti_ocb-packaging-information ti_p-layout-space-small').findAllNext('tr')[2].findNext('a').text
qty.append(qty_element)
# 获取产品US ECCN
if(soup.find('div', class_='ti_ocb-export-classification ti_p-layout-space-small')==None):
eccn_element=' '
else:
eccn_element = soup.find('div', class_='ti_ocb-export-classification ti_p-layout-space-small').findNext('ul').findNext('li').text.split(':')[1]
eccn.append(eccn_element)
# 获取产品图片
img_elements =soup.find('div', class_='ti_p-row').findNext('div').findNext('img')
if img_elements:
img_element=img_elements.get('src')
else:
img_element=' '
img.append(img_element)
# # 获取产品简单描述
easy_element = soup.find('h2',class_='ti_ocb-pdp-short-description u-header-3 u-margin-top-2 u-margin-bottom-0').text
easy.append(easy_element)
# # 获取详细描述信息
if(soup.find('div', class_='ti_ocb-description ti_p-layout-space-small')==None):
description_element=' '
# description.extend(' ')
else:
description_element = soup.find('div', class_='ti_ocb-description ti_p-layout-space-small').findNext('div').text
description.append(description_element)
# # 获取产品价格
price_html =soup.find('tbody').findAllNext('tr')[3].findAllNext('td')[1].findNext('span').get('data-priceitemval')
price.append(price_html)
# # 获取产品特征
if (soup.find('div', class_='ti_ocb-features ti_p-layout-space-small')==None):
feature_element=' '
# feature.append(' ')
else:
feature_element = soup.find('div', class_='ti_ocb-features ti_p-layout-space-small').findNext('div').text
feature.append(feature_element)
with open('data.csv', 'a', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
# writer.writerow(['Img','Product Name','Easydescription','Rating','Package | Pins','Package qty | Carrier','Price','US ECCN','Description','Feature'])
for i in range(len(product_name)):
writer.writerow([base_url + img[i],product_name[i],easy[i],rating[i],package[i],qty[i],'1000+/$'+str(price[i]),eccn[i],description[i],feature[i]])
print(f'第{i + 1}条写入成功')
# with open('data.csv', 'r',encoding='utf-8') as csvfile:
# # 创建一个csv读取器
# csvreader = csv.reader(csvfile)
# # 创建一个新的excel文件和一个sheet
# workbook = xlwt.Workbook(encoding='utf-8')
# worksheet = workbook.add_sheet('Sheet1')
# # 逐行读取csv文件,并将每行数据写入excel文件
# for i, row in enumerate(csvreader):
# for j, cell in enumerate(row):
# worksheet.write(i, j, label=cell)
# # 保存excel文件
# workbook.save('example.xls')