Aduirrrr
2021-08-16 16:57
采纳率: 100%
浏览 37
已结题

Python 爬中文页面但爬到的是英文 附码

import requests
from bs4 import BeautifulSoup
import pandas

headers = {
    'cookie' : 'CGIC=IocBdGV4dC9odG1sLGFwcGxpY2F0aW9uL3hodG1sK3htbCxhcHBsaWNhdGlvbi94bWw7cT0wLjksaW1hZ2UvYXZpZixpbWFnZS93ZWJwLGltYWdlL2FwbmcsKi8qO3E9MC44LGFwcGxpY2F0aW9uL3NpZ25lZC1leGNoYW5nZTt2PWIzO3E9MC45; CONSENT=YES+TW.zh-TW+20170430-09-0; googtrans=/auto/zh-TW; __Secure-HSID=Aqti3_ARoiWh1al3W; __Secure-SSID=A3yBY2FfrkwZu1fpf; __Secure-APISID=hSDWbsylZB16B2Ft/ANjkGt5_I2m4RVsgj; S=adwords-usermgmt=WyGUBem9A7zZFab8zXYoKm8NaxCZm0VAyXzQeidxIDc:adwords-frontend-reporting=M3Dtwrd21TsB2oyhMyAXI-Gv0OJGADFQ:adwords-frontend-mcc=nsCGJtL-PX29IArzkQZDiq98zMsFMUw2:adwords-navi=PiwqC-qz5umFvX7ZonqyrKovuNy82tWE:adwords-campaignmgmt=y8lAr-PpXZ6Gc0pMNyXdAo815F_kD2Sh:adwords-frontend-displayads=Xu5fC_AqRFUSjQ_ItcIkdUpzOTbMuz61:adwords-frontend-usermgmt=EL-XDmdQ1c3LCJsPAYxGaHVBXU4F2Ag_:billing-ui-v3=joBK9uTaOxO8jdas_oKpdIyMUzJTu5lk:billing-ui-v3-efe=joBK9uTaOxO8jdas_oKpdIyMUzJTu5lk:adwords-frontend-changehistory=RQIzy5fdKuNSe7_OgYpAr1E-hETejKES:adwords-frontend-bulk=7Gwqj5zVHYGDXf8WpXXRgQOOyNjnP6Rn; HSID=AsUEZM6Q7pOKaQzws; SSID=AgViqTtgiqeWpOzmH; APISID=8EGBMha9ylPvz4qO/AsDTddlps3HWxaGyQ; SAPISID=CTMqCSbQo8CVBTXp/AZedmbuLopKdiXHgi; __Secure-1PAPISID=CTMqCSbQo8CVBTXp/AZedmbuLopKdiXHgi; __Secure-3PAPISID=CTMqCSbQo8CVBTXp/AZedmbuLopKdiXHgi; SID=AQiJD6NPmXIKdubxDJGB_KGCljemRjbS3Hi26qak2nJEtDMn3U-P2FADldzF3JnnQ8uqaA.; __Secure-3PSID=AQiJD6NPmXIKdubxDJGB_KGCljemRjbS3Hi26qak2nJEtDMnc2jIEwyAdoJYwLZCx9FePg.; __Secure-1PSID=AQiJD6NPmXIKdubxDJGB_KGCljemRjbS3Hi26qak2nJEtDMnsxaIqDJDYfMi3QBch1B6wg.; OTZ=6094327_24_24__24_; SEARCH_SAMESITE=CgQIppMB; NID=221=I4VbznydI4toonEkLgvdqQ6VNEtYzmQMtLxBfu59iXFcoAtccaCFppcHTb3wdFGFQ8BD3d51tenD1Ywe76eKXUt_yFdPm2h-PG5bUprkn9pgRbdmilXFj0nbdVjbfixi48YFH7iq_Mt7jEBXOUukccrNV513pGY01BZjBcAhEexub2oI3t9DRNSL307QV3pTvyGB9ixrP_YZxZISJvGb6IaF1i547PodzXaZAa5M9X8KupSMvgX9zN-wK5aTQhaXjXGZ3S6yOfui7ZNTITpNIobXfnyE3CK3mwb4lmLUdUzoJ7P-kqYEr7SGvXtilWVpXu6T_YpMATQZuah4gkknqR-dA8yvyeucxEM_88qJ_LhI4aKbWz3MEQjCtEV11VZJdtOs3RDs_ISj82mKvFECBHT6nrdDERtdKz7yFeBtST3_as-7tp7BcAY-5xl5LEiyLqMN-e3fqrRWR5ki2O56I-WFT7-F-bN3DzgBhg_TIHpmrJVH_amk8oHmaiOjZeZMYa4Q1g-lHCMRiO4FywcZtAr813GJirmNCta7x2E0BEG8XeeSGJMBV2esALWZWkd3-aqpmAeY8RfiPaNyiR25bxB-89yP2wUQt5ZUpVlzT8pOrujtPIC0HgXqRwCg7LrF6qVCTPG4eLUgVF6QvJkUAQsscAS7RXtqiA1dRrH3iCt4jg0; 1P_JAR=2021-08-16-04; SIDCC=AJi4QfGOSEj6tbWWRzXPT8EZdVat2JivAYcQCFDLYVgXiGoQj6GAs5C-PDWSEGZjEKPL89-Nd4XM; __Secure-3PSIDCC=AJi4QfEb_8eqP2uUofJZsDd5VJamddhhdzFWPSLmkZsns0WgiEXM9iGeSSg-4jACXHiKVG4lEKym',
    'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36',
    'Accept-Language':'zh-CN'
}
url = 'https://bbbeauty.com.hk/collections/sale?page=1'
s = requests.session()
res = s.get(url, headers=headers)
soup = BeautifulSoup(res.text, "html.parser")

result = soup.find_all('h3')
for i in range(len(result)) :
    print(result[i].getText())

img

要爬中文产品名称,但怎么爬都是英文

img

是否和网页右下角的中文英文切换有关?

img

有没有解决的方法可以分享,谢谢!

  • 好问题 提建议
  • 收藏

4条回答 默认 最新

  • 喜欢摸鱼的程序员 2021-08-16 17:08
    已采纳

    本页面还是是英文的,他这个转中文的按钮实际是加载了个post请求
    响应回来一些英文的中文对应然后再替换的。

    img

    img

    已采纳该答案
    评论
    解决 1 无用
    打赏 举报
  • syh_c_python 2021-08-16 17:02

    看爬出来的英文,估计是和中英文切换有关,你试着切换一下,然后复制此时的链接,再爬取一次试试

    评论
    解决 无用
    打赏 举报
  • Pliosauroidea 2021-08-16 17:10

    访问了原页面,我看到的原页面就是全英文的。。

    评论
    解决 无用
    打赏 举报
  • Aduirrrr 2021-08-16 21:17

    已解決

    import json
    
    headers={
        'referer': 'https://www.google.com/',
        'accept-language': 'zh-TW,zh;q=0.9',
        
        'content-type': 'text/plain;charset=UTF-8',
        'cookie' : 'secure_customer_sig=; _orig_referrer=; _landing_page=%2Fcollections%2Fsale%3Fpage%3D1; _y=c45fc6eb-95d9-42d1-8f95-f027580dc1c4; _shopify_y=c45fc6eb-95d9-42d1-8f95-f027580dc1c4; _ga=GA1.3.1753150338.1629094792; _gid=GA1.3.1976097218.1629094792; cart=d1dcb532d38ec5a83b84a5e42c7f2173; cart_sig=9337995286bb8eac5de7c4615bb62e2f; _fbp=fb.2.1629094793202.703685456; fsb_total_price_396801=0; shopify_recently_viewed=ultimune-power-infusing-eye-concentrate; fsb_previous_pathname=/collections/sale; cart_ts=1629095102; cart_ver=gcp-us-east1%3A12',
        'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'
    }
    payload={"l_from":"en","l_to":"tw","words":[{"t":9,"w":"\n    Sale \u2013 BB Beauty\n  "},{"t":1,"w":"Feed"},{"t":1,"w":"Home"},{"t":1,"w":"ULTIMUNE Power Infusing Eye Concentrate (EXP:04/2022)"},{"t":1,"w":"Hydrating Cleanser \uff08italian version\uff09"},{"t":1,"w":"B.A Wash"},{"t":1,"w":"anti-oxi+ skin refining cleansing oil (for any skin type)"},{"t":1,"w":"Beauty Travel Kit (9PCS)"},{"t":1,"w":"Eye Color Quad"},{"t":1,"w":"Softening Cleansing Foam"},{"t":1,"w":"Wild Bluebell Cologne"},{"t":1,"w":"Ultra Facial Cream"},{"t":1,"w":"The Moisturizing Soft Cream"},{"t":1,"w":"Optim-Eyes Contour Care - Essentials Set"},{"t":1,"w":"Korea Red Ginseng Extract Everytime Balance 10ml X30pcs"},{"t":1,"w":"Kincho Kaori Ring- Insect Repellent Ring 30pcs"},{"t":1,"w":"Intral Redness Relief Soothing Serum"},{"t":1,"w":"Vitamin C Suspension 23% + HA Spheres 2%"},{"t":1,"w":"Anti-Wrinkle & Firming Travel Exclusive Limited Edition"},{"t":1,"w":"COCO MADEMOISELLE Eau De Toilette Refillable Spray"},{"t":1,"w":"ULTIMUNE Power Infusing Concentrate"},{"t":1,"w":"Miss Dior EDP"},{"t":1,"w":"Balancing Cleansing Oil"},{"t":1,"w":"HTC Deep Charge Collagen 10 Bottles"},{"t":1,"w":"Intral - Soothing Cream"},{"t":1,"w":"Facial Treatment Essence"},{"t":1,"w":"Long-Lasting Hydrating Veil SPF25 PA++"},{"t":1,"w":"Bio-Cellular"},{"t":1,"w":"Shiseido Men Hydrating Lotion"},{"t":1,"w":"Papaw Ointment"},{"t":1,"w":"Prodigy Cellglow The Radiant Eye Treatment"},{"t":1,"w":"Twist Eau De Parfum"},{"t":1,"w":"Fleur D 'Argent Eau De Perfum Absolue"},{"t":1,"w":"The Collagen Powder W"},{"t":1,"w":"Double Serum Complete Age Control Concentrate"},{"t":1,"w":"YOUTHMUD Tinglexfoliate Treatment"},{"t":1,"w":"Cleanser"},{"t":1,"w":"Pitera BestSeller Trial Kit Set (4PCS)"},{"t":1,"w":"DR. ANDREW WEIL FOR ORIGINS Mega-Mushroom Skin Relief Soothing Face Lotion"},{"t":1,"w":"Dramatically Different Moisturizing Lotion+"},{"t":1,"w":"POWERCELL - 24H Eye Care"},{"t":1,"w":"Body Fit Cellulite Control (7th Generation)"},{"t":1,"w":"25 NEO Vitamin C Serum"},{"t":1,"w":"BLACK TEA Kombucha Facial Treatment Essence"},{"t":1,"w":"ADVANCED NIGHT REPAIR Eye Supercharged Complex"},{"t":1,"w":"L'Or Rose Super-activated Firming Oil with Pink Berries"},{"t":1,"w":"Moisture Surge 100H Auto-Replenishing Hydrator"},{"t":1,"w":"UV Physical Broad-Spectrum SPF41"},{"t":1,"w":"GENOPTICS Aura Essence"},{"t":1,"w":"True White Day Protector SPF26"},{"t":1,"w":"Primavista Ange Moisture Keep Base UV SPF16/PA++"},{"t":1,"w":"Powered by Neighborhood"},{"t":3,"w":"Search"},{"t":3,"w":"Enter your search terms"},{"t":7,"w":"BB Beauty"},{"t":1,"w":"** Sale **"},{"t":1,"w":"Trial Size "},{"t":1,"w":"Skin Care"},{"t":1,"w":"back"},{"t":1,"w":"Day Care"},{"t":1,"w":"Night Care"},{"t":1,"w":"Serum"},{"t":1,"w":"Face Oil"},{"t":1,"w":"Mask"},{"t":1,"w":"Eye Care"},{"t":1,"w":"Lip Care"},{"t":1,"w":"Neck Care"},{"t":1,"w":"UV Protection"},{"t":1,"w":"Toner"},{"t":1,"w":"Face Wash"},{"t":1,"w":"Makeup Remover"},{"t":1,"w":"Skin Care Sets"},{"t":1,"w":"Makeup"},{"t":1,"w":"Eyebrows"},{"t":1,"w":"Mascara"},{"t":1,"w":"Eyeliner"},{"t":1,"w":"Eyeshadow"},{"t":1,"w":"Concealer"},{"t":1,"w":"Face Powder"},{"t":1,"w":"Foundation"},{"t":1,"w":"Primer"},{"t":1,"w":"CC & BB Cream"},{"t":1,"w":"Blush"},{"t":1,"w":"Lipstick"},{"t":1,"w":"Lip Gloss"},{"t":1,"w":"Makeup Sets"},{"t":1,"w":"Tools"},{"t":1,"w":"Perfume"},{"t":1,"w":"Women"},{"t":1,"w":"Men"},{"t":1,"w":"Deodorant"},{"t":1,"w":"Perfume Sets"},{"t":1,"w":"Hair & Body Care"},{"t":1,"w":"Shampoo"},{"t":1,"w":"Conditioner"},{"t":1,"w":"Treatment"},{"t":1,"w":"Hair Styling"},{"t":1,"w":"Body Wash"},{"t":1,"w":"Body Lotion"},{"t":1,"w":"Hand/Foot Cream"},{"t":1,"w":"Slimming Care"},{"t":1,"w":"Feminine care"},{"t":1,"w":"Baby Products"},{"t":1,"w":"Pregnant Care"},{"t":1,"w":"For Men"},{"t":1,"w":"Hygiene Products"},{"t":1,"w":"Health & Beauty Supplements"},{"t":1,"w":"All Brands (A-Z)"},{"t":1,"w":"Europe"},{"t":1,"w":"Chanel"},{"t":1,"w":"Clarins"},{"t":1,"w":"Dior"},{"t":1,"w":"Giorgio Armani"},{"t":1,"w":"Jo Malone"},{"t":1,"w":"Lanc\u00f4me"},{"t":1,"w":"La Prairie"},{"t":1,"w":"Laura Mercier"},{"t":1,"w":"NARS"},{"t":1,"w":"Valmont"},{"t":1,"w":"YSL"},{"t":1,"w":"More..."},{"t":1,"w":"Japan"},{"t":1,"w":"ALBION"},{"t":1,"w":"Cl\u00e9 de Peau"},{"t":1,"w":"Cosme Decorte"},{"t":1,"w":"DHC"},{"t":1,"w":"FANCL"},{"t":1,"w":"IPSA"},{"t":1,"w":"KOS\u00c9"},{"t":1,"w":"Obagi"},{"t":1,"w":"Shiseido"},{"t":1,"w":"Shu Uemura"},{"t":1,"w":"Svelty"},{"t":1,"w":"SK-II"},{"t":1,"w":"USA"},{"t":1,"w":"Aesop"},{"t":1,"w":"Clinique"},{"t":1,"w":"Elizabath Arden"},{"t":1,"w":"Elta MD"},{"t":1,"w":"Est\u00e9e Lauder"},{"t":1,"w":"Fresh"},{"t":1,"w":"Kiehl's"},{"t":1,"w":"La Mer"},{"t":1,"w":"L'Or\u00e9al"},{"t":1,"w":"MAC"},{"t":1,"w":"Origins"},{"t":1,"w":"Korea"},{"t":1,"w":"3CE"},{"t":1,"w":"A.H.C."},{"t":1,"w":"Etude House"},{"t":1,"w":"Innisfree"},{"t":1,"w":"It's skin"},{"t":1,"w":"Jayjun"},{"t":1,"w":"JM Solution"},{"t":1,"w":"Laneige"},{"t":1,"w":"SU:M37\u00b0"},{"t":1,"w":"Sulwhasoo"},{"t":1,"w":"The History of WHOO"},{"t":1,"w":"All Brands"},{"t":1,"w":"About BB"},{"t":1,"w":"Shipping"},{"t":1,"w":"Visit us at Facebook for our latest offers!"},{"t":1,"w":"HKD"},{"t":1,"w":"USD"},{"t":1,"w":"EUR"},{"t":1,"w":"AUD"},{"t":1,"w":"MY CART&nbsp; <i wg-1=\"\"></i>&nbsp; <span wg-2=\"\">0</span>"},{"t":1,"w":"\nYour cart is currently empty.\n"},{"t":1,"w":"\n    <a wg-1=\"\">Home</a>\n    \n    \n    <span wg-2=\"\"> / </span>\n    <span wg-3=\"\">Sale</span>\n    \n    \n  "},{"t":1,"w":"\n  Sale\n"},{"t":1,"w":"Sort by"},{"t":1,"w":"Featured"},{"t":1,"w":"Price, low to high"},{"t":1,"w":"Price, high to low"},{"t":1,"w":"Alphabetically, A-Z"},{"t":1,"w":"Alphabetically, Z-A"},{"t":1,"w":"Date, old to new"},{"t":1,"w":"Date, new to old"},{"t":1,"w":"Best Selling"},{"t":1,"w":"SALE"},{"t":1,"w":"QUICK VIEW"},{"t":1,"w":"CeraVe"},{"t":1,"w":"Pola"},{"t":1,"w":"Tom Ford"},{"t":1,"w":"Cle de Peau"},{"t":1,"w":"FILORGA"},{"t":1,"w":"Cheong Kwan Jang"},{"t":1,"w":"Kincho"},{"t":1,"w":"Darphin"},{"t":1,"w":"The Ordinary"},{"t":1,"w":"L'Oreal"},{"t":1,"w":"THREE"},{"t":1,"w":"Fancl"},{"t":1,"w":"VALMONT"},{"t":1,"w":"Lucas"},{"t":1,"w":"Helena Rubinstein"},{"t":1,"w":"Miu Miu"},{"t":1,"w":"GLAMGLOW"},{"t":1,"w":"EVE LOM"},{"t":1,"w":"Melvita"},{"t":1,"w":"EltaMD"},{"t":1,"w":"ZA"},{"t":1,"w":"Sofina"},{"t":1,"w":"Navigation"},{"t":1,"w":"What's On Sale"},{"t":1,"w":"What's New"},{"t":1,"w":"About Us"},{"t":1,"w":"BB's Pick"},{"t":1,"w":"Terms"},{"t":1,"w":"Privacy"},{"t":1,"w":"Help"},{"t":1,"w":"Payment Methods"},{"t":1,"w":"Shipping Rate"},{"t":1,"w":"Return Policy"},{"t":1,"w":"Contact Us"},{"t":1,"w":"Connect"},{"t":1,"w":"Sign up to receive email updates on new products, special promotions, sales, and more; and receive 5,000 BB Points to use against your purchase.  200 Points = $1."},{"t":1,"w":"\n          Copyright \u00a9 2021 BB Beauty \u2022 \n          <a wg-1=\"\">Powered by Neighborhood</a>\n  \n        "},{"t":1,"w":"Search our store"}],"title":"Sale \u2013 BB Beauty","request_url":"https://bbbeauty.com.hk/collections/sale"}
    #因为这网站是英文翻中文的,所以需要"payload"这个输入json  
    
    url = 'https://api.weglot.com/translate?api_key=wg_47d1dea51acd2905c7987aa5d44dd14d6'
    #接着把josn丢到翻译网站的API
    
    s2 = requests.session()
    res2 = s2.post(url, headers=headers,json = payload)
    #接着爬取网站API翻译好的json
    
    a=res2.text      
    
    j=json.loads(a)  #用json.loads转成dict
    
    print(j.keys())  #查看key
    
    j['from_words'] #from_words是翻译前的字典,to_words是翻译后的字典,
                    # 所以之后我们只要找到一样的index取代就行了
    
    评论
    解决 无用
    打赏 举报

相关推荐 更多相似问题