from selenium import webdriver
from lxml import etree
from bs4 import BeautifulSoup
from selenium import webdriver
from lxml import etree
from bs4 import BeautifulSoup
import time
import pandas as pd
proxies = {
'https': 'http://127.0.0.1:10818',
'http': 'http://127.0.0.1:10818'
}
hd = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Mobile Safari/537.36"
}
urls = ["https://celohub.org/apps/","https://celohub.org/dev-tools/","https://celohub.org/infrastructure/"]
total = []
for i in urls:
wd = webdriver.Chrome()
wd.get(i)
time.sleep(20)
resp =wd.page_source
html = BeautifulSoup(resp, "lxml")
div_each = html.find_all("article", class_="project card static svelte-17ykfzn")
for k in div_each:
print(k)
name = k.article['title'].strip()
url = k.a['href'].strip()
#total.append(url)
total.append([name,url])
ns = pd.DataFrame(total)
date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
path = "/Users/paul/jpn traffic/ana/celoprojects/"
ns.to_csv(path + date + '.csv', index=None)
print("celoprojects finished")
quit()
为什么取不到值?
<article class="project card static svelte-17ykfzn" title="Valora
https://valoraapp.com/"> <a href="https://valoraapp.com/" target="_blank"><img alt="Valora Thumbnail" class="thumbnail" loading="lazy" src="/images/thumbnails/Valora.png"/></a> <footer class="svelte-17ykfzn"><div class="project-links svelte-17ykfzn"><a href="https://play.google.com/store/apps/details?id=co.clabs.valora" target="_blank"><img alt="Android Play Store" class="svelte-17ykfzn" src="/images/link-icons/Android-Logo-Round@3x.png"/></a> <a href="https://apps.apple.com/app/id1520414263" target="_blank"><img alt="iOS App Store" class="svelte-17ykfzn" src="/images/link-icons/Apple-Logo-Round@3x.png"/></a> <a href="https://valoraapp.com/" target="_blank"><img alt="Website" class="svelte-17ykfzn" src="/images/link-icons/Web-Logo-Round@3x.png"/></a> <a href="https://github.com/celo-org/wallet" target="_blank"><img alt="Github" class="svelte-17ykfzn" src="/images/link-icons/GitHub-Logo-Round.png"/></a></div> <div class="tags svelte-jz5ix9 show-first-few"><span class="tag svelte-jz5ix9" style="--tag-color:var(--celo-gold);">identity</span><span class="tag svelte-jz5ix9" style="--tag-color:var(--celo-violet);">valora</span> </div></footer></article>
Traceback (most recent call last):
File "/Users/paul/PycharmProjects/pythonProject/official/celo-simple.py", line 32, in <module>
name = k.article['title'].strip()
TypeError: 'NoneType' object is not subscriptable
Process finished with exit code 1