def get_page(url):
try:
import urllib
return urllib.urlopen(url).read()
except:
return ''
def get_next_traget(page):
global url
page=get_page(url)
start_link=page.find('<a href=')
if start_link==-1:
return None,0
else:
start_quole=page.find('"',start_link)
end_quote=page.find('"',start_quole+1)
url=page[start_quole+1:end_quote]
return url,end_quote
def print_all_links(page):
while True:
url,endpos=get_next_traget(page)
if url:
print(url)
page=page[endpos:]
else:
break
print_all_links(get_page('https://www.baidu.com/'))
pycharm上显示
File "F:/learning/0001/尝试.py", line 29, in
print_all_links(get_page('https://www.baidu.com/'))
File "F:/learning/0001/尝试.py", line 22, in print_all_links
url,endpos=get_next_traget(page)
File "F:/learning/0001/尝试.py", line 10, in get_next_traget
page=get_page(url)
NameError: name 'url' is not defined
不明白什么意思,该怎么修改