我写了个个python方法想要归类wikipedia,然后在做递归式的时候想要把导入的parameter更换,可是却改变不了
import csv
from bs4 import BeautifulSoup
import urllib.request
string_set=[]
url_link=[]
def get_first_category(url):
k=urllib.request.urlopen(url)
soup=BeautifulSoup(k)
s=soup.find_all('a')
for i in s:
string_set.append(i.string)
for i in range(-len(string_set), 0):
if string_set[i] == ("Categories"):
return (string_set[i + 1])
def join_with(k):
return k.replace(" ","_")
def get_category_page(k):
p=["https://en.wikipedia.org/wiki/Category:",k]
return "".join(p)
def return_link(url):
return (get_category_page(join_with(get_first_category(url))))
file=open("Categories.csv")
categories=csv.reader(file)
categories=zip(*categories)
def find_category(url):
k=get_first_category(url)
for i in categories:
if k in i:
return [True,i[0]]
return [False,k]
category_url=''
def main(url):
j=find_category(url)
if j[0]:
return j[1]
else:
url=return_link(url)
print(url)
return main(url)
#print(return_link('https://en.wikipedia.org/wiki/Category:Mathematics_of_infinitesimals'))
print (main('https://en.wikipedia.org/wiki/Category:Charitable_organizations'))
这是我的code,然后
https://www.dropbox.com/s/647hpufvzq0fksk/Categories.csv?dl=0
这是csv的地址,按理说应该会一个个找下去可是parameter被改变一次就不再被改变,已经试了很多方法了可是还是没有办法