import requests import re from fontTools.ttLib import TTFont response = requests.get('https://book.qidian.com/info/1010868264',headers={ 'User-Agent':' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36' }) with open('替换前的网页.html',mode='w',encoding='utf-8') as f: f.write(response.text) html_page = response.text #字体文件下载地址 font_url = re.findall("; src: url\((.*?)\)format",response.text)[1] font_response = requests.get(front_url) with open('字体文件.woff',mode='wb') as f: f.write(font_response.content) fi = TTFont('字体文件.woff') fi.saveXML('font.xml') #获取字体映射关系 font_map = fi['cmap'].getBestCamp print(font_map) d = {'one':1,'two':2, 'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'zero':0,'period':'.'} for key in font_map: font_map[key] = d[font_map[key]] print(font_map) html_page.replace('&#'+str(key)+';',str(font_map[key])) with open('替换hou的网页.html',mode='w',encoding='utf-8') as f: f.write(html_page)
1.下载网页的部分我清楚,重点是如何在Java里修正映射关系和进行替换