解析xml文件时,显示xml.etree.ElementTree.ParseError: out of memory: line 1, column 0
xml文件大小为1.9G左右
def load_xml(self, file):
'''function: use the file in XML format
Args:
file (str): path to the xml file
Returns:
'''
with open(file, 'r', encoding='utf-8') as f:
xml = f.read()
#load the xml file
root = ET.fromstring(xml) #root.tag is document
for child in root:
docID = child.find('DOCNO').text #get docID
content = child.find('TITLE').text + child.find('TAG').text +child.find('ARTIST').text +child.find('YEAR').text +child.find('LYRICS').text #get content
self.docs_df.loc[docID] = content# use a dataframe to store each doc:docID,content(headline+text)