I want to measure the time it takes for GoLang to parse an XML file. So, I've decided the write a benchmark.
I do have a function which generates an io.Reader
that contains an XML document.
// PRIVATE: createSampleXMLReader creates an io.Reader instance that contains 10.000 '<Node />' elements which are
// suitable for running a benchmark test.
func createSampleXMLReader(
nodeElementCount int) io.Reader {
xmlContents := new(strings.Builder)
xmlContents.WriteString("<ROOT>
")
for i := 0; i < nodeElementCount; i++ {
appendNodeXMLElement(xmlContents)
}
xmlContents.WriteString("</ROOT>")
return strings.NewReader(xmlContents.String())
}
// PRIVATE: appendNodeXMLElement appends a '<Node />' elements to an existing io.Reader instance.
func appendNodeXMLElement(
xmlDocument *strings.Builder) {
xmlDocument.WriteString("<Node id=\"0\" position=\"0\" depth=\"0\" parent=\"0\">
")
xmlDocument.WriteString(" <Name>Name</Name>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <OwnInformation>
")
xmlDocument.WriteString(" <Title>Title</Title>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" </OwnInformation>
")
xmlDocument.WriteString(" <Assets>
")
xmlDocument.WriteString(" <Asset id=\"0\" position=\"0\" type=\"0\" category=\"0\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"1\" position=\"1\" type=\"1\" category=\"1\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"2\" position=\"2\" type=\"2\" category=\"2\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"3\" position=\"3\" type=\"3\" category=\"3\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"4\" position=\"4\" type=\"4\" category=\"4\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" </Assets>
")
xmlDocument.WriteString(" <Synonyms>
")
xmlDocument.WriteString(" <Synonym>Synonym 0</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 1</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 2</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 3</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 4</Synonym>
")
xmlDocument.WriteString(" </Synonyms>
")
xmlDocument.WriteString("</Node>
")
}
Then, I have the function that actually parses this XML document.
// PRIVATE: parseXML parses an io.Reader instance into a 'Node' struct.
func parseXML(
xmlReader io.Reader) {
xmlDecoder := xml.NewDecoder(xmlReader)
for {
token, _ := xmlDecoder.Token()
if token == nil {
break
}
switch element := token.(type) {
case xml.StartElement:
if element.Name.Local == "Node" {
decodeNodeElement(xmlDecoder, &element)
}
}
}
}
// PRIVATE: decodeNodeElement decodes a '<Node />' element into a 'Node' struct.
func decodeNodeElement(
xmlDecoder *xml.Decoder,
element *xml.StartElement) {
node := new(model.Node)
xmlDecoder.DecodeElement(node, element)
}
Then, I have the function that performs the benchmark:
// PRIVATE: runBenchmarkParseXML performs a benchmark that parses an XML document that contains the given number of
// '<Node />' element.
func runBenchmarkParseXML(
nodeCount int,
benchmark *testing.B) {
// Arrange.
xmlReader := createSampleXMLReader(nodeCount)
// Act.
for i := 0; i < benchmark.N; i++ {
parseXML(xmlReader)
}
}
And then I have 5 functions that performs the benchmark. These are for 1, 10, 100, 1000 and 10000 elements in the XML document.
func BenchmarkParseXML1(benchmark *testing.B) { runBenchmarkParseXML(1, benchmark) }
func BenchmarkParseXML10(benchmark *testing.B) { runBenchmarkParseXML(10, benchmark) }
func BenchmarkParseXML100(benchmark *testing.B) { runBenchmarkParseXML(100, benchmark) }
func BenchmarkParseXML1000(benchmark *testing.B) { runBenchmarkParseXML(1000, benchmark) }
func BenchmarkParseXML10000(benchmark *testing.B) { runBenchmarkParseXML(10000, benchmark) }
When I'm running this benchmark, I see the following output:
BenchmarkParseXML1-4 5000000 226 ns/op
BenchmarkParseXML10-4 10000000 230 ns/op
BenchmarkParseXML100-4 5000000 226 ns/op
BenchmarkParseXML1000-4 5000000 254 ns/op
BenchmarkParseXML10000-4 1 1690998100 ns/op
How is it possible that there's such a difference in the benchmark that parses an XML file that contains 10.000 elements, which the rest of the benchmark timings are stable?
Is my benchmark new or it the implementation of the ParseXML method not correct.
Edit: Node struct
// Node represents a '<Node />' element in the XML document.
type Node struct {
ID int `xml:"id,attr"`
Position int `xml:"position,attr"`
Depth int `xml:"depth,attr"`
Parent string `xml:"parent,attr"`
Name string `xml:"Name"`
Description string `xml:"Description"`
OwnInformation struct {
Title string `xml:"Title"`
Description string `xml:"Description"`
} `xml:"OwnInformation"`
Assets []struct {
ID string `xml:"id,attr"`
Position int `xml:"position,attr"`
Type string `xml:"type,attr"`
Category int `xml:"category,attr"`
OriginalFile string `xml:"OriginalFile"`
Description string `xml:"Description"`
URI string `xml:"Uri"`
} `xml:"Assets>Asset"`
Synonyms []string `xml:"Synonyms>Synonym"`
}
Thanks in advance for any guidance.