I'm working on a Proof Of Concept to investigate the time required to parse an XML document with a certain amount of entities.
First of all, I do have my struct that contains the entries in my XML document:
type Node struct {
ID int `xml:"id,attr"`
Position int `xml:"position,attr"`
Depth int `xml:"depth,attr"`
Parent string `xml:"parent,attr"`
Name string `xml:"Name"`
Description string `xml:"Description"`
OwnInformation struct {
Title string `xml:"Title"`
Description string `xml:"Description"`
} `xml:"OwnInformation"`
Assets []struct {
ID string `xml:"id,attr"`
Position int `xml:"position,attr"`
Type string `xml:"type,attr"`
Category int `xml:"category,attr"`
OriginalFile string `xml:"OriginalFile"`
Description string `xml:"Description"`
URI string `xml:"Uri"`
} `xml:"Assets>Asset"`
Synonyms []string `xml:"Synonyms>Synonym"`
}
Next, I do have a factory that can generate any given amount of elements:
func CreateNodeXMLDocumentBytes(
nodeElementCount int) []byte {
xmlContents := new(bytes.Buffer)
xmlContents.WriteString("<ROOT>
")
for iterationCounter := 0; iterationCounter < nodeElementCount; iterationCounter++ {
appendNodeXMLElement(iterationCounter, xmlContents)
}
xmlContents.WriteString("</ROOT>")
return xmlContents.Bytes()
}
// PRIVATE: appendNodeXMLElement appends a '<Node />' elements to an existing bytes.Buffer instance.
func appendNodeXMLElement(
counter int,
xmlDocument *bytes.Buffer) {
xmlDocument.WriteString("<Node id=\"" + strconv.Itoa(counter) + "\" position=\"0\" depth=\"0\" parent=\"0\">
")
xmlDocument.WriteString(" <Name>Name</Name>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <OwnInformation>
")
xmlDocument.WriteString(" <Title>Title</Title>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" </OwnInformation>
")
xmlDocument.WriteString(" <Assets>
")
xmlDocument.WriteString(" <Asset id=\"0\" position=\"0\" type=\"0\" category=\"0\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"1\" position=\"1\" type=\"1\" category=\"1\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"2\" position=\"2\" type=\"2\" category=\"2\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"3\" position=\"3\" type=\"3\" category=\"3\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" <Asset id=\"4\" position=\"4\" type=\"4\" category=\"4\">
")
xmlDocument.WriteString(" <OriginalFile>OriginalFile</OriginalFile>
")
xmlDocument.WriteString(" <Description>Description</Description>
")
xmlDocument.WriteString(" <Uri>Uri</Uri>
")
xmlDocument.WriteString(" </Asset>
")
xmlDocument.WriteString(" </Assets>
")
xmlDocument.WriteString(" <Synonyms>
")
xmlDocument.WriteString(" <Synonym>Synonym 0</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 1</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 2</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 3</Synonym>
")
xmlDocument.WriteString(" <Synonym>Synonym 4</Synonym>
")
xmlDocument.WriteString(" </Synonyms>
")
xmlDocument.WriteString("</Node>
")
}
Next, I have the application that creates a sample document and decodes each '' element:
func main() {
nodeXMLDocumentBytes := factories.CreateNodeXMLDocumentBytes(100)
xmlDocReader := bytes.NewReader(nodeXMLDocumentBytes)
xmlDocDecoder := xml.NewDecoder(xmlDocReader)
xmlDocNodeElementCounter := 0
start := time.Now()
for {
token, _ := xmlDocDecoder.Token()
if token == nil {
break
}
switch element := token.(type) {
case xml.StartElement:
if element.Name.Local == "Node" {
xmlDocNodeElementCounter++
xmlDocDecoder.DecodeElement(new(entities.Node), &element)
}
}
}
fmt.Println("Total '<Node />' elements in the XML document: ", xmlDocNodeElementCounter)
fmt.Printf("Total elapsed time: %v
", time.Since(start))
}
This takes around 11ms on my machine.
Next, I used goroutines to decode the XML elements:
func main() {
nodeXMLDocumentBytes := factories.CreateNodeXMLDocumentBytes(100)
xmlDocReader := bytes.NewReader(nodeXMLDocumentBytes)
xmlDocDecoder := xml.NewDecoder(xmlDocReader)
xmlDocNodeElementCounter := 0
start := time.Now()
for {
token, _ := xmlDocDecoder.Token()
if token == nil {
break
}
switch element := token.(type) {
case xml.StartElement:
if element.Name.Local == "Node" {
xmlDocNodeElementCounter++
go xmlDocDecoder.DecodeElement(new(entities.Node), &element)
}
}
}
time.Sleep(time.Second * 5)
fmt.Println("Total '<Node />' elements in the XML document: ", xmlDocNodeElementCounter)
fmt.Printf("Total elapsed time: %v
", time.Since(start))
}
I use a simple 'Sleep' command to ensure that the goroutines are finished. I know it should be implemented with channels and a worker queue.
According to the output on my console only 3 elements are decoded. So what happened to the other elements? Perhaps something to do with the fact that I'm using streams?
Is there any way on how I can make it concurrent so that the required time to decode all the elements is lowered?