What you describe is called XML stream parsing as it is done by any SAX parser, for example. Good news: encoding/xml
supports that, albeit it is a bit hidden.
What you actually have to do is to create an instance of xml.Decoder
, passing an io.Reader
. Then you will use Decoder.Token()
to read the input stream until the next valid xml token found. From there, you can decide what to do next.
Here is a little example also available as gist, or you can <kbd>Run it on PlayGround</kbd>:
package main
import (
"bytes"
"encoding/xml"
"fmt"
)
const (
book = `<?xml version="1.0" encoding="UTF-8"?>
<book>
<preface>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</preface>
<chapter num="1" title="Foo">Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</chapter>
<chapter num="2" title="Bar">Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</chapter>
</book>`
)
type Chapter struct {
Num int `xml:"num,attr"`
Title string `xml:"title,attr"`
Content string `xml:",chardata"`
}
func main() {
// We emulate a file or network stream
b := bytes.NewBufferString(book)
// And set up a decoder
d := xml.NewDecoder(b)
for {
// We look for the next token
// Note that this only reads until the next positively identified
// XML token in the stream
t, err := d.Token()
if err != nil {
break
}
switch et := t.(type) {
case xml.StartElement:
// We now have to inspect wether we are interested in the element
// otherwise we will advance
if et.Name.Local == "chapter" {
// Most often/likely element first
c := &Chapter{}
// We decode the element into(automagically advancing the stream)
// If no matching token is found, there will be an error
// Note the search only happens within the parent.
if err := d.DecodeElement(&c, &et); err != nil {
panic(err)
}
// We have found what we are interested in, so we print it
fmt.Printf("%d: %s
", c.Num, c.Title)
} else if et.Name.Local == "book" {
fmt.Println("Book begins!")
}
case xml.EndElement:
if et.Name.Local != "book" {
continue
}
fmt.Println("Finished processing book!")
}
}
}