I'm trying to build a web-scraper using Go, I'm fairly new to the language and I'm not sure what I'm doing wrong while using the html parser. I'm trying to parse the html to find anchor tags but I keep getting html.TokenTypeEnd instead.
package main
import (
"fmt"
"golang.org/x/net/html"
"io/ioutil"
"net/http"
)
func GetHtml(url string) (text string, resp *http.Response, err error) {
var bytes []byte
if url == "https://www.coastal.edu/scs/employee" {
resp, err = http.Get(url)
if err != nil {
fmt.Println("There seems to ben an error with the Employee Console.")
}
bytes, err = ioutil.ReadAll(resp.Body)
if err != nil {
fmt.Println("Cannot read byte response from Employee Console.")
}
text = string(bytes)
} else {
fmt.Println("Issue with finding URL. Looking for: " + url)
}
return text, resp, err
}
func main() {
htmlSrc, response, err := GetHtml("https://www.coastal.edu/scs/employee")
if err != nil {
fmt.Println("Cannot read HTML source code.")
}
_ = htmlSrc
htmlTokens := html.NewTokenizer(response.Body)
i := 0
for i < 1 {
tt := htmlTokens.Next()
fmt.Printf("%T", tt)
switch tt {
case html.ErrorToken:
fmt.Println("End")
i++
case html.TextToken:
fmt.Println(tt)
case html.StartTagToken:
t := htmlTokens.Token()
isAnchor := t.Data == "a"
if isAnchor {
fmt.Println("We found an anchor!")
}
}
}
I'm getting html.TokenTypeEnd whenever I'm printing
fmt.Printf("%T", tt)