I am trying to scrape related words for a given word for which I am using BFS starting with the given word and searching through each related word on dictionary.com
I have tried this code without concurrency and it works just fine, but takes a lot of time hence, tried using go routines but my code gets stuck after the first iteration. The first level of BFS works just fine but then in the second level it hangs!
package main
import (
"fmt"
"github.com/gocolly/colly"
"sync"
)
var wg sync.WaitGroup
func buildURL(word string) string {
return "https://www.dictionary.com/browse/" + string(word)
}
func get(url string) []string {
c := colly.NewCollector()
c.IgnoreRobotsTxt = true
var ret []string
c.OnHTML("a.css-cilpq1.e15p0a5t2", func(e *colly.HTMLElement) {
ret = append(ret, string(e.Text))
})
c.Visit(url)
c.Wait()
return ret
}
func threading(c chan []string, word string) {
defer wg.Done()
var words []string
for _, w := range get(buildURL(word)) {
words = append(words, w)
}
c <- words
}
func main() {
fmt.Println("START")
word := "jump"
maxDepth := 2
//bfs
var q map[string]int
nq := map[string]int {
word: 0,
}
vis := make(map[string]bool)
queue := make(chan []string, 5000)
for i := 1; i <= maxDepth; i++ {
fmt.Println(i)
q, nq = nq, make(map[string]int)
for word := range q {
if _, ok := vis[word]; !ok {
wg.Add(1)
vis[word] = true
go threading(queue, word)
for v := range queue {
fmt.Println(v)
for _, w := range v {
nq[w] = i
}
}
}
}
}
wg.Wait()
close(queue)
fmt.Println("END")
}
OUTPUT:
START
1
[plunge dive rise upsurge bounce hurdle fall vault drop advance upturn inflation increment spurt boost plummet skip bound surge take]
hangs just here forever, counter = 2 is not printed!
Can check here https://www.dictionary.com/browse/jump for the related words.