diff --git a/main.go b/main.go index db46648..0a7c4c2 100644 --- a/main.go +++ b/main.go @@ -205,7 +205,7 @@ func crawl_dua(db *sql.DB) { bookCollector := c.Clone() // Find links to individual book pages - c.OnHTML("a[href*='/books/']", func(e *colly.HTMLElement) { + c.OnHTML("a", func(e *colly.HTMLElement) { link := e.Attr("href") // Make sure it's a valid book link and not something else if strings.Contains(link, "/books/") { @@ -217,10 +217,8 @@ func crawl_dua(db *sql.DB) { fmt.Println("Visiting", r.URL) }) - bookCollector.Visit(currenturl) - // On the book page, extract the details - bookCollector.OnHTML(`div.book-page`, func(e *colly.HTMLElement) { + bookCollector.OnHTML(`div`, func(e *colly.HTMLElement) { title := e.ChildText("h1") imgSrc := e.ChildAttr("img", "src") var month string @@ -242,6 +240,7 @@ func crawl_dua(db *sql.DB) { return } if exists { + log.Println("SKipping book: ", title) return // Silently skip if already exists }