fix erros
This commit is contained in:
parent
5d35f38292
commit
9e2fef2433
7
main.go
7
main.go
@ -205,7 +205,7 @@ func crawl_dua(db *sql.DB) {
|
|||||||
bookCollector := c.Clone()
|
bookCollector := c.Clone()
|
||||||
|
|
||||||
// Find links to individual book pages
|
// Find links to individual book pages
|
||||||
c.OnHTML("a[href*='/books/']", func(e *colly.HTMLElement) {
|
c.OnHTML("a", func(e *colly.HTMLElement) {
|
||||||
link := e.Attr("href")
|
link := e.Attr("href")
|
||||||
// Make sure it's a valid book link and not something else
|
// Make sure it's a valid book link and not something else
|
||||||
if strings.Contains(link, "/books/") {
|
if strings.Contains(link, "/books/") {
|
||||||
@ -217,10 +217,8 @@ func crawl_dua(db *sql.DB) {
|
|||||||
fmt.Println("Visiting", r.URL)
|
fmt.Println("Visiting", r.URL)
|
||||||
})
|
})
|
||||||
|
|
||||||
bookCollector.Visit(currenturl)
|
|
||||||
|
|
||||||
// On the book page, extract the details
|
// On the book page, extract the details
|
||||||
bookCollector.OnHTML(`div.book-page`, func(e *colly.HTMLElement) {
|
bookCollector.OnHTML(`div`, func(e *colly.HTMLElement) {
|
||||||
title := e.ChildText("h1")
|
title := e.ChildText("h1")
|
||||||
imgSrc := e.ChildAttr("img", "src")
|
imgSrc := e.ChildAttr("img", "src")
|
||||||
var month string
|
var month string
|
||||||
@ -242,6 +240,7 @@ func crawl_dua(db *sql.DB) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
if exists {
|
if exists {
|
||||||
|
log.Println("SKipping book: ", title)
|
||||||
return // Silently skip if already exists
|
return // Silently skip if already exists
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user