Skip to content

Commit

Permalink
Fix #1
Browse files Browse the repository at this point in the history
  • Loading branch information
Akenaide committed Feb 10, 2022
1 parent 9399d0a commit 6f44fbf
Show file tree
Hide file tree
Showing 3 changed files with 1,754 additions and 13 deletions.
56 changes: 43 additions & 13 deletions cmd/fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"net/url"
"os"
"path/filepath"
"strconv"
"sync"

"golang.org/x/net/publicsuffix"
Expand Down Expand Up @@ -126,7 +127,6 @@ func worker(id int, furni furniture, respChannel chan *http.Response, retry chan
*furni.Kanseru = true
log.Printf("Kanseru by : %v", link)
} else {
furni.Wg.Add(1)
proxy.Readd()
respChannel <- resp
}
Expand All @@ -135,6 +135,19 @@ func worker(id int, furni furniture, respChannel chan *http.Response, retry chan
log.Println("Nani", id)
}

func getLastPage(doc *goquery.Document) int {
fmt.Print(doc.Filter(".pager").Html())
all := doc.Find(".pager .next")

all.Each(func(i int, s *goquery.Selection) {
fmt.Printf("%v/ text: %v\n", i, s.Text())
})

last, _ := strconv.Atoi(all.Prev().First().Text())
fmt.Printf("Go for %v pages\n", last)
return last
}

// fetchCmd represents the fetch command
var fetchCmd = &cobra.Command{
Use: "fetch",
Expand Down Expand Up @@ -181,31 +194,48 @@ Use global switches to specify the set, by default it will fetch all sets.`,
Jar: jar,
}

proxy := biri.GetClient()
proxy.Client.Jar = furni.Jar

resp, err := http.PostForm(fmt.Sprintf("%v?page=%d", Baseurl, 1), furni.Values)

if err != nil {
log.Fatal("Error on getting last page")
}

doc, err := goquery.NewDocumentFromReader(resp.Body)

if err != nil {
log.Fatal("Error on getting last page parse")

}
maxPage := getLastPage(doc)
wg.Add(maxPage)

for i := 0; i < maxWorker; i++ {
go worker(i, furni, respChannel, retry)
go writeWorker(i, furni, writeChannel)
go writeWorker(i, furni, writeChannel)
go responseWorker(i, furni, respChannel, writeChannel, retry)
}

for {
select {
case retryLink := <-retry:
jobs <- retryLink
log.Printf("Retry: %v", retryLink)
default:
jobs <- fmt.Sprintf("%v?page=%d", Baseurl, page)
page = page + 1
go func() {
for i := 1; i <= maxPage; i++ {
jobs <- fmt.Sprintf("%v?page=%d", Baseurl, i)
}
}()

go func() {
for v := range retry {
jobs <- v
log.Printf("Retry: %v", v)

if kanseru {
log.Println("Kanseru at: ", page)
break
}
}
}()

log.Println("Waiting...")
wg.Wait()
close(jobs)
biri.Done()

},
Expand Down
26 changes: 26 additions & 0 deletions cmd/fetch_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package cmd

import (
"log"
"os"
"testing"

"github.com/PuerkitoBio/goquery"
)

func TestGetLastPage(t *testing.T) {
f, err := os.Open("mockws/bd.html")
if err != nil {
log.Fatal(err)
}
defer f.Close()

doc, err := goquery.NewDocumentFromReader(f)
if err != nil {
log.Fatal(err)
}
var last = getLastPage(doc)
if last != 69 {
t.Errorf("%v is not last", last)
}
}
Loading

0 comments on commit 6f44fbf

Please sign in to comment.