-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
97 lines (82 loc) · 2.57 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"net/http"
"strings"
"sync"
"github.com/PuerkitoBio/goquery"
)
type Listing struct {
Name string `json:"name"`
Location string `json:"location"`
Company string `json:"company"`
Region string `json:"region"`
}
func parseJobPage(url string) Listing {
url = "https://weworkremotely.com/" + url
response, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer response.Body.Close()
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
log.Fatal("Error loading HTTP response body. ", err)
}
// Find all links and process them with the function
// defined earlier
name, _ := document.Find("body > div > div.content > div.listing-header > div.listing-header-container > h1").First().Html()
location, _ := document.Find("body > div > div.content > div.listing-header > div.listing-header-container > h2 > span.location").First().Html()
company, _ := document.Find("body > div > div.content > div.listing-header > div.listing-header-container > h2 > span.company").First().Html()
region, _ := document.Find("body > div > div.content > div.listing-header > div.listing-header-container > h2 > span.region").First().Html()
listing := Listing{name, location, company, region}
return listing
}
func getListings(url string) []Listing {
response, err := http.Get(url)
if err != nil {
log.Fatal(err)
}
defer response.Body.Close()
document, err := goquery.NewDocumentFromReader(response.Body)
if err != nil {
log.Fatal("Error loading HTTP response body. ", err)
}
remoteJobUrls := make([]string, 0)
processElement := func(index int, element *goquery.Selection) {
href, exists := element.Attr("href")
if exists {
if strings.Contains(href, "remote-jobs") {
// listing = parseJobPage(href)
remoteJobUrls = append(remoteJobUrls, href)
}
}
}
document.Find(".jobs li > a").Each(processElement)
var wg sync.WaitGroup
goroutines := make(chan struct{}, 100)
listings := make([]Listing, 0)
for _, url := range remoteJobUrls {
wg.Add(1) // increasing wait group size to the no of urls
goroutines <- struct{}{}
go func(url string) {
listing := parseJobPage(url)
// fmt.Println(listing)
<-goroutines
listings = append(listings, listing)
wg.Done()
}(url)
}
wg.Wait()
return listings
}
func main() {
fmt.Println("About to start parsing jobs")
const BASE_URL = "https://weworkremotely.com/categories/remote-programming-jobs"
mainPage := getListings(BASE_URL)
result, _ := json.Marshal(mainPage)
ioutil.WriteFile("listings.json", result, 0644)
}