diff --git a/goSpider.go b/goSpider.go index 3a39d17..4d41a8c 100644 --- a/goSpider.go +++ b/goSpider.go @@ -1481,3 +1481,36 @@ func FindNodes(node *html.Node, nodeExpression string) ([]*html.Node, error) { } return nil, errors.New("could not find specified node") } + +// GetElementAttributeFromNode retrieves the value of a specified attribute from an element +// located using an XPath expression within a given HTML node. +// Parameters: +// - node: The root HTML node to search within. +// - xpathExpr: The XPath expression that identifies the target element. +// - attribute: The attribute name whose value you want to retrieve. +// Returns: +// - The attribute value as a string. +// - An error if the element or attribute cannot be found. +func GetElementAttributeFromNode(node *html.Node, xpathExpr, attribute string) (string, error) { + // Locate the element using the provided XPath expression. + target := htmlquery.FindOne(node, xpathExpr) + if target == nil { + return "", fmt.Errorf("failed to find element for XPath: %s", xpathExpr) + } + + // Retrieve the attribute's value. + // Option 1: using a loop to search through the node's attributes. + for _, attr := range target.Attr { + if attr.Key == attribute { + return attr.Val, nil + } + } + + // Option 2: using htmlquery.SelectAttr (if you prefer a one-liner) + // value := htmlquery.SelectAttr(target, attribute) + // if value != "" { + // return value, nil + // } + + return "", fmt.Errorf("attribute %s not found in element", attribute) +} diff --git a/goSpider_test.go b/goSpider_test.go index 69966ef..97a4ac3 100644 --- a/goSpider_test.go +++ b/goSpider_test.go @@ -632,7 +632,6 @@ func TestPrintHtml(t *testing.T) { } -// TestParseStringToHtmlNode tests the ParseStringToHtmlNode function. func TestParseStringToHtmlNode(t *testing.T) { // Sample HTML string to parse htmlString := "

Hello, World!

" @@ -696,21 +695,40 @@ func TestDatepicker(t *testing.T) { } } -// Won't pass on test because 2FA requires input on the terminal by the user, for that reason alone the test will fail -//// TestLoginGoogle tests google single logon -//func TestLoginGoogle(t *testing.T) { -// profilePath := "/Users/USER_NAME/Library/Application Support/Google/Chrome/Profile 2\"" -// nav := NewNavigator(profilePath) -// defer nav.Close() -// -// err := nav.LoginWithGoogle("", "") -// if err != nil { -// t.Errorf("LoginWithGoogle error: %v", err) -// } -// -//} - -//Full Crawlers +func TestGetElementAttributeFromNode(t *testing.T) { + nav := NewNavigator("", true) + nav.DebugLogger = false + + err := nav.OpenURL("https://www.jusbrasil.com.br/jurisprudencia/busca?q=tjsp&dateFrom=2000-01-01&dateTo=2000-01-31") + if err != nil { + t.Errorf("OpenURL error: %v", err) + return + } + + htmlContent, err := nav.GetPageSource() + if err != nil { + t.Fatalf("FetchHTML error: %v", err) + } + if htmlContent == nil { + t.Error("FetchHTML returned empty content") + } + + nodes, err := FindNodes(htmlContent, "//*[@id=\"__next\"]/main/div[3]/div/div/div/section/ul/li") + if err != nil { + t.Errorf("FindNodes error: %v", err) + } + + var elements []string + for _, node := range nodes { + element, err := GetElementAttributeFromNode(node, "div/div/div/article/div/div/div[1]/h2/a", "href") + if err != nil { + t.Errorf("GetElementAttributeFromNode error: %v on node: %v", err, node) + } + elements = append(elements, element) + } + + log.Println(elements) +} func TestParallelRequests(t *testing.T) { users := []Request{