From 2945d308a9da89bd5941ee15c78d4086b951d70e Mon Sep 17 00:00:00 2001 From: Boris Nagaev Date: Thu, 4 Apr 2024 22:09:52 -0300 Subject: [PATCH] golang: add test against false negatives Check against the list from https://github.com/microlinkhq/top-user-agents Fix https://github.com/monperrus/crawler-user-agents/issues/350 --- validate_test.go | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/validate_test.go b/validate_test.go index 9eba6aa..6812ad2 100644 --- a/validate_test.go +++ b/validate_test.go @@ -1,7 +1,9 @@ package agents import ( + "encoding/json" "fmt" + "net/http" "testing" ) @@ -48,6 +50,35 @@ func TestPatterns(t *testing.T) { } } +func TestFalseNegatives(t *testing.T) { + const browsersURL = "https://raw.githubusercontent.com/microlinkhq/top-user-agents/master/src/index.json" + resp, err := http.Get(browsersURL) + if err != nil { + t.Fatalf("Failed to fetch the list of browser User Agents from %s: %v.", browsersURL, err) + } + + t.Cleanup(func() { + if err := resp.Body.Close(); err != nil { + t.Fatal(err) + } + }) + + var browsers []string + if err := json.NewDecoder(resp.Body).Decode(&browsers); err != nil { + t.Fatalf("Failed to parse the list of browser User Agents: %v.", err) + } + + for _, userAgent := range browsers { + if IsCrawler(userAgent) { + t.Errorf("Browser User Agent %q is recognized as a crawler.", userAgent) + } + indices := MatchingCrawlers(userAgent) + if len(indices) != 0 { + t.Errorf("Browser User Agent %q matches with crawlers %v.", userAgent, indices) + } + } +} + const ( crawlerUA = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 Google (+https://developers.google.com/+/web/snippet/" browserUA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) obsidian/1.5.3 Chrome/114.0.5735.289 Electron/25.8.1 Safari/537.36"