From 5104216eebcf1ac7b8eb004badb63775abcda43b Mon Sep 17 00:00:00 2001 From: Dionysos <75300347+ice-dionysos@users.noreply.github.com> Date: Thu, 27 Jun 2024 16:07:44 +0200 Subject: [PATCH] kyc/social: twitter: check for URL in the post body (#169) --- kyc/social/internal/contract.go | 4 ++- kyc/social/internal/scraper.go | 27 ++++++++++++++-- kyc/social/internal/scraper_test.go | 20 ++++++++++++ kyc/social/internal/social_test.go | 25 +++++++++++++++ kyc/social/internal/twitter.go | 50 ++++++++++++++++++++++------- kyc/social/internal/twitter_test.go | 8 +++++ 6 files changed, 120 insertions(+), 14 deletions(-) diff --git a/kyc/social/internal/contract.go b/kyc/social/internal/contract.go index cdcdd585..137dba30 100644 --- a/kyc/social/internal/contract.go +++ b/kyc/social/internal/contract.go @@ -43,10 +43,12 @@ type ( webScraper interface { Scrape(ctx context.Context, url string, opts webScraperOptions) (result *webScraperResult, err error) + Fetcher() dataFetcher } dataFetcher interface { Fetch(ctx context.Context, url string, retry req.RetryConditionFunc) (content []byte, httpCode int, err error) + Head(ctx context.Context, url string) (location string, err error) } censorer interface { @@ -54,7 +56,7 @@ type ( } webScraperImpl struct { - Fetcher dataFetcher + DataFetcher dataFetcher ScrapeAPIURL string APIKey string } diff --git a/kyc/social/internal/scraper.go b/kyc/social/internal/scraper.go index 18ff8a12..0d0e7ceb 100644 --- a/kyc/social/internal/scraper.go +++ b/kyc/social/internal/scraper.go @@ -30,6 +30,25 @@ func (c *censorerImpl) Censor(err error) error { return errors.New(msg) } +func (d *dataFetcherImpl) Head(ctx context.Context, target string) (location string, err error) { + resp, err := req.C().SetRedirectPolicy(req.NoRedirectPolicy()). + R(). + SetContext(ctx). + SetRetryBackoffInterval(0, 0). + SetRetryCount(0). + Head(target) + if err != nil { + return "", multierror.Append(ErrFetchFailed, d.Censorer.Censor(err)) + } + + u, err := resp.Location() + if err != nil { + return "", multierror.Append(ErrFetchReadFailed, d.Censorer.Censor(err)) + } + + return u.String(), nil +} + func (d *dataFetcherImpl) Fetch(ctx context.Context, target string, retry req.RetryConditionFunc) (data []byte, code int, err error) { resp, err := req.DefaultClient(). R(). @@ -63,8 +82,12 @@ func (d *dataFetcherImpl) Fetch(ctx context.Context, target string, retry req.Re return data, resp.GetStatusCode(), nil } +func (s *webScraperImpl) Fetcher() dataFetcher { + return s.DataFetcher +} + func (s *webScraperImpl) Scrape(ctx context.Context, target string, opts webScraperOptions) (*webScraperResult, error) { - data, code, err := s.Fetcher.Fetch(ctx, s.BuildQuery(target, opts.ProxyOptions), opts.Retry) + data, code, err := s.DataFetcher.Fetch(ctx, s.BuildQuery(target, opts.ProxyOptions), opts.Retry) if err != nil { return nil, err //nolint:wrapcheck // False-Positive. } @@ -114,6 +137,6 @@ func newMustWebScraper(apiURL, apiKey string) webScraper { return &webScraperImpl{ ScrapeAPIURL: apiURL, APIKey: apiKey, - Fetcher: &dataFetcherImpl{Censorer: censorer}, + DataFetcher: &dataFetcherImpl{Censorer: censorer}, } } diff --git a/kyc/social/internal/scraper_test.go b/kyc/social/internal/scraper_test.go index b755b33a..6d246277 100644 --- a/kyc/social/internal/scraper_test.go +++ b/kyc/social/internal/scraper_test.go @@ -3,12 +3,15 @@ package social import ( + "context" "testing" "github.com/stretchr/testify/require" ) func TestWebScrapperInvalidConfig(t *testing.T) { + t.Parallel() + sc := newMustWebScraper(string([]byte{0x00}), "") require.NotNil(t, sc) @@ -26,3 +29,20 @@ func TestWebScrapperInvalidConfig(t *testing.T) { }) }) } + +func TestDataFetcherHead(t *testing.T) { + t.Parallel() + + fetcher := &dataFetcherImpl{Censorer: new(censorerImpl)} + + t.Run("OK", func(t *testing.T) { + location, err := fetcher.Head(context.TODO(), "https://httpstat.us/301") + require.NoError(t, err) + require.Equal(t, "https://httpstat.us", location) + }) + t.Run("ServerError", func(t *testing.T) { + _, err := fetcher.Head(context.TODO(), "https://httpstat.us/500") + t.Logf("fetcher error: %v", err) + require.Error(t, err) + }) +} diff --git a/kyc/social/internal/social_test.go b/kyc/social/internal/social_test.go index 2c8e7c4e..522a5211 100644 --- a/kyc/social/internal/social_test.go +++ b/kyc/social/internal/social_test.go @@ -42,6 +42,31 @@ func TestTwitterKYC(t *testing.T) { }) } +func TestTwitterLinkInPostKYC(t *testing.T) { + t.Parallel() + + const ( + expectedText = `https://sunwavestoken.com/@xxyY` + targetURL = `https://x.com/JohnDoe1495747/status/1806293193408795126` + ) + + conf := loadConfig() + require.NotNil(t, conf) + + sc := newMustWebScraper(conf.WebScrapingAPI.URL, conf.WebScrapingAPI.APIKey) + require.NotNil(t, sc) + + verifier := newTwitterVerifier(sc, []string{"x.com"}, []string{"US", "MX", "CA"}) + require.NotNil(t, verifier) + + ctx, cancel := context.WithTimeout(context.TODO(), time.Minute) + defer cancel() + + username, err := verifier.VerifyPost(ctx, &Metadata{PostURL: targetURL, ExpectedPostText: expectedText}) + require.NoError(t, err) + require.Equal(t, "JohnDoe1495747", username) +} + func TestTwitterKYCNoRepost(t *testing.T) { t.Parallel() diff --git a/kyc/social/internal/twitter.go b/kyc/social/internal/twitter.go index b6e3ff52..da906def 100644 --- a/kyc/social/internal/twitter.go +++ b/kyc/social/internal/twitter.go @@ -17,10 +17,16 @@ import ( "github.com/imroc/req/v3" "github.com/pkg/errors" + "github.com/ice-blockchain/wintr/log" "github.com/ice-blockchain/wintr/time" ) -func (*twitterVerifierImpl) VerifyText(doc *goquery.Document, expectedText string) (found bool) { +func (t *twitterVerifierImpl) VerifyText(ctx context.Context, doc *goquery.Document, expectedText string) (found bool) { + isURL := strings.HasPrefix(expectedText, "http://") || strings.HasPrefix(expectedText, "https://") + if isURL { + return t.VerifyPostLink(ctx, doc, expectedText) + } + doc.Find("p").EachWithBreak(func(_ int, s *goquery.Selection) bool { found = found || strings.Contains(s.Text(), strings.TrimSpace(expectedText)) @@ -30,15 +36,37 @@ func (*twitterVerifierImpl) VerifyText(doc *goquery.Document, expectedText strin return } +func (t *twitterVerifierImpl) VerifyPostLinkOf(ctx context.Context, target, expectedURL string) bool { + if strings.EqualFold(target, expectedURL) { + return true + } + + if strings.HasPrefix(target, "https://t.co") { + loc, err := t.Scraper.Fetcher().Head(ctx, target) + if err != nil { + log.Warn("twitter: failed to fetch location header", "error", err) + // Fallthrough. + } else if strings.EqualFold(loc, expectedURL) { + return true + } + } + + result, err := t.Scrape(ctx, target) + if err != nil { + log.Warn("twitter: failed to scrape", "error", err) + + return false + } + + return strings.Contains(strings.ToLower(string(result.Content)), strings.ToLower(expectedURL)) +} + func (t *twitterVerifierImpl) VerifyPostLink(ctx context.Context, doc *goquery.Document, expectedPostURL string) (foundPost bool) { doc.Find("a").EachWithBreak(func(_ int, s *goquery.Selection) bool { for _, node := range s.Nodes { - for i := range node.Attr { - if node.Attr[i].Key == "href" && strings.HasPrefix(node.Attr[i].Val, "https://t.co") { - result, err := t.Scrape(ctx, node.Attr[i].Val) - foundPost = err == nil && strings.Contains(strings.ToLower(string(result.Content)), strings.ToLower(expectedPostURL)) - - break + for attrIndex := range node.Attr { + if node.Attr[attrIndex].Key == "href" { + foundPost = t.VerifyPostLinkOf(ctx, node.Attr[attrIndex].Val, expectedPostURL) } } } @@ -49,17 +77,17 @@ func (t *twitterVerifierImpl) VerifyPostLink(ctx context.Context, doc *goquery.D return } -func (t *twitterVerifierImpl) VerifyContent(ctx context.Context, oe *twitterOE, expectedText, expectedPostURL string) (err error) { +func (t *twitterVerifierImpl) VerifyContent(ctx context.Context, oe *twitterOE, meta *Metadata) (err error) { doc, err := goquery.NewDocumentFromReader(bytes.NewReader([]byte(oe.HTML))) if err != nil { return multierror.Append(ErrInvalidPageContent, err) } - if expectedText != "" && !t.VerifyText(doc, expectedText) { + if meta.ExpectedPostText != "" && !t.VerifyText(ctx, doc, meta.ExpectedPostText) { return ErrTextNotFound } - if expectedPostURL != "" && !t.VerifyPostLink(ctx, doc, expectedPostURL) { + if meta.ExpectedPostURL != "" && !t.VerifyPostLink(ctx, doc, meta.ExpectedPostURL) { return ErrPostNotFound } @@ -199,7 +227,7 @@ func (t *twitterVerifierImpl) VerifyPost(ctx context.Context, meta *Metadata) (u return username, err } - return username, t.VerifyContent(ctx, oe, meta.ExpectedPostText, meta.ExpectedPostURL) + return username, t.VerifyContent(ctx, oe, meta) } func (t *twitterVerifierImpl) countries() []string { diff --git a/kyc/social/internal/twitter_test.go b/kyc/social/internal/twitter_test.go index 20ef4c32..ec274653 100644 --- a/kyc/social/internal/twitter_test.go +++ b/kyc/social/internal/twitter_test.go @@ -40,6 +40,14 @@ func (*mockScraper) Fetch(context.Context, string, req.RetryConditionFunc) ([]by return []byte{}, 0, multierror.Append(ErrScrapeFailed, ErrFetchFailed) } +func (*mockScraper) Head(context.Context, string) (string, error) { + return "", multierror.Append(ErrScrapeFailed, ErrFetchFailed) +} + +func (*mockScraper) Fetcher() dataFetcher { + return nil +} + func TestTwitterVerifyFetch(t *testing.T) { t.Parallel()