Skip to content

Commit

Permalink
Fix social twitter private (#105)
Browse files Browse the repository at this point in the history
  • Loading branch information
ice-dionysos authored Jan 19, 2024
1 parent b712677 commit d0f4319
Show file tree
Hide file tree
Showing 8 changed files with 110 additions and 40 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ require (
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/distribution/reference v0.5.0 // indirect
github.com/docker/distribution v2.8.3+incompatible // indirect
github.com/docker/docker v24.0.7+incompatible // indirect
github.com/docker/docker v25.0.0+incompatible // indirect
github.com/docker/go-connections v0.5.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
Expand Down Expand Up @@ -140,7 +140,7 @@ require (
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/arch v0.7.0 // indirect
golang.org/x/crypto v0.18.0 // indirect
golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 // indirect
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
golang.org/x/oauth2 v0.16.0 // indirect
golang.org/x/sync v0.6.0 // indirect
golang.org/x/sys v0.16.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 h1:hNQpMuAJe5CtcUqCXaWga3FHu+kQvCqcsoVaQgSV60o=
golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08=
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA=
golang.org/x/exp v0.0.0-20240119083558-1b970713d09a/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
Expand Down
16 changes: 13 additions & 3 deletions kyc/social/internal/contract.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package social
import (
"context"

"github.com/imroc/req/v3"
"github.com/pkg/errors"
)

Expand All @@ -30,14 +31,22 @@ type (
)

type (
webScraperOptionsFunc func(map[string]string) map[string]string
webScraperOptions struct {
Retry req.RetryConditionFunc
ProxyOptions func(map[string]string) map[string]string
}

webScraperResult struct {
Content []byte
Code int
}

webScraper interface {
Scrape(ctx context.Context, url string, opts webScraperOptionsFunc) (content []byte, err error)
Scrape(ctx context.Context, url string, opts webScraperOptions) (result *webScraperResult, err error)
}

dataFetcher interface {
Fetch(ctx context.Context, url string) (content []byte, err error)
Fetch(ctx context.Context, url string, retry req.RetryConditionFunc) (content []byte, httpCode int, err error)
}

censorer interface {
Expand Down Expand Up @@ -142,4 +151,5 @@ var (
ErrFetchReadFailed = errors.New("cannot read fetched post")
ErrScrapeFailed = errors.New("cannot scrape target")
ErrInvalidToken = errors.New("invalid token")
ErrTweetPrivate = errors.New("tweet is private or does not exist")
)
4 changes: 2 additions & 2 deletions kyc/social/internal/facebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func (f *facebookVerifierImpl) BuildURL(endpoint string, args map[string]string)
}

func (f *facebookVerifierImpl) FetchFeed(ctx context.Context, targetURL string) (resp facebookFeedResponse, err error) {
data, err := f.Fetcher.Fetch(ctx, targetURL)
data, _, err := f.Fetcher.Fetch(ctx, targetURL, nil)
if err != nil {
return resp, multierror.Append(ErrScrapeFailed, err)
}
Expand Down Expand Up @@ -145,7 +145,7 @@ func (f *facebookVerifierImpl) VerifyToken(ctx context.Context, meta *Metadata)
"access_token": f.AppID + "|" + f.AppSecret,
})

data, err := f.Fetcher.Fetch(ctx, targetURL)
data, _, err := f.Fetcher.Fetch(ctx, targetURL, nil)
if err != nil {
return "", multierror.Append(ErrScrapeFailed, err)
}
Expand Down
29 changes: 17 additions & 12 deletions kyc/social/internal/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func (c *censorerImpl) Censor(err error) error {
return errors.New(msg)
}

func (d *dataFetcherImpl) Fetch(ctx context.Context, target string) ([]byte, error) {
func (d *dataFetcherImpl) Fetch(ctx context.Context, target string, retry req.RetryConditionFunc) (data []byte, code int, err error) {
resp, err := req.DefaultClient().
R().
SetContext(ctx).
Expand All @@ -44,30 +44,35 @@ func (d *dataFetcherImpl) Fetch(ctx context.Context, target string) ([]byte, err
}
}).
SetRetryCondition(func(resp *req.Response, err error) bool {
if retry != nil {
return retry(resp, err)
}

return !(err == nil && resp.GetStatusCode() == http.StatusOK)
}).
Get(target)
if err != nil {
return nil, multierror.Append(ErrFetchFailed, d.Censorer.Censor(err))
return nil, 0, multierror.Append(ErrFetchFailed, d.Censorer.Censor(err))
}

data, err := resp.ToBytes()
data, err = resp.ToBytes()
if err != nil {
return nil, multierror.Append(ErrFetchReadFailed, d.Censorer.Censor(err))
}

if resp.GetStatusCode() != http.StatusOK {
return nil, multierror.Append(ErrFetchFailed, errors.Errorf("unexpected status code: `%v`, response: `%v`", resp.GetStatusCode(), string(data)))
return nil, 0, multierror.Append(ErrFetchReadFailed, d.Censorer.Censor(err))
}

return data, nil
return data, resp.GetStatusCode(), nil
}

func (s *webScraperImpl) Scrape(ctx context.Context, target string, options webScraperOptionsFunc) ([]byte, error) {
return s.Fetcher.Fetch(ctx, s.BuildQuery(target, options)) //nolint:wrapcheck // False-Positive.
func (s *webScraperImpl) Scrape(ctx context.Context, target string, opts webScraperOptions) (*webScraperResult, error) {
data, code, err := s.Fetcher.Fetch(ctx, s.BuildQuery(target, opts.ProxyOptions), opts.Retry)
if err != nil {
return nil, err //nolint:wrapcheck // False-Positive.
}

return &webScraperResult{Code: code, Content: data}, nil
}

func (s *webScraperImpl) BuildQuery(target string, options webScraperOptionsFunc) string {
func (s *webScraperImpl) BuildQuery(target string, options func(map[string]string) map[string]string) string {
conf := map[string]string{
"render_js": "1",
"device": "mobile",
Expand Down
19 changes: 19 additions & 0 deletions kyc/social/internal/social_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,25 @@ func TestTwitterKYC(t *testing.T) {
})
}

func TestTwitterPrivate(t *testing.T) {
t.Parallel()

conf := loadConfig()
require.NotNil(t, conf)

sc := newMustWebScraper(conf.WebScrapingAPI.URL, conf.WebScrapingAPI.APIKey)
require.NotNil(t, sc)

verifier := newTwitterVerifier(sc, []string{"twitter.com"}, []string{"US", "MX", "CA"})
require.NotNil(t, verifier)

ctx, cancel := context.WithTimeout(context.TODO(), time.Minute)
defer cancel()

_, err := verifier.VerifyPost(ctx, &Metadata{PostURL: `https://twitter.com/root/status/1748008059103039495`, ExpectedPostText: "foo", ExpectedPostURL: "bar"})
require.ErrorIs(t, err, ErrTweetPrivate)
}

func TestFacebookKYC(t *testing.T) {
t.Parallel()

Expand Down
65 changes: 50 additions & 15 deletions kyc/social/internal/twitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@ import (
"context"
"encoding/json"
"math/rand"
"net/http"
"net/url"
"slices"
"strings"

"github.com/PuerkitoBio/goquery"
"github.com/hashicorp/go-multierror"
"github.com/imroc/req/v3"
"github.com/pkg/errors"

"github.com/ice-blockchain/wintr/time"
Expand All @@ -33,8 +35,8 @@ func (t *twitterVerifierImpl) VerifyPostLink(ctx context.Context, doc *goquery.D
for _, node := range s.Nodes {
for i := range node.Attr {
if node.Attr[i].Key == "href" && strings.HasPrefix(node.Attr[i].Val, "https://t.co") {
data, err := t.Scrape(ctx, node.Attr[i].Val)
foundPost = err == nil && strings.Contains(strings.ToLower(string(data)), strings.ToLower(expectedPostURL))
result, err := t.Scrape(ctx, node.Attr[i].Val)
foundPost = err == nil && strings.Contains(strings.ToLower(string(result.Content)), strings.ToLower(expectedPostURL))

break
}
Expand Down Expand Up @@ -84,29 +86,62 @@ func (*twitterVerifierImpl) ExtractUsernameFromURL(postURL string) (username str
return
}

func (t *twitterVerifierImpl) Scrape(ctx context.Context, target string) (content []byte, err error) {
for _, country := range t.countries() {
if content, err = t.Scraper.Scrape(ctx, target, func(m map[string]string) map[string]string {
m["country"] = country
delete(m, "render_js")
delete(m, "wait_until")
func twitterRetryFn(resp *req.Response, err error) bool {
if err != nil {
return true
}

switch resp.GetStatusCode() {
case http.StatusOK, http.StatusForbidden:
return false

default:
return true
}
}

return m
}); err == nil {
func (t *twitterVerifierImpl) Scrape(ctx context.Context, target string) (result *webScraperResult, err error) { //nolint:funlen // .
for _, country := range t.countries() {
if result, err = t.Scraper.Scrape(ctx, target,
webScraperOptions{
Retry: twitterRetryFn,
ProxyOptions: func(m map[string]string) map[string]string {
m["country"] = country
delete(m, "render_js")
delete(m, "wait_until")

return m
},
}); err == nil {
break
}
}
if err != nil {
return nil, multierror.Append(ErrFetchFailed, err)
}

return content, nil
switch result.Code {
case http.StatusOK:
return result, nil

case http.StatusForbidden:
const errorText = `Sorry, you are not authorized to see this status.`

if strings.Contains(string(result.Content), errorText) {
return nil, ErrTweetPrivate
}

fallthrough

default:
return nil, multierror.Append(ErrFetchFailed, errors.Errorf("unexpected status code: `%v`, response: `%v`", result.Code, string(result.Content)))
}
}

func (t *twitterVerifierImpl) FetchOE(ctx context.Context, postURL string) (*twitterOE, error) {
var (
data []byte
err error
result *webScraperResult
err error
)

target := url.URL{
Expand All @@ -116,12 +151,12 @@ func (t *twitterVerifierImpl) FetchOE(ctx context.Context, postURL string) (*twi
RawQuery: url.Values{"url": {postURL}}.Encode(),
}

if data, err = t.Scrape(ctx, target.String()); err != nil {
if result, err = t.Scrape(ctx, target.String()); err != nil {
return nil, err
}

var oe twitterOE
if err = json.Unmarshal(data, &oe); err != nil {
if err = json.Unmarshal(result.Content, &oe); err != nil {
return nil, multierror.Append(ErrInvalidPageContent, err)
} else if oe.HTML == "" {
return nil, errors.Wrap(ErrInvalidPageContent, "empty page")
Expand Down
9 changes: 5 additions & 4 deletions kyc/social/internal/twitter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"testing"

"github.com/hashicorp/go-multierror"
"github.com/imroc/req/v3"
"github.com/stretchr/testify/require"
)

Expand All @@ -31,12 +32,12 @@ func TestTwitterExtractUsernameFromURL(t *testing.T) {

type mockScraper struct{}

func (*mockScraper) Scrape(context.Context, string, webScraperOptionsFunc) ([]byte, error) {
return []byte{}, multierror.Append(ErrScrapeFailed, ErrFetchFailed)
func (*mockScraper) Scrape(context.Context, string, webScraperOptions) (*webScraperResult, error) {
return nil, multierror.Append(ErrScrapeFailed, ErrFetchFailed)
}

func (*mockScraper) Fetch(context.Context, string) ([]byte, error) {
return []byte{}, multierror.Append(ErrScrapeFailed, ErrFetchFailed)
func (*mockScraper) Fetch(context.Context, string, req.RetryConditionFunc) ([]byte, int, error) {
return []byte{}, 0, multierror.Append(ErrScrapeFailed, ErrFetchFailed)
}

func TestTwitterVerifyFetch(t *testing.T) {
Expand Down

0 comments on commit d0f4319

Please sign in to comment.