Skip to content
This repository has been archived by the owner on Feb 7, 2025. It is now read-only.

allow bots to bypass authorization and go directly to the default site #85

Merged
merged 3 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker-compose.test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ x-common-variables: &test-variables
COOKIE_NAME: _proxy
TOKEN_SECRET: Rm9yIEdvZCBzbyBsb3ZlZCB0aGUgd29ybGQgdGhhdCBoZSBnYXZlIGhpcyBvbmUgYW5kIG9ubHkgU29uLCB0aGF0IHdob2V2ZXIgYmVsaWV2ZXMgaW4gaGltIHNoYWxsIG5vdCBwZXJpc2ggYnV0IGhhdmUgZXRlcm5hbCBsaWZlLiAtIEpvaG4gMzoxNg==
SITES: one:server1:80,two:server2:80,three:server3:80
DEFAULT_SITE: server1:80
MANAGEMENT_API: http://fakemanagementapi:80

services:
Expand Down
5 changes: 5 additions & 0 deletions local-example.env
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ COOKIE_NAME=_auth_proxy
# Host name is required. Port is optional. Path is ignored.
# For example: "one:server1.org,two:server2.net,three:server3.com:8080"
SITES=
# Site used for proxy when no token is available. Example: "server1.org"
DEFAULT_SITE=
# The URL for the management API. Do not include a path or query string
# For example: https://www.example.com
MANAGEMENT_API=
Expand All @@ -19,3 +21,6 @@ TOKEN_PATH=
TOKEN_SECRET=
# disable robots.txt and X-Robots-Tag handling. Default is "false".
ROBOTS_TXT_DISABLE=false
# List of trusted bots which are directly proxied to a specific site identified by DEFAULT_SITE. Use
# comma-separated user agent keywords. Example: "googlebot,duckduckgo"
TRUSTED_BOTS=
52 changes: 36 additions & 16 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,19 @@ type ProxyClaim struct {
}

type Proxy struct {
DefaultSite string `required:"true" split_words:"true"`
Host string `required:"true"`
TokenSecret string `required:"true" split_words:"true"`
Sites AuthSites `required:"true" split_words:"true"`
ManagementAPI string `required:"true" split_words:"true"`

// optional params
CookieName string `default:"_auth_proxy" split_words:"true"`
ReturnToParam string `default:"returnTo" split_words:"true"`
RobotsTxtDisable bool `default:"false" split_words:"true"`
TokenParam string `default:"token" split_words:"true"`
TokenPath string `default:"/auth/token" split_words:"true"`
CookieName string `split_words:"true" default:"_auth_proxy"`
ReturnToParam string `split_words:"true" default:"returnTo"`
RobotsTxtDisable bool `split_words:"true" default:"false"`
TokenParam string `split_words:"true" default:"token"`
TokenPath string `split_words:"true" default:"/auth/token"`
TrustedBots []string `split_words:"true" default:"googlebot"`

// Secret is the binary token secret. Must be exported to be valid after being passed back from Caddy.
Secret []byte `ignored:"true"`
Expand Down Expand Up @@ -124,6 +126,12 @@ func (p Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp.
}

func (p Proxy) handleRequest(w http.ResponseWriter, r *http.Request) error {
if p.isTrustedBot(r) {
upstream := p.DefaultSite
p.setVar(r, CaddyVarUpstream, upstream)
return nil
}

queryToken := p.getTokenFromQueryString(r)
queryClaim := p.getClaimFromToken(queryToken)
cookieToken := p.getTokenFromCookie(r)
Expand Down Expand Up @@ -176,10 +184,7 @@ func (p Proxy) handleRequest(w http.ResponseWriter, r *http.Request) error {
return nil
}

upstream, err := p.getSite(claim.Level)
if err != nil {
return err
}
upstream := p.getSite(claim.Level)

p.setVar(r, CaddyVarUpstream, upstream)
return nil
Expand Down Expand Up @@ -215,6 +220,10 @@ func newProxy() (Proxy, error) {
if err != nil {
return p, fmt.Errorf("unable to decode Proxy TokenSecret: %w", err)
}

for i := range p.TrustedBots {
p.TrustedBots[i] = strings.ToLower(p.TrustedBots[i])
}
return p, nil
}

Expand All @@ -230,16 +239,12 @@ func (p Proxy) getTokenFromCookie(r *http.Request) string {
return cookie.Value
}

func (p Proxy) getSite(level string) (string, error) {
func (p Proxy) getSite(level string) string {
upstream, ok := p.Sites[level]
if !ok {
return "", &Error{
err: fmt.Errorf("auth level '%v' not in sites: %v", level, p.Sites),
Message: "error: unrecognized access level",
Status: http.StatusBadRequest,
}
return p.DefaultSite
}
return upstream, nil
return upstream
}

func (p Proxy) clearQueryToken(r *http.Request) {
Expand Down Expand Up @@ -320,6 +325,21 @@ func (p Proxy) getNewToken(_ http.ResponseWriter, r *http.Request) error {
return nil
}

// isTrustedBot compares the user agent in the request against a list of trusted bots in the configuration and
// returns true if the user agent contains one of the configured keywords.
func (p Proxy) isTrustedBot(r *http.Request) bool {
userAgent := strings.ToLower(r.Header.Get("User-Agent"))
if userAgent == "" {
return false
}
for _, s := range p.TrustedBots {
if strings.Contains(userAgent, s) {
briskt marked this conversation as resolved.
Show resolved Hide resolved
return true
}
}
return false
}

func claimsAreValidAndDifferent(a, b ProxyClaim) bool {
return a.IsValid && b.IsValid && !a.IssuedAt.Time.Equal(b.IssuedAt.Time)
}
54 changes: 50 additions & 4 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func Test_AuthProxy(t *testing.T) {
CookieName: cookieName,
Secret: tokenSecret,
Sites: authURLs,
DefaultSite: "default.example.com",
log: zap.L(),
ManagementAPI: managementAPI,
TokenPath: tokenPath,
Expand Down Expand Up @@ -59,10 +60,10 @@ func Test_AuthProxy(t *testing.T) {
wantRedirectURL: ptr(managementAPI + tokenPath + "?returnTo=%2F"),
},
{
name: "invalid level",
url: "/",
cookie: makeTestJWTCookie(cookieName, makeTestJWT(tokenSecret, "bad", validTime)),
wantErr: "not in sites",
name: "default site",
url: "/",
cookie: makeTestJWTCookie(cookieName, makeTestJWT(tokenSecret, "default", validTime)),
wantUpstream: ptr("default.example.com"),
},
{
name: "query valid -- redirect to set cookie",
Expand Down Expand Up @@ -233,3 +234,48 @@ func makeTestJWT(secret []byte, level string, expires time.Time) string {

return tokenString
}

func TestProxy_isTrustedBot(t *testing.T) {
tests := []struct {
name string
trusted []string
userAgent string
want bool
}{
{
name: "empty user agent",
trusted: []string{"googlebot"},
userAgent: "",
want: false,
},
{
name: "empty trusted list",
trusted: nil,
userAgent: "Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
want: false,
},
{
name: "not in trusted list",
trusted: []string{"googlebot"},
userAgent: "duckduckgo",
want: false,
},
{
name: "in trusted list",
trusted: []string{"duckduckgo", "googlebot"},
userAgent: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
want: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
proxy := Proxy{
TrustedBots: tt.trusted,
}
r := httptest.NewRequest(http.MethodGet, "/", nil)
r.Header.Set("User-Agent", tt.userAgent)
assert.Equalf(t, tt.want, proxy.isTrustedBot(r), "user agent '%s', trusted %+v",
r.Header.Get("User-Agent"), tt.trusted)
})
}
}