diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 18f5291..cb91281 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -5,6 +5,7 @@ x-common-variables: &test-variables COOKIE_NAME: _proxy TOKEN_SECRET: Rm9yIEdvZCBzbyBsb3ZlZCB0aGUgd29ybGQgdGhhdCBoZSBnYXZlIGhpcyBvbmUgYW5kIG9ubHkgU29uLCB0aGF0IHdob2V2ZXIgYmVsaWV2ZXMgaW4gaGltIHNoYWxsIG5vdCBwZXJpc2ggYnV0IGhhdmUgZXRlcm5hbCBsaWZlLiAtIEpvaG4gMzoxNg== SITES: one:server1:80,two:server2:80,three:server3:80 + DEFAULT_SITE: server1:80 MANAGEMENT_API: http://fakemanagementapi:80 services: diff --git a/local-example.env b/local-example.env index 14aa767..5880aaf 100644 --- a/local-example.env +++ b/local-example.env @@ -6,6 +6,8 @@ COOKIE_NAME=_auth_proxy # Host name is required. Port is optional. Path is ignored. # For example: "one:server1.org,two:server2.net,three:server3.com:8080" SITES= +# Site used for proxy when no token is available. Example: "server1.org" +DEFAULT_SITE= # The URL for the management API. Do not include a path or query string # For example: https://www.example.com MANAGEMENT_API= @@ -19,3 +21,6 @@ TOKEN_PATH= TOKEN_SECRET= # disable robots.txt and X-Robots-Tag handling. Default is "false". ROBOTS_TXT_DISABLE=false +# List of trusted bots which are directly proxied to a specific site identified by DEFAULT_SITE. Use +# comma-separated user agent keywords. Example: "googlebot,duckduckgo" +TRUSTED_BOTS= diff --git a/main.go b/main.go index 96ce00c..03ccb61 100644 --- a/main.go +++ b/main.go @@ -44,17 +44,19 @@ type ProxyClaim struct { } type Proxy struct { + DefaultSite string `required:"true" split_words:"true"` Host string `required:"true"` TokenSecret string `required:"true" split_words:"true"` Sites AuthSites `required:"true" split_words:"true"` ManagementAPI string `required:"true" split_words:"true"` // optional params - CookieName string `default:"_auth_proxy" split_words:"true"` - ReturnToParam string `default:"returnTo" split_words:"true"` - RobotsTxtDisable bool `default:"false" split_words:"true"` - TokenParam string `default:"token" split_words:"true"` - TokenPath string `default:"/auth/token" split_words:"true"` + CookieName string `split_words:"true" default:"_auth_proxy"` + ReturnToParam string `split_words:"true" default:"returnTo"` + RobotsTxtDisable bool `split_words:"true" default:"false"` + TokenParam string `split_words:"true" default:"token"` + TokenPath string `split_words:"true" default:"/auth/token"` + TrustedBots []string `split_words:"true" default:"googlebot"` // Secret is the binary token secret. Must be exported to be valid after being passed back from Caddy. Secret []byte `ignored:"true"` @@ -124,6 +126,12 @@ func (p Proxy) ServeHTTP(w http.ResponseWriter, r *http.Request, next caddyhttp. } func (p Proxy) handleRequest(w http.ResponseWriter, r *http.Request) error { + if p.isTrustedBot(r) { + upstream := p.DefaultSite + p.setVar(r, CaddyVarUpstream, upstream) + return nil + } + queryToken := p.getTokenFromQueryString(r) queryClaim := p.getClaimFromToken(queryToken) cookieToken := p.getTokenFromCookie(r) @@ -176,10 +184,7 @@ func (p Proxy) handleRequest(w http.ResponseWriter, r *http.Request) error { return nil } - upstream, err := p.getSite(claim.Level) - if err != nil { - return err - } + upstream := p.getSite(claim.Level) p.setVar(r, CaddyVarUpstream, upstream) return nil @@ -215,6 +220,10 @@ func newProxy() (Proxy, error) { if err != nil { return p, fmt.Errorf("unable to decode Proxy TokenSecret: %w", err) } + + for i := range p.TrustedBots { + p.TrustedBots[i] = strings.ToLower(p.TrustedBots[i]) + } return p, nil } @@ -230,16 +239,12 @@ func (p Proxy) getTokenFromCookie(r *http.Request) string { return cookie.Value } -func (p Proxy) getSite(level string) (string, error) { +func (p Proxy) getSite(level string) string { upstream, ok := p.Sites[level] if !ok { - return "", &Error{ - err: fmt.Errorf("auth level '%v' not in sites: %v", level, p.Sites), - Message: "error: unrecognized access level", - Status: http.StatusBadRequest, - } + return p.DefaultSite } - return upstream, nil + return upstream } func (p Proxy) clearQueryToken(r *http.Request) { @@ -320,6 +325,21 @@ func (p Proxy) getNewToken(_ http.ResponseWriter, r *http.Request) error { return nil } +// isTrustedBot compares the user agent in the request against a list of trusted bots in the configuration and +// returns true if the user agent contains one of the configured keywords. +func (p Proxy) isTrustedBot(r *http.Request) bool { + userAgent := strings.ToLower(r.Header.Get("User-Agent")) + if userAgent == "" { + return false + } + for _, s := range p.TrustedBots { + if strings.Contains(userAgent, s) { + return true + } + } + return false +} + func claimsAreValidAndDifferent(a, b ProxyClaim) bool { return a.IsValid && b.IsValid && !a.IssuedAt.Time.Equal(b.IssuedAt.Time) } diff --git a/main_test.go b/main_test.go index f1dda33..37becc4 100644 --- a/main_test.go +++ b/main_test.go @@ -27,6 +27,7 @@ func Test_AuthProxy(t *testing.T) { CookieName: cookieName, Secret: tokenSecret, Sites: authURLs, + DefaultSite: "default.example.com", log: zap.L(), ManagementAPI: managementAPI, TokenPath: tokenPath, @@ -59,10 +60,10 @@ func Test_AuthProxy(t *testing.T) { wantRedirectURL: ptr(managementAPI + tokenPath + "?returnTo=%2F"), }, { - name: "invalid level", - url: "/", - cookie: makeTestJWTCookie(cookieName, makeTestJWT(tokenSecret, "bad", validTime)), - wantErr: "not in sites", + name: "default site", + url: "/", + cookie: makeTestJWTCookie(cookieName, makeTestJWT(tokenSecret, "default", validTime)), + wantUpstream: ptr("default.example.com"), }, { name: "query valid -- redirect to set cookie", @@ -233,3 +234,48 @@ func makeTestJWT(secret []byte, level string, expires time.Time) string { return tokenString } + +func TestProxy_isTrustedBot(t *testing.T) { + tests := []struct { + name string + trusted []string + userAgent string + want bool + }{ + { + name: "empty user agent", + trusted: []string{"googlebot"}, + userAgent: "", + want: false, + }, + { + name: "empty trusted list", + trusted: nil, + userAgent: "Googlebot/2.1 (+http://www.googlebot.com/bot.html)", + want: false, + }, + { + name: "not in trusted list", + trusted: []string{"googlebot"}, + userAgent: "duckduckgo", + want: false, + }, + { + name: "in trusted list", + trusted: []string{"duckduckgo", "googlebot"}, + userAgent: "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)", + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + proxy := Proxy{ + TrustedBots: tt.trusted, + } + r := httptest.NewRequest(http.MethodGet, "/", nil) + r.Header.Set("User-Agent", tt.userAgent) + assert.Equalf(t, tt.want, proxy.isTrustedBot(r), "user agent '%s', trusted %+v", + r.Header.Get("User-Agent"), tt.trusted) + }) + } +}