Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: coredns/coredns
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: nicelocal/coredns
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.

Commits on Jan 16, 2025

  1. Add ECS support to cache and ECS plugin

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    ba0bbbb View commit details
  2. Finalize implementation

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    e509f4f View commit details
  3. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    4b0c1c6 View commit details
  4. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    d877e24 View commit details
  5. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    1a918ba View commit details
  6. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    4d5e4e3 View commit details
  7. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    43584bd View commit details
  8. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    82f0e22 View commit details
  9. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    80041f4 View commit details
  10. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    6694b7d View commit details
  11. Implement exact match logic

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    30b9595 View commit details
  12. Fixes

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    9055373 View commit details
  13. Fixes

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    ea06ca1 View commit details
  14. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    5526c5e View commit details
  15. Improve docs

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    e6a1896 View commit details
  16. Fix #7011

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    30bda2e View commit details
  17. Begin fixing tests

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    833eef0 View commit details
  18. Fix tests

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    351950d View commit details
  19. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    f3e1944 View commit details
  20. Remove leftover todo

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    25fb8b3 View commit details
  21. Add break

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    a4ea8b6 View commit details
  22. Cleanup

    Signed-off-by: Daniil Gentili <daniil@daniil.it>
    danog committed Jan 16, 2025
    Copy the full SHA
    aba6ae3 View commit details

Commits on Jan 21, 2025

  1. Copy the full SHA
    159059a View commit details

Commits on Feb 4, 2025

  1. Fix null dereference

    danog committed Feb 4, 2025
    Copy the full SHA
    ea48cc9 View commit details
Showing with 658 additions and 67 deletions.
  1. +1 −0 core/dnsserver/zdirectives.go
  2. +1 −0 core/plugin/zplugin.go
  3. +1 −0 plugin.cfg
  4. +4 −0 plugin/cache/README.md
  5. +208 −26 plugin/cache/cache.go
  6. +30 −9 plugin/cache/cache_test.go
  7. +157 −28 plugin/cache/handler.go
  8. +26 −0 plugin/cache/setup.go
  9. +34 −0 plugin/ecs/README.md
  10. +105 −0 plugin/ecs/ecs.go
  11. +5 −0 plugin/ecs/ready.go
  12. +60 −0 plugin/ecs/setup.go
  13. +26 −4 plugin/forward/forward.go
1 change: 1 addition & 0 deletions core/dnsserver/zdirectives.go
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ var Directives = []string{
"nsid",
"bufsize",
"bind",
"ecs",
"debug",
"trace",
"ready",
1 change: 1 addition & 0 deletions core/plugin/zplugin.go
Original file line number Diff line number Diff line change
@@ -20,6 +20,7 @@ import (
_ "github.com/coredns/coredns/plugin/dns64"
_ "github.com/coredns/coredns/plugin/dnssec"
_ "github.com/coredns/coredns/plugin/dnstap"
_ "github.com/coredns/coredns/plugin/ecs"
_ "github.com/coredns/coredns/plugin/erratic"
_ "github.com/coredns/coredns/plugin/errors"
_ "github.com/coredns/coredns/plugin/etcd"
1 change: 1 addition & 0 deletions plugin.cfg
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@ reload:reload
nsid:nsid
bufsize:bufsize
bind:bind
ecs:ecs
debug:debug
trace:trace
ready:ready
4 changes: 4 additions & 0 deletions plugin/cache/README.md
Original file line number Diff line number Diff line change
@@ -40,6 +40,8 @@ cache [TTL] [ZONES...] {
servfail DURATION
disable success|denial [ZONES...]
keepttl
mask_v4 32
mask_v6 128
}
~~~

@@ -75,6 +77,8 @@ cache [TTL] [ZONES...] {
of the remaining TTL. This can be useful if CoreDNS is used as an authoritative server and you want
to serve a consistent TTL to downstream clients. This is **NOT** recommended when CoreDNS is caching
records it is not authoritative for because it could result in downstream clients using stale answers.
* `mask_v4` specifies the maximum cachable IPv4 prefix size, for queries containing ECS (EDNS0 Client Subnet) data.
* `mask_v6` specifies the maximum cachable IPv6 prefix size, for queries containing ECS (EDNS0 Client Subnet) data.

## Capacity and Eviction

234 changes: 208 additions & 26 deletions plugin/cache/cache.go
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@ import (
"github.com/coredns/coredns/plugin/pkg/response"
"github.com/coredns/coredns/request"

"github.com/infobloxopen/go-trees/iptree"
"github.com/miekg/dns"
)

@@ -35,6 +36,9 @@ type Cache struct {
minpttl time.Duration
failttl time.Duration // TTL for caching SERVFAIL responses

mask_v4_size uint8
mask_v6_size uint8

// Prefetch.
prefetch int
duration time.Duration
@@ -59,27 +63,29 @@ type Cache struct {
// caller to set the Next handler.
func New() *Cache {
return &Cache{
Zones: []string{"."},
pcap: defaultCap,
pcache: cache.New(defaultCap),
pttl: maxTTL,
minpttl: minTTL,
ncap: defaultCap,
ncache: cache.New(defaultCap),
nttl: maxNTTL,
minnttl: minNTTL,
failttl: minNTTL,
prefetch: 0,
duration: 1 * time.Minute,
percentage: 10,
now: time.Now,
Zones: []string{"."},
pcap: defaultCap,
pcache: cache.New(defaultCap),
pttl: maxTTL,
minpttl: minTTL,
ncap: defaultCap,
ncache: cache.New(defaultCap),
nttl: maxNTTL,
minnttl: minNTTL,
failttl: minNTTL,
prefetch: 0,
duration: 1 * time.Minute,
percentage: 10,
now: time.Now,
mask_v4_size: 32,
mask_v6_size: 128,
}
}

// key returns key under which we store the item, -1 will be returned if we don't store the message.
// Currently we do not cache Truncated, errors zone transfers or dynamic update messages.
// qname holds the already lowercased qname.
func key(qname string, m *dns.Msg, t response.Type, do, cd bool) (bool, uint64) {
func key(qname string, exactMatch *net.IPNet, m *dns.Msg, t response.Type, do, cd bool) (bool, uint64) {
// We don't store truncated responses.
if m.Truncated {
return false, 0
@@ -89,13 +95,13 @@ func key(qname string, m *dns.Msg, t response.Type, do, cd bool) (bool, uint64)
return false, 0
}

return true, hash(qname, m.Question[0].Qtype, do, cd)
return true, hash(qname, m.Question[0].Qtype, do, cd, exactMatch)
}

var one = []byte("1")
var zero = []byte("0")

func hash(qname string, qtype uint16, do, cd bool) uint64 {
func hash(qname string, qtype uint16, do, cd bool, exactMatch *net.IPNet) uint64 {
h := fnv.New64()

if do {
@@ -110,6 +116,8 @@ func hash(qname string, qtype uint16, do, cd bool) uint64 {
h.Write(zero)
}

h.Write(exactMatch.IP)
h.Write(exactMatch.Mask)
h.Write([]byte{byte(qtype >> 8)})
h.Write([]byte{byte(qtype)})
h.Write([]byte(qname))
@@ -131,8 +139,11 @@ func computeTTL(msgTTL, minTTL, maxTTL time.Duration) time.Duration {
type ResponseWriter struct {
dns.ResponseWriter
*Cache
state request.Request
server string // Server handling the request.
state request.Request
server string // Server handling the request.
subnet *net.IPNet
exactMatch *net.IPNet
ecs *dns.EDNS0_SUBNET

do bool // When true the original request had the DO bit set.
cd bool // When true the original request had the CD bit set.
@@ -149,7 +160,7 @@ type ResponseWriter struct {
// newPrefetchResponseWriter returns a Cache ResponseWriter to be used in
// prefetch requests. It ensures RemoteAddr() can be called even after the
// original connection has already been closed.
func newPrefetchResponseWriter(server string, state request.Request, c *Cache) *ResponseWriter {
func newPrefetchResponseWriter(server string, state request.Request, subnet *net.IPNet, exactMatch *net.IPNet, ecs *dns.EDNS0_SUBNET, c *Cache) *ResponseWriter {
// Resolve the address now, the connection might be already closed when the
// actual prefetch request is made.
addr := state.W.RemoteAddr()
@@ -169,6 +180,9 @@ func newPrefetchResponseWriter(server string, state request.Request, c *Cache) *
cd: state.Req.CheckingDisabled,
prefetch: true,
remoteAddr: addr,
subnet: subnet,
exactMatch: exactMatch,
ecs: ecs,
}
}

@@ -182,10 +196,160 @@ func (w *ResponseWriter) RemoteAddr() net.Addr {

// WriteMsg implements the dns.ResponseWriter interface.
func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {
o := res.IsEdns0()
subnet := w.subnet
hadEcs := false
if o != nil {
for _, s := range o.Option {
if ecs, ok := s.(*dns.EDNS0_SUBNET); ok {
hadEcs = true

// https://www.rfc-editor.org/rfc/rfc7871#section-7.3
// If FAMILY, SOURCE PREFIX-LENGTH, and SOURCE PREFIX-LENGTH bits of
// ADDRESS in the response don't match the non-zero fields in the
// corresponding query, the full response MUST be dropped.

// If the query had no ECS, drop:
// the RFC doesn't explicitly require this,
// but it seems like the correct behavior.
if w.ecs == nil {
return nil
}

if ecs.Family != w.ecs.Family {
return nil
}

// This part is weird: https://www.rfc-editor.org/rfc/rfc7871#section-11 says that
// "the ECS option in a response packet MUST contain the
// full FAMILY, ADDRESS, and SOURCE PREFIX-LENGTH fields from the
// corresponding query"
//
// Which means that if there is a mismatch in the source netmask, we must drop;
//
// but https://www.rfc-editor.org/rfc/rfc7871#section-7.3 says
// "If FAMILY, SOURCE PREFIX-LENGTH, and SOURCE PREFIX-LENGTH bits of
// ADDRESS in the response don't match the non-zero fields in the
// corresponding query, the full response MUST be dropped."
//
// Which implies that if the source netmask is 0, comparison should be skipped;
//
// And also
// "In a response to a query that specified only SOURCE
// PREFIX-LENGTH for privacy masking, the FAMILY and ADDRESS fields MUST
// contain the appropriate non-zero information that the Authoritative
// Nameserver used to generate the answer, so that it can be cached
// accordingly."
//
// Which also implies that requests with a 0 source prefix may return a non-zero address...
//
// I choose to be safe, respecting section 11 and dropping all requests with non-matching
// source prefix and address, regardless of the mask.
if ecs.SourceNetmask != w.ecs.SourceNetmask {
return nil
}
if !ecs.Address.Equal(w.ecs.Address) {
return nil
}

// Records that are cached as /0 because of a query's SOURCE PREFIX-
// LENGTH of 0 MUST be distinguished from those that are cached as /0
// because of a response's SCOPE PREFIX-LENGTH of 0. The former should
// only be used for other /0 queries that the Intermediate Resolver
// receives, but the latter is suitable as a response for all networks.
if w.ecs.SourceNetmask == 0 {
subnet = &privateZeroSubnet
}

// If SCOPE PREFIX-LENGTH is not longer than SOURCE PREFIX-LENGTH, store
// SCOPE PREFIX-LENGTH bits of ADDRESS, and then mark the response as
// valid for all addresses that fall within that range.
if ecs.SourceScope < ecs.SourceNetmask { // The ecs.SourceScope == ecs.SourceNetmask case is handled by default
// req 10.0.0.0/24, resp valid for 10.0.0.0/8

if ecs.SourceScope == 0 {
subnet = &zeroSubnet
break
}
var mask net.IPMask
if ecs.Family == 1 {
mask = net.CIDRMask(int(ecs.SourceScope), 32)
} else {
mask = net.CIDRMask(int(ecs.SourceScope), 128)
}
subnet = &net.IPNet{
IP: subnet.IP.Mask(mask),
Mask: mask,
}
} else if ecs.SourceScope > ecs.SourceNetmask {
// req 10.0.0.0/8, resp valid only for 10.0.0.0/24 (and not i.e. 10.0.0.1/24, which is in 10.0.0.0/8)

// A SCOPE PREFIX-LENGTH value longer than SOURCE PREFIX-LENGTH
// indicates that the provided prefix length was not specific enough to
// select the most appropriate Tailored Response. Future queries for
// the name within the specified network SHOULD use the longer SCOPE
// PREFIX-LENGTH. Factors affecting whether the Recursive Resolver
// would use the longer length include the amount of privacy masking the
// operator wants to provide their users, and the additional resource
// implications for the cache.
//
// If an Intermediate Nameserver receives a response that has a longer
// SCOPE PREFIX-LENGTH than SOURCE PREFIX-LENGTH that it provided in its
// query, it SHOULD still provide the result as the answer to the
// triggering client request even if the client is in a different
// address range.
//
//
// TODO: The Intermediate Nameserver MAY instead opt to retry
// with a longer SOURCE PREFIX-LENGTH to get a better reply before
// responding to its client, as long as it does not exceed a SOURCE
// PREFIX-LENGTH specified in the query that triggered resolution, but
// this obviously has implications for the latency of the overall
// lookup.

// Cache implications:

// Similarly, if SOURCE PREFIX-LENGTH is the maximum configured for the
// cache, store SOURCE PREFIX-LENGTH bits of ADDRESS, and then mark the
// response as valid for all addresses that fall within that range.
//
// Weirdly, this means to cache by the requested prefix, instead of the returned one.
// i.e. req: 10.0.0.0/8 (max is 8), response covers only 10.0.0.0/24, cache for 10.0.0.0/8
//
// Implemented by default (subnet = w.subnet)

// If SOURCE PREFIX-LENGTH is shorter than the configured maximum and
// SCOPE PREFIX-LENGTH is longer than SOURCE PREFIX-LENGTH, store SOURCE
// PREFIX-LENGTH bits of ADDRESS, and then mark the response as valid
// only to answer client queries that specify exactly the same SOURCE
// PREFIX-LENGTH in their own ECS option.
//
// Weirdly, this means to cache by the requested prefix, instead of the returned one
// i.e. req: 10.0.0.0/8 (max is 16), response covers only 10.0.0.0/24, cache for 10.0.0.0/8
// and only for queries that have an ECS option with subnet /8 and address 10.0.0.0, i.e.
// **exact matches only**, do NOT cache for 10.0.0.0/24 or 10.0.1.0/24 even if it falls inside of 10.0.0.0/8
//
if w.exactMatch != &zeroSubnet {
subnet = w.exactMatch
}
}

break
}
}
}

// If no ECS option is contained in the response, the Intermediate
// Nameserver SHOULD treat this as being equivalent to having received a
// SCOPE PREFIX-LENGTH of 0
if !hadEcs {
subnet = &zeroSubnet
}

mt, _ := response.Typify(res, w.now().UTC())

// key returns empty string for anything we don't want to cache.
hasKey, key := key(w.state.Name(), res, mt, w.do, w.cd)
hasKey, key := key(w.state.Name(), w.exactMatch, res, mt, w.do, w.cd)

msgTTL := dnsutil.MinimalTTL(res, mt)
var duration time.Duration
@@ -199,7 +363,7 @@ func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {

if hasKey && duration > 0 {
if w.state.Match(res) {
w.set(res, key, mt, duration)
w.set(res, key, mt, subnet, duration)
cacheSize.WithLabelValues(w.server, Success, w.zonesMetricLabel, w.viewMetricLabel).Set(float64(w.pcache.Len()))
cacheSize.WithLabelValues(w.server, Denial, w.zonesMetricLabel, w.viewMetricLabel).Set(float64(w.ncache.Len()))
} else {
@@ -227,7 +391,7 @@ func (w *ResponseWriter) WriteMsg(res *dns.Msg) error {
return w.ResponseWriter.WriteMsg(res)
}

func (w *ResponseWriter) set(m *dns.Msg, key uint64, mt response.Type, duration time.Duration) {
func (w *ResponseWriter) set(m *dns.Msg, key uint64, mt response.Type, subnet *net.IPNet, duration time.Duration) {
// duration is expected > 0
// and key is valid
switch mt {
@@ -236,28 +400,46 @@ func (w *ResponseWriter) set(m *dns.Msg, key uint64, mt response.Type, duration
// zone is in exception list, do not cache
return
}
var tree *iptree.Tree
if _tree, ok := w.pcache.Get(key); ok {
tree = _tree.(*iptree.Tree)
} else {
tree = iptree.NewTree()
}

i := newItem(m, w.now(), duration)
if w.wildcardFunc != nil {
i.wildcard = w.wildcardFunc()
}
if w.pcache.Add(key, i) {
if w.pcache.Add(key, tree.InsertNet(subnet, i)) {
evictions.WithLabelValues(w.server, Success, w.zonesMetricLabel, w.viewMetricLabel).Inc()
}
// when pre-fetching, remove the negative cache entry if it exists
if w.prefetch {
w.ncache.Remove(key)
if _tree, ok := w.ncache.Get(key); ok {
tree = _tree.(*iptree.Tree)
if tree, ok = tree.DeleteByNet(subnet); ok {
w.ncache.Add(key, tree)
}
}
}

case response.NameError, response.NoData, response.ServerError:
if plugin.Zones(w.nexcept).Matches(m.Question[0].Name) != "" {
// zone is in exception list, do not cache
return
}
var tree *iptree.Tree
if _tree, ok := w.ncache.Get(key); ok {
tree = _tree.(*iptree.Tree)
} else {
tree = iptree.NewTree()
}
i := newItem(m, w.now(), duration)
if w.wildcardFunc != nil {
i.wildcard = w.wildcardFunc()
}
if w.ncache.Add(key, i) {
if w.ncache.Add(key, tree.InsertNet(subnet, i)) {
evictions.WithLabelValues(w.server, Denial, w.zonesMetricLabel, w.viewMetricLabel).Inc()
}

39 changes: 30 additions & 9 deletions plugin/cache/cache_test.go
Original file line number Diff line number Diff line change
@@ -3,6 +3,7 @@ package cache
import (
"context"
"fmt"
"net"
"testing"
"time"

@@ -13,6 +14,7 @@ import (
"github.com/coredns/coredns/plugin/test"
"github.com/coredns/coredns/request"

"github.com/infobloxopen/go-trees/iptree"
"github.com/miekg/dns"
)

@@ -287,15 +289,15 @@ func TestCacheInsertion(t *testing.T) {
state := request.Request{W: &test.ResponseWriter{}, Req: m}

mt, _ := response.Typify(m, utc)
valid, k := key(state.Name(), m, mt, state.Do(), state.Req.CheckingDisabled)
valid, k := key(state.Name(), &zeroSubnet, m, mt, state.Do(), state.Req.CheckingDisabled)

if valid {
// Insert cache entry
crr.set(m, k, mt, c.pttl)
crr.set(m, k, mt, &zeroSubnet, c.pttl)
}

// Attempt to retrieve cache entry
i := c.getIgnoreTTL(time.Now().UTC(), state, "dns://:53")
i := c.getIgnoreTTL(time.Now().UTC(), state, &zeroSubnet, &zeroSubnet, net.IPv4zero.To4(), "dns://:53", false)
found := i != nil

if !tc.shouldCache && found {
@@ -556,7 +558,18 @@ func TestNegativeStaleMaskingPositiveCache(t *testing.T) {
// Confirm that prefetch removes the negative cache item.
waitFor := 3
for i := 1; i <= waitFor; i++ {
if c.ncache.Len() != 0 {
isEmpty := true
c.ncache.Walk(func(sub map[uint64]interface{}, _ uint64) bool {
for _, _value := range sub {
value := _value.(*iptree.Tree)
for range value.Enumerate() {
isEmpty = false
return false
}
}
return true
})
if !isEmpty {
if i == waitFor {
t.Errorf("Test 2 NOERROR from Backend: item still exists in negative cache")
}
@@ -701,8 +714,16 @@ func TestCacheWildcardMetadata(t *testing.T) {
if c.pcache.Len() != 1 {
t.Errorf("Msg should have been cached")
}
_, k := key(qname, w.Msg, response.NoError, state.Do(), state.Req.CheckingDisabled)
i, _ := c.pcache.Get(k)
_, k := key(qname, &zeroSubnet, w.Msg, response.NoError, state.Do(), state.Req.CheckingDisabled)
_iNet, ok := c.pcache.Get(k)
if !ok {
t.Fatal("Msg should have been cached")
}
iNet := _iNet.(*iptree.Tree)
i, ok := iNet.GetByIP(zeroSubnet.IP)
if !ok {
t.Fatal("Msg should have been cached")
}
if i.(*item).wildcard != wildcard {
t.Errorf("expected wildcard response to enter cache with cache item's wildcard = %q, got %q", wildcard, i.(*item).wildcard)
}
@@ -858,19 +879,19 @@ func TestCacheSeparation(t *testing.T) {
state := request.Request{W: &test.ResponseWriter{}, Req: m}

mt, _ := response.Typify(m, utc)
valid, k := key(state.Name(), m, mt, state.Do(), state.Req.CheckingDisabled)
valid, k := key(state.Name(), &zeroSubnet, m, mt, state.Do(), state.Req.CheckingDisabled)

if valid {
// Insert cache entry
crr.set(m, k, mt, c.pttl)
crr.set(m, k, mt, &zeroSubnet, c.pttl)
}

// Attempt to retrieve cache entry
m = tc.query.Msg()
m = cacheMsg(m, tc.query)
state = request.Request{W: &test.ResponseWriter{}, Req: m}

item := c.getIgnoreTTL(time.Now().UTC(), state, "dns://:53")
item := c.getIgnoreTTL(time.Now().UTC(), state, &zeroSubnet, &zeroSubnet, net.IPv4zero.To4(), "dns://:53", false)
found := item != nil

if !tc.expectCached && found {
185 changes: 157 additions & 28 deletions plugin/cache/handler.go
Original file line number Diff line number Diff line change
@@ -3,16 +3,25 @@ package cache
import (
"context"
"math"
"net"
"time"

"github.com/coredns/coredns/plugin"
"github.com/coredns/coredns/plugin/metadata"
"github.com/coredns/coredns/plugin/metrics"
"github.com/coredns/coredns/request"

"github.com/infobloxopen/go-trees/iptree"
"github.com/miekg/dns"
)

// Use ::/0 as wildcard key for queries without ECS (both v4 and v6)
var zeroSubnet = net.IPNet{IP: net.IPv6zero, Mask: net.CIDRMask(0, 128)}

// Use 0.0.0.0/0 as wildcard key for private ECS queries (both v4 and v6)
// (i.e. those were the client explicitly asked via ECS to not pass our IP to upstreams)
var privateZeroSubnet = net.IPNet{IP: net.IPv4zero.To4(), Mask: net.CIDRMask(0, 32)}

// ServeDNS implements the plugin.Handler interface.
func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
rc := r.Copy() // We potentially modify r, to prevent other plugins from seeing this (r is a pointer), copy r into rc.
@@ -34,18 +43,101 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// in which upstream doesn't support DNSSEC, the two cache items will effectively be the same. Regardless, any
// DNSSEC RRs in the response are written to cache with the response.

subnet := &zeroSubnet
o := r.IsEdns0()
var ecs *dns.EDNS0_SUBNET
var ok bool
exactMatch := &zeroSubnet
if o != nil {
for _, s := range o.Option {
if ecs, ok = s.(*dns.EDNS0_SUBNET); ok {
// Section 7.1.1, "Recursive Resolvers".
// If the triggering query included an ECS option itself, it MUST be
// examined for its SOURCE PREFIX-LENGTH. The Recursive Resolver's
// outgoing query MUST then set SOURCE PREFIX-LENGTH to the shorter of
// the incoming query's SOURCE PREFIX-LENGTH or the server's maximum
// cacheable prefix length.

// Section 7.1.3, "Forwarding Resolvers".
// Forwarding Resolvers essentially appear to be Stub Resolvers to
// whatever Recursive Resolver is ultimately handling the query, but
// they look like a Recursive Resolver to their client. A Forwarding
// Resolver using this option MUST prepare it as described in
// Section 7.1.1, "Recursive Resolvers".

// Normalize
if temp := ecs.Address.To4(); temp != nil {
ecs.Address = temp
}

var mask net.IPMask
if ecs.Family == 1 {
// If SOURCE PREFIX-LENGTH is shorter than the configured maximum and
// SCOPE PREFIX-LENGTH is longer than SOURCE PREFIX-LENGTH, store SOURCE
// PREFIX-LENGTH bits of ADDRESS, and then mark the response as valid
// only to answer client queries that specify exactly the same SOURCE
// PREFIX-LENGTH in their own ECS option.
//
// Weirdly, this means to cache by the requested prefix, instead of the returned one
// i.e. req: 10.0.0.0/8 (max is 16), response covers only 10.0.0.0/24, cache for 10.0.0.0/8
// and only for queries that have an ECS option with subnet /8 and address 10.0.0.0, i.e.
// **exact matches only**, do NOT cache for 10.0.0.0/24 or 10.0.1.0/24 even if it falls inside of 10.0.0.0/8
//
if ecs.SourceNetmask < c.mask_v4_size {
exactMatch = &net.IPNet{IP: ecs.Address, Mask: net.CIDRMask(int(ecs.SourceNetmask), 32)}
}
ecs.SourceNetmask = min(ecs.SourceNetmask, c.mask_v4_size)
mask = net.CIDRMask(int(ecs.SourceNetmask), 32)
} else {
if ecs.SourceNetmask < c.mask_v6_size {
exactMatch = &net.IPNet{IP: ecs.Address, Mask: net.CIDRMask(int(ecs.SourceNetmask), 128)}
}
ecs.SourceNetmask = min(ecs.SourceNetmask, c.mask_v6_size)
mask = net.CIDRMask(int(ecs.SourceNetmask), 128)
}
ecs.Address = ecs.Address.Mask(mask)
if ecs.SourceNetmask == 0 {
subnet = &privateZeroSubnet
} else {
subnet = &net.IPNet{IP: ecs.Address, Mask: mask}
}
break
}
}
}

var srcOrig net.IP
ip := w.RemoteAddr()
if i, ok := ip.(*net.UDPAddr); ok {
srcOrig = i.IP
}
if i, ok := ip.(*net.TCPAddr); ok {
srcOrig = i.IP
}

// Normalize
if temp := srcOrig.To4(); temp != nil {
srcOrig = temp
}

ttl := 0
i := c.getIgnoreTTL(now, state, server)
i := c.getIgnoreTTL(now, state, subnet, exactMatch, srcOrig, server, false)
if i == nil {
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do, ad: ad, cd: cd,
nexcept: c.nexcept, pexcept: c.pexcept, wildcardFunc: wildcardFunc(ctx)}
subnet: subnet,
exactMatch: exactMatch,
ecs: ecs,
nexcept: c.nexcept, pexcept: c.pexcept, wildcardFunc: wildcardFunc(ctx)}
return c.doRefresh(ctx, state, crr)
}
ttl = i.ttl(now)
if ttl < 0 {
// serve stale behavior
if c.verifyStale {
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do, cd: cd}
crr := &ResponseWriter{ResponseWriter: w, Cache: c, state: state, server: server, do: do, cd: cd,
subnet: subnet,
exactMatch: exactMatch,
ecs: ecs}
cw := newVerifyStaleResponseWriter(crr)
ret, err := c.doRefresh(ctx, state, cw)
if cw.refreshed {
@@ -56,13 +148,13 @@ func (c *Cache) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg)
// Adjust the time to get a 0 TTL in the reply built from a stale item.
now = now.Add(time.Duration(ttl) * time.Second)
if !c.verifyStale {
cw := newPrefetchResponseWriter(server, state, c)
go c.doPrefetch(ctx, state, cw, i, now)
cw := newPrefetchResponseWriter(server, state, subnet, exactMatch, ecs, c)
go c.doPrefetch(ctx, state, cw, i, subnet, exactMatch, srcOrig, now, server)
}
servedStale.WithLabelValues(server, c.zonesMetricLabel, c.viewMetricLabel).Inc()
} else if c.shouldPrefetch(i, now) {
cw := newPrefetchResponseWriter(server, state, c)
go c.doPrefetch(ctx, state, cw, i, now)
cw := newPrefetchResponseWriter(server, state, subnet, exactMatch, ecs, c)
go c.doPrefetch(ctx, state, cw, i, subnet, exactMatch, srcOrig, now, server)
}

if i.wildcard != "" {
@@ -92,14 +184,14 @@ func wildcardFunc(ctx context.Context) func() string {
}
}

func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *ResponseWriter, i *item, now time.Time) {
func (c *Cache) doPrefetch(ctx context.Context, state request.Request, cw *ResponseWriter, i *item, subnet *net.IPNet, exactMatch *net.IPNet, srcOrig net.IP, now time.Time, server string) {
cachePrefetches.WithLabelValues(cw.server, c.zonesMetricLabel, c.viewMetricLabel).Inc()
c.doRefresh(ctx, state, cw)

// When prefetching we loose the item i, and with it the frequency
// that we've gathered sofar. See we copy the frequencies info back
// into the new item that was stored in the cache.
if i1 := c.exists(state); i1 != nil {
if i1 := c.getIgnoreTTL(now, state, subnet, exactMatch, srcOrig, server, true); i1 != nil {
i1.Freq.Reset(now, i.Freq.Hits())
}
}
@@ -121,37 +213,74 @@ func (c *Cache) shouldPrefetch(i *item, now time.Time) bool {
func (c *Cache) Name() string { return "cache" }

// getIgnoreTTL unconditionally returns an item if it exists in the cache.
func (c *Cache) getIgnoreTTL(now time.Time, state request.Request, server string) *item {
k := hash(state.Name(), state.QType(), state.Do(), state.Req.CheckingDisabled)
func (c *Cache) getIgnoreTTL(now time.Time, state request.Request, subnet *net.IPNet, exactMatch *net.IPNet, srcOrig net.IP, server string, justCheckExists bool) *item {
cacheRequests.WithLabelValues(server, c.zonesMetricLabel, c.viewMetricLabel).Inc()

if i, ok := c.ncache.Get(k); ok {
itm := i.(*item)
ttl := itm.ttl(now)
if itm.matches(state) && (ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds()))) {
cacheHits.WithLabelValues(server, Denial, c.zonesMetricLabel, c.viewMetricLabel).Inc()
return i.(*item)
k := hash(state.Name(), state.QType(), state.Do(), state.Req.CheckingDisabled, &zeroSubnet)

// ECS, answering from cache
//
// 1. If no ECS option was provided, the client's address is used.
//
if subnet == nil {
i := c.getIgnoreTTLInner(k, now, state, srcOrig, server, justCheckExists)
if i != nil {
return i
}
}
if i, ok := c.pcache.Get(k); ok {
itm := i.(*item)
ttl := itm.ttl(now)
if itm.matches(state) && (ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds()))) {
cacheHits.WithLabelValues(server, Success, c.zonesMetricLabel, c.viewMetricLabel).Inc()
return i.(*item)
} else {
i := c.getIgnoreTTLInner(k, now, state, subnet.IP, server, justCheckExists)
if i != nil {
return i
}
//
// 2.2. If no covering entry is found and SOURCE PREFIX-LENGTH is shorter than the
// configured maximum length allowed for the cache, repeat the cache
// lookup for an entry that exactly matches SOURCE PREFIX-LENGTH.
// These special entries, which do not cover longer prefix lengths,
// occur as described in the previous section.

if exactMatch != &zeroSubnet {
subK := hash(state.Name(), state.QType(), state.Do(), state.Req.CheckingDisabled, exactMatch)
i := c.getIgnoreTTLInner(subK, now, state, subnet.IP, server, justCheckExists)
if i != nil {
return i
}
}
}

cacheMisses.WithLabelValues(server, c.zonesMetricLabel, c.viewMetricLabel).Inc()
return nil
}

func (c *Cache) exists(state request.Request) *item {
k := hash(state.Name(), state.QType(), state.Do(), state.Req.CheckingDisabled)
func (c *Cache) getIgnoreTTLInner(k uint64, now time.Time, state request.Request, src net.IP, server string, justCheckExists bool) *item {
if i, ok := c.ncache.Get(k); ok {
return i.(*item)
tree := i.(*iptree.Tree)
if ii, ok := tree.GetByIP(src); ok {
itm := ii.(*item)
if justCheckExists {
return itm
}
ttl := itm.ttl(now)
if itm.matches(state) && (ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds()))) {
cacheHits.WithLabelValues(server, Denial, c.zonesMetricLabel, c.viewMetricLabel).Inc()
return itm
}
}
}
if i, ok := c.pcache.Get(k); ok {
return i.(*item)
tree := i.(*iptree.Tree)
if ii, ok := tree.GetByIP(src); ok {
itm := ii.(*item)
if justCheckExists {
return itm
}
ttl := itm.ttl(now)
if itm.matches(state) && (ttl > 0 || (c.staleUpTo > 0 && -ttl < int(c.staleUpTo.Seconds()))) {
cacheHits.WithLabelValues(server, Success, c.zonesMetricLabel, c.viewMetricLabel).Inc()
return itm
}
}
}

return nil
}
26 changes: 26 additions & 0 deletions plugin/cache/setup.go
Original file line number Diff line number Diff line change
@@ -211,6 +211,32 @@ func cacheParse(c *caddy.Controller) (*Cache, error) {
return nil, errors.New("caching SERVFAIL responses over 5 minutes is not permitted")
}
ca.failttl = d
case "mask_v4":
args := c.RemainingArgs()
if len(args) != 1 {
return nil, c.ArgErr()
}
d, err := strconv.Atoi(args[0])
if err != nil {
return nil, err
}
if d < 0 || d > 32 {
return nil, errors.New("invalid IPv4 mask size")
}
ca.mask_v4_size = uint8(d)
case "mask_v6":
args := c.RemainingArgs()
if len(args) != 1 {
return nil, c.ArgErr()
}
d, err := strconv.Atoi(args[0])
if err != nil {
return nil, err
}
if d < 0 || d > 128 {
return nil, errors.New("invalid IPv6 mask size")
}
ca.mask_v6_size = uint8(d)
case "disable":
// disable [success|denial] [zones]...
args := c.RemainingArgs()
34 changes: 34 additions & 0 deletions plugin/ecs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# ecs

## Name

*ecs* - ECS plugin

## Description

Passes the client's source address in the EDNS client subnet option (if not already present in the query), to return better results when forwarding queries to CDNs.

## Syntax

~~~ txt
ecs {
mask_v4 SIZE
mask_v6 SIZE
}
~~~

* `mask_v4` - Optional (defaults to 24), specifies the mask to be applied to the client's source IPv4 address for privacy protection
* `mask_v6` - Optional (defaults to 56), specifies the mask to be applied to the client's source IPv6 address for privacy protection

## Examples

Passes the client's source address in the EDNS client subnet option, forwarding to 8.8.8.8:

```
. {
ecs
# Do not use 1.1.1.1, it explicitly blocks ECS
forward . 8.8.8.8
}
```
105 changes: 105 additions & 0 deletions plugin/ecs/ecs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package ecs

import (
"context"
"net"

"github.com/coredns/coredns/plugin"

"github.com/miekg/dns"
)

// Ecs is an example plugin to show how to write a plugin.
type Ecs struct {
Next plugin.Handler
v4Mask net.IPMask
v6Mask net.IPMask

v4MaskSize uint8
v6MaskSize uint8
}

// setupEdns0Opt will retrieve the EDNS0 OPT or create it if it does not exist.
func setupEdns0Opt(r *dns.Msg) *dns.OPT {
o := r.IsEdns0()
if o == nil {
r.SetEdns0(4096, false)
o = r.IsEdns0()
}
return o
}

// ServeDNS implements the plugin.Handler interface. This method gets called when example is used
// in a Server.
func (e *Ecs) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
o := setupEdns0Opt(r)

// If we already have an ECS option, skip
for _, s := range o.Option {
if _, ok := s.(*dns.EDNS0_SUBNET); ok {
return plugin.NextOrFailure(e.Name(), e.Next, ctx, w, r)
}
}

var srcOrig net.IP
ip := w.RemoteAddr()
if i, ok := ip.(*net.UDPAddr); ok {
srcOrig = i.IP
}
if i, ok := ip.(*net.TCPAddr); ok {
srcOrig = i.IP
}

if srcOrig.IsPrivate() {
return plugin.NextOrFailure(e.Name(), e.Next, ctx, w, r)
}

tmp4 := srcOrig.To4()
// Skip 127.0.0.0/8
if tmp4 != nil && tmp4[0] == 127 {
return plugin.NextOrFailure(e.Name(), e.Next, ctx, w, r)
}

ecs := &dns.EDNS0_SUBNET{Code: dns.EDNS0SUBNET}
o.Option = append(o.Option, ecs)

if tmp4 != nil {
ecs.Family = 1
ecs.Address = srcOrig.Mask(e.v4Mask)
ecs.SourceNetmask = e.v4MaskSize
} else {
ecs.Family = 2
ecs.Address = srcOrig.Mask(e.v6Mask)
ecs.SourceNetmask = e.v6MaskSize
}
ecs.SourceScope = 0

return plugin.NextOrFailure(e.Name(), e.Next, ctx, &ecsWriter{w}, r)
}

// Name implements the Handler interface.
func (e *Ecs) Name() string { return "ecs" }

// ecsWriter removes the ECS option from responses to requests that DID NOT originally include one
// See https://www.rfc-editor.org/rfc/rfc7871#section-7.2.2
type ecsWriter struct {
dns.ResponseWriter
}

// WriteMsg implements the dns.ResponseWriter interface.
func (w *ecsWriter) WriteMsg(res *dns.Msg) error {
// Remove ECS option

o := res.IsEdns0()
if o != nil {
for k, s := range o.Option {
if _, ok := s.(*dns.EDNS0_SUBNET); ok {
o.Option[k] = o.Option[len(o.Option)-1]
o.Option = o.Option[:len(o.Option)-1]
break
}
}
}

return w.ResponseWriter.WriteMsg(res)
}
5 changes: 5 additions & 0 deletions plugin/ecs/ready.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package ecs

// Ready implements the ready.Readiness interface, once this flips to true CoreDNS
// assumes this plugin is ready for queries; it is not checked again.
func (e Ecs) Ready() bool { return true }
60 changes: 60 additions & 0 deletions plugin/ecs/setup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package ecs

import (
"net"
"strconv"

"github.com/coredns/caddy"
"github.com/coredns/coredns/core/dnsserver"
"github.com/coredns/coredns/plugin"
)

func init() { plugin.Register("ecs", setup) }

func setup(c *caddy.Controller) error {
c.Next()

v4Mask := uint8(24)
v6Mask := uint8(56)

if c.NextBlock() {
maskType := c.Val()
if maskType != "mask_v4" && maskType != "mask_v6" {
return plugin.Error("ecs", c.ArgErr())
}
if !c.NextArg() {
return plugin.Error("ecs", c.ArgErr())
}
val := c.Val()
valI, err := strconv.Atoi(val)
if err != nil {
return plugin.Error("ecs", err)
}

if maskType == "mask_v4" {
if valI > 32 || valI < 0 {
return plugin.Error("ecs", c.Err("Invalid ipv4 netmask size!"))
}
v4Mask = uint8(valI)
} else {
if valI > 128 || valI < 0 {
return plugin.Error("ecs", c.Err("Invalid ipv6 netmask size!"))
}
v6Mask = uint8(valI)
}
}

// Add the Plugin to CoreDNS, so Servers can use it in their plugin chain.
dnsserver.GetConfig(c).AddPlugin(func(next plugin.Handler) plugin.Handler {
return &Ecs{
Next: next,
v4Mask: net.CIDRMask(int(v4Mask), 32),
v6Mask: net.CIDRMask(int(v6Mask), 128),
v4MaskSize: v4Mask,
v6MaskSize: v6Mask,
}
})

// All OK, return a nil error.
return nil
}
30 changes: 26 additions & 4 deletions plugin/forward/forward.go
Original file line number Diff line number Diff line change
@@ -104,6 +104,17 @@ func (f *Forward) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
}
}

ecsKey := -1
o := r.IsEdns0()
if o != nil {
for k, s := range o.Option {
if _, ok := s.(*dns.EDNS0_SUBNET); ok {
ecsKey = k
break
}
}
}

fails := 0
var span, child ot.Span
var upstreamErr error
@@ -156,11 +167,22 @@ func (f *Forward) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dns.Msg
if err == ErrCachedClosed { // Remote side closed conn, can only happen with TCP.
continue
}
// Retry with TCP if truncated and prefer_udp configured.
if ret != nil && ret.Truncated && !opts.ForceTCP && opts.PreferUDP {
opts.ForceTCP = true
continue
if ret != nil {
// Retry with TCP if truncated and prefer_udp configured.
if ret.Truncated && !opts.ForceTCP && opts.PreferUDP {
opts.ForceTCP = true
continue
}

// Retry without ECS option in case of REFUSED as per RFC 7871
if ecsKey != -1 && ret.Rcode == dns.RcodeRefused {
o.Option[ecsKey] = o.Option[len(o.Option)-1]
o.Option = o.Option[:len(o.Option)-1]
ecsKey = -1
continue
}
}

break
}