From 13404a68a307e4673a677206a7c85c99437daba8 Mon Sep 17 00:00:00 2001 From: Ben Date: Wed, 27 Nov 2024 07:49:22 +0100 Subject: [PATCH] internal/eval: replace geohash library with redis-like implementation and finalize GEORADIUSBYMEMBER command --- go.mod | 1 - go.sum | 2 - internal/eval/geo/geo.go | 303 ++++++++++++++++++++++---- internal/eval/sortedset/sorted_set.go | 4 +- internal/eval/store_eval.go | 37 ++-- 5 files changed, 281 insertions(+), 66 deletions(-) diff --git a/go.mod b/go.mod index dcf33b828e..2f7781b611 100644 --- a/go.mod +++ b/go.mod @@ -43,7 +43,6 @@ require ( github.com/google/uuid v1.6.0 github.com/gorilla/websocket v1.5.3 github.com/mattn/go-sqlite3 v1.14.24 - github.com/mmcloughlin/geohash v0.10.0 github.com/ohler55/ojg v1.25.0 github.com/rs/xid v1.6.0 github.com/rs/zerolog v1.33.0 diff --git a/go.sum b/go.sum index 51567b2720..ebfb5659bc 100644 --- a/go.sum +++ b/go.sum @@ -74,8 +74,6 @@ github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWE github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-sqlite3 v1.14.24 h1:tpSp2G2KyMnnQu99ngJ47EIkWVmliIizyZBfPrBWDRM= github.com/mattn/go-sqlite3 v1.14.24/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= -github.com/mmcloughlin/geohash v0.10.0 h1:9w1HchfDfdeLc+jFEf/04D27KP7E2QmpDu52wPbJWRE= -github.com/mmcloughlin/geohash v0.10.0/go.mod h1:oNZxQo5yWJh0eMQEP/8hwQuVx9Z9tjwFUqcTB1SmG0c= github.com/ohler55/ojg v1.25.0 h1:sDwc4u4zex65Uz5Nm7O1QwDKTT+YRcpeZQTy1pffRkw= github.com/ohler55/ojg v1.25.0/go.mod h1:gQhDVpQLqrmnd2eqGAvJtn+NfKoYJbe/A4Sj3/Vro4o= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= diff --git a/internal/eval/geo/geo.go b/internal/eval/geo/geo.go index 872b15b6e7..a6e89cdd28 100644 --- a/internal/eval/geo/geo.go +++ b/internal/eval/geo/geo.go @@ -4,22 +4,23 @@ import ( "math" diceerrors "github.com/dicedb/dice/internal/errors" - "github.com/mmcloughlin/geohash" ) // Earth's radius in meters const earthRadius float64 = 6372797.560856 -// Bit precision for geohash - picked up to match redis -const bitPrecision = 52 +// Bit precision steps for geohash - picked up to match redis +const maxSteps = 26 const mercatorMax = 20037726.37 const ( - minLat = -85.05112878 - maxLat = 85.05112878 - minLon = -180 - maxLon = 180 + /* These are constraints from EPSG:900913 / EPSG:3785 / OSGEO:41001 */ + /* We can't geocode at the north/south pole. */ + globalMinLat = -85.05112878 + globalMaxLat = 85.05112878 + globalMinLon = -180.0 + globalMaxLon = 180.0 ) type Unit string @@ -31,20 +32,18 @@ const ( Feet Unit = "ft" ) +// DegToRad converts degrees to radians. func DegToRad(deg float64) float64 { return math.Pi * deg / 180.0 } +// RadToDeg converts radians to degrees. func RadToDeg(rad float64) float64 { return 180.0 * rad / math.Pi } -func GetDistance( - lon1, - lat1, - lon2, - lat2 float64, -) float64 { +// GetDistance calculates the distance between two geographical points specified by their longitude and latitude. +func GetDistance(lon1, lat1, lon2, lat2 float64) float64 { lon1r := DegToRad(lon1) lon2r := DegToRad(lon2) v := math.Sin((lon2r - lon1r) / 2) @@ -62,29 +61,73 @@ func GetDistance( return 2.0 * earthRadius * math.Asin(math.Sqrt(a)) } +// GetLatDistance calculates the distance between two latitudes. func GetLatDistance(lat1, lat2 float64) float64 { return earthRadius * math.Abs(DegToRad(lat2)-DegToRad(lat1)) } -// EncodeHash returns a geo hash for a given coordinate, and returns it in float64 so it can be used as score in a zset -func EncodeHash( - latitude, - longitude float64, -) float64 { - h := geohash.EncodeIntWithPrecision(latitude, longitude, bitPrecision) +// EncodeHash returns a geo hash for a given coordinate, and returns it in float64 so it can be used as score in a zset. +func EncodeHash(latitude, longitude float64) float64 { + h := encodeHash(longitude, latitude, maxSteps) + h = align52Bits(h, maxSteps) return float64(h) } -// DecodeHash returns the latitude and longitude from a geo hash -// The hash should be a float64, as it is used as score in a zset +// encodeHash encodes the latitude and longitude into a geohash with the specified number of steps. +func encodeHash(longitude, latitude float64, steps uint8) uint64 { + latOffset := (latitude - globalMinLat) / (globalMaxLat - globalMinLat) + longOffset := (longitude - globalMinLon) / (globalMaxLon - globalMinLon) + + latOffset *= float64(uint64(1) << steps) + longOffset *= float64(uint64(1) << steps) + return interleave64(uint32(latOffset), uint32(longOffset)) +} + +// DecodeHash returns the latitude and longitude from a geo hash. +// The hash should be a float64, as it is used as score in a sorted set. func DecodeHash(hash float64) (lat, lon float64) { - lat, lon = geohash.DecodeIntWithPrecision(uint64(hash), bitPrecision) + return decodeHash(uint64(hash), maxSteps) +} + +// decodeHash decodes the geohash into latitude and longitude with the specified number of steps. +func decodeHash(hash uint64, steps uint8) (lat float64, lon float64) { + hashSep := deinterleave64(hash) + + latScale := globalMaxLat - globalMinLat + longScale := globalMaxLon - globalMinLon + + ilato := uint32(hashSep) // lat part + ilono := uint32(hashSep >> 32) // lon part + + // divide by 2**step. + // Then, for 0-1 coordinate, multiply times scale and add + // to the min to get the absolute coordinate. + minLat := globalMinLat + (float64(ilato)*1.0/float64(uint64(1)< globalMaxLon { + lon = globalMaxLon + } + if lon < globalMinLon { + lon = globalMinLon + } + + lat = (minLat + maxLat) / 2 + if lat > globalMaxLat { + lat = globalMaxLat + } + if lat < globalMinLat { + lat = globalMinLat + } return lat, lon } -// ConvertDistance converts a distance from meters to the desired unit +// ConvertDistance converts a distance from meters to the desired unit. func ConvertDistance(distance float64, unit string) (float64, error) { switch Unit(unit) { case Meters: @@ -100,7 +143,7 @@ func ConvertDistance(distance float64, unit string) (float64, error) { } } -// ToMeters converts a distance and its unit to meters +// ToMeters converts a distance and its unit to meters. func ToMeters(distance float64, unit string) (float64, bool) { switch Unit(unit) { case Meters: @@ -116,6 +159,7 @@ func ToMeters(distance float64, unit string) (float64, bool) { } } +// geohashEstimateStepsByRadius estimates the number of steps required to cover a radius at a given latitude. func geohashEstimateStepsByRadius(radius, lat float64) uint8 { if radius == 0 { return 26 @@ -149,63 +193,89 @@ func geohashEstimateStepsByRadius(radius, lat float64) uint8 { return uint8(step) } +// boundingBox returns the bounding box for a given latitude, longitude and radius. +func boundingBox(lat, lon, radius float64) (float64, float64, float64, float64) { + latDelta := RadToDeg(radius / earthRadius) + lonDeltaTop := RadToDeg(radius / earthRadius / math.Cos(DegToRad(lat+latDelta))) + lonDeltaBottom := RadToDeg(radius / earthRadius / math.Cos(DegToRad(lat-latDelta))) + + isSouthernHemisphere := false + if lat < 0 { + isSouthernHemisphere = true + } + + minLon := lon - lonDeltaTop + if isSouthernHemisphere { + minLon = lon - lonDeltaBottom + } + + maxLon := lon + lonDeltaTop + if isSouthernHemisphere { + maxLon = lon + lonDeltaBottom + } + + minLat := lat - latDelta + maxLat := lat + latDelta + + return minLon, minLat, maxLon, maxLat +} + // Area returns the geohashes of the area covered by a circle with a given radius. It returns the center hash // and the 8 surrounding hashes. The second return value is the number of steps used to cover the area. func Area(centerHash, radius float64) ([9]uint64, uint8) { var result [9]uint64 - centerLat, centerLon := DecodeHash(centerHash) - + centerLat, centerLon := decodeHash(uint64(centerHash), maxSteps) + minLon, minLat, maxLon, maxLat := boundingBox(centerLat, centerLon, radius) steps := geohashEstimateStepsByRadius(radius, centerLat) + centerRadiusHash := encodeHash(centerLon, centerLat, steps) - centerRadiusHash := geohash.EncodeIntWithPrecision(centerLat, centerLon, uint(steps)*2) - - neighbors := geohash.NeighborsIntWithPrecision(centerRadiusHash, uint(steps)*2) - area := geohash.BoundingBoxInt(centerRadiusHash) + neighbors := geohashNeighbors(uint64(centerRadiusHash), steps) + area := areaBySteps(centerRadiusHash, steps) /* Check if the step is enough at the limits of the covered area. * Sometimes when the search area is near an edge of the * area, the estimated step is not small enough, since one of the * north / south / west / east square is too near to the search area * to cover everything. */ - north := geohash.BoundingBoxInt(neighbors[0]) - east := geohash.BoundingBoxInt(neighbors[2]) - south := geohash.BoundingBoxInt(neighbors[4]) - west := geohash.BoundingBoxInt(neighbors[6]) + north := areaBySteps(neighbors[0], steps) + south := areaBySteps(neighbors[4], steps) + east := areaBySteps(neighbors[2], steps) + west := areaBySteps(neighbors[6], steps) decreaseStep := false - if north.MaxLat < maxLat || south.MinLat < minLat || east.MaxLng < maxLon || west.MinLng < minLon { + if north.Lat.Max < maxLat || south.Lat.Min > minLat || east.Lon.Max < maxLon || west.Lon.Min > minLon { decreaseStep = true } if steps > 1 && decreaseStep { steps-- - centerRadiusHash = geohash.EncodeIntWithPrecision(centerLat, centerLon, uint(steps)*2) - neighbors = geohash.NeighborsIntWithPrecision(centerRadiusHash, uint(steps)*2) - area = geohash.BoundingBoxInt(centerRadiusHash) + centerRadiusHash = encodeHash(centerLat, centerLon, steps) + neighbors = geohashNeighbors(centerRadiusHash, steps) + area = areaBySteps(centerRadiusHash, steps) } // exclude useless areas if steps >= 2 { - if area.MinLat < minLat { + if area.Lat.Min < minLat { neighbors[3] = 0 // south east neighbors[4] = 0 // south neighbors[5] = 0 // south west } - if area.MaxLat > maxLat { + if area.Lat.Max > maxLat { neighbors[0] = 0 // north neighbors[1] = 0 // north east neighbors[7] = 0 // north west } - if area.MinLng < minLon { + if area.Lon.Min < minLon { neighbors[5] = 0 // south west neighbors[6] = 0 // west neighbors[7] = 0 // north west } - if area.MaxLng > maxLon { + if area.Lon.Max > maxLon { neighbors[1] = 0 // north east neighbors[2] = 0 // east neighbors[3] = 0 // south east @@ -223,13 +293,156 @@ func Area(centerHash, radius float64) ([9]uint64, uint8) { // HashMinMax returns the min and max hashes for a given hash and steps. This can be used to get the range of hashes // that a given hash and a radius (steps) will cover. func HashMinMax(hash uint64, steps uint8) (uint64, uint64) { - min := geohashAlign52Bits(hash, steps) + min := align52Bits(hash, steps) hash++ - max := geohashAlign52Bits(hash, steps) + max := align52Bits(hash, steps) return min, max } -func geohashAlign52Bits(hash uint64, steps uint8) uint64 { +// align52Bits aligns the hash to 52 bits. +func align52Bits(hash uint64, steps uint8) uint64 { hash <<= (52 - steps*2) return hash } + +type hashRange struct { + Min float64 + Max float64 +} + +type hashArea struct { + Lat hashRange + Lon hashRange +} + +// deinterleave64 deinterleaves a 64-bit integer. +func deinterleave64(interleaved uint64) uint64 { + x := interleaved & 0x5555555555555555 + y := (interleaved >> 1) & 0x5555555555555555 + + x = (x | (x >> 1)) & 0x3333333333333333 + y = (y | (y >> 1)) & 0x3333333333333333 + + x = (x | (x >> 2)) & 0x0f0f0f0f0f0f0f0f + y = (y | (y >> 2)) & 0x0f0f0f0f0f0f0f0f + + x = (x | (x >> 4)) & 0x00ff00ff00ff00ff + y = (y | (y >> 4)) & 0x00ff00ff00ff00ff + + x = (x | (x >> 8)) & 0x0000ffff0000ffff + y = (y | (y >> 8)) & 0x0000ffff0000ffff + + x = (x | (x >> 16)) & 0x00000000ffffffff + y = (y | (y >> 16)) & 0x00000000ffffffff + + return (y << 32) | x +} + +// interleave64 interleaves two 32-bit integers into a 64-bit integer. +func interleave64(xlo, ylo uint32) uint64 { + B := []uint64{ + 0x5555555555555555, + 0x3333333333333333, + 0x0F0F0F0F0F0F0F0F, + 0x00FF00FF00FF00FF, + 0x0000FFFF0000FFFF, + } + S := []uint{1, 2, 4, 8, 16} + + x := uint64(xlo) + y := uint64(ylo) + + x = (x | (x << S[4])) & B[4] + y = (y | (y << S[4])) & B[4] + + x = (x | (x << S[3])) & B[3] + y = (y | (y << S[3])) & B[3] + + x = (x | (x << S[2])) & B[2] + y = (y | (y << S[2])) & B[2] + + x = (x | (x << S[1])) & B[1] + y = (y | (y << S[1])) & B[1] + + x = (x | (x << S[0])) & B[0] + y = (y | (y << S[0])) & B[0] + + return x | (y << 1) +} + +// areaBySteps calculates the area covered by a hash at a given number of steps. +func areaBySteps(hash uint64, steps uint8) *hashArea { + hashSep := deinterleave64(hash) + + latScale := globalMaxLat - globalMinLat + longScale := globalMaxLon - globalMinLon + + ilato := uint32(hashSep) // lat part + ilono := uint32(hashSep >> 32) // lon part + + // divide by 2**step. + // Then, for 0-1 coordinate, multiply times scale and add + // to the min to get the absolute coordinate. + area := &hashArea{} + area.Lat.Min = globalMinLat + (float64(ilato)/float64(uint64(1)<> (64 - steps*2) + + if d > 0 { + x = x + uint64(zz+1) + } else { + x = x | uint64(zz) + x = x - uint64(zz+1) + } + + x &= (0xaaaaaaaaaaaaaaaa >> (64 - steps*2)) + return x | y +} + +// geohashMoveY moves the geohash in the y direction. +func geohashMoveY(hash uint64, steps uint8, d int8) uint64 { + x := hash & 0xaaaaaaaaaaaaaaaa + y := hash & 0x5555555555555555 + + zz := uint64(0xaaaaaaaaaaaaaaaa) >> (64 - steps*2) + + if d > 0 { + y = y + (zz + 1) + } else { + y = y | zz + y = y - (zz + 1) + } + + y &= (0x5555555555555555 >> (64 - steps*2)) + return x | y +} + +// geohashNeighbors returns the geohash neighbors of a given hash with a given number of steps. +func geohashNeighbors(hash uint64, steps uint8) [8]uint64 { + neighbors := [8]uint64{} + + neighbors[0] = geohashMoveY(hash, steps, 1) // North + neighbors[1] = geohashMoveX(geohashMoveY(hash, steps, 1), steps, 1) // North-East + neighbors[2] = geohashMoveX(hash, steps, 1) // East + neighbors[3] = geohashMoveX(geohashMoveY(hash, steps, -1), steps, 1) // South-East + neighbors[4] = geohashMoveY(hash, steps, -1) // South + neighbors[5] = geohashMoveX(geohashMoveY(hash, steps, -1), steps, -1) // South-West + neighbors[6] = geohashMoveX(hash, steps, -1) // West + neighbors[7] = geohashMoveX(geohashMoveY(hash, steps, 1), steps, -1) // North-West + + return neighbors +} diff --git a/internal/eval/sortedset/sorted_set.go b/internal/eval/sortedset/sorted_set.go index 9eeb17d02b..4e5f051e38 100644 --- a/internal/eval/sortedset/sorted_set.go +++ b/internal/eval/sortedset/sorted_set.go @@ -263,14 +263,14 @@ func (ss *Set) GetMemberScoresInRange(minScore, maxScore float64, count, max int if ssi.Score < minScore { return true } - if ssi.Score > maxScore { + if ssi.Score >= maxScore { return false } members = append(members, ssi.Member) scores = append(scores, ssi.Score) count++ - if max > 0 && count == max { + if max > 0 && count >= max { return false } diff --git a/internal/eval/store_eval.go b/internal/eval/store_eval.go index e015943b2b..555b3da4ec 100644 --- a/internal/eval/store_eval.go +++ b/internal/eval/store_eval.go @@ -7121,26 +7121,27 @@ func evalGEORADIUSBYMEMBER(args []string, store *dstore.Store) *EvalResponse { centerLat, centerLon := geo.DecodeHash(centerHash) - if opts.IsSorted || opts.WithDist || opts.WithCoord { - for i := range hashes { - msLat, msLon := geo.DecodeHash(hashes[i]) + for i := range hashes { + msLat, msLon := geo.DecodeHash(hashes[i]) - if opts.WithDist || opts.IsSorted { - dist := geo.GetDistance(centerLon, centerLat, msLon, msLat) - distance, err := geo.ConvertDistance(dist, unit) - if err != nil { - return &EvalResponse{ - Result: nil, - Error: err, - } - } - dists = append(dists, distance) - } + dist := geo.GetDistance(centerLon, centerLat, msLon, msLat) + + // Geohash scores are not linear. Therefore, we can sometimes receive results + // which are out of the geographical range and we need to post filter the results here. + if dist > radius { + members[i] = "" + } - if opts.WithCoord { - coords = append(coords, []float64{msLat, msLon}) + distance, err := geo.ConvertDistance(dist, unit) + if err != nil { + return &EvalResponse{ + Result: nil, + Error: err, } } + + dists = append(dists, distance) + coords = append(coords, []float64{msLat, msLon}) } // Sorting is done by distance. Since our output can be dynamic and we can avoid allocating memory @@ -7174,6 +7175,10 @@ func evalGEORADIUSBYMEMBER(args []string, store *dstore.Store) *EvalResponse { response := make([][]interface{}, 0, min(len(members), countVal)) for i := 0; i < cap(response); i++ { + if members[i] == "" { + continue + } + member := []interface{}{} member = append(member, members[indices[i]]) if opts.WithDist {