From 894aa810a147cf168c9d0722dac3e68e23f373c4 Mon Sep 17 00:00:00 2001 From: Jeff Henrikson <79888432+jeffhhk@users.noreply.github.com> Date: Fri, 26 Mar 2021 16:38:13 -0700 Subject: [PATCH 1/5] implement: lru.rkt refactor: extract struct payload implement: unit test demonstrating lru bug fix: lru-freshen: the case of oldest lrun and newest lrun fix: handling an empty linked list would be more work, and useless, so skip it refactor: extract define lru-remove refactor: move all lru-evict calls to lru-ref amend: refactor: move all lru-evict calls to lru-ref refactor: use lru-remove from lru-evict refactor: remove unnecessary lru-evict call refactor: make all the counting work the same way renames/comments/formatting --- medikanren/lru.rkt | 234 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100644 medikanren/lru.rkt diff --git a/medikanren/lru.rkt b/medikanren/lru.rkt new file mode 100644 index 00000000..6864af68 --- /dev/null +++ b/medikanren/lru.rkt @@ -0,0 +1,234 @@ +#lang racket +(provide + make-lru + lru-ref + ) +(require chk) + + +(define (assert k st) + (if (not k) + (raise (format "assertion failure: ~a" st)) + #f)) + +(struct payload (k v)) + +(struct lrun ( + (older #:mutable) + payload + (newer #:mutable))) + +(struct lru ( + behind-ref + hash + (lrun-oldest #:mutable) + (lrun-newest #:mutable) + (num-entries #:mutable) + num-entries-max)) + +(define (make-lru behind-ref #:num-entries-max (num-entries-max 1000)) + (lru + behind-ref + (make-hash) + #f + #f + 0 + num-entries-max)) + +;;; Add a newest entry to the lru. +(define (lru-put-newest ths payload1) + (let* ( + (lrun1 (lru-lrun-newest ths)) + (k (payload-k payload1)) + (lrun0 (lrun lrun1 payload1 #f)) + ) + ; connect lrun fields + ; lrun0.older is already connected + (set-lrun-older! lrun0 lrun1) + (if lrun1 + (set-lrun-newer! lrun1 lrun0) + #f) + ; connect lru fields + (set-lru-lrun-newest! ths lrun0) + (if (not (lru-lrun-oldest ths)) ; are we brand new? + (set-lru-lrun-oldest! ths lrun0) + #f) + (set-lru-num-entries! ths (+ (lru-num-entries ths) 1)) + (hash-set! (lru-hash ths) k lrun0))) + +(define (lru-remove ths lrun1) + (let* ( + (payload1 (lrun-payload lrun1)) + (k (payload-k payload1)) + (lrun0 (lrun-older lrun1)) + (lrun2 (lrun-newer lrun1)) + ) + (if lrun0 + (set-lrun-newer! lrun0 lrun2) + (begin + ; we are removing the oldest + (set-lru-lrun-oldest! ths lrun2) + (set-lrun-older! lrun2 #f))) + (if lrun2 + (set-lrun-older! lrun2 lrun0) + (begin + ; we are removing the newest + (set-lru-lrun-newest! ths lrun0) + (set-lrun-newer! lrun0 #f))) + (set-lru-num-entries! ths (- (lru-num-entries ths) 1)) + (hash-remove! (lru-hash ths) k))) + + +;;; If the lru is full, remove the oldest entry. +(define (lru-evict ths) + (if (> (lru-num-entries ths) (lru-num-entries-max ths)) + (let* ((lrun1 (lru-lrun-oldest ths))) + (lru-remove ths lrun1)) + #f)) + +;;; Make the entry with key k the newest entry. +(define (lru-freshen ths k) + (let* ( + (lrun1 (hash-ref (lru-hash ths) k)) + (payload1 (lrun-payload lrun1)) + ) + (lru-remove ths lrun1) + (lru-put-newest ths payload1))) + + +;;; Fetch item from lru cache, or if absent, from ref-behind. +(define (lru-ref ths k) + (match (hash-ref! (lru-hash ths) k #f) + (#f + (let ((v ((lru-behind-ref ths) k))) + (lru-put-newest ths (payload k v)) + (lru-evict ths) + v)) + (lrun + (if (>= (lru-num-entries ths) 2) ; freshen 1 entry is noop + (lru-freshen ths k) + #f) + (payload-v (lrun-payload lrun))))) + + + +(module+ test + (define (make-test-hash n) + (define h (make-hash)) + (for ((i (range n))) + (hash-set! h i (+ i 100))) + h) + + ;; do we get a correct value back on a miss? + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (define v (lru-ref l 5)) + #:= v 105) + + ;; do we get a correct value back on a hit? + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (lru-ref l 5) + #:do (define v (lru-ref l 5)) + #:= v 105) + + ;; do we initialize properly? + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (define v (lru-ref l 5)) + #:t (lru-lrun-newest l) + #:t (lru-lrun-oldest l)) + + ;; when the oldest entry becomes the newest entry, is the next eviction correct? + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (lru-ref l 5) + #:do (lru-ref l 6) + #:do (lru-ref l 7) + #:do (lru-ref l 5) + #:do (lru-ref l 8) + #:t (not (hash-has-key? (lru-hash l) 6))) + + ;; does num-entries grow as expeced? + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (lru-ref l 5) + #:= (lru-num-entries l) 1) + + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (lru-ref l 5) + #:do (lru-ref l 6) + #:= (lru-num-entries l) 2) + + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (lru-ref l 5) + #:do (lru-ref l 6) + #:do (lru-ref l 7) + #:= (lru-num-entries l) 3) + + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define l (make-lru (lambda (k) (hash-ref h1 k #f)) #:num-entries-max 3)) + #:do (lru-ref l 5) + #:do (lru-ref l 6) + #:do (lru-ref l 7) + #:do (lru-ref l 8) + #:= (lru-num-entries l) 3) + + ;; do we make the expected number of upstream calls? + + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define num-calls (box 0)) + #:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls)))) + #:do (define l (make-lru + (lambda (k) (incr) (hash-ref h1 k #f)) + #:num-entries-max 2)) + #:do (lru-ref l 5) + #:= (unbox num-calls) 1) + + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define num-calls (box 0)) + #:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls)))) + #:do (define l (make-lru + (lambda (k) (incr) (hash-ref h1 k #f)) + #:num-entries-max 2)) + #:do (lru-ref l 5) + #:do (lru-ref l 5) + #:= (unbox num-calls) 1) + + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define num-calls (box 0)) + #:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls)))) + #:do (define l (make-lru + (lambda (k) (incr) (hash-ref h1 k #f)) + #:num-entries-max 2)) + #:do (lru-ref l 5) + #:do (lru-ref l 6) + #:do (lru-ref l 5) + #:= (unbox num-calls) 2) + + (chk + #:do (define h1 (make-test-hash 100)) + #:do (define num-calls (box 0)) + #:do (define (incr) (set-box! num-calls (+ 1 (unbox num-calls)))) + #:do (define l (make-lru + (lambda (k) (incr) (hash-ref h1 k #f)) + #:num-entries-max 2)) + #:do (lru-ref l 5) + #:do (lru-ref l 6) + #:do (lru-ref l 7) + #:do (lru-ref l 5) + #:= (unbox num-calls) 4) + ) From 2c51cec4a293e5f72cef08c0b5205e5dd7db4aa5 Mon Sep 17 00:00:00 2001 From: Jeff Henrikson <79888432+jeffhhk@users.noreply.github.com> Date: Thu, 25 Mar 2021 20:18:38 -0700 Subject: [PATCH 2/5] refactor: string:corpus-find*/disk: use argument lookup instead of in-index --- medikanren/db.rkt | 3 ++- medikanren/string-search.rkt | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/medikanren/db.rkt b/medikanren/db.rkt index 55e0a85b..97638ef0 100644 --- a/medikanren/db.rkt +++ b/medikanren/db.rkt @@ -140,7 +140,8 @@ (close-input-port in-concept-cui-index) (lambda (cui*) (string:corpus-find* cui-corpus cui-index cui*))) (else - (lambda (cui*) (string:corpus-find*/disk cid->concept in-concept-cui-index cui*))))) + (define (lookup cui) (string:corpus-find/disk cid->concept in-concept-cui-index cui)) + (lambda (cui*) (string:corpus-find*/disk cid->concept lookup cui*))))) (define ~name*->cid* (cond (in-memory-names? diff --git a/medikanren/string-search.rkt b/medikanren/string-search.rkt index dfe0c9f6..e563ffed 100644 --- a/medikanren/string-search.rkt +++ b/medikanren/string-search.rkt @@ -6,6 +6,7 @@ suffix:corpus-find*/disk string:corpus->index string:corpus-find* + string:corpus-find/disk string:corpus-find*/disk) (require "repr.rkt" @@ -328,7 +329,8 @@ (range rstart rend)))))) (else '())))) -(define (string:corpus-find*/disk cid->concept in-index str*) + +(define (string:corpus-find*/disk cid->concept lookup str*) (remove-duplicates - (sort (append* (map (lambda (s) (string:corpus-find/disk cid->concept in-index s)) str*)) + (sort (append* (map lookup str*)) <))) From c69be54fa1aa7b44e8f1934b6b1edaf35a28f5ff Mon Sep 17 00:00:00 2001 From: Jeff Henrikson <79888432+jeffhhk@users.noreply.github.com> Date: Thu, 25 Mar 2021 20:35:38 -0700 Subject: [PATCH 3/5] implement: config key num-cached-cuis --- medikanren/common.rkt | 7 ++++--- medikanren/config.defaults.scm | 1 + medikanren/db.rkt | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/medikanren/common.rkt b/medikanren/common.rkt index e84ce047..20d60025 100644 --- a/medikanren/common.rkt +++ b/medikanren/common.rkt @@ -128,12 +128,13 @@ (map (lambda (name) (define path (path/data (symbol->string name))) (define options (list (config-ref 'in-memory-names?) - (config-ref 'in-memory-cuis?))) + (config-ref 'in-memory-cuis?) + (config-ref 'num-cached-cuis))) (cond ((directory-exists? path) (when verbose? (printf "loading ~a\n" name)) (cons name (if verbose? - (time (apply make-db path options)) - (apply make-db path options)))) + (time (apply make-db (cons path options))) + (apply make-db (cons path options))))) (else (when verbose? (printf "cannot load ~a; " name) (printf "directory missing: ~a\n" path)) diff --git a/medikanren/config.defaults.scm b/medikanren/config.defaults.scm index a08b96c1..f9457d1a 100644 --- a/medikanren/config.defaults.scm +++ b/medikanren/config.defaults.scm @@ -11,6 +11,7 @@ (in-memory-names? . #t) (in-memory-cuis? . #t) + (num-cached-cuis . #f) (query-results.write-to-file? . #t) ;; #t will write the query and results to file, #f will not (query-results.file-name . "last.sx") diff --git a/medikanren/db.rkt b/medikanren/db.rkt index 97638ef0..588f9460 100644 --- a/medikanren/db.rkt +++ b/medikanren/db.rkt @@ -78,7 +78,7 @@ ;; category*, predicate* ;; memory-usage: 0 1 2 3 -(define (make-db db-dir (in-memory-names? #t) (in-memory-cuis? #t)) +(define (make-db db-dir (in-memory-names? #t) (in-memory-cuis? #t) (num-cached-cuis #f)) (define (db-path fname) (expand-user-path (build-path db-dir fname))) (define (open-db-path fname) (open-input-file (db-path fname))) (define (open-db-path/optional fname) From ebc671bf7b49420855a46695af5f7e1e0b25bf8e Mon Sep 17 00:00:00 2001 From: Jeff Henrikson <79888432+jeffhhk@users.noreply.github.com> Date: Fri, 26 Mar 2021 16:37:55 -0700 Subject: [PATCH 4/5] implement: lru caching for string:corpus-find/disk --- medikanren/db.rkt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/medikanren/db.rkt b/medikanren/db.rkt index 588f9460..6174ddad 100644 --- a/medikanren/db.rkt +++ b/medikanren/db.rkt @@ -38,6 +38,7 @@ (require "repr.rkt" "string-search.rkt" + "lru.rkt" racket/file racket/stream racket/string @@ -139,6 +140,13 @@ (define cui-index (port->string-keys in-concept-cui-index)) (close-input-port in-concept-cui-index) (lambda (cui*) (string:corpus-find* cui-corpus cui-index cui*))) + (num-cached-cuis + (define lru + (make-lru + (lambda (cui) (string:corpus-find/disk cid->concept in-concept-cui-index cui)) + #:num-entries-max num-cached-cuis)) + (define (lookup cui) (lru-ref lru cui)) + (lambda (cui*) (string:corpus-find*/disk cid->concept lookup cui*))) (else (define (lookup cui) (string:corpus-find/disk cid->concept in-concept-cui-index cui)) (lambda (cui*) (string:corpus-find*/disk cid->concept lookup cui*))))) From 8c00a95f4e52ac0d5ef504c061e750a72cfeb93e Mon Sep 17 00:00:00 2001 From: Jeff Henrikson <79888432+jeffhhk@users.noreply.github.com> Date: Mon, 5 Apr 2021 12:42:13 -0700 Subject: [PATCH 5/5] refactor: prefer (when x y) to (if x y #f) --- medikanren/lru.rkt | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/medikanren/lru.rkt b/medikanren/lru.rkt index 6864af68..e1d7f5e8 100644 --- a/medikanren/lru.rkt +++ b/medikanren/lru.rkt @@ -7,9 +7,8 @@ (define (assert k st) - (if (not k) - (raise (format "assertion failure: ~a" st)) - #f)) + (when (not k) + (raise (format "assertion failure: ~a" st)))) (struct payload (k v)) @@ -45,14 +44,12 @@ ; connect lrun fields ; lrun0.older is already connected (set-lrun-older! lrun0 lrun1) - (if lrun1 - (set-lrun-newer! lrun1 lrun0) - #f) + (when lrun1 + (set-lrun-newer! lrun1 lrun0)) ; connect lru fields (set-lru-lrun-newest! ths lrun0) - (if (not (lru-lrun-oldest ths)) ; are we brand new? - (set-lru-lrun-oldest! ths lrun0) - #f) + (when (not (lru-lrun-oldest ths)) ; are we brand new? + (set-lru-lrun-oldest! ths lrun0)) (set-lru-num-entries! ths (+ (lru-num-entries ths) 1)) (hash-set! (lru-hash ths) k lrun0))) @@ -81,10 +78,9 @@ ;;; If the lru is full, remove the oldest entry. (define (lru-evict ths) - (if (> (lru-num-entries ths) (lru-num-entries-max ths)) + (when (> (lru-num-entries ths) (lru-num-entries-max ths)) (let* ((lrun1 (lru-lrun-oldest ths))) - (lru-remove ths lrun1)) - #f)) + (lru-remove ths lrun1)))) ;;; Make the entry with key k the newest entry. (define (lru-freshen ths k) @@ -105,9 +101,8 @@ (lru-evict ths) v)) (lrun - (if (>= (lru-num-entries ths) 2) ; freshen 1 entry is noop - (lru-freshen ths k) - #f) + (when (>= (lru-num-entries ths) 2) ; freshen 1 entry is noop + (lru-freshen ths k)) (payload-v (lrun-payload lrun)))))