Skip to content

Commit

Permalink
Tracing GC.
Browse files Browse the repository at this point in the history
Based partially on datacrypt-project#24
Rewritten to primarily use core.async.

* src/hitchhiker/tree/konserve.cljc (create-id): new function; prepends
  the current timestamp as hex to the UUID key.
  (KonserveBackend.-write-node): use create-id to generate the storage ID.
* src/hitchhiker/tree/tracing-gc/konserve.cljc: new namespace.
* src/hitchhiker/tree/tracing-gc.cljc: new namespace.
* .gitignore: ignore IntelliJ files.
* project.clj: update konserve to 0.6.0-SNAPSHOT.
  • Loading branch information
noonhomie committed Nov 18, 2019
1 parent 5775714 commit 57c063e
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 3 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@ pom.xml.asc
*.swo
*.swn
*~
/.idea
*.iml
profiles.clj
/scripts
4 changes: 2 additions & 2 deletions project.clj
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
[com.taoensso/carmine "2.12.2" :scope "provided"]
[org.clojure/core.rrb-vector "0.0.14"]
[org.clojure/core.cache "0.7.2"]
[io.replikativ/konserve "0.5.1"]]
[io.replikativ/konserve "0.6.0-SNAPSHOT"]]
:aliases {"bench" ["with-profile" "profiling" "run" "-m" "hitchhiker.bench"]}
:jvm-opts ["-server" "-Xmx3700m" "-Xms3700m"]
:profiles {:test
Expand Down Expand Up @@ -42,7 +42,7 @@
:compiler {:main hitchhiker.tree.core
:asset-path "js/out"
:output-to "resources/public/js/core.js"
:output-dir "resources/public/js/out" }}
:output-dir "resources/public/js/out"}}
;; inspired by datascript project.clj
{:id "test"
:source-paths ["src" "test"]
Expand Down
11 changes: 10 additions & 1 deletion src/hitchhiker/tree/bootstrap/konserve.cljc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@
[key]
(ha/promise-chan key))

(defn create-id
"Generate a storage ID from a content UUID.
Adds a (hexadecimal) timestamp in milliseconds to the start of the
key; this is so the GC can properly delete keys that don't exist in
the tree anymore"
[uuid]
(format "%016x.%s" (System/currentTimeMillis) uuid))

(defrecord KonserveAddr [store last-key konserve-key storage-addr]
n/INode
(-last-key [_] last-key)
Expand Down Expand Up @@ -90,7 +99,7 @@
(ha/go-try
(swap! session update-in [:writes] inc)
(let [pnode (encode node)
id (h/uuid pnode)
id (create-id (h/uuid pnode))
ch (k/assoc-in store [id] node)]
(ha/<? ch)
(konserve-addr store
Expand Down
54 changes: 54 additions & 0 deletions src/hitchhiker/tree/tracing_gc.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
(ns hitchhiker.tree.tracing-gc
(:require [clojure.core.async :as async]
[clojure.tools.logging :as log]
[hitchhiker.tree :as hh]
[hitchhiker.tree.node :as n]
[hitchhiker.tree.utils.async :as ha]))

(defprotocol IGCScratch
(observe-addr! [this addr] "Marks the given addr as being currently active")
(observed? [this addr] "Returns true if the given addr was observed"))

(defmacro do-<!
"Force into an async taking call.
Evaluates to <! when in an async backend.
Wraps form in a thread when non-async."
[& form]
(ha/if-async?
`(async/<! ~@form)
`(async/<! (async/thread ~@form))))

(defn trace-gc!
"Does a tracing GC and frees up all unused keys.
This is a simple mark-sweep algorithm.
gc-scratch should be an instance of IGCScratch
gc-roots should be a list of the roots of currently active trees.
all-keys should be a core.async channel that will contain every key in storage.
delete-fn will be called on every key that should be deleted during the sweep phase. It is expected to return a channel that yields when the item is deleted."
[gc-scratch gc-roots all-keys delete-fn]
(let [mark-phase (async/go-loop [roots gc-roots]
(when-let [root (first roots)]
(loop [nodes [root]]
(when-let [node (first nodes)]
(log/debug :task ::trace-gc! :phase :marking :visiting-node (async/poll! (:storage-addr node)))
(let [node (if (hh/resolved? node)
node
(do-<! (do
(log/debug :task ::trace-gc! :phase :marking :resolve-node node)
(n/-resolve-chan node))))
nodes (if (hh/index-node? node)
(into (subvec nodes 1) (:children node))
(subvec nodes 1))]
(when-let [address (async/poll! (:storage-addr node))]
(async/<! (observe-addr! gc-scratch address)))
(recur nodes))))
(recur (rest roots))))]
(async/go
(async/<! mark-phase)
(loop []
(when-let [address (async/<! all-keys)]
(when-not (async/<! (observed? gc-scratch address))
(async/<! (delete-fn address)))
(recur))))))
28 changes: 28 additions & 0 deletions src/hitchhiker/tree/tracing_gc/konserve.cljc
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
(ns hitchhiker.tree.tracing-gc.konserve
(:require [clojure.tools.logging :as log]
[hitchhiker.tree.tracing-gc :as gc]
[konserve.core :as k]
#?(:clj [clojure.core.async :as async]
:cljs [cljs.core.async :as async :include-macros true])))

(defn- within-epoch?
[address epoch]
(if-let [addr-ts (some-> (when (string? address)
(re-matches #"([0-9a-f]{16})\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}" address))
(second))]
(not (pos? (compare addr-ts epoch)))
true))

(defrecord KonserveGCScratch [store epoch]
gc/IGCScratch
(observe-addr! [_ addr]
(log/debug :task ::gc/observe-addr! :addr addr)
(k/assoc store addr :marked))

(observed? [_ addr]
(async/go
(log/debug :task ::gc/observed? :phase :begin :addr addr)
(let [result (or (within-epoch? addr epoch)
(= :marked (async/<! (k/get store addr))))]
(log/debug :task ::gc/observed? :phase :end :addr addr :result result)
result))))

0 comments on commit 57c063e

Please sign in to comment.