-
Notifications
You must be signed in to change notification settings - Fork 1
Asynchronous cache misses in flight
The number of asynchronous cache-misses in-flight affects performance depending on size of cache, latency of backing-store algorithm and number of cache-hits. Here is a simple benchmark that tests the scaling on number of cache misses in-flight:
- allocates cache that is big enough for 100x100 pixels of image
- continuously increases size of image until 105x105
- hides latencies of cache-misses to keep re-computation times at a minimum
"use strict";
let Lru = require("./lrucache.js").Lru;
let cache = new Lru(10000 /* enough to hold 100x100 pixels */, async function(key,callback){
// cache-miss data-load algorithm
// Mandelbrot Set Generator
let xy=key.split(",");
const MAX_ITERATION = 100
let z = { x: 0, y: 0 }, n = 0, p, d;
do {
p = {
x: Math.pow(z.x, 2) - Math.pow(z.y, 2),
y: 2 * z.x * z.y
};
z = {
x: p.x + (xy[0]-200)/200,
y: p.y + (xy[1]-200)/200
};
d = Math.sqrt(Math.pow(z.x, 2) + Math.pow(z.y, 2));
n += 1;
} while (d <= 2 && n < MAX_ITERATION);
callback(n);
},100000000 /* cache element lifetime milliseconds */);
// avoiding callback-hell when multiple values are needed at once (all values are gathered asynchronously and joined as a result array)
let N=1;
function test()
{
let ctr = 0;
let t = Date.now();
for(let x=0;x<N;x++)
{
for(let y=0;y<N;y++)
{
cache.getMultiple(function(results){
let smoothedPixel = results.reduce((a,b)=>{return a+b;}) /5.0;
ctr++;
if(ctr == N*N)
{
console.log("benchmark for image-smoothing for "+N+"x"+N+" sized image:");
console.log((Date.now()-t)+" milliseconds for "+(N*N*5)+" pixel accesses ==> "+( 1000 * (Date.now()-t) / (N*N*5) )+" nanoseconds per pixel");
console.log(" ");
if(N<105){ N++; test(); }
}
},x+","+y, (x+1)+","+(y+1), (x-1)+","+(y-1), (x+1)+","+(y-1), (x-1)+","+(y+1));
}
}
}
test();
Output starts like this:
benchmark for image-smoothing for 1x1 sized image:
11 milliseconds for 5 pixel accesses ==> 2200 nanoseconds per pixel
benchmark for image-smoothing for 2x2 sized image:
12 milliseconds for 20 pixel accesses ==> 600 nanoseconds per pixel
benchmark for image-smoothing for 3x3 sized image:
3 milliseconds for 45 pixel accesses ==> 66.66666666666667 nanoseconds per pixel
and ends like this:
benchmark for image-smoothing for 97x97 sized image:
63 milliseconds for 47045 pixel accesses ==> 1.3391433733659261 nanoseconds per pixel
benchmark for image-smoothing for 98x98 sized image:
64 milliseconds for 48020 pixel accesses ==> 1.3327780091628487 nanoseconds per pixel
benchmark for image-smoothing for 99x99 sized image:
80 milliseconds for 49005 pixel accesses ==> 1.6324864809713295 nanoseconds per pixel
benchmark for image-smoothing for 100x100 sized image:
85 milliseconds for 50000 pixel accesses ==> 1.7 nanoseconds per pixel
benchmark for image-smoothing for 101x101 sized image:
93 milliseconds for 51005 pixel accesses ==> 1.823350651896873 nanoseconds per pixel
benchmark for image-smoothing for 102x102 sized image:
107 milliseconds for 52020 pixel accesses ==> 2.0569011918492888 nanoseconds per pixel
benchmark for image-smoothing for 103x103 sized image:
128 milliseconds for 53045 pixel accesses ==> 2.413045527382411 nanoseconds per pixel
benchmark for image-smoothing for 104x104 sized image:
123 milliseconds for 54080 pixel accesses ==> 2.274408284023669 nanoseconds per pixel
benchmark for image-smoothing for 105x105 sized image:
136 milliseconds for 55125 pixel accesses ==> 2.4671201814058956 nanoseconds per pixel
there is a sudden increase of timings at 100x100 as its equal to size of cache. So at 100x100 cache starts handling cache-miss operations but the operation per item is a computation which can not be latency-hidden so the timing keeps increasing.
To really hide latencies, the operations must be truly asynchronous within themselves. To simulate this, the algorithm can be replaced with a simple setTimeout with 150-millisecond waiting:
"use strict";
let Lru = require("./lrucache.js").Lru;
let cache = new Lru(10000 /* enough to hold 100x100 pixels */, async function(key,callback){
// cache-miss data-load algorithm
let randomResult = 5;
setTimeout(function(){
callback(randomResult);
},150);
},100000000 /* cache element lifetime milliseconds */);
// avoiding callback-hell when multiple values are needed at once (all values are gathered asynchronously and joined as a result array)
let N=1;
function test()
{
let ctr = 0;
let t = Date.now();
for(let x=0;x<N;x++)
{
for(let y=0;y<N;y++)
{
cache.getMultiple(function(results){
let smoothedPixel = results.reduce((a,b)=>{return a+b;}) /5.0;
ctr++;
if(ctr == N*N)
{
console.log("benchmark for image-smoothing for "+N+"x"+N+" sized image:");
console.log((Date.now()-t)+" milliseconds for "+(N*N*5)+" pixel accesses ==> "+( 1000 * (Date.now()-t) / (N*N*5) )+" nanoseconds per pixel");
console.log(" ");
if(N<105){ N++; test(); }
}
},x+","+y, (x+1)+","+(y+1), (x-1)+","+(y-1), (x+1)+","+(y-1), (x-1)+","+(y+1));
}
}
}
test();
output:
benchmark for image-smoothing for 1x1 sized image:
163 milliseconds for 5 pixel accesses ==> 32600 nanoseconds per pixel
benchmark for image-smoothing for 2x2 sized image:
151 milliseconds for 20 pixel accesses ==> 7550 nanoseconds per pixel
benchmark for image-smoothing for 3x3 sized image:
152 milliseconds for 45 pixel accesses ==> 3377.777777777778 nanoseconds per pixel
benchmark for image-smoothing for 4x4 sized image:
152 milliseconds for 80 pixel accesses ==> 1900 nanoseconds per pixel
benchmark for image-smoothing for 5x5 sized image:
153 milliseconds for 125 pixel accesses ==> 1224 nanoseconds per pixel
...
...
...
benchmark for image-smoothing for 98x98 sized image:
220 milliseconds for 48020 pixel accesses ==> 4.581424406497293 nanoseconds per pixel
benchmark for image-smoothing for 99x99 sized image:
223 milliseconds for 49005 pixel accesses ==> 4.550556065707581 nanoseconds per pixel
benchmark for image-smoothing for 100x100 sized image:
227 milliseconds for 50000 pixel accesses ==> 4.54 nanoseconds per pixel
benchmark for image-smoothing for 101x101 sized image:
228 milliseconds for 51005 pixel accesses ==> 4.470149985295559 nanoseconds per pixel
benchmark for image-smoothing for 102x102 sized image:
232 milliseconds for 52020 pixel accesses ==> 4.459823144944252 nanoseconds per pixel
benchmark for image-smoothing for 103x103 sized image:
233 milliseconds for 53045 pixel accesses ==> 4.392496936563295 nanoseconds per pixel
benchmark for image-smoothing for 104x104 sized image:
239 milliseconds for 54080 pixel accesses ==> 4.419378698224852 nanoseconds per pixel
benchmark for image-smoothing for 105x105 sized image:
239 milliseconds for 55125 pixel accesses ==> 4.3356009070294785 nanoseconds per pixel
There is no performance drop at all, even at 105x105, as all the cache-misses are hidden behind all item accesses even the cache-hits. For this particular example, 50k cache-hits effectively hided all of the 5125 cache-misses and even if they can't those 5125 misses they can still hide each other (1x extra latency instead of 5125x extra latency).