forked from jvirkki/libbloom
-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
614 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
|
||
Copyright (c) 2012, Jyri J. Virkki | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are | ||
met: | ||
|
||
1. Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in the | ||
documentation and/or other materials provided with the distribution. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
|
||
# Copyright (c) 2012, Jyri J. Virkki | ||
# All rights reserved. | ||
# | ||
# This file is under BSD license. See LICENSE file. | ||
# | ||
# By default, builds optimized 32bit libbloom (under ./build) | ||
# Requires GNU Make, so invoke appropriately (make or gmake) | ||
# | ||
# Other build options: | ||
# | ||
# DEBUG=1 make to build debug instead of optimized | ||
# MM=-m64 make to build 64bit library | ||
# | ||
# Other build targets: | ||
# | ||
# make test to build and run test code | ||
# make gcov to build with code coverage and run gcov | ||
# make lint to run lint | ||
# make clean the usual | ||
# | ||
|
||
TOP := $(shell /bin/pwd) | ||
BUILD_OS := $(shell uname) | ||
|
||
BUILD=$(TOP)/build | ||
INC=-I$(TOP) -I$(TOP)/murmur2 | ||
LIB=-lm | ||
CC=gcc ${OPT} ${MM} -std=c99 -fPIC | ||
|
||
ifeq ($(MM),) | ||
MM=-m32 | ||
endif | ||
|
||
ifeq ($(BUILD_OS),Linux) | ||
RPATH=-Wl,-rpath,$(BUILD) | ||
endif | ||
|
||
ifeq ($(BUILD_OS),SunOS) | ||
RPATH=-R$(BUILD) | ||
endif | ||
|
||
ifeq ($(DEBUG),1) | ||
OPT=-g $(DEBUGOPT) | ||
else | ||
OPT=-O3 | ||
endif | ||
|
||
|
||
all: $(BUILD)/libbloom.so $(BUILD)/test-libbloom | ||
|
||
$(BUILD)/libbloom.so: $(BUILD)/murmurhash2.o $(BUILD)/bloom.o | ||
(cd $(BUILD) && $(CC) bloom.o murmurhash2.o -shared $(LIB) -o libbloom.so) | ||
|
||
$(BUILD)/test-libbloom: $(BUILD)/libbloom.so $(BUILD)/test.o | ||
(cd $(BUILD) && $(CC) test.o -L$(BUILD) $(RPATH) -lbloom -o test-libbloom) | ||
|
||
$(BUILD)/%.o: %.c | ||
mkdir -p $(BUILD) | ||
$(CC) $(INC) -c $< -o $@ | ||
|
||
$(BUILD)/murmurhash2.o: murmur2/MurmurHash2.c murmur2/murmurhash2.h | ||
mkdir -p $(BUILD) | ||
$(CC) $(INC) -c murmur2/MurmurHash2.c -o $(BUILD)/murmurhash2.o | ||
|
||
clean: | ||
rm -rf $(BUILD) | ||
|
||
lint: | ||
lint -x -errfmt=simple $(INC) $(LIB) *.c murmur2/*.c | ||
|
||
test: $(BUILD)/test-libbloom | ||
$(BUILD)/test-libbloom | ||
|
||
gcov: | ||
$(MAKE) clean | ||
DEBUG=1 DEBUGOPT="-fprofile-arcs -ftest-coverage" $(MAKE) all | ||
(cd $(BUILD) && \ | ||
cp ../*.c . && \ | ||
./test-libbloom && \ | ||
gcov -bf bloom.c) | ||
@echo Remember to make clean to remove instrumented objects |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,45 @@ | ||
|
||
libbloom | ||
Introduction | ||
------------ | ||
This is libbloom, a simple and small bloom filter implementation in C. | ||
|
||
If you are reading this you probably already know about bloom filters | ||
and why you might use one. If not, the wikipedia article is a good intro: | ||
http://en.wikipedia.org/wiki/Bloom_filter | ||
|
||
|
||
Building | ||
-------- | ||
The Makefile assumes GNU Make, so run 'make' or 'gmake' as appropriate | ||
on your system. | ||
|
||
By default it builds an optimized 32bit libbloom. See Makefile comments | ||
for other build options. | ||
|
||
The shared library will be in ./build/libbloom.so | ||
|
||
|
||
Sample Usage | ||
------------ | ||
|
||
#include "bloom.h" | ||
|
||
struct bloom bloom; | ||
bloom_init(&bloom, 1000000, 0.01); | ||
bloom_add(&bloom, buffer, buflen); | ||
|
||
if (bloom_check(&bloom, buffer, buflen)) { | ||
printf("It may be there!\n"); | ||
} | ||
|
||
|
||
Documentation | ||
------------- | ||
Read bloom.h for more detailed documentation on the public interfaces. | ||
|
||
|
||
License | ||
------- | ||
This code (except MurmurHash2) is under BSD license. See LICENSE file. | ||
|
||
See murmur2/README for info on MurmurHash2. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
/* | ||
* Copyright (c) 2012, Jyri J. Virkki | ||
* All rights reserved. | ||
* | ||
* This file is under BSD license. See LICENSE file. | ||
*/ | ||
|
||
/* | ||
* Refer to bloom.h for documentation on the public interfaces. | ||
*/ | ||
|
||
#include <fcntl.h> | ||
#include <math.h> | ||
#include <stdio.h> | ||
#include <stdlib.h> | ||
#include <string.h> | ||
#include <sys/stat.h> | ||
#include <sys/types.h> | ||
#include <unistd.h> | ||
|
||
#include "bloom.h" | ||
#include "murmurhash2.h" | ||
|
||
|
||
static int bloom_check_add(struct bloom * bloom, | ||
const void * buffer, int len, int add) | ||
{ | ||
if (bloom->ready == 0) { | ||
(void)printf("bloom at %p not initialized!\n", (void *)bloom); | ||
return -1; | ||
} | ||
|
||
int hits = 0; | ||
register unsigned int a = murmurhash2(buffer, len, 0x9747b28c); | ||
register unsigned int b = murmurhash2(buffer, len, a); | ||
register unsigned int x; | ||
register unsigned int i; | ||
register unsigned int byte; | ||
register unsigned int mask; | ||
register unsigned char c; | ||
|
||
for (i = 0; i < bloom->hashes; i++) { | ||
x = (a + i*b) % bloom->bits; | ||
byte = x >> 3; | ||
c = bloom->bf[byte]; // expensive memory access | ||
mask = 1 << (x % 8); | ||
|
||
if (c & mask) { | ||
hits++; | ||
} else { | ||
if (add) { | ||
bloom->bf[byte] = c | mask; | ||
} | ||
} | ||
} | ||
|
||
if (hits == bloom->hashes) { | ||
return 1; // 1 == element already in (or collision) | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
|
||
int bloom_init(struct bloom * bloom, int entries, double error) | ||
{ | ||
bloom->ready = 0; | ||
|
||
if (entries < 1 || error == 0) { | ||
return 1; | ||
} | ||
|
||
bloom->entries = entries; | ||
bloom->error = error; | ||
|
||
double num = log(bloom->error); | ||
double denom = 0.480453013918201; // ln(2)^2 | ||
bloom->bpe = -(num / denom); | ||
|
||
double dentries = (double)entries; | ||
bloom->bits = (int)(dentries * bloom->bpe); | ||
|
||
if (bloom->bits % 8) { | ||
bloom->bytes = (bloom->bits / 8) + 1; | ||
} else { | ||
bloom->bytes = bloom->bits / 8; | ||
} | ||
|
||
bloom->hashes = (int)ceil(0.693147180559945 * bloom->bpe); // ln(2) | ||
|
||
bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char)); | ||
if (bloom->bf == NULL) { | ||
return 1; | ||
} | ||
|
||
bloom->ready = 1; | ||
return 0; | ||
} | ||
|
||
|
||
int bloom_check(struct bloom * bloom, const void * buffer, int len) | ||
{ | ||
return bloom_check_add(bloom, buffer, len, 0); | ||
} | ||
|
||
|
||
int bloom_add(struct bloom * bloom, const void * buffer, int len) | ||
{ | ||
return bloom_check_add(bloom, buffer, len, 1); | ||
} | ||
|
||
|
||
void bloom_print(struct bloom * bloom) | ||
{ | ||
(void)printf("bloom at %p\n", (void *)bloom); | ||
(void)printf(" ->entries = %d\n", bloom->entries); | ||
(void)printf(" ->error = %f\n", bloom->error); | ||
(void)printf(" ->bits = %d\n", bloom->bits); | ||
(void)printf(" ->bits per elem = %f\n", bloom->bpe); | ||
(void)printf(" ->bytes = %d\n", bloom->bytes); | ||
(void)printf(" ->hash functions = %d\n", bloom->hashes); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* | ||
* Copyright (c) 2012, Jyri J. Virkki | ||
* All rights reserved. | ||
* | ||
* This file is under BSD license. See LICENSE file. | ||
*/ | ||
|
||
#ifndef _BLOOM_H | ||
#define _BLOOM_H | ||
|
||
/** *************************************************************************** | ||
* Structure to keep track of one bloom filter. Caller needs to | ||
* allocate this and pass it to the functions below. First call for | ||
* every struct must be to bloom_init(). | ||
* | ||
*/ | ||
struct bloom | ||
{ | ||
// These fields are part of the public interface of this structure. | ||
// Client code may read these values if desired. Client code MUST NOT | ||
// modify any of these. | ||
int entries; | ||
double error; | ||
int bits; | ||
int bytes; | ||
int hashes; | ||
|
||
// Fields below are private to the implementation. These may go away or | ||
// change incompatibly at any moment. Client code MUST NOT access or rely | ||
// on these. | ||
double bpe; | ||
unsigned char * bf; | ||
int ready; | ||
}; | ||
|
||
|
||
/** *************************************************************************** | ||
* Initialize the bloom filter for use. | ||
* | ||
* The filter is initialized with a bit field and number of hash functions | ||
* according to the computations from the wikipedia entry: | ||
* http://en.wikipedia.org/wiki/Bloom_filter | ||
* | ||
* Optimal number of bits is: | ||
* bits = (entries * ln(error)) / ln(2)^2 | ||
* | ||
* Optimal number of hash functions is: | ||
* hashes = bpe * ln(2) | ||
* | ||
* Parameters: | ||
* ----------- | ||
* bloom - Pointer to an allocated struct bloom (see above). | ||
* | ||
* Parameters: | ||
* ----------- | ||
* entries - The expected number of entries which will be inserted. | ||
* error - Probability of collision (as long as entries are not | ||
* exceeded). | ||
* | ||
* Return: | ||
* ------- | ||
* 0 - on success | ||
* 1 - on failure | ||
* | ||
*/ | ||
int bloom_init(struct bloom * bloom, int entries, double error); | ||
|
||
|
||
/** *************************************************************************** | ||
* Check if the given element is in the bloom filter. Remember this may | ||
* return false positive if a collision occured. | ||
* | ||
* Parameters: | ||
* ----------- | ||
* bloom - Pointer to an allocated struct bloom (see above). | ||
* buffer - Pointer to buffer containing element to check. | ||
* len - Size of 'buffer'. | ||
* | ||
* Return: | ||
* ------- | ||
* 0 - element is not present | ||
* 1 - element is present (or false positive due to collision) | ||
* -1 - bloom not initialized | ||
* | ||
*/ | ||
int bloom_check(struct bloom * bloom, const void * buffer, int len); | ||
|
||
|
||
/** *************************************************************************** | ||
* Add the given element to the bloom filter. | ||
* The return code indicates if the element (or a collision) was already in, | ||
* so for the common check+add use case, no need to call check separately. | ||
* | ||
* Parameters: | ||
* ----------- | ||
* bloom - Pointer to an allocated struct bloom (see above). | ||
* buffer - Pointer to buffer containing element to add. | ||
* len - Size of 'buffer'. | ||
* | ||
* Return: | ||
* ------- | ||
* 0 - element was not present and was added | ||
* 1 - element (or a collision) had already been added previously | ||
* -1 - bloom not initialized | ||
* | ||
*/ | ||
int bloom_add(struct bloom * bloom, const void * buffer, int len); | ||
|
||
|
||
/** *************************************************************************** | ||
* Print (to stdout) info about this bloom filter. Debugging aid. | ||
* | ||
*/ | ||
void bloom_print(struct bloom * bloom); | ||
|
||
#endif |
Oops, something went wrong.