-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbitcask.py
84 lines (71 loc) · 2.58 KB
/
bitcask.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from inmemory_store import BitCaskKeyDirEntry, BitCaskKVPair
import os
from typing import Dict
import struct
class BitCaskDataStore:
def __init__(self, datafile_name="database.db") -> None:
self.datafile = datafile_name
self.file_handle = open(self.datafile, "a+b")
self.keydir: Dict[str, BitCaskKeyDirEntry] = dict()
self.build_keydir()
def build_keydir(self):
filesize = os.path.getsize(self.datafile)
if filesize == 0:
return
with open(self.datafile, 'rb') as f:
while f.tell() < filesize:
offset = f.tell()
byte_data = f.read(16)
timestamp, keysize, valuesize = struct.unpack("<QLL", byte_data)
key = f.read(keysize).decode()
value = f.read(valuesize).decode()
self.keydir[key] = BitCaskKeyDirEntry(valuesize, offset)
def get(self, key):
"""
Get the offset information from the keydir
Use the offset info to read the key and the value from the data file
"""
if key not in self.keydir:
return ""
offset_info = self.keydir[key]
file_offset = offset_info.offset
with open(self.datafile, 'rb') as f:
f.seek(file_offset)
byte_data = f.read(16)
timestamp, keysize, valuesize = struct.unpack("<QLL", byte_data)
_ = f.read(keysize)
value_bytes = f.read(valuesize)
return value_bytes.decode()
def put(self, key, value):
"""
Insert a key-value pair into the bitcask database
"""
# First, insert the key-value into the in-memory keydir
valsize = len(value)
offset = os.path.getsize(self.datafile)
kdEntry = BitCaskKeyDirEntry(valsize, offset)
self.keydir[key] = kdEntry
# Append the key-value to the disk store now
diskstore = BitCaskKVPair(key, value)
byte_data = diskstore.encode()
with open(self.datafile, "ab") as f:
f.write(byte_data)
def list_keys(self):
"""
List all keys in the datastore
"""
return list(self.keydir.keys())
def sync(self):
"""
Flush OS file buffers to disk to
persist pending writes to disk
"""
self.file_handle.flush()
os.fsync(self.file_handle)
def close(self):
"""
Flush pending writes to disk and close file handle
"""
self.file_handle.flush()
os.fsync(self.file_handle)
self.file_handle.close()