Last active
January 16, 2020 14:26
-
-
Save magiconair/5952535 to your computer and use it in GitHub Desktop.
Tool for extracting bucket/key data from riak's bitcask hint files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Riak key extractor | |
// | |
// extracts bucket names and keys from riak's bitcask hint files | |
// | |
// Authors: The CAS Team 2013 | |
// | |
package main | |
import ( | |
"bytes" | |
"flag" | |
"fmt" | |
"io" | |
"log" | |
"os" | |
"path/filepath" | |
"time" | |
) | |
var ( | |
dirname = flag.String("dirname", "", "bitcask directory") | |
printKeys = flag.Bool("printKeys", true, "print the keys") | |
cache = flag.Bool("cache", true, "read hint files in RAM first") | |
) | |
func readInt(r io.Reader, buf []uint8) (n uint32, err error) { | |
count, err := r.Read(buf[:4]) | |
if err != nil { | |
return 0, err | |
} | |
if count != 4 { | |
return 0, fmt.Errorf("EOF when reading an int") | |
} | |
return (uint32)(buf[0]<<24 | buf[1]<<16 | buf[2]<<8 | buf[3]), nil | |
} | |
func readString(r io.ReadSeeker, buf []uint8) (s string, err error) { | |
_, err = r.Seek(1, 1) // skip 6D (field indicator?) | |
if err != nil { | |
return "", err | |
} | |
// read length | |
sz, err := readInt(r, buf) | |
if err != nil { | |
return "", err | |
} | |
// check length | |
if sz > uint32(len(buf)) { | |
return "", fmt.Errorf("String too long: %d > %d", sz, len(buf)) | |
} | |
// read string | |
count, err := r.Read(buf[:sz]) | |
if err != nil { | |
return "", err | |
} | |
if uint32(count) != sz { | |
return "", fmt.Errorf("EOF while reading string") | |
} | |
return (string)(buf[:sz]), nil | |
} | |
func extractKeys(r io.ReadSeeker) int { | |
var ( | |
err error | |
count int | |
buf []uint8 | |
) | |
count = 0 | |
buf = make([]uint8, 256) | |
for { | |
// skip timestamp (4) + key_sz (2) + total_sz (4) + offset (8) + start_of_key (3) = 21 bytes | |
_, err = r.Seek(21, 1) | |
if err != nil { | |
break | |
} | |
bucket, err := readString(r, buf) | |
if err != nil { | |
break | |
} | |
key, err := readString(r, buf) | |
if err != nil { | |
break | |
} | |
if *printKeys { | |
fmt.Printf("%s/%s\n", bucket, key) | |
} | |
count++ | |
} | |
if err != nil { | |
fmt.Fprintf(os.Stderr, "%v", err) | |
} | |
return count | |
} | |
func extractKeysFromHintFile(filename string) int { | |
f, err := os.Open(filename) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer f.Close() | |
fi, err := f.Stat() | |
if err != nil { | |
log.Fatal(err) | |
} | |
var r io.ReadSeeker = f | |
if *cache { | |
buf := make([]byte, fi.Size()) | |
_, err = io.ReadFull(f, buf) | |
if err != nil { | |
log.Fatal(err) | |
} | |
r = bytes.NewReader(buf) | |
} | |
return extractKeys(r) | |
} | |
func extractKeysFromBitcaskDir(dirname string) { | |
start := time.Now() | |
hintFiles, err := filepath.Glob(dirname + "/*/*.hint") | |
if err != nil { | |
log.Fatal(err) | |
} | |
counts := make(chan int) | |
for _, f := range hintFiles { | |
go func(filename string) { | |
counts <- extractKeysFromHintFile(filename) | |
}(f) | |
} | |
total := 0 | |
for i := 0; i < len(hintFiles); i++ { | |
total += <-counts | |
} | |
duration := time.Since(start) | |
throughput := int(float64(total) / duration.Seconds()) | |
fmt.Fprintf(os.Stderr, "Extracted %d keys in %2.3f seconds (%d keys/sec)\n", total, duration.Seconds(), throughput) | |
} | |
func main() { | |
flag.Parse() | |
if *dirname == "" { | |
flag.Usage() | |
return | |
} | |
extractKeysFromBitcaskDir(*dirname) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
So Java, Scala and Go now read 338780 keys out of the data file.
[cas-go master] $ GOMAXPROCS=1 riak-key-extractor -dirname=/Users/frschroeder/Temp/bitcask -printKeys=true -cache=false > /dev/null
Extracted 338780 keys in 1.697 seconds (199576 keys/sec)
[cas-go master] $ GOMAXPROCS=8 riak-key-extractor -dirname=/Users/frschroeder/Temp/bitcask -printKeys=true -cache=false > /dev/null
Extracted 338780 keys in 0.885 seconds (382933 keys/sec)
[cas-go master] $ GOMAXPROCS=1 riak-key-extractor -dirname=/Users/frschroeder/Temp/bitcask -printKeys=true -cache=true > /dev/null
Extracted 338780 keys in 0.465 seconds (727859 keys/sec)
[cas-go master] $ GOMAXPROCS=8 riak-key-extractor -dirname=/Users/frschroeder/Temp/bitcask -printKeys=true -cache=true > /dev/null
Extracted 338780 keys in 0.212 seconds (1601749 keys/sec)
[cas-go master] $ GOMAXPROCS=8 riak-key-extractor -dirname=/Users/frschroeder/Temp/bitcask -printKeys=false -cache=true
Extracted 338780 keys in 0.045 seconds (7498993 keys/sec)