Info command: Show histogram of key/value sizes and number of keys per table (#730)
* Add ShowKeyValueHistogram method
This commit adds ShowKeyValueHistogram method and types associated with it.
Signed-off-by: Ibrahim Jarif <[email protected]>
* Remove whitespace
Signed-off-by: Ibrahim Jarif <[email protected]>
* Fix comment
Signed-off-by: Ibrahim Jarif <[email protected]>
* Move histogram code to histogram.go and add histogram_test.go file
Signed-off-by: Ibrahim Jarif <[email protected]>
* Show number of keys present per SSTable
The output of `badger info` command now shows the number of keys per SSTable
Signed-off-by: Ibrahim Jarif <[email protected]>
* Rename ShowKeyValueSizeHistogram to PrintKeyValueHistogram
Signed-off-by: Ibrahim Jarif <[email protected]>
diff --git a/badger/cmd/info.go b/badger/cmd/info.go
index fd3ef92..b84fd6d 100644
--- a/badger/cmd/info.go
+++ b/badger/cmd/info.go
@@ -33,6 +33,23 @@
"github.com/spf13/cobra"
)
+type flagOptions struct {
+ showTables bool
+ sizeHistogram bool
+}
+
+var (
+ opt flagOptions
+)
+
+func init() {
+ RootCmd.AddCommand(infoCmd)
+ infoCmd.Flags().BoolVarP(&opt.showTables, "show-tables", "s", false,
+ "If set to true, show tables as well.")
+ infoCmd.Flags().BoolVar(&opt.sizeHistogram, "histogram", false,
+ "Show a histogram of the key and value sizes.")
+}
+
var infoCmd = &cobra.Command{
Use: "info",
Short: "Health info about Badger database.",
@@ -48,25 +65,35 @@
fmt.Println("Error:", err.Error())
os.Exit(1)
}
- if !showTables {
+ if !opt.showTables {
return
}
- err = tableInfo(sstDir, vlogDir)
+ // Open DB
+ opts := badger.DefaultOptions
+ opts.TableLoadingMode = options.MemoryMap
+ opts.Dir = sstDir
+ opts.ValueDir = vlogDir
+ opts.ReadOnly = true
+
+ db, err := badger.Open(opts)
if err != nil {
fmt.Println("Error:", err.Error())
os.Exit(1)
}
+ defer db.Close()
+
+ err = tableInfo(sstDir, vlogDir, db)
+ if err != nil {
+ fmt.Println("Error:", err.Error())
+ os.Exit(1)
+ }
+ if opt.sizeHistogram {
+ // use prefix as nil since we want to list all keys
+ db.ShowKeyValueSizeHistogram(nil)
+ }
},
}
-var showTables bool
-
-func init() {
- RootCmd.AddCommand(infoCmd)
- infoCmd.Flags().BoolVarP(&showTables, "show-tables", "s", false,
- "If set to true, show tables as well.")
-}
-
func hbytes(sz int64) string {
return humanize.Bytes(uint64(sz))
}
@@ -75,27 +102,20 @@
return humanize.RelTime(dst, src, "earlier", "later")
}
-func tableInfo(dir, valueDir string) error {
- // Open DB
- opts := badger.DefaultOptions
- opts.TableLoadingMode = options.MemoryMap
- opts.Dir = sstDir
- opts.ValueDir = vlogDir
- opts.ReadOnly = true
-
- db, err := badger.Open(opts)
- if err != nil {
- return err
- }
- defer db.Close()
-
+func tableInfo(dir, valueDir string, db *badger.DB) error {
tables := db.Tables()
+ fmt.Printf("\n%s SSTables %[1]s\n", strings.Repeat("=", 45))
+ fmt.Printf("%-5s\t%-10s\t%-30s\t%-30s\t%-7s\n", "ID", "Level",
+ "Left-Key(in hex) (Time)", "Right-Key(in hex) (Time)", "Total Keys")
+ fmt.Printf("%s\n", strings.Repeat("=", 100))
for _, t := range tables {
- lk, lv := y.ParseKey(t.Left), y.ParseTs(t.Left)
- rk, rv := y.ParseKey(t.Right), y.ParseTs(t.Right)
- fmt.Printf("SSTable [L%d, %03d] [%20X, v%-10d -> %20X, v%-10d]\n",
- t.Level, t.ID, lk, lv, rk, rv)
+ lk, lt := y.ParseKey(t.Left), y.ParseTs(t.Left)
+ rk, rt := y.ParseKey(t.Right), y.ParseTs(t.Right)
+
+ fmt.Printf("%-5d\tL%-9d\t%-30s\t%-30s\t%-7d\n", t.ID, t.Level,
+ fmt.Sprintf("%X (v%d)", lk, lt), fmt.Sprintf("%X (v%d)", rk, rt), t.KeyCount)
}
+ fmt.Println()
return nil
}
@@ -135,7 +155,6 @@
fmt.Println()
var baseTime time.Time
- // fmt.Print("\n[Manifest]\n")
manifestTruncated := false
manifestInfo, ok := fileinfoByName[badger.ManifestFilename]
if ok {
diff --git a/histogram.go b/histogram.go
new file mode 100644
index 0000000..0d46439
--- /dev/null
+++ b/histogram.go
@@ -0,0 +1,169 @@
+/*
+ * Copyright 2019 Dgraph Labs, Inc. and Contributors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package badger
+
+import (
+ "fmt"
+ "math"
+)
+
+// PrintKeyValueHistogram builds and displays the key-value size histogram.
+// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
+// considered for creating the histogram
+func (db *DB) PrintKeyValueHistogram(keyPrefix []byte) {
+ if db == nil {
+ fmt.Println("\nCannot build histogram: DB is nil.")
+ return
+ }
+ histogram := db.buildKeyValueSizeHistogram(keyPrefix)
+ fmt.Printf("Histogram of key sizes (in bytes)\n")
+ histogram.keySizeHistogram.printHistogram()
+ fmt.Printf("Histogram of value sizes (in bytes)\n")
+ histogram.valueSizeHistogram.printHistogram()
+}
+
+// histogramData stores information about a histogram
+type histogramData struct {
+ bins []int64
+ countPerBin []int64
+ totalCount int64
+ min int64
+ max int64
+ sum int64
+}
+
+// keyValueSizeHistogram contains keySize histogram and valueSize histogram
+type keyValueSizeHistogram struct {
+ keySizeHistogram, valueSizeHistogram histogramData
+}
+
+// newKeyValueSizeHistogram returns a new instance of keyValueSizeHistogram with
+// properly initialized fields.
+func newKeyValueSizeHistogram() *keyValueSizeHistogram {
+ // TODO(ibrahim): find appropriate bin size.
+ keyBins := createHistogramBins(1, 16)
+ valueBins := createHistogramBins(1, 30)
+ return &keyValueSizeHistogram{
+ keySizeHistogram: histogramData{
+ bins: keyBins,
+ countPerBin: make([]int64, len(keyBins)+1),
+ max: math.MinInt64,
+ min: math.MaxInt64,
+ sum: 0,
+ },
+ valueSizeHistogram: histogramData{
+ bins: valueBins,
+ countPerBin: make([]int64, len(valueBins)+1),
+ max: math.MinInt64,
+ min: math.MaxInt64,
+ sum: 0,
+ },
+ }
+}
+
+// createHistogramBins creates bins for an histogram. The bin sizes are powers
+// of two of the form [2^min_exponent, ..., 2^max_exponent].
+func createHistogramBins(minExponent, maxExponent uint32) []int64 {
+ var bins []int64
+ for i := minExponent; i <= maxExponent; i++ {
+ bins = append(bins, int64(1)<<i)
+ }
+ return bins
+}
+
+// Update the min and max fields if value is less than or greater than the
+// current min/max value.
+func (histogram *histogramData) Update(value int64) {
+ if value > histogram.max {
+ histogram.max = value
+ }
+ if value < histogram.min {
+ histogram.min = value
+ }
+
+ histogram.sum += value
+ histogram.totalCount++
+
+ for index := 0; index <= len(histogram.bins); index++ {
+ // Allocate value in the last buckets if we reached the end of the Bounds array.
+ if index == len(histogram.bins) {
+ histogram.countPerBin[index]++
+ break
+ }
+
+ // Check if the value should be added to the "index" bin
+ if value < int64(histogram.bins[index]) {
+ histogram.countPerBin[index]++
+ break
+ }
+ }
+}
+
+// buildKeyValueSizeHistogram builds the key-value size histogram.
+// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
+// considered for creating the histogram
+func (db *DB) buildKeyValueSizeHistogram(keyPrefix []byte) *keyValueSizeHistogram {
+ txn := db.NewTransaction(false)
+ defer txn.Discard()
+
+ itr := txn.NewIterator(DefaultIteratorOptions)
+ defer itr.Close()
+
+ badgerHistogram := newKeyValueSizeHistogram()
+
+ // Collect key and value sizes.
+ for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
+ item := itr.Item()
+ badgerHistogram.keySizeHistogram.Update(item.KeySize())
+ badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
+ }
+ return badgerHistogram
+}
+
+// printHistogram prints the histogram data in a human-readable format.
+func (histogram histogramData) printHistogram() {
+ fmt.Printf("Total count: %d\n", histogram.totalCount)
+ fmt.Printf("Min value: %d\n", histogram.min)
+ fmt.Printf("Max value: %d\n", histogram.max)
+ fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
+ fmt.Printf("%24s %9s\n", "Range", "Count")
+
+ numBins := len(histogram.bins)
+ for index, count := range histogram.countPerBin {
+ if count == 0 {
+ continue
+ }
+
+ // The last bin represents the bin that contains the range from
+ // the last bin up to infinity so it's processed differently than the
+ // other bins.
+ if index == len(histogram.countPerBin)-1 {
+ lowerBound := int(histogram.bins[numBins-1])
+ fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
+ continue
+ }
+
+ upperBound := int(histogram.bins[index])
+ lowerBound := 0
+ if index > 0 {
+ lowerBound = int(histogram.bins[index-1])
+ }
+
+ fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
+ }
+ fmt.Println()
+}
diff --git a/histogram_test.go b/histogram_test.go
new file mode 100644
index 0000000..240fca6
--- /dev/null
+++ b/histogram_test.go
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2019 Dgraph Labs, Inc. and Contributors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package badger
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestBuildKeyValueSizeHistogram(t *testing.T) {
+ t.Run("All same size key-values", func(t *testing.T) {
+ runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+ entries := int64(40)
+ err := db.Update(func(txn *Txn) error {
+ for i := int64(0); i < entries; i++ {
+ err := txn.Set([]byte(string(i)), []byte("B"))
+ if err != nil {
+ return err
+ }
+ }
+ return nil
+ })
+ require.NoError(t, err)
+
+ histogram := db.buildKeyValueSizeHistogram(nil)
+ keyHistogram := histogram.keySizeHistogram
+ valueHistogram := histogram.valueSizeHistogram
+
+ require.Equal(t, entries, keyHistogram.totalCount)
+ require.Equal(t, entries, valueHistogram.totalCount)
+
+ // Each entry is of size one. So the sum of sizes should be the same
+ // as number of entries
+ require.Equal(t, entries, valueHistogram.sum)
+ require.Equal(t, entries, keyHistogram.sum)
+
+ // All value sizes are same. The first bin should have all the values.
+ require.Equal(t, entries, valueHistogram.countPerBin[0])
+ require.Equal(t, entries, keyHistogram.countPerBin[0])
+
+ require.Equal(t, int64(1), keyHistogram.max)
+ require.Equal(t, int64(1), keyHistogram.min)
+ require.Equal(t, int64(1), valueHistogram.max)
+ require.Equal(t, int64(1), valueHistogram.min)
+ })
+ })
+
+ t.Run("different size key-values", func(t *testing.T) {
+ runBadgerTest(t, nil, func(t *testing.T, db *DB) {
+ entries := int64(3)
+ err := db.Update(func(txn *Txn) error {
+ if err := txn.Set([]byte("A"), []byte("B")); err != nil {
+ return err
+ }
+
+ if err := txn.Set([]byte("AA"), []byte("BB")); err != nil {
+ return err
+ }
+
+ if err := txn.Set([]byte("AAA"), []byte("BBB")); err != nil {
+ return err
+ }
+ return nil
+ })
+ require.NoError(t, err)
+
+ histogram := db.buildKeyValueSizeHistogram(nil)
+ keyHistogram := histogram.keySizeHistogram
+ valueHistogram := histogram.valueSizeHistogram
+
+ require.Equal(t, entries, keyHistogram.totalCount)
+ require.Equal(t, entries, valueHistogram.totalCount)
+
+ // Each entry is of size one. So the sum of sizes should be the same
+ // as number of entries
+ require.Equal(t, int64(6), valueHistogram.sum)
+ require.Equal(t, int64(6), keyHistogram.sum)
+
+ // Lenght 1 key is in first bucket, length 2 and 3 are in the second
+ // bucket
+ require.Equal(t, int64(1), valueHistogram.countPerBin[0])
+ require.Equal(t, int64(2), valueHistogram.countPerBin[1])
+ require.Equal(t, int64(1), keyHistogram.countPerBin[0])
+ require.Equal(t, int64(2), keyHistogram.countPerBin[1])
+
+ require.Equal(t, int64(3), keyHistogram.max)
+ require.Equal(t, int64(1), keyHistogram.min)
+ require.Equal(t, int64(3), valueHistogram.max)
+ require.Equal(t, int64(1), valueHistogram.min)
+ })
+ })
+}
diff --git a/iterator.go b/iterator.go
index 7e79266..f6df716 100644
--- a/iterator.go
+++ b/iterator.go
@@ -246,6 +246,12 @@
return int64(vp.Len) // includes key length.
}
+// KeySize returns the size of the key.
+// Exact size of the key is key + 8 bytes of timestamp
+func (item *Item) KeySize() int64 {
+ return int64(len(item.key))
+}
+
// ValueSize returns the exact size of the value.
//
// This can be called to quickly estimate the size of a value without fetching
diff --git a/levels.go b/levels.go
index 9c8a490..9d2b501 100644
--- a/levels.go
+++ b/levels.go
@@ -945,20 +945,27 @@
// TableInfo represents the information about a table.
type TableInfo struct {
- ID uint64
- Level int
- Left []byte
- Right []byte
+ ID uint64
+ Level int
+ Left []byte
+ Right []byte
+ KeyCount uint64 // Number of keys in the table
}
func (s *levelsController) getTableInfo() (result []TableInfo) {
for _, l := range s.levels {
for _, t := range l.tables {
+ it := t.NewIterator(false)
+ var count uint64
+ for it.Rewind(); it.Valid(); it.Next() {
+ count++
+ }
info := TableInfo{
- ID: t.ID(),
- Level: l.level,
- Left: t.Smallest(),
- Right: t.Biggest(),
+ ID: t.ID(),
+ Level: l.level,
+ Left: t.Smallest(),
+ Right: t.Biggest(),
+ KeyCount: count,
}
result = append(result, info)
}
diff --git a/table/table.go b/table/table.go
index 4e57b91..5ec2b30 100644
--- a/table/table.go
+++ b/table/table.go
@@ -59,7 +59,7 @@
tableSize int // Initialized in OpenTable, using fd.Stat().
blockIndex []keyOffset
- ref int32 // For file garbage collection. Atomic.
+ ref int32 // For file garbage collection. Atomic.
loadingMode options.FileLoadingMode
mmap []byte // Memory mapped.