compaction.go - external/github.com/dgraph-io/badger - Git at Google

 /*
  * Copyright 2017 Dgraph Labs, Inc. and Contributors
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package badger

 import (
 	"bytes"
 	"fmt"
 	"log"
 	"math"
 	"sync"

 	"github.com/dgraph-io/badger/v3/table"
 	"github.com/dgraph-io/badger/v3/y"
 )

 type keyRange struct {
 	left  []byte
 	right []byte
 	inf   bool
 	size  int64 // size is used for Key splits.
 }

 func (r keyRange) isEmpty() bool {
 	return len(r.left) == 0 && len(r.right) == 0 && !r.inf
 }

 var infRange = keyRange{inf: true}

 func (r keyRange) String() string {
 	return fmt.Sprintf("[left=%x, right=%x, inf=%v]", r.left, r.right, r.inf)
 }

 func (r keyRange) equals(dst keyRange) bool {
 	return bytes.Equal(r.left, dst.left) &&
 		bytes.Equal(r.right, dst.right) &&
 		r.inf == dst.inf
 }

 func (r *keyRange) extend(kr keyRange) {
 	// TODO(ibrahim): Is this needed?
 	if kr.isEmpty() {
 		return
 	}
 	if r.isEmpty() {
 		*r = kr
 	}
 	if len(r.left) == 0 || y.CompareKeys(kr.left, r.left) < 0 {
 		r.left = kr.left
 	}
 	if len(r.right) == 0 || y.CompareKeys(kr.right, r.right) > 0 {
 		r.right = kr.right
 	}
 	if kr.inf {
 		r.inf = true
 	}
 }

 func (r keyRange) overlapsWith(dst keyRange) bool {
 	// Empty keyRange always overlaps.
 	if r.isEmpty() {
 		return true
 	}
 	// TODO(ibrahim): Do you need this?
 	// Empty dst doesn't overlap with anything.
 	if dst.isEmpty() {
 		return false
 	}
 	if r.inf || dst.inf {
 		return true
 	}

 	// [dst.left, dst.right] ... [r.left, r.right]
 	// If my left is greater than dst right, we have no overlap.
 	if y.CompareKeys(r.left, dst.right) > 0 {
 		return false
 	}
 	// [r.left, r.right] ... [dst.left, dst.right]
 	// If my right is less than dst left, we have no overlap.
 	if y.CompareKeys(r.right, dst.left) < 0 {
 		return false
 	}
 	// We have overlap.
 	return true
 }

 // getKeyRange returns the smallest and the biggest in the list of tables.
 // TODO(naman): Write a test for this. The smallest and the biggest should
 // be the smallest of the leftmost table and the biggest of the right most table.
 func getKeyRange(tables ...*table.Table) keyRange {
 	if len(tables) == 0 {
 		return keyRange{}
 	}
 	smallest := tables[0].Smallest()
 	biggest := tables[0].Biggest()
 	for i := 1; i < len(tables); i++ {
 		if y.CompareKeys(tables[i].Smallest(), smallest) < 0 {
 			smallest = tables[i].Smallest()
 		}
 		if y.CompareKeys(tables[i].Biggest(), biggest) > 0 {
 			biggest = tables[i].Biggest()
 		}
 	}

 	// We pick all the versions of the smallest and the biggest key. Note that version zero would
 	// be the rightmost key, considering versions are default sorted in descending order.
 	return keyRange{
 		left:  y.KeyWithTs(y.ParseKey(smallest), math.MaxUint64),
 		right: y.KeyWithTs(y.ParseKey(biggest), 0),
 	}
 }

 type levelCompactStatus struct {
 	ranges  []keyRange
 	delSize int64
 }

 func (lcs *levelCompactStatus) debug() string {
 	var b bytes.Buffer
 	for _, r := range lcs.ranges {
 		b.WriteString(r.String())
 	}
 	return b.String()
 }

 func (lcs *levelCompactStatus) overlapsWith(dst keyRange) bool {
 	for _, r := range lcs.ranges {
 		if r.overlapsWith(dst) {
 			return true
 		}
 	}
 	return false
 }

 func (lcs *levelCompactStatus) remove(dst keyRange) bool {
 	final := lcs.ranges[:0]
 	var found bool
 	for _, r := range lcs.ranges {
 		if !r.equals(dst) {
 			final = append(final, r)
 		} else {
 			found = true
 		}
 	}
 	lcs.ranges = final
 	return found
 }

 type compactStatus struct {
 	sync.RWMutex
 	levels []*levelCompactStatus
 	tables map[uint64]struct{}
 }

 func (cs *compactStatus) overlapsWith(level int, this keyRange) bool {
 	cs.RLock()
 	defer cs.RUnlock()

 	thisLevel := cs.levels[level]
 	return thisLevel.overlapsWith(this)
 }

 func (cs *compactStatus) delSize(l int) int64 {
 	cs.RLock()
 	defer cs.RUnlock()
 	return cs.levels[l].delSize
 }

 type thisAndNextLevelRLocked struct{}

 // compareAndAdd will check whether we can run this compactDef. That it doesn't overlap with any
 // other running compaction. If it can be run, it would store this run in the compactStatus state.
 func (cs *compactStatus) compareAndAdd(_ thisAndNextLevelRLocked, cd compactDef) bool {
 	cs.Lock()
 	defer cs.Unlock()

 	tl := cd.thisLevel.level
 	y.AssertTruef(tl < len(cs.levels), "Got level %d. Max levels: %d", tl, len(cs.levels))
 	thisLevel := cs.levels[cd.thisLevel.level]
 	nextLevel := cs.levels[cd.nextLevel.level]

 	if thisLevel.overlapsWith(cd.thisRange) {
 		return false
 	}
 	if nextLevel.overlapsWith(cd.nextRange) {
 		return false
 	}
 	// Check whether this level really needs compaction or not. Otherwise, we'll end up
 	// running parallel compactions for the same level.
 	// Update: We should not be checking size here. Compaction priority already did the size checks.
 	// Here we should just be executing the wish of others.

 	thisLevel.ranges = append(thisLevel.ranges, cd.thisRange)
 	nextLevel.ranges = append(nextLevel.ranges, cd.nextRange)
 	thisLevel.delSize += cd.thisSize
 	for _, t := range append(cd.top, cd.bot...) {
 		cs.tables[t.ID()] = struct{}{}
 	}
 	return true
 }

 func (cs *compactStatus) delete(cd compactDef) {
 	cs.Lock()
 	defer cs.Unlock()

 	tl := cd.thisLevel.level
 	y.AssertTruef(tl < len(cs.levels), "Got level %d. Max levels: %d", tl, len(cs.levels))

 	thisLevel := cs.levels[cd.thisLevel.level]
 	nextLevel := cs.levels[cd.nextLevel.level]

 	thisLevel.delSize -= cd.thisSize
 	found := thisLevel.remove(cd.thisRange)
 	// The following check makes sense only if we're compacting more than one
 	// table. In case of the max level, we might rewrite a single table to
 	// remove stale data.
 	if cd.thisLevel != cd.nextLevel && !cd.nextRange.isEmpty() {
 		found = nextLevel.remove(cd.nextRange) && found
 	}

 	if !found {
 		this := cd.thisRange
 		next := cd.nextRange
 		fmt.Printf("Looking for: %s in this level %d.\n", this, tl)
 		fmt.Printf("This Level:\n%s\n", thisLevel.debug())
 		fmt.Println()
 		fmt.Printf("Looking for: %s in next level %d.\n", next, cd.nextLevel.level)
 		fmt.Printf("Next Level:\n%s\n", nextLevel.debug())
 		log.Fatal("keyRange not found")
 	}
 	for _, t := range append(cd.top, cd.bot...) {
 		_, ok := cs.tables[t.ID()]
 		y.AssertTrue(ok)
 		delete(cs.tables, t.ID())
 	}
 }
	/*
	* Copyright 2017 Dgraph Labs, Inc. and Contributors
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package badger

	import (
	"bytes"
	"fmt"
	"log"
	"math"
	"sync"

	"github.com/dgraph-io/badger/v3/table"
	"github.com/dgraph-io/badger/v3/y"
	)

	type keyRange struct {
	left []byte
	right []byte
	inf bool
	size int64 // size is used for Key splits.
	}

	func (r keyRange) isEmpty() bool {
	return len(r.left) == 0 && len(r.right) == 0 && !r.inf
	}

	var infRange = keyRange{inf: true}

	func (r keyRange) String() string {
	return fmt.Sprintf("[left=%x, right=%x, inf=%v]", r.left, r.right, r.inf)
	}

	func (r keyRange) equals(dst keyRange) bool {
	return bytes.Equal(r.left, dst.left) &&
	bytes.Equal(r.right, dst.right) &&
	r.inf == dst.inf
	}

	func (r *keyRange) extend(kr keyRange) {
	// TODO(ibrahim): Is this needed?
	if kr.isEmpty() {
	return
	}
	if r.isEmpty() {
	*r = kr
	}
	if len(r.left) == 0 \|\| y.CompareKeys(kr.left, r.left) < 0 {
	r.left = kr.left
	}
	if len(r.right) == 0 \|\| y.CompareKeys(kr.right, r.right) > 0 {
	r.right = kr.right
	}
	if kr.inf {
	r.inf = true
	}
	}

	func (r keyRange) overlapsWith(dst keyRange) bool {
	// Empty keyRange always overlaps.
	if r.isEmpty() {
	return true
	}
	// TODO(ibrahim): Do you need this?
	// Empty dst doesn't overlap with anything.
	if dst.isEmpty() {
	return false
	}
	if r.inf \|\| dst.inf {
	return true
	}

	// [dst.left, dst.right] ... [r.left, r.right]
	// If my left is greater than dst right, we have no overlap.
	if y.CompareKeys(r.left, dst.right) > 0 {
	return false
	}
	// [r.left, r.right] ... [dst.left, dst.right]
	// If my right is less than dst left, we have no overlap.
	if y.CompareKeys(r.right, dst.left) < 0 {
	return false
	}
	// We have overlap.
	return true
	}

	// getKeyRange returns the smallest and the biggest in the list of tables.
	// TODO(naman): Write a test for this. The smallest and the biggest should
	// be the smallest of the leftmost table and the biggest of the right most table.
	func getKeyRange(tables ...*table.Table) keyRange {
	if len(tables) == 0 {
	return keyRange{}
	}
	smallest := tables[0].Smallest()
	biggest := tables[0].Biggest()
	for i := 1; i < len(tables); i++ {
	if y.CompareKeys(tables[i].Smallest(), smallest) < 0 {
	smallest = tables[i].Smallest()
	}
	if y.CompareKeys(tables[i].Biggest(), biggest) > 0 {
	biggest = tables[i].Biggest()
	}
	}

	// We pick all the versions of the smallest and the biggest key. Note that version zero would
	// be the rightmost key, considering versions are default sorted in descending order.
	return keyRange{
	left: y.KeyWithTs(y.ParseKey(smallest), math.MaxUint64),
	right: y.KeyWithTs(y.ParseKey(biggest), 0),
	}
	}

	type levelCompactStatus struct {
	ranges []keyRange
	delSize int64
	}

	func (lcs *levelCompactStatus) debug() string {
	var b bytes.Buffer
	for _, r := range lcs.ranges {
	b.WriteString(r.String())
	}
	return b.String()
	}

	func (lcs *levelCompactStatus) overlapsWith(dst keyRange) bool {
	for _, r := range lcs.ranges {
	if r.overlapsWith(dst) {
	return true
	}
	}
	return false
	}

	func (lcs *levelCompactStatus) remove(dst keyRange) bool {
	final := lcs.ranges[:0]
	var found bool
	for _, r := range lcs.ranges {
	if !r.equals(dst) {
	final = append(final, r)
	} else {
	found = true
	}
	}
	lcs.ranges = final
	return found
	}

	type compactStatus struct {
	sync.RWMutex
	levels []*levelCompactStatus
	tables map[uint64]struct{}
	}

	func (cs *compactStatus) overlapsWith(level int, this keyRange) bool {
	cs.RLock()
	defer cs.RUnlock()

	thisLevel := cs.levels[level]
	return thisLevel.overlapsWith(this)
	}

	func (cs *compactStatus) delSize(l int) int64 {
	cs.RLock()
	defer cs.RUnlock()
	return cs.levels[l].delSize
	}

	type thisAndNextLevelRLocked struct{}

	// compareAndAdd will check whether we can run this compactDef. That it doesn't overlap with any
	// other running compaction. If it can be run, it would store this run in the compactStatus state.
	func (cs *compactStatus) compareAndAdd(_ thisAndNextLevelRLocked, cd compactDef) bool {
	cs.Lock()
	defer cs.Unlock()

	tl := cd.thisLevel.level
	y.AssertTruef(tl < len(cs.levels), "Got level %d. Max levels: %d", tl, len(cs.levels))
	thisLevel := cs.levels[cd.thisLevel.level]
	nextLevel := cs.levels[cd.nextLevel.level]

	if thisLevel.overlapsWith(cd.thisRange) {
	return false
	}
	if nextLevel.overlapsWith(cd.nextRange) {
	return false
	}
	// Check whether this level really needs compaction or not. Otherwise, we'll end up
	// running parallel compactions for the same level.
	// Update: We should not be checking size here. Compaction priority already did the size checks.
	// Here we should just be executing the wish of others.

	thisLevel.ranges = append(thisLevel.ranges, cd.thisRange)
	nextLevel.ranges = append(nextLevel.ranges, cd.nextRange)
	thisLevel.delSize += cd.thisSize
	for _, t := range append(cd.top, cd.bot...) {
	cs.tables[t.ID()] = struct{}{}
	}
	return true
	}

	func (cs *compactStatus) delete(cd compactDef) {
	cs.Lock()
	defer cs.Unlock()

	tl := cd.thisLevel.level
	y.AssertTruef(tl < len(cs.levels), "Got level %d. Max levels: %d", tl, len(cs.levels))

	thisLevel := cs.levels[cd.thisLevel.level]
	nextLevel := cs.levels[cd.nextLevel.level]

	thisLevel.delSize -= cd.thisSize
	found := thisLevel.remove(cd.thisRange)
	// The following check makes sense only if we're compacting more than one
	// table. In case of the max level, we might rewrite a single table to
	// remove stale data.
	if cd.thisLevel != cd.nextLevel && !cd.nextRange.isEmpty() {
	found = nextLevel.remove(cd.nextRange) && found
	}

	if !found {
	this := cd.thisRange
	next := cd.nextRange
	fmt.Printf("Looking for: %s in this level %d.\n", this, tl)
	fmt.Printf("This Level:\n%s\n", thisLevel.debug())
	fmt.Println()
	fmt.Printf("Looking for: %s in next level %d.\n", next, cd.nextLevel.level)
	fmt.Printf("Next Level:\n%s\n", nextLevel.debug())
	log.Fatal("keyRange not found")
	}
	for _, t := range append(cd.top, cd.bot...) {
	_, ok := cs.tables[t.ID()]
	y.AssertTrue(ok)
	delete(cs.tables, t.ID())
	}
	}