Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2017 Dgraph Labs, Inc. and Contributors |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package badger |
| 18 | |
| 19 | import ( |
| 20 | "sync" |
| 21 | "time" |
| 22 | |
aman bansal | b69163b | 2021-01-13 05:25:37 | [diff] [blame] | 23 | "github.com/dgraph-io/badger/v3/y" |
Ibrahim Jarif | 0beefbc | 2020-09-07 17:50:46 | [diff] [blame] | 24 | "github.com/dgraph-io/ristretto/z" |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 25 | "github.com/pkg/errors" |
| 26 | ) |
| 27 | |
| 28 | // MergeOperator represents a Badger merge operator. |
| 29 | type MergeOperator struct { |
| 30 | sync.RWMutex |
| 31 | f MergeFunc |
| 32 | db *DB |
| 33 | key []byte |
Ibrahim Jarif | 0beefbc | 2020-09-07 17:50:46 | [diff] [blame] | 34 | closer *z.Closer |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 35 | } |
| 36 | |
| 37 | // MergeFunc accepts two byte slices, one representing an existing value, and |
| 38 | // another representing a new value that needs to be ‘merged’ into it. MergeFunc |
| 39 | // contains the logic to perform the ‘merge’ and return an updated value. |
| 40 | // MergeFunc could perform operations like integer addition, list appends etc. |
Ibrahim Jarif | 18f8a33 | 2019-06-04 08:08:32 | [diff] [blame] | 41 | // Note that the ordering of the operands is maintained. |
| 42 | type MergeFunc func(existingVal, newVal []byte) []byte |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 43 | |
| 44 | // GetMergeOperator creates a new MergeOperator for a given key and returns a |
| 45 | // pointer to it. It also fires off a goroutine that performs a compaction using |
| 46 | // the merge function that runs periodically, as specified by dur. |
| 47 | func (db *DB) GetMergeOperator(key []byte, |
| 48 | f MergeFunc, dur time.Duration) *MergeOperator { |
| 49 | op := &MergeOperator{ |
| 50 | f: f, |
| 51 | db: db, |
| 52 | key: key, |
Ibrahim Jarif | 0beefbc | 2020-09-07 17:50:46 | [diff] [blame] | 53 | closer: z.NewCloser(1), |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 54 | } |
| 55 | |
| 56 | go op.runCompactions(dur) |
| 57 | return op |
| 58 | } |
| 59 | |
| 60 | var errNoMerge = errors.New("No need for merge") |
| 61 | |
Ibrahim Jarif | 18f8a33 | 2019-06-04 08:08:32 | [diff] [blame] | 62 | func (op *MergeOperator) iterateAndMerge() (newVal []byte, latest uint64, err error) { |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 63 | txn := op.db.NewTransaction(false) |
| 64 | defer txn.Discard() |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 65 | opt := DefaultIteratorOptions |
| 66 | opt.AllVersions = true |
Ibrahim Jarif | 53b1bee | 2019-03-07 18:13:24 | [diff] [blame] | 67 | it := txn.NewKeyIterator(op.key, opt) |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 68 | defer it.Close() |
| 69 | |
| 70 | var numVersions int |
Ibrahim Jarif | 53b1bee | 2019-03-07 18:13:24 | [diff] [blame] | 71 | for it.Rewind(); it.Valid(); it.Next() { |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 72 | item := it.Item() |
Naman Jain | e6c9d4e | 2021-03-03 08:11:05 | [diff] [blame] | 73 | if item.IsDeletedOrExpired() { |
| 74 | break |
| 75 | } |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 76 | numVersions++ |
| 77 | if numVersions == 1 { |
Ibrahim Jarif | 18f8a33 | 2019-06-04 08:08:32 | [diff] [blame] | 78 | // This should be the newVal, considering this is the latest version. |
| 79 | newVal, err = item.ValueCopy(newVal) |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 80 | if err != nil { |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 81 | return nil, 0, err |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 82 | } |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 83 | latest = item.Version() |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 84 | } else { |
Ibrahim Jarif | 18f8a33 | 2019-06-04 08:08:32 | [diff] [blame] | 85 | if err := item.Value(func(oldVal []byte) error { |
| 86 | // The merge should always be on the newVal considering it has the merge result of |
| 87 | // the latest version. The value read should be the oldVal. |
| 88 | newVal = op.f(oldVal, newVal) |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 89 | return nil |
| 90 | }); err != nil { |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 91 | return nil, 0, err |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 92 | } |
| 93 | } |
| 94 | if item.DiscardEarlierVersions() { |
| 95 | break |
| 96 | } |
| 97 | } |
| 98 | if numVersions == 0 { |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 99 | return nil, latest, ErrKeyNotFound |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 100 | } else if numVersions == 1 { |
Ibrahim Jarif | 18f8a33 | 2019-06-04 08:08:32 | [diff] [blame] | 101 | return newVal, latest, errNoMerge |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 102 | } |
Ibrahim Jarif | 18f8a33 | 2019-06-04 08:08:32 | [diff] [blame] | 103 | return newVal, latest, nil |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 104 | } |
| 105 | |
| 106 | func (op *MergeOperator) compact() error { |
| 107 | op.Lock() |
| 108 | defer op.Unlock() |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 109 | val, version, err := op.iterateAndMerge() |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 110 | if err == ErrKeyNotFound || err == errNoMerge { |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 111 | return nil |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 112 | } else if err != nil { |
| 113 | return err |
| 114 | } |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 115 | entries := []*Entry{ |
| 116 | { |
| 117 | Key: y.KeyWithTs(op.key, version), |
| 118 | Value: val, |
Manish R Jain | b21f591 | 2021-04-27 04:37:45 | [diff] [blame] | 119 | meta: BitDiscardEarlierVersions, |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 120 | }, |
| 121 | } |
| 122 | // Write value back to the DB. It is important that we do not set the bitMergeEntry bit |
| 123 | // here. When compaction happens, all the older merged entries will be removed. |
| 124 | return op.db.batchSetAsync(entries, func(err error) { |
| 125 | if err != nil { |
| 126 | op.db.opt.Errorf("failed to insert the result of merge compaction: %s", err) |
| 127 | } |
| 128 | }) |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 129 | } |
| 130 | |
| 131 | func (op *MergeOperator) runCompactions(dur time.Duration) { |
| 132 | ticker := time.NewTicker(dur) |
| 133 | defer op.closer.Done() |
| 134 | var stop bool |
| 135 | for { |
| 136 | select { |
| 137 | case <-op.closer.HasBeenClosed(): |
| 138 | stop = true |
| 139 | case <-ticker.C: // wait for tick |
| 140 | } |
| 141 | if err := op.compact(); err != nil { |
Martin Martinez Rivera | 3f66663 | 2019-01-03 00:07:41 | [diff] [blame] | 142 | op.db.opt.Errorf("failure while running merge operation: %s", err) |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 143 | } |
| 144 | if stop { |
| 145 | ticker.Stop() |
| 146 | break |
| 147 | } |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | // Add records a value in Badger which will eventually be merged by a background |
| 152 | // routine into the values that were recorded by previous invocations to Add(). |
| 153 | func (op *MergeOperator) Add(val []byte) error { |
| 154 | return op.db.Update(func(txn *Txn) error { |
Ashish Goswami | e9447c9 | 2019-05-28 05:28:37 | [diff] [blame] | 155 | return txn.SetEntry(NewEntry(op.key, val).withMergeBit()) |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 156 | }) |
| 157 | } |
| 158 | |
| 159 | // Get returns the latest value for the merge operator, which is derived by |
| 160 | // applying the merge function to all the values added so far. |
| 161 | // |
| 162 | // If Add has not been called even once, Get will return ErrKeyNotFound. |
| 163 | func (op *MergeOperator) Get() ([]byte, error) { |
| 164 | op.RLock() |
| 165 | defer op.RUnlock() |
| 166 | var existing []byte |
| 167 | err := op.db.View(func(txn *Txn) (err error) { |
Ibrahim Jarif | e74d5a7 | 2019-05-31 07:21:43 | [diff] [blame] | 168 | existing, _, err = op.iterateAndMerge() |
Manish R Jain | 4738bcf | 2018-12-27 03:22:33 | [diff] [blame] | 169 | return err |
| 170 | }) |
| 171 | if err == errNoMerge { |
| 172 | return existing, nil |
| 173 | } |
| 174 | return existing, err |
| 175 | } |
| 176 | |
| 177 | // Stop waits for any pending merge to complete and then stops the background |
| 178 | // goroutine. |
| 179 | func (op *MergeOperator) Stop() { |
| 180 | op.closer.SignalAndWait() |
| 181 | } |