| /* |
| * Copyright 2018 Dgraph Labs, Inc. and Contributors |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package badger |
| |
| import ( |
| "bytes" |
| "context" |
| "math" |
| "sync" |
| "sync/atomic" |
| "time" |
| |
| "github.com/dgraph-io/badger/pb" |
| "github.com/dgraph-io/badger/y" |
| humanize "github.com/dustin/go-humanize" |
| ) |
| |
| const pageSize = 4 << 20 // 4MB |
| |
| // Stream provides a framework to concurrently iterate over a snapshot of Badger, pick up |
| // key-values, batch them up and call Send. Stream does concurrent iteration over many smaller key |
| // ranges. It does NOT send keys in lexicographical sorted order. To get keys in sorted |
| // order, use Iterator. |
| type Stream struct { |
| // Prefix to only iterate over certain range of keys. If set to nil (default), Stream would |
| // iterate over the entire DB. |
| Prefix []byte |
| |
| // Number of goroutines to use for iterating over key ranges. Defaults to 16. |
| NumGo int |
| |
| // Badger would produce log entries in Infof to indicate the progress of Stream. LogPrefix can |
| // be used to help differentiate them from other activities. Default is "Badger.Stream". |
| LogPrefix string |
| |
| // ChooseKey is invoked each time a new key is encountered. Note that this is not called |
| // on every version of the value, only the first encountered version (i.e. the highest version |
| // of the value a key has). ChooseKey can be left nil to select all keys. |
| // |
| // Note: Calls to ChooseKey are concurrent. |
| ChooseKey func(item *Item) bool |
| |
| // KeyToList, similar to ChooseKey, is only invoked on the highest version of the value. It |
| // is upto the caller to iterate over the versions and generate zero, one or more KVs. It |
| // is expected that the user would advance the iterator to go through the versions of the |
| // values. However, the user MUST immediately return from this function on the first encounter |
| // with a mismatching key. See example usage in ToList function. Can be left nil to use ToList |
| // function by default. |
| // |
| // Note: Calls to KeyToList are concurrent. |
| KeyToList func(key []byte, itr *Iterator) (*pb.KVList, error) |
| |
| // This is the method where Stream sends the final output. All calls to Send are done by a |
| // single goroutine, i.e. logic within Send method can expect single threaded execution. |
| Send func(*pb.KVList) error |
| |
| readTs uint64 |
| db *DB |
| rangeCh chan keyRange |
| kvChan chan *pb.KVList |
| nextStreamId uint32 |
| } |
| |
| // ToList is a default implementation of KeyToList. It picks up all valid versions of the key, |
| // skipping over deleted or expired keys. |
| func (st *Stream) ToList(key []byte, itr *Iterator) (*pb.KVList, error) { |
| list := &pb.KVList{} |
| for ; itr.Valid(); itr.Next() { |
| item := itr.Item() |
| if item.IsDeletedOrExpired() { |
| break |
| } |
| if !bytes.Equal(key, item.Key()) { |
| // Break out on the first encounter with another key. |
| break |
| } |
| |
| valCopy, err := item.ValueCopy(nil) |
| if err != nil { |
| return nil, err |
| } |
| kv := &pb.KV{ |
| Key: item.KeyCopy(nil), |
| Value: valCopy, |
| UserMeta: []byte{item.UserMeta()}, |
| Version: item.Version(), |
| ExpiresAt: item.ExpiresAt(), |
| } |
| list.Kv = append(list.Kv, kv) |
| if st.db.opt.NumVersionsToKeep == 1 { |
| break |
| } |
| |
| if item.DiscardEarlierVersions() { |
| break |
| } |
| } |
| return list, nil |
| } |
| |
| // keyRange is [start, end), including start, excluding end. Do ensure that the start, |
| // end byte slices are owned by keyRange struct. |
| func (st *Stream) produceRanges(ctx context.Context) { |
| splits := st.db.KeySplits(st.Prefix) |
| |
| // We don't need to create more key ranges than NumGo goroutines. This way, we will have limited |
| // number of "streams" coming out, which then helps limit the memory used by SSWriter. |
| { |
| pickEvery := int(math.Floor(float64(len(splits)) / float64(st.NumGo))) |
| if pickEvery < 1 { |
| pickEvery = 1 |
| } |
| filtered := splits[:0] |
| for i, split := range splits { |
| if (i+1)%pickEvery == 0 { |
| filtered = append(filtered, split) |
| } |
| } |
| splits = filtered |
| } |
| |
| start := y.SafeCopy(nil, st.Prefix) |
| for _, key := range splits { |
| st.rangeCh <- keyRange{left: start, right: y.SafeCopy(nil, []byte(key))} |
| start = y.SafeCopy(nil, []byte(key)) |
| } |
| // Edge case: prefix is empty and no splits exist. In that case, we should have at least one |
| // keyRange output. |
| st.rangeCh <- keyRange{left: start} |
| close(st.rangeCh) |
| } |
| |
| // produceKVs picks up ranges from rangeCh, generates KV lists and sends them to kvChan. |
| func (st *Stream) produceKVs(ctx context.Context) error { |
| var size int |
| var txn *Txn |
| if st.readTs > 0 { |
| txn = st.db.NewTransactionAt(st.readTs, false) |
| } else { |
| txn = st.db.NewTransaction(false) |
| } |
| defer txn.Discard() |
| |
| iterate := func(kr keyRange) error { |
| iterOpts := DefaultIteratorOptions |
| iterOpts.AllVersions = true |
| iterOpts.Prefix = st.Prefix |
| iterOpts.PrefetchValues = false |
| itr := txn.NewIterator(iterOpts) |
| defer itr.Close() |
| |
| // This unique stream id is used to identify all the keys from this iteration. |
| streamId := atomic.AddUint32(&st.nextStreamId, 1) |
| |
| outList := new(pb.KVList) |
| var prevKey []byte |
| for itr.Seek(kr.left); itr.Valid(); { |
| // it.Valid would only return true for keys with the provided Prefix in iterOpts. |
| item := itr.Item() |
| if bytes.Equal(item.Key(), prevKey) { |
| itr.Next() |
| continue |
| } |
| prevKey = append(prevKey[:0], item.Key()...) |
| |
| // Check if we reached the end of the key range. |
| if len(kr.right) > 0 && bytes.Compare(item.Key(), kr.right) >= 0 { |
| break |
| } |
| // Check if we should pick this key. |
| if st.ChooseKey != nil && !st.ChooseKey(item) { |
| continue |
| } |
| |
| // Now convert to key value. |
| list, err := st.KeyToList(item.KeyCopy(nil), itr) |
| if err != nil { |
| return err |
| } |
| if list == nil || len(list.Kv) == 0 { |
| continue |
| } |
| outList.Kv = append(outList.Kv, list.Kv...) |
| size += list.Size() |
| if size >= pageSize { |
| for _, kv := range outList.Kv { |
| kv.StreamId = streamId |
| } |
| select { |
| case st.kvChan <- outList: |
| case <-ctx.Done(): |
| return ctx.Err() |
| } |
| outList = new(pb.KVList) |
| size = 0 |
| } |
| } |
| if len(outList.Kv) > 0 { |
| for _, kv := range outList.Kv { |
| kv.StreamId = streamId |
| } |
| // TODO: Think of a way to indicate that a stream is over. |
| select { |
| case st.kvChan <- outList: |
| case <-ctx.Done(): |
| return ctx.Err() |
| } |
| } |
| return nil |
| } |
| |
| for { |
| select { |
| case kr, ok := <-st.rangeCh: |
| if !ok { |
| // Done with the keys. |
| return nil |
| } |
| if err := iterate(kr); err != nil { |
| return err |
| } |
| case <-ctx.Done(): |
| return ctx.Err() |
| } |
| } |
| } |
| |
| func (st *Stream) streamKVs(ctx context.Context) error { |
| var count int |
| var bytesSent uint64 |
| t := time.NewTicker(time.Second) |
| defer t.Stop() |
| now := time.Now() |
| |
| slurp := func(batch *pb.KVList) error { |
| loop: |
| for { |
| select { |
| case kvs, ok := <-st.kvChan: |
| if !ok { |
| break loop |
| } |
| y.AssertTrue(kvs != nil) |
| batch.Kv = append(batch.Kv, kvs.Kv...) |
| default: |
| break loop |
| } |
| } |
| sz := uint64(batch.Size()) |
| bytesSent += sz |
| count += len(batch.Kv) |
| t := time.Now() |
| if err := st.Send(batch); err != nil { |
| return err |
| } |
| st.db.opt.Infof("%s Created batch of size: %s in %s.\n", |
| st.LogPrefix, humanize.Bytes(sz), time.Since(t)) |
| return nil |
| } |
| |
| outer: |
| for { |
| var batch *pb.KVList |
| select { |
| case <-ctx.Done(): |
| return ctx.Err() |
| |
| case <-t.C: |
| dur := time.Since(now) |
| durSec := uint64(dur.Seconds()) |
| if durSec == 0 { |
| continue |
| } |
| speed := bytesSent / durSec |
| st.db.opt.Infof("%s Time elapsed: %s, bytes sent: %s, speed: %s/sec\n", st.LogPrefix, |
| y.FixedDuration(dur), humanize.Bytes(bytesSent), humanize.Bytes(speed)) |
| |
| case kvs, ok := <-st.kvChan: |
| if !ok { |
| break outer |
| } |
| y.AssertTrue(kvs != nil) |
| batch = kvs |
| if err := slurp(batch); err != nil { |
| return err |
| } |
| } |
| } |
| |
| st.db.opt.Infof("%s Sent %d keys\n", st.LogPrefix, count) |
| return nil |
| } |
| |
| // Orchestrate runs Stream. It picks up ranges from the SSTables, then runs NumGo number of |
| // goroutines to iterate over these ranges and batch up KVs in lists. It concurrently runs a single |
| // goroutine to pick these lists, batch them up further and send to Output.Send. Orchestrate also |
| // spits logs out to Infof, using provided LogPrefix. Note that all calls to Output.Send |
| // are serial. In case any of these steps encounter an error, Orchestrate would stop execution and |
| // return that error. Orchestrate can be called multiple times, but in serial order. |
| func (st *Stream) Orchestrate(ctx context.Context) error { |
| st.rangeCh = make(chan keyRange, 3) // Contains keys for posting lists. |
| |
| // kvChan should only have a small capacity to ensure that we don't buffer up too much data if |
| // sending is slow. Page size is set to 4MB, which is used to lazily cap the size of each |
| // KVList. To get 128MB buffer, we can set the channel size to 32. |
| st.kvChan = make(chan *pb.KVList, 32) |
| |
| if st.KeyToList == nil { |
| st.KeyToList = st.ToList |
| } |
| |
| // Picks up ranges from Badger, and sends them to rangeCh. |
| go st.produceRanges(ctx) |
| |
| errCh := make(chan error, 1) // Stores error by consumeKeys. |
| var wg sync.WaitGroup |
| for i := 0; i < st.NumGo; i++ { |
| wg.Add(1) |
| go func() { |
| defer wg.Done() |
| // Picks up ranges from rangeCh, generates KV lists, and sends them to kvChan. |
| if err := st.produceKVs(ctx); err != nil { |
| select { |
| case errCh <- err: |
| default: |
| } |
| } |
| }() |
| } |
| |
| // Pick up key-values from kvChan and send to stream. |
| kvErr := make(chan error, 1) |
| go func() { |
| // Picks up KV lists from kvChan, and sends them to Output. |
| kvErr <- st.streamKVs(ctx) |
| }() |
| wg.Wait() // Wait for produceKVs to be over. |
| close(st.kvChan) // Now we can close kvChan. |
| |
| select { |
| case err := <-errCh: // Check error from produceKVs. |
| return err |
| default: |
| } |
| |
| // Wait for key streaming to be over. |
| err := <-kvErr |
| return err |
| } |
| |
| func (db *DB) newStream() *Stream { |
| return &Stream{db: db, NumGo: 16, LogPrefix: "Badger.Stream"} |
| } |
| |
| // NewStream creates a new Stream. |
| func (db *DB) NewStream() *Stream { |
| if db.opt.managedTxns { |
| panic("This API can not be called in managed mode.") |
| } |
| return db.newStream() |
| } |
| |
| // NewStreamAt creates a new Stream at a particular timestamp. Should only be used with managed DB. |
| func (db *DB) NewStreamAt(readTs uint64) *Stream { |
| if !db.opt.managedTxns { |
| panic("This API can only be called in managed mode.") |
| } |
| stream := db.newStream() |
| stream.readTs = readTs |
| return stream |
| } |