Skip to content

Commit

Permalink
use left right diff instead of top down due to bugs in top down (dolt…
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Hendriks authored Mar 18, 2021
1 parent 05e316a commit 3c46382
Showing 5 changed files with 4 additions and 168 deletions.
4 changes: 2 additions & 2 deletions go/store/diff/diff.go
Original file line number Diff line number Diff line change
@@ -305,7 +305,7 @@ func (d differ) diffMapsInRange(ctx context.Context, p types.Path, v1, v2 types.
panic("not implemented")
}

return v2.DiffHybrid(ctx, v1, cc)
return v2.Diff(ctx, v1, cc)
}
},
func(k types.Value) (types.Value, error) {
@@ -364,7 +364,7 @@ func (d differ) diffSets(ctx context.Context, p types.Path, v1, v2 types.Set) er
if d.leftRight {
return v2.DiffLeftRight(ctx, v1, cc)
} else {
return v2.DiffHybrid(ctx, v1, cc)
return v2.Diff(ctx, v1, cc)
}
},
func(k types.Value) (types.Value, error) { return k, nil },
11 changes: 1 addition & 10 deletions go/store/types/map.go
Original file line number Diff line number Diff line change
@@ -191,16 +191,7 @@ func (m Map) Diff(ctx context.Context, last Map, changes chan<- ValueChanged) er
if m.Equals(last) {
return nil
}
return orderedSequenceDiffTopDown(ctx, last.orderedSequence, m.orderedSequence, changes)
}

// DiffHybrid computes the diff from |last| to |m| using a hybrid algorithm
// which balances returning results early vs completing quickly, if possible.
func (m Map) DiffHybrid(ctx context.Context, last Map, changes chan<- ValueChanged) error {
if m.Equals(last) {
return nil
}
return orderedSequenceDiffBest(ctx, last.orderedSequence, m.orderedSequence, changes)
return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, m.orderedSequence, changes)
}

// DiffLeftRight computes the diff from |last| to |m| using a left-to-right
140 changes: 0 additions & 140 deletions go/store/types/ordered_sequences_diff.go
Original file line number Diff line number Diff line change
@@ -24,11 +24,7 @@ package types
import (
"context"

"golang.org/x/sync/errgroup"

"github.com/dolthub/dolt/go/libraries/utils/async"
"github.com/dolthub/dolt/go/store/d"
"github.com/dolthub/dolt/go/store/util/functions"
)

type DiffChangeType uint8
@@ -53,142 +49,6 @@ func sendChange(ctx context.Context, changes chan<- ValueChanged, change ValueCh
}
}

// Streams the diff from |last| to |current| into |changes|, using both left-right and top-down approach in parallel.
// The left-right diff is expected to return results earlier, whereas the top-down approach is faster overall. This "best" algorithm runs both:
// - early results from left-right are sent to |changes|.
// - if/when top-down catches up, left-right is stopped and the rest of the changes are streamed from top-down.
func orderedSequenceDiffBest(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
lrChanges := make(chan ValueChanged)
tdChanges := make(chan ValueChanged)

eg, ctx := errgroup.WithContext(ctx)

lrCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
defer close(lrChanges)
return orderedSequenceDiffLeftRight(ctx, last, current, lrChanges)
})
tdCancel := async.GoWithCancel(ctx, eg, func(ctx context.Context) error {
defer close(tdChanges)
return orderedSequenceDiffTopDown(ctx, last, current, tdChanges)
})

eg.Go(func() error {
defer lrCancel()
defer tdCancel()

// Stream left-right changes while the top-down diff algorithm catches up.
var lrChangeCount, tdChangeCount int
for multiplexing := true; multiplexing; {
select {
case c, ok := <-lrChanges:
if !ok {
// Left-to-right diff completed.
return nil
}
lrChangeCount++
if err := sendChange(ctx, changes, c); err != nil {
return err
}
case c, ok := <-tdChanges:
if !ok {
// Top-down diff completed.
return nil
}
tdChangeCount++
if tdChangeCount > lrChangeCount {
// Top-down diff has overtaken left-right diff.
if err := sendChange(ctx, changes, c); err != nil {
return err
}
lrCancel()
multiplexing = false
}
}
}

for c := range tdChanges {
if err := sendChange(ctx, changes, c); err != nil {
return err
}
}

return nil
})

return eg.Wait()
}

// Streams the diff from |last| to |current| into |changes|, using a top-down approach.
// Top-down is parallel and efficiently returns the complete diff, but compared to left-right it's slow to start streaming changes.
func orderedSequenceDiffTopDown(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
return orderedSequenceDiffInternalNodes(ctx, last, current, changes)
}

// TODO - something other than the literal edit-distance, which is way too much cpu work for this case - https://github.com/attic-labs/noms/issues/2027
func orderedSequenceDiffInternalNodes(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
if ctx.Err() != nil {
return ctx.Err()
}

if last.treeLevel() > current.treeLevel() {
lastChild, err := last.getCompositeChildSequence(ctx, 0, uint64(last.seqLen()))
if err != nil {
return err
}
return orderedSequenceDiffInternalNodes(ctx, lastChild.(orderedSequence), current, changes)
}

if current.treeLevel() > last.treeLevel() {
currentChild, err := current.getCompositeChildSequence(ctx, 0, uint64(current.seqLen()))
if err != nil {
return err
}
return orderedSequenceDiffInternalNodes(ctx, last, currentChild.(orderedSequence), changes)
}

if last.isLeaf() && current.isLeaf() {
return orderedSequenceDiffLeftRight(ctx, last, current, changes)
}

compareFn := last.getCompareFn(current)
initialSplices, err := calcSplices(uint64(last.seqLen()), uint64(current.seqLen()), DEFAULT_MAX_SPLICE_MATRIX_SIZE,
func(i uint64, j uint64) (bool, error) { return compareFn(int(i), int(j)) })
if err != nil {
return err
}

for _, splice := range initialSplices {
var lastChild, currentChild orderedSequence
err := functions.All(
func() error {
seq, err := last.getCompositeChildSequence(ctx, splice.SpAt, splice.SpRemoved)
if err != nil {
return err
}
lastChild = seq.(orderedSequence)
return nil
},
func() error {
seq, err := current.getCompositeChildSequence(ctx, splice.SpFrom, splice.SpAdded)
if err != nil {
return err
}
currentChild = seq.(orderedSequence)
return nil
},
)
if err != nil {
return err
}

if err := orderedSequenceDiffInternalNodes(ctx, lastChild, currentChild, changes); err != nil {
return err
}
}

return nil
}

// Streams the diff from |last| to |current| into |changes|, using a left-right approach.
// Left-right immediately descends to the first change and starts streaming changes, but compared to top-down it's serial and much slower to calculate the full diff.
func orderedSequenceDiffLeftRight(ctx context.Context, last orderedSequence, current orderedSequence, changes chan<- ValueChanged) error {
6 changes: 0 additions & 6 deletions go/store/types/ordered_sequences_diff_test.go
Original file line number Diff line number Diff line change
@@ -111,9 +111,7 @@ func (suite *diffTestSuite) TestDiff() {
}

runTest := func(name string, vf valFn, cf colFn) {
//runTestDf(name, vf, cf, orderedSequenceDiffTopDown)
runTestDf(name, vf, cf, orderedSequenceDiffLeftRight)
//runTestDf(name, vf, cf, orderedSequenceDiffBest)
}

newSetAsCol := func(vals []Value) (Collection, error) { return NewSet(context.Background(), vs, vals...) }
@@ -246,9 +244,7 @@ func TestOrderedSequencesDiffCloseWithoutReading(t *testing.T) {
assert.Equal(t, err, context.Canceled)
}

t.Run("Best", func(t *testing.T) { runTest(t, orderedSequenceDiffBest) })
t.Run("LeftRight", func(t *testing.T) { runTest(t, orderedSequenceDiffLeftRight) })
t.Run("TopDown", func(t *testing.T) { runTest(t, orderedSequenceDiffTopDown) })
}

func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
@@ -320,7 +316,5 @@ func TestOrderedSequenceDiffWithMetaNodeGap(t *testing.T) {
require.NoError(t, err)
}

runTest(orderedSequenceDiffBest)
runTest(orderedSequenceDiffLeftRight)
runTest(orderedSequenceDiffTopDown)
}
11 changes: 1 addition & 10 deletions go/store/types/set.go
Original file line number Diff line number Diff line change
@@ -127,16 +127,7 @@ func (s Set) Diff(ctx context.Context, last Set, changes chan<- ValueChanged) er
if s.Equals(last) {
return nil
}
return orderedSequenceDiffTopDown(ctx, last.orderedSequence, s.orderedSequence, changes)
}

// DiffHybrid computes the diff from |last| to |s| using a hybrid algorithm
// which balances returning results early vs completing quickly, if possible.
func (s Set) DiffHybrid(ctx context.Context, last Set, changes chan<- ValueChanged) error {
if s.Equals(last) {
return nil
}
return orderedSequenceDiffBest(ctx, last.orderedSequence, s.orderedSequence, changes)
return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, s.orderedSequence, changes)
}

// DiffLeftRight computes the diff from |last| to |s| using a left-to-right

0 comments on commit 3c46382

Please sign in to comment.