From 1ea9f03f578f8e935fcc817968da689db6b176d7 Mon Sep 17 00:00:00 2001 From: mcrakhman Date: Mon, 15 Aug 2022 23:31:00 +0200 Subject: [PATCH] Refactoring doc tree --- pkg/acl/account/accountdata.go | 3 +- pkg/acl/tree/acltree.go | 3 +- pkg/acl/tree/change.go | 32 +++++++ pkg/acl/tree/doctree.go | 165 ++++++++++++++------------------- pkg/acl/tree/treebuilder.go | 71 ++++---------- 5 files changed, 123 insertions(+), 151 deletions(-) diff --git a/pkg/acl/account/accountdata.go b/pkg/acl/account/accountdata.go index 7b0c773b..07530083 100644 --- a/pkg/acl/account/accountdata.go +++ b/pkg/acl/account/accountdata.go @@ -1,6 +1,7 @@ package account import ( + "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys" "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys/asymmetric/encryptionkey" "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys/asymmetric/signingkey" ) @@ -9,5 +10,5 @@ type AccountData struct { // TODO: create a convenient constructor for this Identity string // TODO: this is essentially the same as sign key SignKey signingkey.PrivKey EncKey encryptionkey.PrivKey - Decoder signingkey.PubKeyDecoder + Decoder keys.Decoder } diff --git a/pkg/acl/tree/acltree.go b/pkg/acl/tree/acltree.go index c0724c97..6b148f36 100644 --- a/pkg/acl/tree/acltree.go +++ b/pkg/acl/tree/acltree.go @@ -458,12 +458,11 @@ func (a *aclTree) ChangesAfterCommonSnapshot(theirPath []string) ([]*aclpb.RawCh return nil, err } - aclChange, err := a.treeBuilder.makeUnverifiedACLChange(raw) + ch, err := NewFromRawChange(raw) if err != nil { return nil, err } - ch := NewChange(id, aclChange) rawChanges = append(rawChanges, raw) return ch, nil } diff --git a/pkg/acl/tree/change.go b/pkg/acl/tree/change.go index ac0a55c2..7924ea5d 100644 --- a/pkg/acl/tree/change.go +++ b/pkg/acl/tree/change.go @@ -3,6 +3,8 @@ package tree import ( "fmt" "github.com/anytypeio/go-anytype-infrastructure-experiments/pkg/acl/aclchanges/aclpb" + "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys" + "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys/asymmetric/signingkey" "github.com/gogo/protobuf/proto" "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys/symmetric" @@ -59,6 +61,36 @@ func NewFromRawChange(rawChange *aclpb.RawChange) (*Change, error) { return ch, nil } +func NewFromVerifiedRawChange( + rawChange *aclpb.RawChange, + identityKeys map[string]signingkey.PubKey, + decoder keys.Decoder) (*Change, error) { + unmarshalled := &aclpb.Change{} + err := proto.Unmarshal(rawChange.Payload, unmarshalled) + if err != nil { + return nil, err + } + + identityKey, exists := identityKeys[unmarshalled.Identity] + if !exists { + key, err := decoder.DecodeFromString(unmarshalled.Identity) + if err != nil { + return nil, err + } + identityKey = key.(signingkey.PubKey) + identityKeys[unmarshalled.Identity] = identityKey + } + res, err := identityKey.Verify(rawChange.Payload, rawChange.Signature) + if err != nil { + return nil, err + } + if !res { + return nil, fmt.Errorf("change has incorrect signature") + } + + return NewChange(rawChange.Id, unmarshalled), nil +} + func NewChange(id string, ch *aclpb.Change) *Change { return &Change{ Next: nil, diff --git a/pkg/acl/tree/doctree.go b/pkg/acl/tree/doctree.go index 00960554..63933983 100644 --- a/pkg/acl/tree/doctree.go +++ b/pkg/acl/tree/doctree.go @@ -7,6 +7,7 @@ import ( "github.com/anytypeio/go-anytype-infrastructure-experiments/pkg/acl/treestorage" "github.com/anytypeio/go-anytype-infrastructure-experiments/pkg/acl/treestorage/treepb" "github.com/anytypeio/go-anytype-infrastructure-experiments/util/cid" + "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys" "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys/asymmetric/signingkey" "github.com/gogo/protobuf/proto" "go.uber.org/zap" @@ -38,6 +39,12 @@ type docTree struct { treeBuilder *treeBuilder validator DocTreeValidator + difSnapshotBuf []*aclpb.RawChange + tmpChangesBuf []*Change + notSeenIdxBuf []int + + identityKeys map[string]signingkey.PubKey + sync.RWMutex } @@ -52,16 +59,12 @@ func BuildDocTreeWithIdentity(t treestorage.TreeStorage, acc *account.AccountDat treeBuilder: treeBuilder, validator: validator, updateListener: listener, + tmpChangesBuf: make([]*Change, 0, 10), + difSnapshotBuf: make([]*aclpb.RawChange, 0, 10), + notSeenIdxBuf: make([]int, 0, 10), + identityKeys: make(map[string]signingkey.PubKey), } - err := docTree.rebuildFromStorage(aclTree) - if err != nil { - return nil, err - } - err = docTree.removeOrphans() - if err != nil { - return nil, err - } - err = t.SetHeads(docTree.Heads()) + err := docTree.rebuildFromStorage(aclTree, nil) if err != nil { return nil, err } @@ -81,7 +84,7 @@ func BuildDocTreeWithIdentity(t treestorage.TreeStorage, acc *account.AccountDat return docTree, nil } -func BuildDocTree(t treestorage.TreeStorage, decoder signingkey.PubKeyDecoder, listener TreeUpdateListener, aclTree ACLTree) (DocTree, error) { +func BuildDocTree(t treestorage.TreeStorage, decoder keys.Decoder, listener TreeUpdateListener, aclTree ACLTree) (DocTree, error) { treeBuilder := newTreeBuilder(t, decoder) validator := newTreeValidator() @@ -91,16 +94,12 @@ func BuildDocTree(t treestorage.TreeStorage, decoder signingkey.PubKeyDecoder, l treeBuilder: treeBuilder, validator: validator, updateListener: listener, + tmpChangesBuf: make([]*Change, 0, 10), + difSnapshotBuf: make([]*aclpb.RawChange, 0, 10), + notSeenIdxBuf: make([]int, 0, 10), + identityKeys: make(map[string]signingkey.PubKey), } - err := docTree.rebuildFromStorage(aclTree) - if err != nil { - return nil, err - } - err = docTree.removeOrphans() - if err != nil { - return nil, err - } - err = t.SetHeads(docTree.Heads()) + err := docTree.rebuildFromStorage(aclTree, nil) if err != nil { return nil, err } @@ -120,29 +119,10 @@ func BuildDocTree(t treestorage.TreeStorage, decoder signingkey.PubKeyDecoder, l return docTree, nil } -func (d *docTree) removeOrphans() error { - // removing attached or invalid orphans - var toRemove []string +func (d *docTree) rebuildFromStorage(aclTree ACLTree, newChanges []*Change) (err error) { + d.treeBuilder.Init(d.identityKeys) - orphans, err := d.treeStorage.Orphans() - if err != nil { - return err - } - for _, orphan := range orphans { - if _, exists := d.tree.attached[orphan]; exists { - toRemove = append(toRemove, orphan) - } - if _, exists := d.tree.invalidChanges[orphan]; exists { - toRemove = append(toRemove, orphan) - } - } - return d.treeStorage.RemoveOrphans(toRemove...) -} - -func (d *docTree) rebuildFromStorage(aclTree ACLTree) (err error) { - d.treeBuilder.Init() - - d.tree, err = d.treeBuilder.Build(false) + d.tree, err = d.treeBuilder.Build(false, newChanges) if err != nil { return err } @@ -233,29 +213,34 @@ func (d *docTree) AddContent(ctx context.Context, aclTree ACLTree, content proto return rawCh, nil } -func (d *docTree) AddRawChanges(ctx context.Context, aclTree ACLTree, rawChanges ...*aclpb.RawChange) (AddResult, error) { - // TODO: make proper error handling, because there are a lot of corner cases where this will break - var err error +func (d *docTree) AddRawChanges(ctx context.Context, aclTree ACLTree, rawChanges ...*aclpb.RawChange) (addResult AddResult, err error) { var mode Mode - var changes []*Change // TODO: = addChangesBuf[:0] ... - var notSeenIdx []int + // resetting buffers + d.tmpChangesBuf = d.tmpChangesBuf[:0] + d.notSeenIdxBuf = d.notSeenIdxBuf[:0] + d.difSnapshotBuf = d.difSnapshotBuf[:0] + prevHeads := d.tree.Heads() + + // filtering changes, verifying and unmarshalling them for idx, ch := range rawChanges { if d.HasChange(ch.Id) { continue } - change, err := NewFromRawChange(ch) - // TODO: think what if we will have incorrect signatures on rawChanges, how everything will work + var change *Change + change, err = NewFromVerifiedRawChange(ch, d.identityKeys, d.treeBuilder.signingPubKeyDecoder) if err != nil { - continue + return AddResult{}, err } - changes = append(changes, change) - notSeenIdx = append(notSeenIdx, idx) + + d.tmpChangesBuf = append(d.tmpChangesBuf, change) + d.notSeenIdxBuf = append(d.notSeenIdxBuf, idx) } - if len(notSeenIdx) == 0 { + // if no new changes, then returning + if len(d.notSeenIdxBuf) == 0 { return AddResult{ OldHeads: prevHeads, Heads: prevHeads, @@ -268,11 +253,15 @@ func (d *docTree) AddRawChanges(ctx context.Context, aclTree ACLTree, rawChanges return } - err = d.removeOrphans() - if err != nil { - return + // adding to database all the added changes only after they are good + for _, ch := range addResult.Added { + err = d.treeStorage.AddRawChange(ch) + if err != nil { + return + } } + // setting heads err = d.treeStorage.SetHeads(d.tree.Heads()) if err != nil { return @@ -292,9 +281,10 @@ func (d *docTree) AddRawChanges(ctx context.Context, aclTree ACLTree, rawChanges } }() + // returns changes that we added to the tree getAddedChanges := func() []*aclpb.RawChange { var added []*aclpb.RawChange - for _, idx := range notSeenIdx { + for _, idx := range d.notSeenIdxBuf { rawChange := rawChanges[idx] if _, exists := d.tree.attached[rawChange.Id]; exists { added = append(added, rawChange) @@ -303,49 +293,37 @@ func (d *docTree) AddRawChanges(ctx context.Context, aclTree ACLTree, rawChanges return added } - rebuild := func() (AddResult, error) { - err = d.rebuildFromStorage(aclTree) - if err != nil { - return AddResult{}, err - } + // checking if we have some changes with different snapshot and then rebuilding + for _, ch := range d.tmpChangesBuf { + if ch.SnapshotId != d.tree.RootId() && ch.SnapshotId != "" { + err = d.rebuildFromStorage(aclTree, d.tmpChangesBuf) + if err != nil { + return AddResult{}, err + } - return AddResult{ - OldHeads: prevHeads, - Heads: d.tree.Heads(), - Added: getAddedChanges(), - Summary: AddResultSummaryRebuild, - }, nil - } - - for _, ch := range changes { - err = d.treeStorage.AddChange(ch) - if err != nil { - return AddResult{}, err - } - err = d.treeStorage.AddOrphans(ch.Id) - if err != nil { - return AddResult{}, err + addResult = AddResult{ + OldHeads: prevHeads, + Heads: d.tree.Heads(), + Added: getAddedChanges(), + Summary: AddResultSummaryRebuild, + } + err = nil + return } } - mode = d.tree.Add(changes...) + // normal mode of operation, where we don't need to rebuild from database + mode = d.tree.Add(d.tmpChangesBuf...) switch mode { case Nothing: - for _, ch := range changes { - // rebuilding if the snapshot is different from the root - if ch.SnapshotId != d.tree.RootId() && ch.SnapshotId != "" { - return rebuild() - } - } - - return AddResult{ + addResult = AddResult{ OldHeads: prevHeads, Heads: prevHeads, Summary: AddResultSummaryNothing, - }, nil + } + err = nil + return - case Rebuild: - return rebuild() default: // just rebuilding the state from start without reloading everything from tree storage // as an optimization we could've started from current heads, but I didn't implement that @@ -354,13 +332,15 @@ func (d *docTree) AddRawChanges(ctx context.Context, aclTree ACLTree, rawChanges return AddResult{}, err } - return AddResult{ + addResult = AddResult{ OldHeads: prevHeads, Heads: d.tree.Heads(), Added: getAddedChanges(), Summary: AddResultSummaryAppend, - }, nil + } + err = nil } + return } func (d *docTree) Iterate(f func(change *Change) bool) { @@ -434,12 +414,11 @@ func (d *docTree) ChangesAfterCommonSnapshot(theirPath []string) ([]*aclpb.RawCh return nil, err } - aclChange, err := d.treeBuilder.makeUnverifiedACLChange(raw) + ch, err := NewFromRawChange(raw) if err != nil { return nil, err } - ch := NewChange(id, aclChange) rawChanges = append(rawChanges, raw) return ch, nil } diff --git a/pkg/acl/tree/treebuilder.go b/pkg/acl/tree/treebuilder.go index 5376bee6..e63d2292 100644 --- a/pkg/acl/tree/treebuilder.go +++ b/pkg/acl/tree/treebuilder.go @@ -5,11 +5,10 @@ import ( "errors" "fmt" "github.com/anytypeio/go-anytype-infrastructure-experiments/app/logger" - "github.com/anytypeio/go-anytype-infrastructure-experiments/pkg/acl/aclchanges/aclpb" "github.com/anytypeio/go-anytype-infrastructure-experiments/pkg/acl/treestorage" + "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys" "github.com/anytypeio/go-anytype-infrastructure-experiments/util/keys/asymmetric/signingkey" "github.com/anytypeio/go-anytype-infrastructure-experiments/util/slice" - "github.com/gogo/protobuf/proto" "go.uber.org/zap" "time" ) @@ -22,38 +21,39 @@ var ( type treeBuilder struct { cache map[string]*Change identityKeys map[string]signingkey.PubKey - signingPubKeyDecoder signingkey.PubKeyDecoder + signingPubKeyDecoder keys.Decoder tree *Tree treeStorage treestorage.TreeStorage } -func newTreeBuilder(t treestorage.TreeStorage, decoder signingkey.PubKeyDecoder) *treeBuilder { +func newTreeBuilder(t treestorage.TreeStorage, decoder keys.Decoder) *treeBuilder { return &treeBuilder{ signingPubKeyDecoder: decoder, treeStorage: t, } } -func (tb *treeBuilder) Init() { +func (tb *treeBuilder) Init(identityKeys map[string]signingkey.PubKey) { tb.cache = make(map[string]*Change) - tb.identityKeys = make(map[string]signingkey.PubKey) + tb.identityKeys = identityKeys tb.tree = &Tree{} } -func (tb *treeBuilder) Build(fromStart bool) (*Tree, error) { +func (tb *treeBuilder) Build(fromStart bool, newChanges []*Change) (*Tree, error) { var headsAndOrphans []string - orphans, err := tb.treeStorage.Orphans() - if err != nil { - return nil, err - } heads, err := tb.treeStorage.Heads() if err != nil { return nil, err } - headsAndOrphans = append(headsAndOrphans, orphans...) - headsAndOrphans = append(headsAndOrphans, heads...) - log.With(zap.Strings("heads", heads), zap.Strings("orphans", orphans)).Debug("building tree") + headsAndOrphans = append(headsAndOrphans, heads...) + tb.cache = make(map[string]*Change) + for _, ch := range newChanges { + headsAndOrphans = append(headsAndOrphans, ch.Id) + tb.cache[ch.Id] = ch + } + + log.With(zap.Strings("heads", heads)).Debug("building tree") if fromStart { if err := tb.buildTreeFromStart(headsAndOrphans); err != nil { return nil, fmt.Errorf("buildTree error: %v", err) @@ -69,8 +69,6 @@ func (tb *treeBuilder) Build(fromStart bool) (*Tree, error) { } } - tb.cache = make(map[string]*Change) - return tb.tree, nil } @@ -184,53 +182,16 @@ func (tb *treeBuilder) loadChange(id string) (ch *Change, err error) { return nil, err } - verifiedChange, err := tb.makeVerifiedChange(change) + // TODO: maybe we can use unverified changes here, because we shouldn't put bad changes in the DB in the first place + ch, err = NewFromVerifiedRawChange(change, tb.identityKeys, tb.signingPubKeyDecoder) if err != nil { return nil, err } - ch = NewChange(id, verifiedChange) tb.cache[id] = ch return ch, nil } -func (tb *treeBuilder) verify(identity string, payload, signature []byte) (isVerified bool, err error) { - identityKey, exists := tb.identityKeys[identity] - if !exists { - identityKey, err = tb.signingPubKeyDecoder.DecodeFromString(identity) - if err != nil { - return - } - tb.identityKeys[identity] = identityKey - } - return identityKey.Verify(payload, signature) -} - -func (tb *treeBuilder) makeVerifiedChange(change *aclpb.RawChange) (aclChange *aclpb.Change, err error) { - aclChange = new(aclpb.Change) - - // TODO: think what should we do with such cases, because this can be used by attacker to break our Tree - if err = proto.Unmarshal(change.Payload, aclChange); err != nil { - return - } - var verified bool - verified, err = tb.verify(aclChange.Identity, change.Payload, change.Signature) - if err != nil { - return - } - if !verified { - err = fmt.Errorf("the signature of the payload cannot be verified") - return - } - return -} - -func (tb *treeBuilder) makeUnverifiedACLChange(change *aclpb.RawChange) (aclChange *aclpb.Change, err error) { - aclChange = new(aclpb.Change) - err = proto.Unmarshal(change.Payload, aclChange) - return -} - func (tb *treeBuilder) findBreakpoint(heads []string) (breakpoint string, err error) { var ( ch *Change