Optimize and simplify tree algorithm

This commit is contained in:
mcrakhman 2022-08-20 16:57:32 +02:00 committed by Mikhail Iudin
parent 86118686ad
commit 0d6bbd9258
No known key found for this signature in database
GPG Key ID: FAAAA8BAABDFF1C0
2 changed files with 24 additions and 49 deletions

View File

@ -19,7 +19,6 @@ type ChangeContent struct {
// Change is an abstract type for all types of changes // Change is an abstract type for all types of changes
type Change struct { type Change struct {
Next []*Change Next []*Change
Unattached []*Change
PreviousIds []string PreviousIds []string
Id string Id string
SnapshotId string SnapshotId string

View File

@ -33,10 +33,6 @@ type Tree struct {
duplicateEvents int duplicateEvents int
} }
func (t *Tree) GetUnattachedChanges(changes ...*Change) []*Change {
return nil
}
func (t *Tree) RootId() string { func (t *Tree) RootId() string {
if t.root != nil { if t.root != nil {
return t.root.Id return t.root.Id
@ -111,6 +107,7 @@ func (t *Tree) RemoveInvalidChange(id string) {
t.invalidChanges[top] = struct{}{} t.invalidChanges[top] = struct{}{}
if rem, exists = t.unAttached[top]; exists { if rem, exists = t.unAttached[top]; exists {
delete(t.unAttached, top) delete(t.unAttached, top)
// TODO: delete waitlist, this can only help for memory/performance
} else if rem, exists = t.attached[top]; exists { } else if rem, exists = t.attached[top]; exists {
// remove from all prev changes // remove from all prev changes
for _, id := range rem.PreviousIds { for _, id := range rem.PreviousIds {
@ -128,9 +125,6 @@ func (t *Tree) RemoveInvalidChange(id string) {
} }
delete(t.attached, top) delete(t.attached, top)
} }
for _, el := range rem.Unattached {
stack = append(stack, el.Id)
}
for _, el := range rem.Next { for _, el := range rem.Next {
stack = append(stack, el.Id) stack = append(stack, el.Id)
} }
@ -161,16 +155,13 @@ func (t *Tree) add(c *Change) (attached bool) {
} }
// attaching only if all prev ids are attached // attaching only if all prev ids are attached
attached = true attached = true
// the logic below is the following
for _, pid := range c.PreviousIds { for _, pid := range c.PreviousIds {
if prev, ok := t.attached[pid]; ok { if _, ok := t.attached[pid]; ok {
prev.Unattached = append(prev.Unattached, c)
continue continue
} }
attached = false attached = false
if prev, ok := t.unAttached[pid]; ok { // updating wait list for either unseen or unAttached changes
prev.Unattached = append(prev.Unattached, c)
continue
}
wl := t.waitList[pid] wl := t.waitList[pid]
wl = append(wl, c.Id) wl = append(wl, c.Id)
t.waitList[pid] = wl t.waitList[pid] = wl
@ -178,11 +169,6 @@ func (t *Tree) add(c *Change) (attached bool) {
if attached { if attached {
t.attach(c, true) t.attach(c, true)
} else { } else {
// clearing wait list
for _, wid := range t.waitList[c.Id] {
c.Unattached = append(c.Unattached, t.unAttached[wid])
}
delete(t.waitList, c.Id)
t.unAttached[c.Id] = c t.unAttached[c.Id] = c
} }
return return
@ -196,6 +182,7 @@ func (t *Tree) canAttach(c *Change) (attach bool) {
for _, id := range c.PreviousIds { for _, id := range c.PreviousIds {
if _, exists := t.attached[id]; !exists { if _, exists := t.attached[id]; !exists {
attach = false attach = false
break
} }
} }
return return
@ -209,37 +196,40 @@ func (t *Tree) attach(c *Change, newEl bool) {
// add next to all prev changes // add next to all prev changes
for _, id := range c.PreviousIds { for _, id := range c.PreviousIds {
// prev id must be attached if we attach this id // prev id must already be attached if we attach this id, so we don't need to check if it exists
prev := t.attached[id] prev := t.attached[id]
prev.Next = append(prev.Next, c) // appending c to next changes of all previous changes
if len(prev.Next) > 1 { if len(prev.Next) == 0 || prev.Next[len(prev.Next)-1].Id <= c.Id {
sort.Sort(sortChanges(prev.Next)) prev.Next = append(prev.Next, c)
} } else {
for i, next := range prev.Unattached { // inserting in correct position, before the change which is greater or equal
if next.Id == c.Id { insertIdx := 0
prev.Unattached[i] = nil for idx, el := range prev.Next {
prev.Unattached = append(prev.Unattached[:i], prev.Unattached[i+1:]...) if el.Id >= c.Id {
break insertIdx = idx
break
}
} }
prev.Next = append(prev.Next[:insertIdx+1], prev.Next[:insertIdx]...)
prev.Next[insertIdx] = c
} }
} }
// TODO: as a future optimization we can actually sort next later after we finished building the tree
// clearing wait list // clearing wait list
if waitIds, ok := t.waitList[c.Id]; ok { if waitIds, ok := t.waitList[c.Id]; ok {
for _, wid := range waitIds { for _, wid := range waitIds {
// next can only be in unAttached, because if next is attached then previous (we) are attached
// which is obviously not true, because we are attaching previous only now
next := t.unAttached[wid] next := t.unAttached[wid]
if t.canAttach(next) { if t.canAttach(next) {
t.attach(next, false) t.attach(next, false)
} }
// if we can't attach next that means that some other change will trigger attachment later,
// so we don't care about those changes
} }
delete(t.waitList, c.Id) delete(t.waitList, c.Id)
} }
for _, next := range c.Unattached {
if t.canAttach(next) {
t.attach(next, false)
}
}
} }
func (t *Tree) after(id1, id2 string) (found bool) { func (t *Tree) after(id1, id2 string) (found bool) {
@ -371,17 +361,3 @@ func (t *Tree) String() string {
func (t *Tree) Get(id string) *Change { func (t *Tree) Get(id string) *Change {
return t.attached[id] return t.attached[id]
} }
type sortChanges []*Change
func (s sortChanges) Len() int {
return len(s)
}
func (s sortChanges) Less(i, j int) bool {
return s[i].Id < s[j].Id
}
func (s sortChanges) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}