// Copyright 2022 The Dawn Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package cov import ( "log" "sort" "sync" ) // Optimize optimizes the Tree by de-duplicating common spans into a tree of SpanGroups. // // Breaking down tests into group hierarchies provide a natural way to structure // coverage data, as tests of the same suite, file or test are likely to have // similar coverage spans. // // For each source file in the codebase, we create a tree of SpanGroups, where the // leaves are the test cases. // // For example, given the following Paths: // // a.b.d.h // a.b.d.i.n // a.b.d.i.o // a.b.e.j // a.b.e.k.p // a.b.e.k.q // a.c.f // a.c.g.l.r // a.c.g.m // // We would construct the following tree: // // a // ╭──────┴──────╮ // b c // ╭───┴───╮ ╭───┴───╮ // d e f g // ╭─┴─╮ ╭─┴─╮ ╭─┴─╮ // h i j k l m // ╭┴╮ ╭┴╮ │ // n o p q r // // Each leaf node in this tree (`h`, `n`, `o`, `j`, `p`, `q`, `f`, `r`, `m`) // represent a test case, and non-leaf nodes (`a`, `b`, `c`, `d`, `e`, `g`, `i`, // `k`, `l`) are suite, file or tests. // // To begin, we create a test tree structure, and associate the full list of test // coverage spans with every leaf node (test case) in this tree. // // This data structure hasn't given us any compression benefits yet, but we can // now do a few tricks to dramatically reduce number of spans needed to describe // the graph: // // ~ Optimization 1: Common span promotion ~ // // The first compression scheme is to promote common spans up the tree when they // are common for all children. This will reduce the number of spans needed to be // encoded in the final file. // // For example, if the test group `a` has 4 children that all share the same span // `X`: // // a // ╭───┬─┴─┬───╮ // b c d e // [X,Y] [X] [X] [X,Z] // // Then span `X` can be promoted up to `a`: // // [X] // a // ╭───┬─┴─┬───╮ // b c d e // [Y] [] [] [Z] // // ~ Optimization 2: Span XOR promotion ~ // // This idea can be extended further, by not requiring all the children to share // the same span before promotion. If *most* child nodes share the same span, we // can still promote the span, but this time we *remove* the span from the // children *if they had it*, and *add* the span to children *if they didn't // have it*. // // For example, if the test group `a` has 4 children with 3 that share the span // `X`: // // a // ╭───┬─┴─┬───╮ // b c d e // [X,Y] [X] [] [X,Z] // // Then span `X` can be promoted up to `a` by flipping the presence of `X` on the // child nodes: // // [X] // a // ╭───┬─┴─┬───╮ // b c d e // [Y] [] [X] [Z] // // This process repeats up the tree. // // With this optimization applied, we now need to traverse the tree from root to // leaf in order to know whether a given span is in use for the leaf node (test case): // // * If the span is encountered an *odd* number of times during traversal, then // the span is *covered*. // * If the span is encountered an *even* number of times during traversal, then // the span is *not covered*. // // See tools/src/cov/coverage_test.go for more examples of this optimization. // // ~ Optimization 3: Common span grouping ~ // // With real world data, we encounter groups of spans that are commonly found // together. To further reduce coverage data, the whole graph is scanned for common // span patterns, and are indexed by each tree node. // The XOR'ing of spans as described above is performed as if the spans were not // grouped. // // ~ Optimization 4: Lookup tables ~ // // All spans, span-groups and strings are stored in de-duplicated tables, and are // indexed wherever possible. func (t *Tree) Optimize() { log.Printf("Optimizing coverage tree...") // Start by gathering all of the unique spansets wg := sync.WaitGroup{} wg.Add(len(t.files)) for _, file := range t.files { file := file go func() { defer wg.Done() o := optimizer{} for idx, tc := range file.tcm { o.invertForCommon(tc, &t.testRoot.children[idx]) } o.createGroups(file) }() } wg.Wait() } type optimizer struct{} // createGroups looks for common SpanSets, and creates indexable span groups // which are then used instead. func (o *optimizer) createGroups(f *treeFile) { const minSpansInGroup = 2 type spansetKey string spansetMap := map[spansetKey]SpanSet{} f.tcm.traverse(func(tc *TestCoverage) { if len(tc.Spans) >= minSpansInGroup { key := spansetKey(tc.Spans.String()) if _, ok := spansetMap[key]; !ok { spansetMap[key] = tc.Spans } } }) if len(spansetMap) == 0 { return } type spansetInfo struct { key spansetKey set SpanSet // fully expanded set grp SpanGroup id SpanGroupID } spansets := make([]*spansetInfo, 0, len(spansetMap)) for key, set := range spansetMap { spansets = append(spansets, &spansetInfo{ key: key, set: set, grp: SpanGroup{Spans: set}, }) } // Sort by number of spans in each sets starting with the largest. sort.Slice(spansets, func(i, j int) bool { a, b := spansets[i].set, spansets[j].set switch { case len(a) > len(b): return true case len(a) < len(b): return false } return a.List().Compare(b.List()) == -1 // Just to keep output stable }) // Assign IDs now that we have stable order. for i := range spansets { spansets[i].id = SpanGroupID(i) } // Loop over the spanGroups starting from the largest, and try to fold them // into the larger sets. // This is O(n^2) complexity. nextSpan: for i, a := range spansets[:len(spansets)-1] { for _, b := range spansets[i+1:] { if len(a.set) > len(b.set) && a.set.containsAll(b.set) { extend := b.id // Do not take address of iterator! a.grp.Spans = a.set.removeAll(b.set) a.grp.Extend = &extend continue nextSpan } } } // Rebuild a map of spansetKey to SpanGroup spangroupMap := make(map[spansetKey]*spansetInfo, len(spansets)) for _, s := range spansets { spangroupMap[s.key] = s } // Store the groups in the tree f.spangroups = make(map[SpanGroupID]SpanGroup, len(spansets)) for _, s := range spansets { f.spangroups[s.id] = s.grp } // Update all the uses. f.tcm.traverse(func(tc *TestCoverage) { key := spansetKey(tc.Spans.String()) if g, ok := spangroupMap[key]; ok { tc.Spans = nil tc.Group = &g.id } }) } // invertCommon looks for tree nodes with the majority of the child nodes with // the same spans. This span is promoted up to the parent, and the children // have the span inverted. func (o *optimizer) invertForCommon(tc *TestCoverage, t *Test) { wg := sync.WaitGroup{} wg.Add(len(tc.Children)) for id, child := range tc.Children { id, child := id, child go func() { defer wg.Done() o.invertForCommon(child, &t.children[id]) }() } wg.Wait() counts := map[SpanID]int{} for _, child := range tc.Children { for span := range child.Spans { counts[span] = counts[span] + 1 } } for span, count := range counts { if count > len(t.children)/2 { tc.Spans = tc.Spans.invert(span) for _, idx := range t.indices { child := tc.Children.index(idx) child.Spans = child.Spans.invert(span) if child.deletable() { delete(tc.Children, idx) } } } } }