// Copyright 2021 The Tint Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Package lut provides a look up table, which compresses indexed data package lut import ( "sort" "dawn.googlesource.com/dawn/tools/src/list" ) // LUT is a look up table. // The table holds a number of items that are stored in a linear list. type LUT interface { // Add adds a sequence of items to the table. // items can be a single element, a slice of element, or a List of element. // Returns a pointer to the offset of the first item in the table's list. // The sequence of items stored at [offset, offset+N), where N is the // number of items added will remain equal, even after calling Compact(). Add(items interface{}) *int // Compact reorders the table items so that the table storage is compacted // by shuffling data around and de-duplicating sequences of common data. // Each originally added sequence is preserved in the resulting table, with // the same contiguous ordering, but with a potentially different offset. // Heuristics are used to shorten the table length, by exploiting common // subsequences, and removing duplicate sequences. // Note that shortest common superstring is NP-hard, so heuristics are used. // Compact updates pointers returned by Add(). Compact() } // New returns a new look up table func New(storage list.List) LUT { return &lut{storage: storage} } // A sequence represents a span of entries in the table type sequence struct { offset *int // Pointer to the start index of the sequence count int // Length of the sequence } // lut implements LUT type lut struct { storage list.List // The List that backs this LUT sequences []sequence // The entries in the LUT } func (t *lut) Add(items interface{}) *int { offset := t.storage.Count() t.storage.Append(items) count := t.storage.Count() - offset offsetPtr := &offset t.sequences = append(t.sequences, sequence{offsetPtr, count}) return offsetPtr } func (t lut) Compact() { // Generate int32 identifiers for each unique item in the table. // We use these to compare items instead of comparing the real data as this // function is comparison-heavy, and integer compares are cheap. srcIDs := t.itemIDs() dstIDs := make([]int32, len(srcIDs)) // Make a copy the data held in the table, use the copy as the source, and // t.storage as the destination. srcData := list.Copy(t.storage) dstData := t.storage // Sort all the sequences by length, with the largest first. // This helps 'seed' the compacted form with the largest items first. // This can improve the compaction as small sequences can pack into larger, // placed items. sort.SliceStable(t.sequences, func(i, j int) bool { return t.sequences[i].count > t.sequences[j].count }) // unplaced is the list of sequences that have not yet been placed. // All sequences are initially unplaced. unplaced := make([]sequence, len(t.sequences)) copy(unplaced, t.sequences) // placed is the list of sequences that have been placed. // Nothing is initially placed. placed := make([]sequence, 0, len(t.sequences)) // remove removes the sequence in unplaced with the index i. remove := func(i int) { placed = append(placed, unplaced[i]) if i > 0 { if i < len(unplaced)-1 { copy(unplaced[i:], unplaced[i+1:]) } unplaced = unplaced[:len(unplaced)-1] } else { unplaced = unplaced[1:] } } // cp copies data from [srcOffset:srcOffset+count] to [dstOffset:dstOffset+count]. cp := func(dstOffset, srcOffset, count int) { dstData.CopyFrom(srcData, dstOffset, srcOffset, count) copy( dstIDs[dstOffset:dstOffset+count], srcIDs[srcOffset:srcOffset+count], ) } // match describes a sequence that can be placed. type match struct { dst int // destination offset src sequence // source sequence len int // number of items that matched idx int // sequence index } // number of items that have been placed. newSize := 0 // While there's sequences to place... for len(unplaced) > 0 { // Place the next largest, unplaced sequence at the end of the new list cp(newSize, *unplaced[0].offset, unplaced[0].count) *unplaced[0].offset = newSize newSize += unplaced[0].count remove(0) for { // Look for the sequence with the longest match against the // currently placed data. Any mismatches with currently placed data // will nullify the match. The head or tail of this sequence may // extend the currently placed data. best := match{} // For each unplaced sequence... for i := 0; i < len(unplaced); i++ { seq := unplaced[i] if best.len >= seq.count { // The best match is already at least as long as this // sequence and sequences are sorted by size, so best cannot // be beaten. Stop searching. break } // Perform a full sweep from left to right, scoring the match... for shift := -seq.count + 1; shift < newSize; shift++ { dstS := max(shift, 0) dstE := min(shift+seq.count, newSize) count := dstE - dstS srcS := *seq.offset - min(shift, 0) srcE := srcS + count if best.len < count { if equal(srcIDs[srcS:srcE], dstIDs[dstS:dstE]) { best = match{shift, seq, count, i} } } } } if best.src.offset == nil { // Nothing matched. Not even one element. // Resort to placing the next largest sequence at the end. break } if best.dst < 0 { // Best match wants to place the sequence to the left of the // current output. We have to shuffle everything... n := -best.dst dstData.Copy(n, 0, newSize) copy(dstIDs[n:n+newSize], dstIDs) newSize += n best.dst = 0 for _, p := range placed { *p.offset += n } } // Place the best matching sequence. cp(best.dst, *best.src.offset, best.src.count) newSize = max(newSize, best.dst+best.src.count) *best.src.offset = best.dst remove(best.idx) } } // Shrink the output buffer to the new size. dstData.Resize(newSize) // All done. } // Generate a set of identifiers for all the unique items in storage func (t lut) itemIDs() []int32 { storageSize := t.storage.Count() keys := make([]int32, storageSize) dataToKey := map[interface{}]int32{} for i := 0; i < storageSize; i++ { data := t.storage.Get(i) key, found := dataToKey[data] if !found { key = int32(len(dataToKey)) dataToKey[data] = key } keys[i] = key } return keys } func max(a, b int) int { if a < b { return b } return a } func min(a, b int) int { if a > b { return b } return a } func equal(a, b []int32) bool { for i, v := range a { if b[i] != v { return false } } return true }