Add tools/src/lut: A lookup table with compaction
Used by the intrinsic definition generator. Bug: tint:832 Change-Id: I000b1ebe067117e533b3edd8a3fac67a3cbc14c9 Reviewed-on: https://dawn-review.googlesource.com/c/tint/+/52601 Kokoro: Kokoro <noreply+kokoro@google.com> Commit-Queue: Ben Clayton <bclayton@google.com> Reviewed-by: David Neto <dneto@google.com>
This commit is contained in:
parent
b487c71e59
commit
e8177dade6
|
@ -0,0 +1,245 @@
|
|||
// Copyright 2021 The Tint Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Package lut provides a look up table, which compresses indexed data
|
||||
package lut
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"dawn.googlesource.com/tint/tools/src/list"
|
||||
)
|
||||
|
||||
// LUT is a look up table.
|
||||
// The table holds a number of items that are stored in a linear list.
|
||||
type LUT interface {
|
||||
// Add adds a sequence of items to the table.
|
||||
// items can be a single element, a slice of element, or a List of element.
|
||||
// Returns a pointer to the offset of the first item in the table's list.
|
||||
// The sequence of items stored at [offset, offset+N), where N is the
|
||||
// number of items added will remain equal, even after calling Compact().
|
||||
Add(items interface{}) *int
|
||||
// Compact reorders the table items so that the table storage is compacted
|
||||
// by shuffling data around and de-duplicating sequences of common data.
|
||||
// Each originally added sequence is preserved in the resulting table, with
|
||||
// the same contiguous ordering, but with a potentially different offset.
|
||||
// Heuristics are used to shorten the table length, by exploiting common
|
||||
// subsequences, and removing duplicate sequences.
|
||||
// Note that shortest common superstring is NP-hard, so heuristics are used.
|
||||
// Compact updates pointers returned by Add().
|
||||
Compact()
|
||||
}
|
||||
|
||||
// New returns a new look up table
|
||||
func New(storage list.List) LUT {
|
||||
return &lut{storage: storage}
|
||||
}
|
||||
|
||||
// A sequence represents a span of entries in the table
|
||||
type sequence struct {
|
||||
offset *int // Pointer to the start index of the sequence
|
||||
count int // Length of the sequence
|
||||
}
|
||||
|
||||
// lut implements LUT
|
||||
type lut struct {
|
||||
storage list.List // The List that backs this LUT
|
||||
sequences []sequence // The entries in the LUT
|
||||
}
|
||||
|
||||
func (t *lut) Add(items interface{}) *int {
|
||||
offset := t.storage.Count()
|
||||
t.storage.Append(items)
|
||||
count := t.storage.Count() - offset
|
||||
offsetPtr := &offset
|
||||
t.sequences = append(t.sequences, sequence{offsetPtr, count})
|
||||
return offsetPtr
|
||||
}
|
||||
|
||||
func (t lut) Compact() {
|
||||
// Generate int32 identifiers for each unique item in the table.
|
||||
// We use these to compare items instead of comparing the real data as this
|
||||
// function is comparison-heavy, and integer compares are cheap.
|
||||
srcIDs := t.itemIDs()
|
||||
dstIDs := make([]int32, len(srcIDs))
|
||||
|
||||
// Make a copy the data held in the table, use the copy as the source, and
|
||||
// t.storage as the destination.
|
||||
srcData := list.Copy(t.storage)
|
||||
dstData := t.storage
|
||||
|
||||
// Sort all the sequences by length, with the largest first.
|
||||
// This helps 'seed' the compacted form with the largest items first.
|
||||
// This can improve the compaction as small sequences can pack into larger,
|
||||
// placed items.
|
||||
sort.Slice(t.sequences, func(i, j int) bool {
|
||||
return t.sequences[i].count > t.sequences[j].count
|
||||
})
|
||||
|
||||
// unplaced is the list of sequences that have not yet been placed.
|
||||
// All sequences are initially unplaced.
|
||||
unplaced := make([]sequence, len(t.sequences))
|
||||
copy(unplaced, t.sequences)
|
||||
|
||||
// placed is the list of sequences that have been placed.
|
||||
// Nothing is initially placed.
|
||||
placed := make([]sequence, 0, len(t.sequences))
|
||||
|
||||
// remove removes the sequence in unplaced with the index i.
|
||||
remove := func(i int) {
|
||||
placed = append(placed, unplaced[i])
|
||||
if i > 0 {
|
||||
if i < len(unplaced)-1 {
|
||||
copy(unplaced[i:], unplaced[i+1:])
|
||||
}
|
||||
unplaced = unplaced[:len(unplaced)-1]
|
||||
} else {
|
||||
unplaced = unplaced[1:]
|
||||
}
|
||||
}
|
||||
|
||||
// cp copies data from [srcOffset:srcOffset+count] to [dstOffset:dstOffset+count].
|
||||
cp := func(dstOffset, srcOffset, count int) {
|
||||
dstData.CopyFrom(srcData, dstOffset, srcOffset, count)
|
||||
copy(
|
||||
dstIDs[dstOffset:dstOffset+count],
|
||||
srcIDs[srcOffset:srcOffset+count],
|
||||
)
|
||||
}
|
||||
|
||||
// match describes a sequence that can be placed.
|
||||
type match struct {
|
||||
dst int // destination offset
|
||||
src sequence // source sequence
|
||||
len int // number of items that matched
|
||||
idx int // sequence index
|
||||
}
|
||||
|
||||
// number of items that have been placed.
|
||||
newSize := 0
|
||||
|
||||
// While there's sequences to place...
|
||||
for len(unplaced) > 0 {
|
||||
// Place the next largest, unplaced sequence at the end of the new list
|
||||
cp(newSize, *unplaced[0].offset, unplaced[0].count)
|
||||
*unplaced[0].offset = newSize
|
||||
newSize += unplaced[0].count
|
||||
remove(0)
|
||||
|
||||
for {
|
||||
// Look for the sequence with the longest match against the
|
||||
// currently placed data. Any mismatches with currently placed data
|
||||
// will nullify the match. The head or tail of this sequence may
|
||||
// extend the currently placed data.
|
||||
best := match{}
|
||||
|
||||
// For each unplaced sequence...
|
||||
for i := 0; i < len(unplaced); i++ {
|
||||
seq := unplaced[i]
|
||||
|
||||
if best.len >= seq.count {
|
||||
// The best match is already at least as long as this
|
||||
// sequence and sequences are sorted by size, so best cannot
|
||||
// be beaten. Stop searching.
|
||||
break
|
||||
}
|
||||
|
||||
// Perform a full sweep from left to right, scoring the match...
|
||||
for shift := -seq.count + 1; shift < newSize; shift++ {
|
||||
dstS := max(shift, 0)
|
||||
dstE := min(shift+seq.count, newSize)
|
||||
count := dstE - dstS
|
||||
srcS := *seq.offset - min(shift, 0)
|
||||
srcE := srcS + count
|
||||
|
||||
if best.len < count {
|
||||
if equal(srcIDs[srcS:srcE], dstIDs[dstS:dstE]) {
|
||||
best = match{shift, seq, count, i}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if best.src.offset == nil {
|
||||
// Nothing matched. Not even one element.
|
||||
// Resort to placing the next largest sequence at the end.
|
||||
break
|
||||
}
|
||||
|
||||
if best.dst < 0 {
|
||||
// Best match wants to place the sequence to the left of the
|
||||
// current output. We have to shuffle everything...
|
||||
n := -best.dst
|
||||
dstData.Copy(n, 0, newSize)
|
||||
copy(dstIDs[n:n+newSize], dstIDs)
|
||||
newSize += n
|
||||
best.dst = 0
|
||||
for _, p := range placed {
|
||||
*p.offset += n
|
||||
}
|
||||
}
|
||||
|
||||
// Place the best matching sequence.
|
||||
cp(best.dst, *best.src.offset, best.src.count)
|
||||
newSize = max(newSize, best.dst+best.src.count)
|
||||
*best.src.offset = best.dst
|
||||
remove(best.idx)
|
||||
}
|
||||
}
|
||||
|
||||
// Shrink the output buffer to the new size.
|
||||
dstData.Resize(newSize)
|
||||
|
||||
// All done.
|
||||
}
|
||||
|
||||
// Generate a set of identifiers for all the unique items in storage
|
||||
func (t lut) itemIDs() []int32 {
|
||||
storageSize := t.storage.Count()
|
||||
keys := make([]int32, storageSize)
|
||||
dataToKey := map[interface{}]int32{}
|
||||
for i := 0; i < storageSize; i++ {
|
||||
data := t.storage.Get(i)
|
||||
key, found := dataToKey[data]
|
||||
if !found {
|
||||
key = int32(len(dataToKey))
|
||||
dataToKey[data] = key
|
||||
}
|
||||
keys[i] = key
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
func max(a, b int) int {
|
||||
if a < b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a > b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func equal(a, b []int32) bool {
|
||||
for i, v := range a {
|
||||
if b[i] != v {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2021 The Tint Authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package lut_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"dawn.googlesource.com/tint/tools/src/list"
|
||||
"dawn.googlesource.com/tint/tools/src/lut"
|
||||
)
|
||||
|
||||
func TestCompactWithFragments(t *testing.T) {
|
||||
runes := []rune{}
|
||||
lut := lut.New(list.Wrap(&runes))
|
||||
indices := []*int{
|
||||
lut.Add([]rune("the life in your")),
|
||||
lut.Add([]rune("in your life that count")),
|
||||
lut.Add([]rune("In the end,")),
|
||||
lut.Add([]rune("the life in")),
|
||||
lut.Add([]rune("count. It's the")),
|
||||
lut.Add([]rune("years")),
|
||||
lut.Add([]rune("in your years.")),
|
||||
lut.Add([]rune("it's not the years in")),
|
||||
lut.Add([]rune("not the years")),
|
||||
lut.Add([]rune("not the years in your")),
|
||||
lut.Add([]rune("end, it's")),
|
||||
}
|
||||
|
||||
lut.Compact()
|
||||
|
||||
expect := "In the end, it's not the years in your life that count. It's the life in your years."
|
||||
got := string(runes)
|
||||
if got != expect {
|
||||
t.Errorf("Compact result was not as expected\nExpected: '%v'\nGot: '%v'", expect, got)
|
||||
}
|
||||
expectedIndices := []int{
|
||||
61, // the life in your
|
||||
31, // in your life that count
|
||||
0, // In the end,
|
||||
61, // the life in
|
||||
49, // count. It's the
|
||||
25, // years
|
||||
70, // in your years.
|
||||
12, // it's not the years in
|
||||
17, // not the years
|
||||
17, // not the years in your
|
||||
7, // end, it's
|
||||
}
|
||||
for i, p := range indices {
|
||||
if expected, got := expectedIndices[i], *p; expected != got {
|
||||
t.Errorf("Index %v was not expected. Expected %v, got %v", i, expected, got)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue