255 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Go
		
	
	
	
			
		
		
	
	
			255 lines
		
	
	
		
			6.2 KiB
		
	
	
	
		
			Go
		
	
	
	
//  Copyright (c) 2017 Couchbase, Inc.
 | 
						|
//
 | 
						|
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
// you may not use this file except in compliance with the License.
 | 
						|
// You may obtain a copy of the License at
 | 
						|
//
 | 
						|
// 		http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
//
 | 
						|
// Unless required by applicable law or agreed to in writing, software
 | 
						|
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
// See the License for the specific language governing permissions and
 | 
						|
// limitations under the License.
 | 
						|
 | 
						|
package vellum
 | 
						|
 | 
						|
import (
 | 
						|
	"io"
 | 
						|
 | 
						|
	"github.com/willf/bitset"
 | 
						|
)
 | 
						|
 | 
						|
// FST is an in-memory representation of a finite state transducer,
 | 
						|
// capable of returning the uint64 value associated with
 | 
						|
// each []byte key stored, as well as enumerating all of the keys
 | 
						|
// in order.
 | 
						|
type FST struct {
 | 
						|
	f       io.Closer
 | 
						|
	ver     int
 | 
						|
	len     int
 | 
						|
	typ     int
 | 
						|
	data    []byte
 | 
						|
	decoder decoder
 | 
						|
}
 | 
						|
 | 
						|
func new(data []byte, f io.Closer) (rv *FST, err error) {
 | 
						|
	rv = &FST{
 | 
						|
		data: data,
 | 
						|
		f:    f,
 | 
						|
	}
 | 
						|
 | 
						|
	rv.ver, rv.typ, err = decodeHeader(data)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	rv.decoder, err = loadDecoder(rv.ver, rv.data)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	rv.len = rv.decoder.getLen()
 | 
						|
 | 
						|
	return rv, nil
 | 
						|
}
 | 
						|
 | 
						|
// Contains returns true if this FST contains the specified key.
 | 
						|
func (f *FST) Contains(val []byte) (bool, error) {
 | 
						|
	_, exists, err := f.Get(val)
 | 
						|
	return exists, err
 | 
						|
}
 | 
						|
 | 
						|
// Get returns the value associated with the key.  NOTE: a value of zero
 | 
						|
// does not imply the key does not exist, you must consult the second
 | 
						|
// return value as well.
 | 
						|
func (f *FST) Get(input []byte) (uint64, bool, error) {
 | 
						|
	return f.get(input, nil)
 | 
						|
}
 | 
						|
 | 
						|
func (f *FST) get(input []byte, prealloc fstState) (uint64, bool, error) {
 | 
						|
	var total uint64
 | 
						|
	curr := f.decoder.getRoot()
 | 
						|
	state, err := f.decoder.stateAt(curr, prealloc)
 | 
						|
	if err != nil {
 | 
						|
		return 0, false, err
 | 
						|
	}
 | 
						|
	for i := range input {
 | 
						|
		_, curr, output := state.TransitionFor(input[i])
 | 
						|
		if curr == noneAddr {
 | 
						|
			return 0, false, nil
 | 
						|
		}
 | 
						|
 | 
						|
		state, err = f.decoder.stateAt(curr, state)
 | 
						|
		if err != nil {
 | 
						|
			return 0, false, err
 | 
						|
		}
 | 
						|
 | 
						|
		total += output
 | 
						|
	}
 | 
						|
 | 
						|
	if state.Final() {
 | 
						|
		total += state.FinalOutput()
 | 
						|
		return total, true, nil
 | 
						|
	}
 | 
						|
	return 0, false, nil
 | 
						|
}
 | 
						|
 | 
						|
// Version returns the encoding version used by this FST instance.
 | 
						|
func (f *FST) Version() int {
 | 
						|
	return f.ver
 | 
						|
}
 | 
						|
 | 
						|
// Len returns the number of entries in this FST instance.
 | 
						|
func (f *FST) Len() int {
 | 
						|
	return f.len
 | 
						|
}
 | 
						|
 | 
						|
// Type returns the type of this FST instance.
 | 
						|
func (f *FST) Type() int {
 | 
						|
	return f.typ
 | 
						|
}
 | 
						|
 | 
						|
// Close will unmap any mmap'd data (if managed by vellum) and it will close
 | 
						|
// the backing file (if managed by vellum).  You MUST call Close() for any
 | 
						|
// FST instance that is created.
 | 
						|
func (f *FST) Close() error {
 | 
						|
	if f.f != nil {
 | 
						|
		err := f.f.Close()
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
	}
 | 
						|
	f.data = nil
 | 
						|
	f.decoder = nil
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
// Start returns the start state of this Automaton
 | 
						|
func (f *FST) Start() int {
 | 
						|
	return f.decoder.getRoot()
 | 
						|
}
 | 
						|
 | 
						|
// IsMatch returns if this state is a matching state in this Automaton
 | 
						|
func (f *FST) IsMatch(addr int) bool {
 | 
						|
	match, _ := f.IsMatchWithVal(addr)
 | 
						|
	return match
 | 
						|
}
 | 
						|
 | 
						|
// CanMatch returns if this state can ever transition to a matching state
 | 
						|
// in this Automaton
 | 
						|
func (f *FST) CanMatch(addr int) bool {
 | 
						|
	if addr == noneAddr {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	return true
 | 
						|
}
 | 
						|
 | 
						|
// WillAlwaysMatch returns if from this state the Automaton will always
 | 
						|
// be in a matching state
 | 
						|
func (f *FST) WillAlwaysMatch(int) bool {
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// Accept returns the next state for this Automaton on input of byte b
 | 
						|
func (f *FST) Accept(addr int, b byte) int {
 | 
						|
	next, _ := f.AcceptWithVal(addr, b)
 | 
						|
	return next
 | 
						|
}
 | 
						|
 | 
						|
// IsMatchWithVal returns if this state is a matching state in this Automaton
 | 
						|
// and also returns the final output value for this state
 | 
						|
func (f *FST) IsMatchWithVal(addr int) (bool, uint64) {
 | 
						|
	s, err := f.decoder.stateAt(addr, nil)
 | 
						|
	if err != nil {
 | 
						|
		return false, 0
 | 
						|
	}
 | 
						|
	return s.Final(), s.FinalOutput()
 | 
						|
}
 | 
						|
 | 
						|
// AcceptWithVal returns the next state for this Automaton on input of byte b
 | 
						|
// and also returns the output value for the transition
 | 
						|
func (f *FST) AcceptWithVal(addr int, b byte) (int, uint64) {
 | 
						|
	s, err := f.decoder.stateAt(addr, nil)
 | 
						|
	if err != nil {
 | 
						|
		return noneAddr, 0
 | 
						|
	}
 | 
						|
	_, next, output := s.TransitionFor(b)
 | 
						|
	return next, output
 | 
						|
}
 | 
						|
 | 
						|
// Iterator returns a new Iterator capable of enumerating the key/value pairs
 | 
						|
// between the provided startKeyInclusive and endKeyExclusive.
 | 
						|
func (f *FST) Iterator(startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
 | 
						|
	return newIterator(f, startKeyInclusive, endKeyExclusive, nil)
 | 
						|
}
 | 
						|
 | 
						|
// Search returns a new Iterator capable of enumerating the key/value pairs
 | 
						|
// between the provided startKeyInclusive and endKeyExclusive that also
 | 
						|
// satisfy the provided automaton.
 | 
						|
func (f *FST) Search(aut Automaton, startKeyInclusive, endKeyExclusive []byte) (*FSTIterator, error) {
 | 
						|
	return newIterator(f, startKeyInclusive, endKeyExclusive, aut)
 | 
						|
}
 | 
						|
 | 
						|
// Debug is only intended for debug purposes, it simply asks the underlying
 | 
						|
// decoder visit each state, and pass it to the provided callback.
 | 
						|
func (f *FST) Debug(callback func(int, interface{}) error) error {
 | 
						|
 | 
						|
	addr := f.decoder.getRoot()
 | 
						|
	set := bitset.New(uint(addr))
 | 
						|
	stack := addrStack{addr}
 | 
						|
 | 
						|
	stateNumber := 0
 | 
						|
	stack, addr = stack[:len(stack)-1], stack[len(stack)-1]
 | 
						|
	for addr != noneAddr {
 | 
						|
		if set.Test(uint(addr)) {
 | 
						|
			stack, addr = stack.Pop()
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		set.Set(uint(addr))
 | 
						|
		state, err := f.decoder.stateAt(addr, nil)
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		err = callback(stateNumber, state)
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		for i := 0; i < state.NumTransitions(); i++ {
 | 
						|
			tchar := state.TransitionAt(i)
 | 
						|
			_, dest, _ := state.TransitionFor(tchar)
 | 
						|
			stack = append(stack, dest)
 | 
						|
		}
 | 
						|
		stateNumber++
 | 
						|
		stack, addr = stack.Pop()
 | 
						|
	}
 | 
						|
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
type addrStack []int
 | 
						|
 | 
						|
func (a addrStack) Pop() (addrStack, int) {
 | 
						|
	l := len(a)
 | 
						|
	if l < 1 {
 | 
						|
		return a, noneAddr
 | 
						|
	}
 | 
						|
	return a[:l-1], a[l-1]
 | 
						|
}
 | 
						|
 | 
						|
// Reader() returns a Reader instance that a single thread may use to
 | 
						|
// retrieve data from the FST
 | 
						|
func (f *FST) Reader() (*Reader, error) {
 | 
						|
	return &Reader{f: f}, nil
 | 
						|
}
 | 
						|
 | 
						|
// A Reader is meant for a single threaded use
 | 
						|
type Reader struct {
 | 
						|
	f        *FST
 | 
						|
	prealloc fstStateV1
 | 
						|
}
 | 
						|
 | 
						|
func (r *Reader) Get(input []byte) (uint64, bool, error) {
 | 
						|
	return r.f.get(input, &r.prealloc)
 | 
						|
}
 |