mirror of https://github.com/0xERR0R/blocky.git
192 lines
3.8 KiB
Go
192 lines
3.8 KiB
Go
package trie
|
|
|
|
import (
|
|
"github.com/0xERR0R/blocky/log"
|
|
"strings"
|
|
)
|
|
|
|
// Trie stores a set of strings and can quickly check
|
|
// if it contains an element, or one of its parents.
|
|
//
|
|
// It implements a semi-radix/semi-compressed trie:
|
|
// a node that would be a single child is merged with
|
|
// its parent, if it is a terminal.
|
|
//
|
|
// The word "prefix" is avoided because in practice
|
|
// we use the `Trie` with `SplitTLD` so parents are
|
|
// suffixes even if in the datastructure they are
|
|
// prefixes.
|
|
type Trie struct {
|
|
split SplitFunc
|
|
root parent
|
|
}
|
|
|
|
func NewTrie(split SplitFunc) *Trie {
|
|
return &Trie{
|
|
split: split,
|
|
root: parent{},
|
|
}
|
|
}
|
|
|
|
func (t *Trie) IsEmpty() bool {
|
|
return t.root.children == nil
|
|
}
|
|
|
|
func (t *Trie) Insert(key string) {
|
|
t.root.insert(key, t.split)
|
|
}
|
|
|
|
func (t *Trie) HasParentOf(key string) bool {
|
|
return t.root.hasParentOf(key, t.split)
|
|
}
|
|
|
|
type node interface {
|
|
hasParentOf(key string, split SplitFunc) bool
|
|
}
|
|
|
|
// We save memory by not keeping track of children of
|
|
// nodes that are terminals (part of the set) as we only
|
|
// ever need to know if a domain, or any of its parents,
|
|
// is in the `Trie`.
|
|
// Example: if the `Trie` contains "example.com", inserting
|
|
// "www.example.com" has no effect as we already know it
|
|
// is contained in the set.
|
|
// Conversely, if it contains "www.example.com" and we insert
|
|
// "example.com", then "www.example.com" is removed as it is
|
|
// no longer useful.
|
|
//
|
|
// This means that all terminals are leafs and vice-versa.
|
|
// So we save slightly more memory by avoiding a `isTerminal bool`
|
|
// per parent.
|
|
type parent struct {
|
|
children map[string]node
|
|
}
|
|
|
|
func newParent() *parent {
|
|
return &parent{
|
|
children: make(map[string]node, 1),
|
|
}
|
|
}
|
|
|
|
func (n *parent) insert(key string, split SplitFunc) {
|
|
if len(key) == 0 {
|
|
return
|
|
}
|
|
|
|
for {
|
|
if n.children == nil {
|
|
n.children = make(map[string]node, 1)
|
|
}
|
|
|
|
label, rest := split(key)
|
|
|
|
child, ok := n.children[label]
|
|
if !ok || len(rest) == 0 {
|
|
n.children[label] = terminal(rest)
|
|
|
|
return
|
|
}
|
|
|
|
switch child := child.(type) {
|
|
case *parent:
|
|
// Continue down the trie
|
|
key = rest
|
|
n = child
|
|
|
|
continue
|
|
|
|
case terminal:
|
|
if child.hasParentOf(rest, split) {
|
|
// Found a parent/"prefix" in the set
|
|
return
|
|
}
|
|
|
|
p := newParent()
|
|
n.children[label] = p
|
|
|
|
p.insert(child.String(), split) // keep existing terminal
|
|
p.insert(rest, split) // add new value
|
|
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (n *parent) hasParentOf(key string, split SplitFunc) bool {
|
|
searchString := key
|
|
rule := ""
|
|
|
|
for {
|
|
label, rest := split(key)
|
|
rule = strings.Join([]string{label, rule}, ".")
|
|
|
|
child, ok := n.children[label]
|
|
if !ok {
|
|
return false
|
|
}
|
|
|
|
switch child := child.(type) {
|
|
case *parent:
|
|
if len(rest) == 0 {
|
|
// The trie only contains children/"suffixes" of the
|
|
// key we're searching for
|
|
return false
|
|
}
|
|
|
|
// Continue down the trie
|
|
key = rest
|
|
n = child
|
|
|
|
continue
|
|
|
|
case terminal:
|
|
// Continue down the trie
|
|
matched := child.hasParentOf(rest, split)
|
|
if matched {
|
|
rule = strings.Join([]string{child.String(), rule}, ".")
|
|
rule = strings.Trim(rule, ".")
|
|
log.PrefixedLog("trie").Debugf("wildcard block rule '%s' matched with '%s'", rule, searchString)
|
|
}
|
|
|
|
return matched
|
|
}
|
|
}
|
|
}
|
|
|
|
type terminal string
|
|
|
|
func (t terminal) String() string {
|
|
return string(t)
|
|
}
|
|
|
|
func (t terminal) hasParentOf(searchKey string, split SplitFunc) bool {
|
|
tKey := t.String()
|
|
if tKey == "" {
|
|
return true
|
|
}
|
|
|
|
for {
|
|
tLabel, tRest := split(tKey)
|
|
|
|
searchLabel, searchRest := split(searchKey)
|
|
if searchLabel != tLabel {
|
|
return false
|
|
}
|
|
|
|
if len(tRest) == 0 {
|
|
// Found a parent/"prefix" in the set
|
|
return true
|
|
}
|
|
|
|
if len(searchRest) == 0 {
|
|
// The trie only contains children/"suffixes" of the
|
|
// key we're searching for
|
|
return false
|
|
}
|
|
|
|
// Continue down the trie
|
|
searchKey = searchRest
|
|
tKey = tRest
|
|
}
|
|
}
|