mirror of https://github.com/0xERR0R/blocky.git
regex support for matching (#12)
This commit is contained in:
parent
925c6f97eb
commit
e7ddab714b
|
@ -25,6 +25,7 @@ Blocky is a DNS proxy and ad-blocker for the local network written in Go with fo
|
|||
|
||||
* Definition of black and white lists per client group (Kids, Smart home devices, etc.)
|
||||
* Periodical reload of external black and white lists
|
||||
* Regex support
|
||||
* Blocking of request domain, response CNAME (deep CNAME inspection) and response IP addresses (against IP lists)
|
||||
|
||||
- **Advanced DNS configuration** - not just an ad-blocker
|
||||
|
|
|
@ -53,6 +53,8 @@ blocking:
|
|||
# inline definition with YAML literal block scalar style
|
||||
# hosts format
|
||||
whitelistdomain.com
|
||||
# this is a regex
|
||||
/^banners?[_.-]/
|
||||
# definition: which groups should be applied for which client
|
||||
clientGroupsBlock:
|
||||
# default will be used, if no special definition for a client name exists
|
||||
|
|
|
@ -176,7 +176,8 @@ contains a map of client name and multiple IP addresses.
|
|||
|
||||
Blocky can download and use external lists with domains or IP addresses to block DNS query (e.g. advertisement, malware,
|
||||
trackers, adult sites). You can group several list sources together and define the blocking behavior per client.
|
||||
External blacklists must be in the well-known [Hosts format](https://en.wikipedia.org/wiki/Hosts_(file)).
|
||||
External blacklists must be either in the well-known [Hosts format](https://en.wikipedia.org/wiki/Hosts_(file)) or just
|
||||
a plain domain list (one domain per line). Blocky also supports regex as more powerful tool to define patterns to block.
|
||||
|
||||
Blocky uses [DNS sinkhole](https://en.wikipedia.org/wiki/DNS_sinkhole) approach to block a DNS query. Domain name from
|
||||
the request, IP address from the response, and the CNAME record will be checked against configured blacklists.
|
||||
|
@ -200,6 +201,8 @@ in hosts format (YAML literal block scalar style). All Urls must be grouped to a
|
|||
# inline definition with YAML literal block scalar style
|
||||
someadsdomain.com
|
||||
anotheradsdomain.com
|
||||
# this is a regex
|
||||
/^banners?[_.-]/
|
||||
special:
|
||||
- https://raw.githubusercontent.com/StevenBlack/hosts/master/alternates/fakenews/hosts
|
||||
whiteLists:
|
||||
|
@ -218,6 +221,15 @@ in hosts format (YAML literal block scalar style). All Urls must be grouped to a
|
|||
If a group has **only** whitelist entries -> this means only domains from this list are allowed, all other domains will
|
||||
be blocked
|
||||
|
||||
#### Regex support
|
||||
|
||||
You can use regex to define patterns to block. A regex entry must start and end with the slash character (/). Some
|
||||
Examples:
|
||||
|
||||
- `/baddomain/` will block `www.baddomain.com`, `baddomain.com`, but also `mybaddomain-sometext.com`
|
||||
- `/^baddomain/` will block `baddomain.com`, but not `www.baddomain.com`
|
||||
- `/^apple\.(de|com)$/` will only block `apple.de` and `apple.com`
|
||||
|
||||
### Client groups
|
||||
|
||||
In this configuration section, you can define, which blocking group(s) should be used for which client in your network.
|
||||
|
|
|
@ -22,4 +22,5 @@
|
|||
*[CSV]: Comma-separated values
|
||||
*[SAMBA]: Server Message Block Protocol (Windows Network File System)
|
||||
*[DHCP]: Dynamic Host Configuration Protocol
|
||||
*[duration format]: Example: "300ms", "1.5h" or "2h45m". Valid time units are "ns", "us", "ms", "s", "m", "h".
|
||||
*[duration format]: Example: "300ms", "1.5h" or "2h45m". Valid time units are "ns", "us", "ms", "s", "m", "h".
|
||||
*[regex]: Regular expression
|
|
@ -12,6 +12,7 @@ Blocky is a DNS proxy and ad-blocker for the local network written in Go with fo
|
|||
|
||||
* Definition of black and white lists per client group (Kids, Smart home devices, etc.)
|
||||
* Periodical reload of external black and white lists
|
||||
* Regex support
|
||||
* Blocking of request domain, response CNAME (deep CNAME inspection) and response IP addresses (against IP lists)
|
||||
|
||||
- **Advanced DNS configuration** - :nerd: not just an ad-blocker
|
||||
|
|
|
@ -0,0 +1,182 @@
|
|||
package lists
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/0xERR0R/blocky/log"
|
||||
|
||||
"github.com/0xERR0R/blocky/util"
|
||||
)
|
||||
|
||||
type cache interface {
|
||||
elementCount() int
|
||||
contains(searchString string) bool
|
||||
}
|
||||
|
||||
type cacheFactory interface {
|
||||
addEntry(entry string)
|
||||
create() cache
|
||||
}
|
||||
|
||||
type stringCache map[int]string
|
||||
|
||||
func (cache stringCache) elementCount() int {
|
||||
count := 0
|
||||
|
||||
for k, v := range cache {
|
||||
count += len(v) / k
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
func (cache stringCache) contains(searchString string) bool {
|
||||
searchLen := len(searchString)
|
||||
if searchLen == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
searchBucketLen := len(cache[searchLen]) / searchLen
|
||||
idx := sort.Search(searchBucketLen, func(i int) bool {
|
||||
return cache[searchLen][i*searchLen:i*searchLen+searchLen] >= searchString
|
||||
})
|
||||
|
||||
if idx < searchBucketLen {
|
||||
return cache[searchLen][idx*searchLen:idx*searchLen+searchLen] == strings.ToLower(searchString)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type stringCacheFactory struct {
|
||||
cache stringCache
|
||||
keys map[string]struct{}
|
||||
tmp map[int]*strings.Builder
|
||||
}
|
||||
|
||||
func newStringCacheFactory() cacheFactory {
|
||||
return &stringCacheFactory{
|
||||
cache: make(stringCache),
|
||||
// temporary map to remove duplicates
|
||||
keys: make(map[string]struct{}),
|
||||
tmp: make(map[int]*strings.Builder),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *stringCacheFactory) addEntry(entry string) {
|
||||
if _, value := s.keys[entry]; !value {
|
||||
s.keys[entry] = struct{}{}
|
||||
if s.tmp[len(entry)] == nil {
|
||||
s.tmp[len(entry)] = &strings.Builder{}
|
||||
}
|
||||
|
||||
s.tmp[len(entry)].WriteString(entry)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *stringCacheFactory) create() cache {
|
||||
for k, v := range s.tmp {
|
||||
chunks := util.Chunks(v.String(), k)
|
||||
sort.Strings(chunks)
|
||||
|
||||
s.cache[k] = strings.Join(chunks, "")
|
||||
|
||||
v.Reset()
|
||||
}
|
||||
|
||||
return s.cache
|
||||
}
|
||||
|
||||
type regexCache []*regexp.Regexp
|
||||
|
||||
func (cache regexCache) elementCount() int {
|
||||
return len(cache)
|
||||
}
|
||||
|
||||
func (cache regexCache) contains(searchString string) bool {
|
||||
for _, regex := range cache {
|
||||
if regex.MatchString(searchString) {
|
||||
log.PrefixedLog("regexCache").Debugf("regex '%s' matched with '%s'", regex, searchString)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type regexCacheFactory struct {
|
||||
cache regexCache
|
||||
}
|
||||
|
||||
func (r *regexCacheFactory) addEntry(entry string) {
|
||||
compile, err := regexp.Compile(entry)
|
||||
if err != nil {
|
||||
log.Log().Warnf("invalid regex '%s'", entry)
|
||||
} else {
|
||||
r.cache = append(r.cache, compile)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *regexCacheFactory) create() cache {
|
||||
return r.cache
|
||||
}
|
||||
|
||||
func newRegexCacheFactory() cacheFactory {
|
||||
return ®exCacheFactory{
|
||||
cache: make(regexCache, 0),
|
||||
}
|
||||
}
|
||||
|
||||
type chainedCache struct {
|
||||
caches []cache
|
||||
}
|
||||
|
||||
func (cache chainedCache) elementCount() int {
|
||||
sum := 0
|
||||
for _, c := range cache.caches {
|
||||
sum += c.elementCount()
|
||||
}
|
||||
|
||||
return sum
|
||||
}
|
||||
|
||||
func (cache chainedCache) contains(searchString string) bool {
|
||||
for _, c := range cache.caches {
|
||||
if c.contains(searchString) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
type chainedCacheFactory struct {
|
||||
stringCacheFactory cacheFactory
|
||||
regexCacheFactory cacheFactory
|
||||
}
|
||||
|
||||
var regexPattern = regexp.MustCompile("^/.*/$")
|
||||
|
||||
func (r *chainedCacheFactory) addEntry(entry string) {
|
||||
if regexPattern.MatchString(entry) {
|
||||
entry = strings.TrimSpace(strings.Trim(entry, "/"))
|
||||
r.regexCacheFactory.addEntry(entry)
|
||||
} else {
|
||||
r.stringCacheFactory.addEntry(entry)
|
||||
}
|
||||
}
|
||||
|
||||
func (r *chainedCacheFactory) create() cache {
|
||||
return &chainedCache{
|
||||
caches: []cache{r.stringCacheFactory.create(), r.regexCacheFactory.create()},
|
||||
}
|
||||
}
|
||||
|
||||
func newChainedCacheFactory() cacheFactory {
|
||||
return &chainedCacheFactory{
|
||||
stringCacheFactory: newStringCacheFactory(),
|
||||
regexCacheFactory: newRegexCacheFactory(),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
package lists
|
||||
|
||||
import (
|
||||
. "github.com/onsi/ginkgo"
|
||||
. "github.com/onsi/gomega"
|
||||
)
|
||||
|
||||
var _ = Describe("Caches", func() {
|
||||
Describe("String cache", func() {
|
||||
When("string cache was created", func() {
|
||||
factory := newStringCacheFactory()
|
||||
factory.addEntry("google.com")
|
||||
factory.addEntry("apple.com")
|
||||
cache := factory.create()
|
||||
It("should match if cache contains string", func() {
|
||||
Expect(cache.contains("apple.com")).Should(BeTrue())
|
||||
Expect(cache.contains("google.com")).Should(BeTrue())
|
||||
Expect(cache.contains("www.google.com")).Should(BeFalse())
|
||||
})
|
||||
It("should return correct element count", func() {
|
||||
Expect(cache.elementCount()).Should(Equal(2))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Regex cache", func() {
|
||||
When("regex cache was created", func() {
|
||||
factory := newRegexCacheFactory()
|
||||
factory.addEntry(".*google.com")
|
||||
factory.addEntry("^apple\\.(de|com)$")
|
||||
factory.addEntry("amazon")
|
||||
// this is not a regex, will be ignored
|
||||
factory.addEntry("(wrongRegex")
|
||||
cache := factory.create()
|
||||
It("should match if one regex in cache matches string", func() {
|
||||
Expect(cache.contains("google.com")).Should(BeTrue())
|
||||
Expect(cache.contains("google.coma")).Should(BeTrue())
|
||||
Expect(cache.contains("agoogle.com")).Should(BeTrue())
|
||||
Expect(cache.contains("www.google.com")).Should(BeTrue())
|
||||
Expect(cache.contains("apple.com")).Should(BeTrue())
|
||||
Expect(cache.contains("apple.de")).Should(BeTrue())
|
||||
Expect(cache.contains("apple.it")).Should(BeFalse())
|
||||
Expect(cache.contains("www.apple.com")).Should(BeFalse())
|
||||
Expect(cache.contains("applecom")).Should(BeFalse())
|
||||
Expect(cache.contains("www.amazon.com")).Should(BeTrue())
|
||||
Expect(cache.contains("amazon.com")).Should(BeTrue())
|
||||
Expect(cache.contains("myamazon.com")).Should(BeTrue())
|
||||
})
|
||||
It("should return correct element count", func() {
|
||||
Expect(cache.elementCount()).Should(Equal(3))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
Describe("Chained cache", func() {
|
||||
When("chained cache was created", func() {
|
||||
factory := newChainedCacheFactory()
|
||||
factory.addEntry("/.*google.com/")
|
||||
factory.addEntry("/^apple\\.(de|com)$/")
|
||||
factory.addEntry("amazon.com")
|
||||
cache := factory.create()
|
||||
It("should match if one regex in cache matches string", func() {
|
||||
Expect(cache.contains("google.com")).Should(BeTrue())
|
||||
Expect(cache.contains("google.coma")).Should(BeTrue())
|
||||
Expect(cache.contains("agoogle.com")).Should(BeTrue())
|
||||
Expect(cache.contains("www.google.com")).Should(BeTrue())
|
||||
Expect(cache.contains("apple.com")).Should(BeTrue())
|
||||
Expect(cache.contains("amazon.com")).Should(BeTrue())
|
||||
Expect(cache.contains("apple.de")).Should(BeTrue())
|
||||
Expect(cache.contains("www.apple.com")).Should(BeFalse())
|
||||
Expect(cache.contains("applecom")).Should(BeFalse())
|
||||
})
|
||||
It("should return correct element count", func() {
|
||||
Expect(cache.elementCount()).Should(Equal(3))
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
})
|
|
@ -9,15 +9,12 @@ import (
|
|||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/0xERR0R/blocky/evt"
|
||||
"github.com/0xERR0R/blocky/log"
|
||||
"github.com/0xERR0R/blocky/util"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
|
@ -32,36 +29,6 @@ const (
|
|||
// )
|
||||
type ListCacheType int
|
||||
|
||||
type stringCache map[int]string
|
||||
|
||||
func (cache stringCache) elementCount() int {
|
||||
count := 0
|
||||
|
||||
for k, v := range cache {
|
||||
count += len(v) / k
|
||||
}
|
||||
|
||||
return count
|
||||
}
|
||||
|
||||
func (cache stringCache) contains(searchString string) bool {
|
||||
searchLen := len(searchString)
|
||||
if searchLen == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
searchBucketLen := len(cache[searchLen]) / searchLen
|
||||
idx := sort.Search(searchBucketLen, func(i int) bool {
|
||||
return cache[searchLen][i*searchLen:i*searchLen+searchLen] >= searchString
|
||||
})
|
||||
|
||||
if idx < searchBucketLen {
|
||||
return cache[searchLen][idx*searchLen:idx*searchLen+searchLen] == strings.ToLower(searchString)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Matcher checks if a domain is in a list
|
||||
type Matcher interface {
|
||||
// Match matches passed domain name against cached list entries
|
||||
|
@ -73,7 +40,7 @@ type Matcher interface {
|
|||
|
||||
// ListCache generic cache of strings divided in groups
|
||||
type ListCache struct {
|
||||
groupCaches map[string]stringCache
|
||||
groupCaches map[string]cache
|
||||
lock sync.RWMutex
|
||||
|
||||
groupToLinks map[string][]string
|
||||
|
@ -115,7 +82,7 @@ func (b *ListCache) Configuration() (result []string) {
|
|||
// NewListCache creates new list instance
|
||||
func NewListCache(t ListCacheType, groupToLinks map[string][]string, refreshPeriod time.Duration,
|
||||
downloadTimeout time.Duration) *ListCache {
|
||||
groupCaches := make(map[string]stringCache)
|
||||
groupCaches := make(map[string]cache)
|
||||
|
||||
timeout := downloadTimeout
|
||||
if downloadTimeout == 0 {
|
||||
|
@ -159,15 +126,12 @@ func logger() *logrus.Entry {
|
|||
}
|
||||
|
||||
// downloads and reads files with domain names and creates cache for them
|
||||
func (b *ListCache) createCacheForGroup(links []string) stringCache {
|
||||
cache := make(stringCache)
|
||||
|
||||
keys := make(map[string]struct{})
|
||||
|
||||
func (b *ListCache) createCacheForGroup(links []string) cache {
|
||||
var wg sync.WaitGroup
|
||||
|
||||
c := make(chan []string, len(links))
|
||||
|
||||
// loop over links (http/local) or inline definitions
|
||||
for _, link := range links {
|
||||
wg.Add(1)
|
||||
|
||||
|
@ -176,7 +140,7 @@ func (b *ListCache) createCacheForGroup(links []string) stringCache {
|
|||
|
||||
wg.Wait()
|
||||
|
||||
tmp := make(map[int]*strings.Builder)
|
||||
factory := newChainedCacheFactory()
|
||||
|
||||
Loop:
|
||||
for {
|
||||
|
@ -186,13 +150,7 @@ Loop:
|
|||
return nil
|
||||
}
|
||||
for _, entry := range res {
|
||||
if _, value := keys[entry]; !value {
|
||||
keys[entry] = struct{}{}
|
||||
if tmp[len(entry)] == nil {
|
||||
tmp[len(entry)] = &strings.Builder{}
|
||||
}
|
||||
tmp[len(entry)].WriteString(entry)
|
||||
}
|
||||
factory.addEntry(entry)
|
||||
}
|
||||
default:
|
||||
close(c)
|
||||
|
@ -200,16 +158,7 @@ Loop:
|
|||
}
|
||||
}
|
||||
|
||||
for k, v := range tmp {
|
||||
chunks := util.Chunks(v.String(), k)
|
||||
sort.Strings(chunks)
|
||||
|
||||
cache[k] = strings.Join(chunks, "")
|
||||
|
||||
v.Reset()
|
||||
}
|
||||
|
||||
return cache
|
||||
return factory.create()
|
||||
}
|
||||
|
||||
// Match matches passed domain name against cached list entries
|
||||
|
@ -218,7 +167,7 @@ func (b *ListCache) Match(domain string, groupsToCheck []string) (found bool, gr
|
|||
defer b.lock.RUnlock()
|
||||
|
||||
for _, g := range groupsToCheck {
|
||||
if b.groupCaches[g].contains(domain) {
|
||||
if c, ok := b.groupCaches[g]; ok && c.contains(domain) {
|
||||
return true, g
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ var _ = Describe("ListCache", func() {
|
|||
emptyFile, file1, file2, file3 *os.File
|
||||
server1, server2, server3 *httptest.Server
|
||||
)
|
||||
|
||||
BeforeEach(func() {
|
||||
emptyFile = TempFile("#empty file\n\n")
|
||||
server1 = TestServer("blocked1.com\nblocked1a.com\n192.168.178.55")
|
||||
|
@ -259,6 +258,23 @@ var _ = Describe("ListCache", func() {
|
|||
Expect(group).Should(Equal("gr1"))
|
||||
})
|
||||
})
|
||||
When("inline regex content is defined", func() {
|
||||
It("should match", func() {
|
||||
lists := map[string][]string{
|
||||
"gr1": {"/^apple\\.(de|com)$/\n"},
|
||||
}
|
||||
|
||||
sut := NewListCache(ListCacheTypeBlacklist, lists, 0, 0)
|
||||
|
||||
found, group := sut.Match("apple.com", []string{"gr1"})
|
||||
Expect(found).Should(BeTrue())
|
||||
Expect(group).Should(Equal("gr1"))
|
||||
|
||||
found, group = sut.Match("apple.de", []string{"gr1"})
|
||||
Expect(found).Should(BeTrue())
|
||||
Expect(group).Should(Equal("gr1"))
|
||||
})
|
||||
})
|
||||
})
|
||||
Describe("Configuration", func() {
|
||||
When("refresh is enabled", func() {
|
||||
|
|
Loading…
Reference in New Issue