Add maxErrorsPerFile blocking configuration (#986)

* Add maxErrorsPerFile blocking configuration

The default max errors per file of 5 is too small IMHO.
This commit makes this number user-configurable.

* squash: fix lint

* squash: docs

* squash: change type to int to allow -1

* squash: test that the `maxErrorsPerFile` is actually used

---------

Co-authored-by: ThinkChaos <ThinkChaos@users.noreply.github.com>
This commit is contained in:
mandrav 2023-04-12 21:43:49 +03:00 committed by GitHub
parent 74516ca0b9
commit 015b565137
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 64 additions and 27 deletions

View File

@ -21,6 +21,7 @@ type BlockingConfig struct {
FailStartOnListError bool `yaml:"failStartOnListError" default:"false"` // Deprecated
ProcessingConcurrency uint `yaml:"processingConcurrency" default:"4"`
StartStrategy StartStrategyType `yaml:"startStrategy" default:"blocking"`
MaxErrorsPerFile int `yaml:"maxErrorsPerFile" default:"5"`
}
// IsEnabled implements `config.Configurable`.
@ -46,6 +47,8 @@ func (c *BlockingConfig) LogConfig(logger *logrus.Entry) {
logger.Infof("startStrategy = %s", c.StartStrategy)
logger.Infof("maxErrorsPerFile = %d", c.MaxErrorsPerFile)
if c.RefreshPeriod > 0 {
logger.Infof("refresh = every %s", c.RefreshPeriod)
} else {

View File

@ -111,6 +111,10 @@ blocking:
downloadCooldown: 10s
# optional: if failOnError, application startup will fail if at least one list can't be downloaded / opened. Default: blocking
startStrategy: failOnError
# Number of errors allowed in a list before it is considered invalid.
# A value of -1 disables the limit.
# Default: 5
maxErrorsPerFile: 5
# optional: configuration for caching of DNS responses
caching:

View File

@ -496,7 +496,7 @@ You can configure the list download attempts according to your internet connecti
### Start strategy
You can configure the blocking behavior during application start of blocky.
If no starategy is selected blocking will be used.
If no strategy is selected blocking will be used.
| startStrategy | Description |
|---------------|-------------------------------------------------------------------------------------------------------|
@ -511,6 +511,18 @@ If no starategy is selected blocking will be used.
startStrategy: failOnError
```
### Max Errors per file
Number of errors allowed in a list before it is considered invalid and parsing stops.
A value of -1 disables the limit.
!!! example
```yaml
blocking:
maxErrorsPerFile: 10
```
### Concurrency
Blocky downloads and processes links in a single group concurrently. With parameter `processingConcurrency` you can adjust

View File

@ -23,7 +23,6 @@ import (
const (
defaultProcessingConcurrency = 4
chanCap = 1000
maxErrorsPerFile = 5
)
// ListCacheType represents the type of cached list ENUM(
@ -47,6 +46,7 @@ type ListCache struct {
downloader FileDownloader
listType ListCacheType
processingConcurrency uint
maxErrorsPerFile int
}
// LogConfig implements `config.Configurable`.
@ -64,7 +64,7 @@ func (b *ListCache) LogConfig(logger *logrus.Entry) {
// NewListCache creates new list instance
func NewListCache(t ListCacheType, groupToLinks map[string][]string, refreshPeriod time.Duration,
downloader FileDownloader, processingConcurrency uint, async bool,
downloader FileDownloader, processingConcurrency uint, async bool, maxErrorsPerFile int,
) (*ListCache, error) {
if processingConcurrency == 0 {
processingConcurrency = defaultProcessingConcurrency
@ -80,6 +80,7 @@ func NewListCache(t ListCacheType, groupToLinks map[string][]string, refreshPeri
downloader: downloader,
listType: t,
processingConcurrency: processingConcurrency,
maxErrorsPerFile: maxErrorsPerFile,
}
var initError error
@ -261,7 +262,7 @@ func (b *ListCache) parseFile(ctx context.Context, name, link string, resultCh c
}
defer r.Close()
p := parsers.AllowErrors(parsers.Hosts(r), maxErrorsPerFile)
p := parsers.AllowErrors(parsers.Hosts(r), b.maxErrorsPerFile)
p.OnErr(func(err error) {
logger().Warnf("parse error: %s, trying to continue", err)
})

View File

@ -12,7 +12,7 @@ func BenchmarkRefresh(b *testing.B) {
"gr1": {file1, file2, file3},
}
cache, _ := NewListCache(ListCacheTypeBlacklist, lists, -1, NewDownloader(), 5, false)
cache, _ := NewListCache(ListCacheTypeBlacklist, lists, -1, NewDownloader(), 5, false, 5)
b.ReportAllocs()

View File

@ -27,8 +27,10 @@ var _ = Describe("ListCache", func() {
tmpDir *TmpFolder
emptyFile, file1, file2, file3 *TmpFile
server1, server2, server3 *httptest.Server
maxErrorsPerFile int
)
BeforeEach(func() {
maxErrorsPerFile = 5
tmpDir = NewTmpFolder("ListCache")
Expect(tmpDir.Error).Should(Succeed())
DeferCleanup(tmpDir.Clean)
@ -56,7 +58,9 @@ var _ = Describe("ListCache", func() {
lists := map[string][]string{
"gr0": {emptyFile.Path},
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false)
sut, err := NewListCache(
ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false, maxErrorsPerFile,
)
Expect(err).Should(Succeed())
group := sut.Match("", []string{"gr0"})
@ -69,7 +73,9 @@ var _ = Describe("ListCache", func() {
lists := map[string][]string{
"gr1": {emptyFile.Path},
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false)
sut, err := NewListCache(
ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false, maxErrorsPerFile,
)
Expect(err).Should(Succeed())
group := sut.Match("google.com", []string{"gr1"})
@ -93,6 +99,7 @@ var _ = Describe("ListCache", func() {
mockDownloader,
defaultProcessingConcurrency,
false,
maxErrorsPerFile,
)
Expect(err).Should(Succeed())
@ -119,7 +126,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
group := sut.Match("inlinedomain1.com", []string{"gr1"})
@ -148,6 +155,7 @@ var _ = Describe("ListCache", func() {
mockDownloader,
defaultProcessingConcurrency,
false,
maxErrorsPerFile,
)
Expect(err).Should(Succeed())
@ -186,7 +194,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, mockDownloader,
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
By("Lists loaded without err", func() {
@ -209,7 +217,9 @@ var _ = Describe("ListCache", func() {
"gr2": {server3.URL},
}
sut, _ := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false)
sut, _ := NewListCache(
ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false, maxErrorsPerFile,
)
group := sut.Match("blocked1.com", []string{"gr1", "gr2"})
Expect(group).Should(ContainElement("gr1"))
@ -228,7 +238,9 @@ var _ = Describe("ListCache", func() {
"gr2": {server3.URL, "someotherfile"},
}
sut, _ := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false)
sut, _ := NewListCache(
ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false, maxErrorsPerFile,
)
group := sut.Match("blocked1.com", []string{"gr1", "gr2"})
Expect(group).Should(ContainElement("gr1"))
@ -252,7 +264,9 @@ var _ = Describe("ListCache", func() {
resultCnt = cnt
})
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false)
sut, err := NewListCache(
ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false, maxErrorsPerFile,
)
Expect(err).Should(Succeed())
group := sut.Match("blocked1.com", []string{})
@ -267,7 +281,9 @@ var _ = Describe("ListCache", func() {
"gr2": {"file://" + file3.Path},
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false)
sut, err := NewListCache(
ListCacheTypeBlacklist, lists, 0, NewDownloader(), defaultProcessingConcurrency, false, maxErrorsPerFile,
)
Expect(err).Should(Succeed())
Expect(sut.groupedCache.ElementCount("gr1")).Should(Equal(3))
@ -293,7 +309,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
Expect(sut.groupedCache.ElementCount("gr1")).Should(Equal(lines1 + lines2 + lines3))
@ -310,7 +326,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
Expect(sut.groupedCache.ElementCount("gr1")).Should(Equal(2))
@ -333,7 +349,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
group := sut.Match("inlinedomain1.com", []string{"gr1"})
@ -341,17 +357,18 @@ var _ = Describe("ListCache", func() {
})
})
When("Text file has too many errors", func() {
It("should fail parsing", func() {
BeforeEach(func() {
maxErrorsPerFile = 0
})
FIt("should fail parsing", func() {
lists := map[string][]string{
"gr1": {
inlineList(
strings.Repeat("invaliddomain!\n", maxErrorsPerFile+1), // too many errors
),
inlineList("invaliddomain!"), // too many errors since `maxErrorsPerFile` is 0
},
}
_, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).ShouldNot(Succeed())
Expect(err).Should(MatchError(parsers.ErrTooManyErrors))
})
@ -363,7 +380,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
group := sut.Match("inlinedomain1.com", []string{"gr1"})
@ -377,7 +394,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, 0, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
group := sut.Match("apple.com", []string{"gr1"})
@ -405,7 +422,7 @@ var _ = Describe("ListCache", func() {
}
sut, err := NewListCache(ListCacheTypeBlacklist, lists, time.Hour, NewDownloader(),
defaultProcessingConcurrency, false)
defaultProcessingConcurrency, false, maxErrorsPerFile)
Expect(err).Should(Succeed())
sut.LogConfig(logger)
@ -424,7 +441,7 @@ var _ = Describe("ListCache", func() {
}
_, err := NewListCache(ListCacheTypeBlacklist, lists, -1, NewDownloader(),
defaultProcessingConcurrency, true)
defaultProcessingConcurrency, true, maxErrorsPerFile)
Expect(err).Should(Succeed())
})
})

View File

@ -105,10 +105,10 @@ func NewBlockingResolver(
downloader := createDownloader(cfg, bootstrap)
blacklistMatcher, blErr := lists.NewListCache(lists.ListCacheTypeBlacklist, cfg.BlackLists,
refreshPeriod, downloader, cfg.ProcessingConcurrency,
(cfg.StartStrategy == config.StartStrategyTypeFast))
(cfg.StartStrategy == config.StartStrategyTypeFast), cfg.MaxErrorsPerFile)
whitelistMatcher, wlErr := lists.NewListCache(lists.ListCacheTypeWhitelist, cfg.WhiteLists,
refreshPeriod, downloader, cfg.ProcessingConcurrency,
(cfg.StartStrategy == config.StartStrategyTypeFast))
(cfg.StartStrategy == config.StartStrategyTypeFast), cfg.MaxErrorsPerFile)
whitelistOnlyGroups := determineWhitelistOnlyGroups(&cfg)
err = multierror.Append(err, blErr, wlErr).ErrorOrNil()