forked from nats-io/nats-server
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsublist.go
691 lines (631 loc) · 15 KB
/
sublist.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
// Copyright 2016-2017 Apcera Inc. All rights reserved.
// Package sublist is a routing mechanism to handle subject distribution
// and provides a facility to match subjects from published messages to
// interested subscribers. Subscribers can have wildcard subjects to match
// multiple published subjects.
package server
import (
"bytes"
"errors"
"strings"
"sync"
"sync/atomic"
)
// Common byte variables for wildcards and token separator.
const (
pwc = '*'
fwc = '>'
tsep = "."
btsep = '.'
)
// Sublist related errors
var (
ErrInvalidSubject = errors.New("sublist: Invalid Subject")
ErrNotFound = errors.New("sublist: No Matches Found")
)
// cacheMax is used to bound limit the frontend cache
const slCacheMax = 1024
// A result structure better optimized for queue subs.
type SublistResult struct {
psubs []*subscription
qsubs [][]*subscription // don't make this a map, too expensive to iterate
}
// A Sublist stores and efficiently retrieves subscriptions.
type Sublist struct {
sync.RWMutex
genid uint64
matches uint64
cacheHits uint64
inserts uint64
removes uint64
cache map[string]*SublistResult
root *level
count uint32
}
// A node contains subscriptions and a pointer to the next level.
type node struct {
next *level
psubs []*subscription
qsubs [][]*subscription
}
// A level represents a group of nodes and special pointers to
// wildcard nodes.
type level struct {
nodes map[string]*node
pwc, fwc *node
}
// Create a new default node.
func newNode() *node {
return &node{psubs: make([]*subscription, 0, 4)}
}
// Create a new default level. We use FNV1A as the hash
// algorithm for the tokens, which should be short.
func newLevel() *level {
return &level{nodes: make(map[string]*node)}
}
// New will create a default sublist
func NewSublist() *Sublist {
return &Sublist{root: newLevel(), cache: make(map[string]*SublistResult)}
}
// Insert adds a subscription into the sublist
func (s *Sublist) Insert(sub *subscription) error {
// copy the subject since we hold this and this might be part of a large byte slice.
subject := string(sub.subject)
tsa := [32]string{}
tokens := tsa[:0]
start := 0
for i := 0; i < len(subject); i++ {
if subject[i] == btsep {
tokens = append(tokens, subject[start:i])
start = i + 1
}
}
tokens = append(tokens, subject[start:])
s.Lock()
sfwc := false
l := s.root
var n *node
for _, t := range tokens {
lt := len(t)
if lt == 0 || sfwc {
s.Unlock()
return ErrInvalidSubject
}
if lt > 1 {
n = l.nodes[t]
} else {
switch t[0] {
case pwc:
n = l.pwc
case fwc:
n = l.fwc
sfwc = true
default:
n = l.nodes[t]
}
}
if n == nil {
n = newNode()
if lt > 1 {
l.nodes[t] = n
} else {
switch t[0] {
case pwc:
l.pwc = n
case fwc:
l.fwc = n
default:
l.nodes[t] = n
}
}
}
if n.next == nil {
n.next = newLevel()
}
l = n.next
}
if sub.queue == nil {
n.psubs = append(n.psubs, sub)
} else {
// This is a queue subscription
if i := findQSliceForSub(sub, n.qsubs); i >= 0 {
n.qsubs[i] = append(n.qsubs[i], sub)
} else {
n.qsubs = append(n.qsubs, []*subscription{sub})
}
}
s.count++
s.inserts++
s.addToCache(subject, sub)
atomic.AddUint64(&s.genid, 1)
s.Unlock()
return nil
}
// Deep copy
func copyResult(r *SublistResult) *SublistResult {
nr := &SublistResult{}
nr.psubs = append([]*subscription(nil), r.psubs...)
for _, qr := range r.qsubs {
nqr := append([]*subscription(nil), qr...)
nr.qsubs = append(nr.qsubs, nqr)
}
return nr
}
// addToCache will add the new entry to existing cache
// entries if needed. Assumes write lock is held.
func (s *Sublist) addToCache(subject string, sub *subscription) {
for k, r := range s.cache {
if matchLiteral(k, subject) {
// Copy since others may have a reference.
nr := copyResult(r)
if sub.queue == nil {
nr.psubs = append(nr.psubs, sub)
} else {
if i := findQSliceForSub(sub, nr.qsubs); i >= 0 {
nr.qsubs[i] = append(nr.qsubs[i], sub)
} else {
nr.qsubs = append(nr.qsubs, []*subscription{sub})
}
}
s.cache[k] = nr
}
}
}
// removeFromCache will remove the sub from any active cache entries.
// Assumes write lock is held.
func (s *Sublist) removeFromCache(subject string, sub *subscription) {
for k := range s.cache {
if !matchLiteral(k, subject) {
continue
}
// Since someone else may be referecing, can't modify the list
// safely, just let it re-populate.
delete(s.cache, k)
}
}
// Match will match all entries to the literal subject.
// It will return a set of results for both normal and queue subscribers.
func (s *Sublist) Match(subject string) *SublistResult {
s.RLock()
atomic.AddUint64(&s.matches, 1)
rc, ok := s.cache[subject]
s.RUnlock()
if ok {
atomic.AddUint64(&s.cacheHits, 1)
return rc
}
tsa := [32]string{}
tokens := tsa[:0]
start := 0
for i := 0; i < len(subject); i++ {
if subject[i] == btsep {
tokens = append(tokens, subject[start:i])
start = i + 1
}
}
tokens = append(tokens, subject[start:])
// FIXME(dlc) - Make shared pool between sublist and client readLoop?
result := &SublistResult{}
s.Lock()
matchLevel(s.root, tokens, result)
// Add to our cache
s.cache[subject] = result
// Bound the number of entries to sublistMaxCache
if len(s.cache) > slCacheMax {
for k := range s.cache {
delete(s.cache, k)
break
}
}
s.Unlock()
return result
}
// This will add in a node's results to the total results.
func addNodeToResults(n *node, results *SublistResult) {
results.psubs = append(results.psubs, n.psubs...)
for _, qr := range n.qsubs {
if len(qr) == 0 {
continue
}
// Need to find matching list in results
if i := findQSliceForSub(qr[0], results.qsubs); i >= 0 {
results.qsubs[i] = append(results.qsubs[i], qr...)
} else {
results.qsubs = append(results.qsubs, qr)
}
}
}
// We do not use a map here since we want iteration to be past when
// processing publishes in L1 on client. So we need to walk sequentially
// for now. Keep an eye on this in case we start getting large number of
// different queue subscribers for the same subject.
func findQSliceForSub(sub *subscription, qsl [][]*subscription) int {
if sub.queue == nil {
return -1
}
for i, qr := range qsl {
if len(qr) > 0 && bytes.Equal(sub.queue, qr[0].queue) {
return i
}
}
return -1
}
// matchLevel is used to recursively descend into the trie.
func matchLevel(l *level, toks []string, results *SublistResult) {
var pwc, n *node
for i, t := range toks {
if l == nil {
return
}
if l.fwc != nil {
addNodeToResults(l.fwc, results)
}
if pwc = l.pwc; pwc != nil {
matchLevel(pwc.next, toks[i+1:], results)
}
n = l.nodes[t]
if n != nil {
l = n.next
} else {
l = nil
}
}
if n != nil {
addNodeToResults(n, results)
}
if pwc != nil {
addNodeToResults(pwc, results)
}
}
// lnt is used to track descent into levels for a removal for pruning.
type lnt struct {
l *level
n *node
t string
}
// Remove will remove a subscription.
func (s *Sublist) Remove(sub *subscription) error {
subject := string(sub.subject)
tsa := [32]string{}
tokens := tsa[:0]
start := 0
for i := 0; i < len(subject); i++ {
if subject[i] == btsep {
tokens = append(tokens, subject[start:i])
start = i + 1
}
}
tokens = append(tokens, subject[start:])
s.Lock()
defer s.Unlock()
sfwc := false
l := s.root
var n *node
// Track levels for pruning
var lnts [32]lnt
levels := lnts[:0]
for _, t := range tokens {
lt := len(t)
if lt == 0 || sfwc {
return ErrInvalidSubject
}
if l == nil {
return ErrNotFound
}
if lt > 1 {
n = l.nodes[t]
} else {
switch t[0] {
case pwc:
n = l.pwc
case fwc:
n = l.fwc
sfwc = true
default:
n = l.nodes[t]
}
}
if n != nil {
levels = append(levels, lnt{l, n, t})
l = n.next
} else {
l = nil
}
}
if !s.removeFromNode(n, sub) {
return ErrNotFound
}
s.count--
s.removes++
for i := len(levels) - 1; i >= 0; i-- {
l, n, t := levels[i].l, levels[i].n, levels[i].t
if n.isEmpty() {
l.pruneNode(n, t)
}
}
s.removeFromCache(subject, sub)
atomic.AddUint64(&s.genid, 1)
return nil
}
// pruneNode is used to prune an empty node from the tree.
func (l *level) pruneNode(n *node, t string) {
if n == nil {
return
}
if n == l.fwc {
l.fwc = nil
} else if n == l.pwc {
l.pwc = nil
} else {
delete(l.nodes, t)
}
}
// isEmpty will test if the node has any entries. Used
// in pruning.
func (n *node) isEmpty() bool {
if len(n.psubs) == 0 && len(n.qsubs) == 0 {
if n.next == nil || n.next.numNodes() == 0 {
return true
}
}
return false
}
// Return the number of nodes for the given level.
func (l *level) numNodes() int {
num := len(l.nodes)
if l.pwc != nil {
num++
}
if l.fwc != nil {
num++
}
return num
}
// Removes a sub from a list.
func removeSubFromList(sub *subscription, sl []*subscription) ([]*subscription, bool) {
for i := 0; i < len(sl); i++ {
if sl[i] == sub {
last := len(sl) - 1
sl[i] = sl[last]
sl[last] = nil
sl = sl[:last]
return shrinkAsNeeded(sl), true
}
}
return sl, false
}
// Remove the sub for the given node.
func (s *Sublist) removeFromNode(n *node, sub *subscription) (found bool) {
if n == nil {
return false
}
if sub.queue == nil {
n.psubs, found = removeSubFromList(sub, n.psubs)
return found
}
// We have a queue group subscription here
if i := findQSliceForSub(sub, n.qsubs); i >= 0 {
n.qsubs[i], found = removeSubFromList(sub, n.qsubs[i])
if len(n.qsubs[i]) == 0 {
last := len(n.qsubs) - 1
n.qsubs[i] = n.qsubs[last]
n.qsubs[last] = nil
n.qsubs = n.qsubs[:last]
if len(n.qsubs) == 0 {
n.qsubs = nil
}
}
return found
}
return false
}
// Checks if we need to do a resize. This is for very large growth then
// subsequent return to a more normal size from unsubscribe.
func shrinkAsNeeded(sl []*subscription) []*subscription {
lsl := len(sl)
csl := cap(sl)
// Don't bother if list not too big
if csl <= 8 {
return sl
}
pFree := float32(csl-lsl) / float32(csl)
if pFree > 0.50 {
return append([]*subscription(nil), sl...)
}
return sl
}
// Count returns the number of subscriptions.
func (s *Sublist) Count() uint32 {
s.RLock()
defer s.RUnlock()
return s.count
}
// CacheCount returns the number of result sets in the cache.
func (s *Sublist) CacheCount() int {
s.RLock()
defer s.RUnlock()
return len(s.cache)
}
// Public stats for the sublist
type SublistStats struct {
NumSubs uint32 `json:"num_subscriptions"`
NumCache uint32 `json:"num_cache"`
NumInserts uint64 `json:"num_inserts"`
NumRemoves uint64 `json:"num_removes"`
NumMatches uint64 `json:"num_matches"`
CacheHitRate float64 `json:"cache_hit_rate"`
MaxFanout uint32 `json:"max_fanout"`
AvgFanout float64 `json:"avg_fanout"`
}
// Stats will return a stats structure for the current state.
func (s *Sublist) Stats() *SublistStats {
s.Lock()
defer s.Unlock()
st := &SublistStats{}
st.NumSubs = s.count
st.NumCache = uint32(len(s.cache))
st.NumInserts = s.inserts
st.NumRemoves = s.removes
st.NumMatches = atomic.LoadUint64(&s.matches)
if st.NumMatches > 0 {
st.CacheHitRate = float64(atomic.LoadUint64(&s.cacheHits)) / float64(st.NumMatches)
}
// whip through cache for fanout stats
tot, max := 0, 0
for _, r := range s.cache {
l := len(r.psubs) + len(r.qsubs)
tot += l
if l > max {
max = l
}
}
st.MaxFanout = uint32(max)
if tot > 0 {
st.AvgFanout = float64(tot) / float64(len(s.cache))
}
return st
}
// numLevels will return the maximum number of levels
// contained in the Sublist tree.
func (s *Sublist) numLevels() int {
return visitLevel(s.root, 0)
}
// visitLevel is used to descend the Sublist tree structure
// recursively.
func visitLevel(l *level, depth int) int {
if l == nil || l.numNodes() == 0 {
return depth
}
depth++
maxDepth := depth
for _, n := range l.nodes {
if n == nil {
continue
}
newDepth := visitLevel(n.next, depth)
if newDepth > maxDepth {
maxDepth = newDepth
}
}
if l.pwc != nil {
pwcDepth := visitLevel(l.pwc.next, depth)
if pwcDepth > maxDepth {
maxDepth = pwcDepth
}
}
if l.fwc != nil {
fwcDepth := visitLevel(l.fwc.next, depth)
if fwcDepth > maxDepth {
maxDepth = fwcDepth
}
}
return maxDepth
}
// IsValidSubject returns true if a subject is valid, false otherwise
func IsValidSubject(subject string) bool {
if subject == "" {
return false
}
sfwc := false
tokens := strings.Split(subject, tsep)
for _, t := range tokens {
if len(t) == 0 || sfwc {
return false
}
if len(t) > 1 {
continue
}
switch t[0] {
case fwc:
sfwc = true
}
}
return true
}
// IsValidLiteralSubject returns true if a subject is valid and literal (no wildcards), false otherwise
func IsValidLiteralSubject(subject string) bool {
tokens := strings.Split(subject, tsep)
for _, t := range tokens {
if len(t) == 0 {
return false
}
if len(t) > 1 {
continue
}
switch t[0] {
case pwc, fwc:
return false
}
}
return true
}
// matchLiteral is used to test literal subjects, those that do not have any
// wildcards, with a target subject. This is used in the cache layer.
func matchLiteral(literal, subject string) bool {
li := 0
ll := len(literal)
ls := len(subject)
for i := 0; i < ls; i++ {
if li >= ll {
return false
}
// This function has been optimized for speed.
// For instance, do not set b:=subject[i] here since
// we may bump `i` in this loop to avoid `continue` or
// skiping common test in a particular test.
// Run Benchmark_SublistMatchLiteral before making any change.
switch subject[i] {
case pwc:
// NOTE: This is not testing validity of a subject, instead ensures
// that wildcards are treated as such if they follow some basic rules,
// namely that they are a token on their own.
if i == 0 || subject[i-1] == btsep {
if i == ls-1 {
// There is no more token in the subject after this wildcard.
// Skip token in literal and expect to not find a separator.
for {
// End of literal, this is a match.
if li >= ll {
return true
}
// Presence of separator, this can't be a match.
if literal[li] == btsep {
return false
}
li++
}
} else if subject[i+1] == btsep {
// There is another token in the subject after this wildcard.
// Skip token in literal and expect to get a separator.
for {
// We found the end of the literal before finding a separator,
// this can't be a match.
if li >= ll {
return false
}
if literal[li] == btsep {
break
}
li++
}
// Bump `i` since we know there is a `.` following, we are
// safe. The common test below is going to check `.` with `.`
// which is good. A `continue` here is too costly.
i++
}
}
case fwc:
// For `>` to be a wildcard, it means being the only or last character
// in the string preceded by a `.`
if (i == 0 || subject[i-1] == btsep) && i == ls-1 {
return true
}
}
if subject[i] != literal[li] {
return false
}
li++
}
// Make sure we have processed all of the literal's chars..
return li >= ll
}