Simple Architecture
This commit is contained in:
@@ -3,7 +3,6 @@ package indexer
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -43,19 +42,69 @@ type NameIndexEvent struct {
|
||||
|
||||
// nameIndexState holds the local in-memory name index and the sender-side
|
||||
// deduplication tracker.
|
||||
//
|
||||
// Search strategy: trigram inverted index.
|
||||
// - byName: lowercased name → peerID → DID (for delete and exact resolution)
|
||||
// - byPeer: peerID → lowercased name (to recompute trigrams on delete)
|
||||
// - trigrams: 3-char substring → set of peerIDs (for O(1) substring lookup)
|
||||
//
|
||||
// For needles shorter than 3 chars the trigram index cannot help; a linear
|
||||
// scan of byName is used as fallback (rare and fast enough at small N).
|
||||
type nameIndexState struct {
|
||||
// index: name → peerID → DID, built from events received from all indexers.
|
||||
index map[string]map[string]string
|
||||
indexMu sync.RWMutex
|
||||
byName map[string]map[string]string // name → peerID → DID
|
||||
byPeer map[string]string // peerID → name
|
||||
trigrams map[string]map[string]struct{} // trigram → peerID set
|
||||
indexMu sync.RWMutex
|
||||
|
||||
// emitted tracks the last emission time for each (action, name, peerID) key
|
||||
// to suppress duplicates within nameIndexDedupWindow.
|
||||
// emitted deduplicates GossipSub emissions within nameIndexDedupWindow.
|
||||
// Purged periodically to prevent unbounded growth.
|
||||
emitted map[string]time.Time
|
||||
emittedMu sync.Mutex
|
||||
}
|
||||
|
||||
// trigramsOf returns all overlapping 3-char substrings of s (already lowercased).
|
||||
// If s is shorter than 3 chars the string itself is returned as the sole token.
|
||||
func trigramsOf(s string) []string {
|
||||
if len(s) < 3 {
|
||||
return []string{s}
|
||||
}
|
||||
out := make([]string, 0, len(s)-2)
|
||||
for i := 0; i <= len(s)-3; i++ {
|
||||
out = append(out, s[i:i+3])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// addTrigrams inserts peerID into every trigram bucket for name.
|
||||
func (s *nameIndexState) addTrigrams(name, peerID string) {
|
||||
for _, tg := range trigramsOf(name) {
|
||||
if s.trigrams[tg] == nil {
|
||||
s.trigrams[tg] = map[string]struct{}{}
|
||||
}
|
||||
s.trigrams[tg][peerID] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// removeTrigrams deletes peerID from every trigram bucket for name,
|
||||
// cleaning up empty buckets to keep memory tight.
|
||||
func (s *nameIndexState) removeTrigrams(name, peerID string) {
|
||||
for _, tg := range trigramsOf(name) {
|
||||
if m := s.trigrams[tg]; m != nil {
|
||||
delete(m, peerID)
|
||||
if len(m) == 0 {
|
||||
delete(s.trigrams, tg)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shouldEmit returns true if the (action, name, peerID) tuple has not been
|
||||
// emitted within nameIndexDedupWindow, updating the tracker if so.
|
||||
//
|
||||
// On DELETE: the ADD entry for the same peer is immediately removed — the peer
|
||||
// is gone, keeping it would cause the map to grow with departed peers forever.
|
||||
// The DELETE entry itself is kept for the dedup window to absorb duplicate
|
||||
// delete events, then cleaned by the purgeEmitted ticker.
|
||||
func (s *nameIndexState) shouldEmit(action NameIndexAction, name, peerID string) bool {
|
||||
key := string(action) + ":" + name + ":" + peerID
|
||||
s.emittedMu.Lock()
|
||||
@@ -64,9 +113,27 @@ func (s *nameIndexState) shouldEmit(action NameIndexAction, name, peerID string)
|
||||
return false
|
||||
}
|
||||
s.emitted[key] = time.Now()
|
||||
if action == NameIndexDelete {
|
||||
// Peer is leaving: drop its ADD entry — no longer needed.
|
||||
delete(s.emitted, string(NameIndexAdd)+":"+name+":"+peerID)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// purgeEmitted removes stale DELETE entries from the emitted dedup map.
|
||||
// ADD entries are cleaned eagerly on DELETE, so only short-lived DELETE
|
||||
// entries remain here; the ticker just trims those stragglers.
|
||||
func (s *nameIndexState) purgeEmitted() {
|
||||
now := time.Now()
|
||||
s.emittedMu.Lock()
|
||||
defer s.emittedMu.Unlock()
|
||||
for k, t := range s.emitted {
|
||||
if now.Sub(t) >= nameIndexDedupWindow {
|
||||
delete(s.emitted, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// onEvent applies a received NameIndexEvent to the local index.
|
||||
// "add" inserts/updates the mapping; "delete" removes it.
|
||||
// Operations are idempotent — duplicate events from multiple indexers are harmless.
|
||||
@@ -74,19 +141,40 @@ func (s *nameIndexState) onEvent(evt NameIndexEvent) {
|
||||
if evt.Name == "" || evt.PeerID == "" {
|
||||
return
|
||||
}
|
||||
nameLow := strings.ToLower(evt.Name)
|
||||
s.indexMu.Lock()
|
||||
defer s.indexMu.Unlock()
|
||||
switch evt.Action {
|
||||
case NameIndexAdd:
|
||||
if s.index[evt.Name] == nil {
|
||||
s.index[evt.Name] = map[string]string{}
|
||||
// If the peer previously had a different name, clean up old trigrams.
|
||||
if old, ok := s.byPeer[evt.PeerID]; ok && old != nameLow {
|
||||
s.removeTrigrams(old, evt.PeerID)
|
||||
if s.byName[old] != nil {
|
||||
delete(s.byName[old], evt.PeerID)
|
||||
if len(s.byName[old]) == 0 {
|
||||
delete(s.byName, old)
|
||||
}
|
||||
}
|
||||
}
|
||||
s.index[evt.Name][evt.PeerID] = evt.DID
|
||||
if s.byName[nameLow] == nil {
|
||||
s.byName[nameLow] = map[string]string{}
|
||||
}
|
||||
s.byName[nameLow][evt.PeerID] = evt.DID
|
||||
s.byPeer[evt.PeerID] = nameLow
|
||||
s.addTrigrams(nameLow, evt.PeerID)
|
||||
|
||||
case NameIndexDelete:
|
||||
if s.index[evt.Name] != nil {
|
||||
delete(s.index[evt.Name], evt.PeerID)
|
||||
if len(s.index[evt.Name]) == 0 {
|
||||
delete(s.index, evt.Name)
|
||||
// Use stored name so trigrams match exactly what was indexed.
|
||||
name := nameLow
|
||||
if stored, ok := s.byPeer[evt.PeerID]; ok {
|
||||
name = stored
|
||||
}
|
||||
s.removeTrigrams(name, evt.PeerID)
|
||||
delete(s.byPeer, evt.PeerID)
|
||||
if s.byName[name] != nil {
|
||||
delete(s.byName[name], evt.PeerID)
|
||||
if len(s.byName[name]) == 0 {
|
||||
delete(s.byName, name)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -96,10 +184,22 @@ func (s *nameIndexState) onEvent(evt NameIndexEvent) {
|
||||
// Must be called after ix.PS is ready.
|
||||
func (ix *IndexerService) initNameIndex(ps *pubsub.PubSub) {
|
||||
logger := oclib.GetLogger()
|
||||
ix.nameIndex = &nameIndexState{
|
||||
index: map[string]map[string]string{},
|
||||
emitted: map[string]time.Time{},
|
||||
state := &nameIndexState{
|
||||
byName: map[string]map[string]string{},
|
||||
byPeer: map[string]string{},
|
||||
trigrams: map[string]map[string]struct{}{},
|
||||
emitted: map[string]time.Time{},
|
||||
}
|
||||
ix.nameIndex = state
|
||||
|
||||
// Periodically purge the emitted dedup map so it doesn't grow forever.
|
||||
go func() {
|
||||
t := time.NewTicker(nameIndexDedupWindow)
|
||||
defer t.Stop()
|
||||
for range t.C {
|
||||
state.purgeEmitted()
|
||||
}
|
||||
}()
|
||||
|
||||
ps.RegisterTopicValidator(TopicNameIndex, func(_ context.Context, _ pp.ID, _ *pubsub.Message) bool {
|
||||
return true
|
||||
@@ -149,23 +249,72 @@ func (ix *IndexerService) publishNameEvent(action NameIndexAction, name, peerID,
|
||||
|
||||
// LookupNameIndex searches the distributed name index for peers whose name
|
||||
// contains needle (case-insensitive). Returns peerID → DID for matched peers.
|
||||
// Returns nil if the name index is not initialised (e.g. native indexers).
|
||||
// Returns nil if the name index is not initialised.
|
||||
//
|
||||
// Algorithm:
|
||||
// - needle ≥ 3 chars: trigram intersection → O(|candidates|) verify pass.
|
||||
// The trigram index immediately narrows the candidate set; false positives
|
||||
// are eliminated by the full-string contains check.
|
||||
// - needle < 3 chars: linear scan of byName (rare, still fast at small N).
|
||||
func (ix *IndexerService) LookupNameIndex(needle string) map[string]string {
|
||||
if ix.nameIndex == nil {
|
||||
return nil
|
||||
}
|
||||
result := map[string]string{}
|
||||
needleLow := strings.ToLower(needle)
|
||||
result := map[string]string{}
|
||||
|
||||
ix.nameIndex.indexMu.RLock()
|
||||
defer ix.nameIndex.indexMu.RUnlock()
|
||||
for name, peers := range ix.nameIndex.index {
|
||||
fmt.Println(strings.Contains(strings.ToLower(name), needleLow), needleLow, strings.ToLower(name))
|
||||
if strings.Contains(strings.ToLower(name), needleLow) {
|
||||
for peerID, did := range peers {
|
||||
result[peerID] = did
|
||||
|
||||
if len(needleLow) < 3 {
|
||||
// Short needle: linear scan fallback.
|
||||
for name, peers := range ix.nameIndex.byName {
|
||||
if strings.Contains(name, needleLow) {
|
||||
for peerID, did := range peers {
|
||||
result[peerID] = did
|
||||
}
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Trigram intersection: start with the first trigram's set, then
|
||||
// progressively intersect with each subsequent trigram's set.
|
||||
tgs := trigramsOf(needleLow)
|
||||
var candidates map[string]struct{}
|
||||
for _, tg := range tgs {
|
||||
set := ix.nameIndex.trigrams[tg]
|
||||
if len(set) == 0 {
|
||||
return result // any empty trigram set → no possible match
|
||||
}
|
||||
if candidates == nil {
|
||||
candidates = make(map[string]struct{}, len(set))
|
||||
for pid := range set {
|
||||
candidates[pid] = struct{}{}
|
||||
}
|
||||
} else {
|
||||
for pid := range candidates {
|
||||
if _, ok := set[pid]; !ok {
|
||||
delete(candidates, pid)
|
||||
}
|
||||
}
|
||||
}
|
||||
if len(candidates) == 0 {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
// Full-string verification pass: trigrams admit false positives
|
||||
// (e.g. "abc" and "bca" share the trigram "bc_" with a rotated name).
|
||||
for peerID := range candidates {
|
||||
name := ix.nameIndex.byPeer[peerID]
|
||||
if strings.Contains(name, needleLow) {
|
||||
did := ""
|
||||
if m := ix.nameIndex.byName[name]; m != nil {
|
||||
did = m[peerID]
|
||||
}
|
||||
result[peerID] = did
|
||||
}
|
||||
}
|
||||
fmt.Println("RESULT", result)
|
||||
return result
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user