feat: 优化ac自动机
This commit is contained in:
@@ -53,7 +53,7 @@ func SensitiveWordReplace(text string, returnImmediately bool) (bool, []string,
|
||||
return false, nil, text
|
||||
}
|
||||
checkText := strings.ToLower(text)
|
||||
m := InitAc(setting.SensitiveWords)
|
||||
m := getOrBuildAC(setting.SensitiveWords)
|
||||
hits := m.MultiPatternSearch([]rune(checkText), returnImmediately)
|
||||
if len(hits) > 0 {
|
||||
words := make([]string, 0, len(hits))
|
||||
|
||||
@@ -3,8 +3,12 @@ package service
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
goahocorasick "github.com/anknown/ahocorasick"
|
||||
"hash/fnv"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
goahocorasick "github.com/anknown/ahocorasick"
|
||||
)
|
||||
|
||||
func SundaySearch(text string, pattern string) bool {
|
||||
@@ -56,26 +60,73 @@ func RemoveDuplicate(s []string) []string {
|
||||
return result
|
||||
}
|
||||
|
||||
func InitAc(words []string) *goahocorasick.Machine {
|
||||
func InitAc(dict []string) *goahocorasick.Machine {
|
||||
m := new(goahocorasick.Machine)
|
||||
dict := readRunes(words)
|
||||
if err := m.Build(dict); err != nil {
|
||||
runes := readRunes(dict)
|
||||
if err := m.Build(runes); err != nil {
|
||||
fmt.Println(err)
|
||||
return nil
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func readRunes(words []string) [][]rune {
|
||||
var dict [][]rune
|
||||
var acCache sync.Map
|
||||
|
||||
for _, word := range words {
|
||||
func acKey(dict []string) string {
|
||||
if len(dict) == 0 {
|
||||
return ""
|
||||
}
|
||||
normalized := make([]string, 0, len(dict))
|
||||
for _, w := range dict {
|
||||
w = strings.ToLower(strings.TrimSpace(w))
|
||||
if w != "" {
|
||||
normalized = append(normalized, w)
|
||||
}
|
||||
}
|
||||
if len(normalized) == 0 {
|
||||
return ""
|
||||
}
|
||||
sort.Strings(normalized)
|
||||
hasher := fnv.New64a()
|
||||
for _, w := range normalized {
|
||||
hasher.Write([]byte{0})
|
||||
hasher.Write([]byte(w))
|
||||
}
|
||||
return fmt.Sprintf("%x", hasher.Sum64())
|
||||
}
|
||||
|
||||
func getOrBuildAC(dict []string) *goahocorasick.Machine {
|
||||
key := acKey(dict)
|
||||
if key == "" {
|
||||
return nil
|
||||
}
|
||||
if v, ok := acCache.Load(key); ok {
|
||||
if m, ok2 := v.(*goahocorasick.Machine); ok2 {
|
||||
return m
|
||||
}
|
||||
}
|
||||
m := InitAc(dict)
|
||||
if m == nil {
|
||||
return nil
|
||||
}
|
||||
if actual, loaded := acCache.LoadOrStore(key, m); loaded {
|
||||
if cached, ok := actual.(*goahocorasick.Machine); ok {
|
||||
return cached
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
func readRunes(dict []string) [][]rune {
|
||||
var runes [][]rune
|
||||
|
||||
for _, word := range dict {
|
||||
word = strings.ToLower(word)
|
||||
l := bytes.TrimSpace([]byte(word))
|
||||
dict = append(dict, bytes.Runes(l))
|
||||
runes = append(runes, bytes.Runes(l))
|
||||
}
|
||||
|
||||
return dict
|
||||
return runes
|
||||
}
|
||||
|
||||
func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []string) {
|
||||
@@ -85,7 +136,7 @@ func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []str
|
||||
if len(findText) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
m := InitAc(dict)
|
||||
m := getOrBuildAC(dict)
|
||||
if m == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user