feat: 优化ac自动机
This commit is contained in:
@@ -53,7 +53,7 @@ func SensitiveWordReplace(text string, returnImmediately bool) (bool, []string,
|
|||||||
return false, nil, text
|
return false, nil, text
|
||||||
}
|
}
|
||||||
checkText := strings.ToLower(text)
|
checkText := strings.ToLower(text)
|
||||||
m := InitAc(setting.SensitiveWords)
|
m := getOrBuildAC(setting.SensitiveWords)
|
||||||
hits := m.MultiPatternSearch([]rune(checkText), returnImmediately)
|
hits := m.MultiPatternSearch([]rune(checkText), returnImmediately)
|
||||||
if len(hits) > 0 {
|
if len(hits) > 0 {
|
||||||
words := make([]string, 0, len(hits))
|
words := make([]string, 0, len(hits))
|
||||||
|
|||||||
@@ -3,8 +3,12 @@ package service
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
goahocorasick "github.com/anknown/ahocorasick"
|
"hash/fnv"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
goahocorasick "github.com/anknown/ahocorasick"
|
||||||
)
|
)
|
||||||
|
|
||||||
func SundaySearch(text string, pattern string) bool {
|
func SundaySearch(text string, pattern string) bool {
|
||||||
@@ -56,26 +60,73 @@ func RemoveDuplicate(s []string) []string {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
func InitAc(words []string) *goahocorasick.Machine {
|
func InitAc(dict []string) *goahocorasick.Machine {
|
||||||
m := new(goahocorasick.Machine)
|
m := new(goahocorasick.Machine)
|
||||||
dict := readRunes(words)
|
runes := readRunes(dict)
|
||||||
if err := m.Build(dict); err != nil {
|
if err := m.Build(runes); err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return m
|
return m
|
||||||
}
|
}
|
||||||
|
|
||||||
func readRunes(words []string) [][]rune {
|
var acCache sync.Map
|
||||||
var dict [][]rune
|
|
||||||
|
|
||||||
for _, word := range words {
|
func acKey(dict []string) string {
|
||||||
|
if len(dict) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
normalized := make([]string, 0, len(dict))
|
||||||
|
for _, w := range dict {
|
||||||
|
w = strings.ToLower(strings.TrimSpace(w))
|
||||||
|
if w != "" {
|
||||||
|
normalized = append(normalized, w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(normalized) == 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
sort.Strings(normalized)
|
||||||
|
hasher := fnv.New64a()
|
||||||
|
for _, w := range normalized {
|
||||||
|
hasher.Write([]byte{0})
|
||||||
|
hasher.Write([]byte(w))
|
||||||
|
}
|
||||||
|
return fmt.Sprintf("%x", hasher.Sum64())
|
||||||
|
}
|
||||||
|
|
||||||
|
func getOrBuildAC(dict []string) *goahocorasick.Machine {
|
||||||
|
key := acKey(dict)
|
||||||
|
if key == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if v, ok := acCache.Load(key); ok {
|
||||||
|
if m, ok2 := v.(*goahocorasick.Machine); ok2 {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
}
|
||||||
|
m := InitAc(dict)
|
||||||
|
if m == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if actual, loaded := acCache.LoadOrStore(key, m); loaded {
|
||||||
|
if cached, ok := actual.(*goahocorasick.Machine); ok {
|
||||||
|
return cached
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
|
||||||
|
func readRunes(dict []string) [][]rune {
|
||||||
|
var runes [][]rune
|
||||||
|
|
||||||
|
for _, word := range dict {
|
||||||
word = strings.ToLower(word)
|
word = strings.ToLower(word)
|
||||||
l := bytes.TrimSpace([]byte(word))
|
l := bytes.TrimSpace([]byte(word))
|
||||||
dict = append(dict, bytes.Runes(l))
|
runes = append(runes, bytes.Runes(l))
|
||||||
}
|
}
|
||||||
|
|
||||||
return dict
|
return runes
|
||||||
}
|
}
|
||||||
|
|
||||||
func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []string) {
|
func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []string) {
|
||||||
@@ -85,7 +136,7 @@ func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []str
|
|||||||
if len(findText) == 0 {
|
if len(findText) == 0 {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
m := InitAc(dict)
|
m := getOrBuildAC(dict)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user