diff --git a/service/sensitive.go b/service/sensitive.go index 25cfd46f..ed033daa 100644 --- a/service/sensitive.go +++ b/service/sensitive.go @@ -53,7 +53,7 @@ func SensitiveWordReplace(text string, returnImmediately bool) (bool, []string, return false, nil, text } checkText := strings.ToLower(text) - m := InitAc(setting.SensitiveWords) + m := getOrBuildAC(setting.SensitiveWords) hits := m.MultiPatternSearch([]rune(checkText), returnImmediately) if len(hits) > 0 { words := make([]string, 0, len(hits)) diff --git a/service/str.go b/service/str.go index 4390e99b..61054bdc 100644 --- a/service/str.go +++ b/service/str.go @@ -3,8 +3,12 @@ package service import ( "bytes" "fmt" - goahocorasick "github.com/anknown/ahocorasick" + "hash/fnv" + "sort" "strings" + "sync" + + goahocorasick "github.com/anknown/ahocorasick" ) func SundaySearch(text string, pattern string) bool { @@ -56,26 +60,73 @@ func RemoveDuplicate(s []string) []string { return result } -func InitAc(words []string) *goahocorasick.Machine { +func InitAc(dict []string) *goahocorasick.Machine { m := new(goahocorasick.Machine) - dict := readRunes(words) - if err := m.Build(dict); err != nil { + runes := readRunes(dict) + if err := m.Build(runes); err != nil { fmt.Println(err) return nil } return m } -func readRunes(words []string) [][]rune { - var dict [][]rune +var acCache sync.Map - for _, word := range words { +func acKey(dict []string) string { + if len(dict) == 0 { + return "" + } + normalized := make([]string, 0, len(dict)) + for _, w := range dict { + w = strings.ToLower(strings.TrimSpace(w)) + if w != "" { + normalized = append(normalized, w) + } + } + if len(normalized) == 0 { + return "" + } + sort.Strings(normalized) + hasher := fnv.New64a() + for _, w := range normalized { + hasher.Write([]byte{0}) + hasher.Write([]byte(w)) + } + return fmt.Sprintf("%x", hasher.Sum64()) +} + +func getOrBuildAC(dict []string) *goahocorasick.Machine { + key := acKey(dict) + if key == "" { + return nil + } + if v, ok := acCache.Load(key); ok { + if m, ok2 := v.(*goahocorasick.Machine); ok2 { + return m + } + } + m := InitAc(dict) + if m == nil { + return nil + } + if actual, loaded := acCache.LoadOrStore(key, m); loaded { + if cached, ok := actual.(*goahocorasick.Machine); ok { + return cached + } + } + return m +} + +func readRunes(dict []string) [][]rune { + var runes [][]rune + + for _, word := range dict { word = strings.ToLower(word) l := bytes.TrimSpace([]byte(word)) - dict = append(dict, bytes.Runes(l)) + runes = append(runes, bytes.Runes(l)) } - return dict + return runes } func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []string) { @@ -85,7 +136,7 @@ func AcSearch(findText string, dict []string, stopImmediately bool) (bool, []str if len(findText) == 0 { return false, nil } - m := InitAc(dict) + m := getOrBuildAC(dict) if m == nil { return false, nil }