组件地址
https://github.com/houbb/sensitive-word

网上博客版本不是最新,查看官方文档,基于0.16.1整理总结,快速上手

pom文件引入

<dependency>
    <groupId>com.github.houbb</groupId>
    <artifactId>sensitive-word</artifactId>
    <version>0.16.1</version>
</dependency>

配置类

package com.huida.tzly.sensitive;

import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class SpringSensitiveWordConfig {

    @Autowired
    private MyDdWordAllow myDdWordAllow;

    @Autowired
    private MyDdWordDeny myDdWordDeny;


    /**
     * 初始化引导类
     *
     * @return 初始化引导类
     * @since 1.0.0
     */
    @Bean
    public SensitiveWordBs sensitiveWordBs() {
        SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
                //白名单
                .wordAllow(WordAllows.chains(WordAllows.defaults(), myDdWordAllow))
                //黑名单
                .wordDeny(WordDenys.chains(WordDenys.defaults(), myDdWordDeny))
                //否启用敏感单词检测 fuck
                .enableWordCheck(true)
                //忽略大小写
                .ignoreCase(true)
                //忽略半角圆角 fuck the bad words.
                .ignoreWidth(true)
                //忽略中文繁简体样式
                .ignoreChineseStyle(true)
                //忽略英文样式 Ⓕⓤc⒦ the bad words
                .ignoreEnglishStyle(true)
                // 是否忽略重复 ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words
                .ignoreRepeat(false)
                // 连续数字检测 过滤手机号、QQ等
                //.enableNumCheck(true)
                //指定连续数字检测长度
                //.numCheckLen(8)
                //邮箱检测
                //.enableEmailCheck(true)
                //网址检测 www.baidu.com
                //.enableUrlCheck(true)
                //忽略数字的写法,这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄
                //.ignoreNumStyle(true)
                //词对应的标签 属于政治或人身攻击
                .wordTag(WordTags.none())
                //忽略的字符
                .charIgnore(SensitiveWordCharIgnores.defaults())
                //针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配
                .wordResultCondition(WordResultConditions.alwaysTrue())
                .init();
        return sensitiveWordBs;
    }

}

从数据库加载自定义敏感词白名单黑名单

package com.huida.tzly.sensitive;

import com.github.houbb.sensitive.word.api.IWordAllow;
import com.huida.tzly.domain.TzLySensitiveWord;
import com.huida.tzly.service.TzLySensitiveWordService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

/**
 * @author binbin.hou
 * @since 1.0.0
 */
@Component
public class MyDdWordAllow implements IWordAllow {
    @Autowired
    private TzLySensitiveWordService tzLySensitiveWordService;

    @Override
    public List<String> allow() {
        // 数据库查询
        List<TzLySensitiveWord> list = tzLySensitiveWordService.selectTzLySensitiveWord(TzLySensitiveWord.builder().type(1).build());
        List<String> allowList = list.stream()
                .map(TzLySensitiveWord::getName)
                .collect(Collectors.toList());
        return allowList;
    }

}

package com.huida.tzly.sensitive;

import com.github.houbb.sensitive.word.api.IWordDeny;
import com.huida.tzly.domain.TzLySensitiveWord;
import com.huida.tzly.service.TzLySensitiveWordService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;

/**
 * @author binbin.hou
 * @since 1.0.0
 */
@Component
public class MyDdWordDeny implements IWordDeny {
    @Autowired
    private TzLySensitiveWordService tzLySensitiveWordService;
    @Override
    public List<String> deny() {
        // 数据库查询
        List<TzLySensitiveWord> list = tzLySensitiveWordService.selectTzLySensitiveWord(TzLySensitiveWord.builder().type(0).build());
        List<String> denyList = list.stream()
                .map(TzLySensitiveWord::getName)
                .collect(Collectors.toList());
        return denyList;
    }

}


	//自定义后就不能使用官方的工具类SensitiveWordHelper;
    @Autowired
    private SensitiveWordBs sensitiveWordBs;
	//任务手动调用初始化敏感词,不做差量同步
    public void manualInitSensitiveWord() {
        sensitiveWordBs.init();
    }
    @Override
    public String checkMessage(Long messageId, String message, List<TzLyUser> tzLyUserList) {

        //找出全部敏感词
        List<String> sensitiveWordList = sensitiveWordBs.findAll(message);
        // 判断发送的消息是否包含扬言敏感词
        if (CollectionUtils.isEmpty(sensitiveWordList)) {
            return null;
        }
        String finalMessage = message;
        //将敏感词替换成**
        for (String word : sensitiveWordList) {
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < word.length(); i++) {
                sb.append("*");
            }
            finalMessage = finalMessage.replace(word, sb.toString());
        }
        // 批量插入敏感词记录
        mergeBatch(messageId, tzLyUserList, StrUtil.join(",", sensitiveWordList),message);
        return finalMessage;
    }
Logo

网易易盾是国内领先的数字内容风控服务商,依托网易二十余年的先进技术和一线实践经验沉淀,为客户提供专业可靠的安全服务,涵盖内容安全、业务安全、应用安全、安全专家服务四大领域,全方位保障客户业务合规、稳健和安全运营。

更多推荐