若依 ruoyi-vue SpringBoot聊天评论内容敏感词过滤sensitive-word(一)
import com/**// 数据库查询 List < TzLySensitiveWord > list = tzLySensitiveWordService . selectTzLySensitiveWord(TzLySensitiveWord . builder() . type(1) . build());} }/**
·
组件地址
https://github.com/houbb/sensitive-word
网上博客版本不是最新,查看官方文档,基于0.16.1整理总结,快速上手
pom文件引入
<dependency>
<groupId>com.github.houbb</groupId>
<artifactId>sensitive-word</artifactId>
<version>0.16.1</version>
</dependency>
配置类
package com.huida.tzly.sensitive;
import com.github.houbb.sensitive.word.bs.SensitiveWordBs;
import com.github.houbb.sensitive.word.support.allow.WordAllows;
import com.github.houbb.sensitive.word.support.ignore.SensitiveWordCharIgnores;
import com.github.houbb.sensitive.word.support.resultcondition.WordResultConditions;
import com.github.houbb.sensitive.word.support.tag.WordTags;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class SpringSensitiveWordConfig {
@Autowired
private MyDdWordAllow myDdWordAllow;
@Autowired
private MyDdWordDeny myDdWordDeny;
/**
* 初始化引导类
*
* @return 初始化引导类
* @since 1.0.0
*/
@Bean
public SensitiveWordBs sensitiveWordBs() {
SensitiveWordBs sensitiveWordBs = SensitiveWordBs.newInstance()
//白名单
.wordAllow(WordAllows.chains(WordAllows.defaults(), myDdWordAllow))
//黑名单
.wordDeny(WordDenys.chains(WordDenys.defaults(), myDdWordDeny))
//否启用敏感单词检测 fuck
.enableWordCheck(true)
//忽略大小写
.ignoreCase(true)
//忽略半角圆角 fuck the bad words.
.ignoreWidth(true)
//忽略中文繁简体样式
.ignoreChineseStyle(true)
//忽略英文样式 Ⓕⓤc⒦ the bad words
.ignoreEnglishStyle(true)
// 是否忽略重复 ⒻⒻⒻfⓤuⓤ⒰cⓒ⒦ the bad words
.ignoreRepeat(false)
// 连续数字检测 过滤手机号、QQ等
//.enableNumCheck(true)
//指定连续数字检测长度
//.numCheckLen(8)
//邮箱检测
//.enableEmailCheck(true)
//网址检测 www.baidu.com
//.enableUrlCheck(true)
//忽略数字的写法,这个是我的微信:9⓿二肆⁹₈③⑸⒋➃㈤㊄
//.ignoreNumStyle(true)
//词对应的标签 属于政治或人身攻击
.wordTag(WordTags.none())
//忽略的字符
.charIgnore(SensitiveWordCharIgnores.defaults())
//针对匹配的敏感词额外加工,比如可以限制英文单词必须全匹配
.wordResultCondition(WordResultConditions.alwaysTrue())
.init();
return sensitiveWordBs;
}
}
从数据库加载自定义敏感词白名单黑名单
package com.huida.tzly.sensitive;
import com.github.houbb.sensitive.word.api.IWordAllow;
import com.huida.tzly.domain.TzLySensitiveWord;
import com.huida.tzly.service.TzLySensitiveWordService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
/**
* @author binbin.hou
* @since 1.0.0
*/
@Component
public class MyDdWordAllow implements IWordAllow {
@Autowired
private TzLySensitiveWordService tzLySensitiveWordService;
@Override
public List<String> allow() {
// 数据库查询
List<TzLySensitiveWord> list = tzLySensitiveWordService.selectTzLySensitiveWord(TzLySensitiveWord.builder().type(1).build());
List<String> allowList = list.stream()
.map(TzLySensitiveWord::getName)
.collect(Collectors.toList());
return allowList;
}
}
package com.huida.tzly.sensitive;
import com.github.houbb.sensitive.word.api.IWordDeny;
import com.huida.tzly.domain.TzLySensitiveWord;
import com.huida.tzly.service.TzLySensitiveWordService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
/**
* @author binbin.hou
* @since 1.0.0
*/
@Component
public class MyDdWordDeny implements IWordDeny {
@Autowired
private TzLySensitiveWordService tzLySensitiveWordService;
@Override
public List<String> deny() {
// 数据库查询
List<TzLySensitiveWord> list = tzLySensitiveWordService.selectTzLySensitiveWord(TzLySensitiveWord.builder().type(0).build());
List<String> denyList = list.stream()
.map(TzLySensitiveWord::getName)
.collect(Collectors.toList());
return denyList;
}
}
//自定义后就不能使用官方的工具类SensitiveWordHelper;
@Autowired
private SensitiveWordBs sensitiveWordBs;
//任务手动调用初始化敏感词,不做差量同步
public void manualInitSensitiveWord() {
sensitiveWordBs.init();
}
@Override
public String checkMessage(Long messageId, String message, List<TzLyUser> tzLyUserList) {
//找出全部敏感词
List<String> sensitiveWordList = sensitiveWordBs.findAll(message);
// 判断发送的消息是否包含扬言敏感词
if (CollectionUtils.isEmpty(sensitiveWordList)) {
return null;
}
String finalMessage = message;
//将敏感词替换成**
for (String word : sensitiveWordList) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < word.length(); i++) {
sb.append("*");
}
finalMessage = finalMessage.replace(word, sb.toString());
}
// 批量插入敏感词记录
mergeBatch(messageId, tzLyUserList, StrUtil.join(",", sensitiveWordList),message);
return finalMessage;
}
网易易盾是国内领先的数字内容风控服务商,依托网易二十余年的先进技术和一线实践经验沉淀,为客户提供专业可靠的安全服务,涵盖内容安全、业务安全、应用安全、安全专家服务四大领域,全方位保障客户业务合规、稳健和安全运营。
更多推荐


所有评论(0)