<pre class="prettyprint"><code class=" hljs java">using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace SensitiveWordFilter
{
<span class="hljs-keyword">public <span class="hljs-class"><span class="hljs-keyword">class <span class="hljs-title">SensitiveWord
{
<span class="hljs-keyword">private <span class="hljs-keyword">static readonly <span class="hljs-keyword">char IsEndChar = <span class="hljs-string">'$';
<span class="hljs-javadoc">/**
* 初始化敏感词库<br>
* 将敏感词加入到HashMap中<br>
* 构建DFA算法模型
*
*<span class="hljs-javadoctag"> @author</span> dxm
*
*/</span>
<span class="hljs-keyword">public</span> <span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">SensitiveWordInit</span>
{</span>
<span class="hljs-comment">// 字符编码</span>
<span class="hljs-keyword">private</span> <span class="hljs-keyword">static</span> readonly String ENCODING = <span class="hljs-string">"UTF-8"</span>;
<span class="hljs-javadoc">/**
* 初始化敏感字库
*
*<span class="hljs-javadoctag"> @return</span>
*/</span>
<span class="hljs-keyword">public</span> Dictionary<<span class="hljs-keyword">char</span>,object> <span class="hljs-title">initKeyWord</span>()
{
<span class="hljs-comment">// 读取敏感词库</span>
HashSet<String> wordSet = readSensitiveWordFile();
<span class="hljs-comment">// 将敏感词库加入到HashMap中</span>
<span class="hljs-keyword">return</span> addSensitiveWordToHashMap(wordSet);
}
<span class="hljs-javadoc">/**
* 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
* 中 = {
* isEnd = 0
* 国 = {
* isEnd = 1
* 人 = {
* isEnd = 0
* 民 = {
* isEnd = 1
* }
* }
* 男 = {
* isEnd = 0
* 人 = {
* isEnd = 1
* }
* }
* }
* }
* 五 = {
* isEnd = 0
* 星 = {
* isEnd = 0
* 红 = {
* isEnd = 0
* 旗 = {
* isEnd = 1
* }
* }
* }
* }
*/</span>
<span class="hljs-keyword">private</span> Dictionary<<span class="hljs-keyword">char</span>,object> <span class="hljs-title">addSensitiveWordToHashMap</span>(HashSet<String> wordSet)
{
<span class="hljs-comment">// 初始化敏感词容器,减少扩容操作</span>
Dictionary<<span class="hljs-keyword">char</span>,object> wordMap = <span class="hljs-keyword">new</span> Dictionary<<span class="hljs-keyword">char</span>,object>(wordSet.Count);
foreach (String word in wordSet)
{
IDictionary<<span class="hljs-keyword">char</span>,object> nowMap = wordMap;
<span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> i = <span class="hljs-number">0</span>; i < word.Length; i++)
{
<span class="hljs-comment">// 转换成char型</span>
<span class="hljs-keyword">char</span> keyChar = word[i];
<span class="hljs-keyword">if</span> (keyChar == IsEndChar)
<span class="hljs-keyword">continue</span>;
Object tempMap;
<span class="hljs-comment">// 获取</span>
nowMap.TryGetValue(keyChar,out tempMap);
<span class="hljs-comment">// 如果存在该key,直接赋值</span>
<span class="hljs-keyword">if</span> (tempMap != <span class="hljs-keyword">null</span>)
{
nowMap = (Dictionary<<span class="hljs-keyword">char</span>,object>)tempMap;
}
<span class="hljs-comment">// 不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个</span>
<span class="hljs-keyword">else</span> {
<span class="hljs-comment">// 设置标志位</span>
Dictionary<<span class="hljs-keyword">char</span>,object> newMap = <span class="hljs-keyword">new</span> Dictionary<<span class="hljs-keyword">char</span>,object>();
newMap.Add(IsEndChar,<span class="hljs-string">"0"</span>);
<span class="hljs-comment">// 添加到集合</span>
nowMap.Add(keyChar,newMap);
nowMap = newMap;
}
<span class="hljs-comment">// 最后一个</span>
<span class="hljs-keyword">if</span> (i == word.Length - <span class="hljs-number">1</span>)
{
nowMap[IsEndChar] = <span class="hljs-string">"1"</span>;
}
}
}
<span class="hljs-keyword">return</span> wordMap;
}
<span class="hljs-javadoc">/**
* 读取敏感词库中的内容,将内容添加到SortedSet集合中
*
*<span class="hljs-javadoctag"> @return</span>
*<span class="hljs-javadoctag"> @throws</span> Exception
*/</span>
<span class="hljs-keyword">private</span> HashSet<String> <span class="hljs-title">readSensitiveWordFile</span>()
{
HashSet<String> wordSet = <span class="hljs-keyword">new</span> HashSet<string>();
string content = File.ReadAllText(<span class="hljs-string">"dic.txt"</span>,Encoding.GetEncoding(ENCODING));
using (StringReader sr = <span class="hljs-keyword">new</span> StringReader(content))
{
string s;
<span class="hljs-keyword">while</span> ((s = sr.ReadLine()) != <span class="hljs-keyword">null</span>)
{
wordSet.Add(s);
}
}
<span class="hljs-keyword">return</span> wordSet;
}
}
<span class="hljs-keyword">public</span> <span class="hljs-class"><span class="hljs-keyword">class</span> <span class="hljs-title">SensitivewordFilter</span>
{</span>
<span class="hljs-keyword">private</span> Dictionary<<span class="hljs-keyword">char</span>,object> sensitiveWordMap = <span class="hljs-keyword">null</span>;
<span class="hljs-comment">// 最小匹配规则</span>
<span class="hljs-keyword">public</span> <span class="hljs-keyword">static</span> <span class="hljs-keyword">int</span> minMatchTYpe = <span class="hljs-number">1</span>;
<span class="hljs-comment">// 最大匹配规则</span>
<span class="hljs-keyword">public</span> <span class="hljs-keyword">static</span> <span class="hljs-keyword">int</span> maxMatchType = <span class="hljs-number">2</span>;
<span class="hljs-comment">// 单例</span>
<span class="hljs-keyword">private</span> <span class="hljs-keyword">static</span> SensitivewordFilter inst = <span class="hljs-keyword">null</span>;
<span class="hljs-javadoc">/**
* 构造函数,初始化敏感词库
*/</span>
<span class="hljs-keyword">private</span> <span class="hljs-title">SensitivewordFilter</span>()
{
sensitiveWordMap = <span class="hljs-keyword">new</span> SensitiveWordInit().initKeyWord();
}
<span class="hljs-javadoc">/**
* 获取单例
*
*<span class="hljs-javadoctag"> @return</span>
*/</span>
<span class="hljs-keyword">public</span> <span class="hljs-keyword">static</span> SensitivewordFilter <span class="hljs-title">getInstance</span>()
{
<span class="hljs-keyword">if</span> (<span class="hljs-keyword">null</span> == inst)
{
inst = <span class="hljs-keyword">new</span> SensitivewordFilter();
}
<span class="hljs-keyword">return</span> inst;
}
<span class="hljs-javadoc">/**
* 判断文字是否包含敏感字符
*
*<span class="hljs-javadoctag"> @param</span> txt
*<span class="hljs-javadoctag"> @param</span> matchType
*<span class="hljs-javadoctag"> @return</span>
*/</span>
<span class="hljs-keyword">public</span> bool <span class="hljs-title">isContaintSensitiveWord</span>(String txt,<span class="hljs-keyword">int</span> matchType = <span class="hljs-number">1</span>)
{
bool flag = <span class="hljs-keyword">false</span>;
<span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> i = <span class="hljs-number">0</span>; i < txt.Length; i++)
{
<span class="hljs-comment">// 判断是否包含敏感字符</span>
<span class="hljs-keyword">int</span> matchFlag = <span class="hljs-keyword">this</span>.CheckSensitiveWord(txt,i,matchType);
<span class="hljs-comment">// 大于0存在,返回true</span>
<span class="hljs-keyword">if</span> (matchFlag > <span class="hljs-number">0</span>)
{
flag = <span class="hljs-keyword">true</span>;
}
}
<span class="hljs-keyword">return</span> flag;
}
<span class="hljs-javadoc">/**
* 获取文字中的敏感词
*
*<span class="hljs-javadoctag"> @param</span> txt
*<span class="hljs-javadoctag"> @param</span> matchType
*<span class="hljs-javadoctag"> @return</span>
*/</span>
<span class="hljs-keyword">public</span> HashSet<String> <span class="hljs-title">getSensitiveWord</span>(String txt,<span class="hljs-keyword">int</span> matchType = <span class="hljs-number">1</span>)
{
HashSet<String> sensitiveWordList = <span class="hljs-keyword">new</span> HashSet<String>();
<span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> i = <span class="hljs-number">0</span>; i < txt.Length; i++)
{
<span class="hljs-comment">// 判断是否包含敏感字符</span>
<span class="hljs-keyword">int</span> length = CheckSensitiveWord(txt,matchType);
<span class="hljs-comment">// 存在,加入list中</span>
<span class="hljs-keyword">if</span> (length > <span class="hljs-number">0</span>)
{
sensitiveWordList.Add(txt.Substring(i,length));
<span class="hljs-comment">// 减1的原因,是因为for会自增</span>
i = i + length - <span class="hljs-number">1</span>;
}
}
<span class="hljs-keyword">return</span> sensitiveWordList;
}
<span class="hljs-javadoc">/**
* 替换敏感字字符
*
*<span class="hljs-javadoctag"> @param</span> txt
*<span class="hljs-javadoctag"> @param</span> matchType
*<span class="hljs-javadoctag"> @param</span> replaceChar
*<span class="hljs-javadoctag"> @return</span>
*/</span>
<span class="hljs-keyword">public</span> String <span class="hljs-title">replaceSensitiveWord</span>(String txt,String replaceChar,<span class="hljs-keyword">int</span> matchType = <span class="hljs-number">1</span>)
{
StringBuilder sb = <span class="hljs-keyword">new</span> StringBuilder(txt);
<span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> i = <span class="hljs-number">0</span>; i < txt.Length; i++)
{
<span class="hljs-comment">// 判断是否包含敏感字符</span>
<span class="hljs-keyword">int</span> length = CheckSensitiveWord(txt,加入list中</span>
<span class="hljs-keyword">if</span> (length > <span class="hljs-number">0</span>)
{
var ttxt = txt.Substring(i,length);
sb.Replace(ttxt,getReplaceChars(replaceChar,ttxt.Length),length);
<span class="hljs-comment">// 减1的原因,是因为for会自增</span>
i = i + length - <span class="hljs-number">1</span>;
}
}
<span class="hljs-keyword">return</span> sb.ToString();
}
<span class="hljs-javadoc">/**
* 获取替换字符串
*
*<span class="hljs-javadoctag"> @param</span> replaceChar
*<span class="hljs-javadoctag"> @param</span> length
*<span class="hljs-javadoctag"> @return</span>
*/</span>
<span class="hljs-keyword">private</span> String <span class="hljs-title">getReplaceChars</span>(String replaceChar,<span class="hljs-keyword">int</span> length)
{
StringBuilder sb = <span class="hljs-keyword">new</span> StringBuilder();
<span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> i = <span class="hljs-number">0</span>; i < length; i++)
{
sb.Append(replaceChar);
}
<span class="hljs-keyword">return</span> sb.ToString();
}
<span class="hljs-javadoc">/**
* 检查文字中是否包含敏感字符,检查规则如下:<br>
* 如果存在,则返回敏感词字符的长度,不存在返回0
*
*<span class="hljs-javadoctag"> @param</span> txt
*<span class="hljs-javadoctag"> @param</span> beginIndex
*<span class="hljs-javadoctag"> @param</span> matchType
*<span class="hljs-javadoctag"> @return</span>
*/</span>
<span class="hljs-keyword">public</span> <span class="hljs-keyword">int</span> <span class="hljs-title">CheckSensitiveWord</span>(String txt,<span class="hljs-keyword">int</span> beginIndex,<span class="hljs-keyword">int</span> matchType)
{
<span class="hljs-comment">// 敏感词结束标识位:用于敏感词只有1位的情况</span>
bool flag = <span class="hljs-keyword">false</span>;
<span class="hljs-comment">// 匹配标识数默认为0</span>
<span class="hljs-keyword">int</span> matchFlag = <span class="hljs-number">0</span>;
Dictionary<<span class="hljs-keyword">char</span>,object> nowMap = sensitiveWordMap;
<span class="hljs-keyword">int</span> tempFlag = <span class="hljs-number">0</span>;
Dictionary<<span class="hljs-keyword">char</span>,object> tempMapForBack = <span class="hljs-keyword">new</span> Dictionary<<span class="hljs-keyword">char</span>,object>();
<span class="hljs-keyword">int</span> len = txt.Length;
<span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> i = beginIndex; i < len; i++)
{
<span class="hljs-keyword">char</span> word = txt[i];
<span class="hljs-keyword">if</span> (word == IsEndChar)
<span class="hljs-keyword">continue</span>;
<span class="hljs-comment">// 获取指定key</span>
Object tempMap;
<span class="hljs-comment">// 获取</span>
nowMap.TryGetValue(word,out tempMap);
<span class="hljs-keyword">if</span> (tempFlag == <span class="hljs-number">0</span>)
tempMapForBack = nowMap;
<span class="hljs-comment">// 如果存在该key,直接赋值</span>
<span class="hljs-keyword">if</span> (tempMap != <span class="hljs-keyword">null</span>)
{
nowMap = (Dictionary<<span class="hljs-keyword">char</span>,object>)tempMap;
}
<span class="hljs-keyword">else</span>
{
<span class="hljs-keyword">if</span> (tempFlag > <span class="hljs-number">0</span>)
{
matchFlag = matchFlag - (i - tempFlag);
i = tempFlag - <span class="hljs-number">1</span>;
nowMap = tempMapForBack;
<span class="hljs-keyword">continue</span>;
}
<span class="hljs-keyword">else</span>
{
nowMap = <span class="hljs-keyword">null</span>;
}
}
<span class="hljs-comment">// 存在,则判断是否为最后一个</span>
<span class="hljs-keyword">if</span> (nowMap != <span class="hljs-keyword">null</span>)
{
<span class="hljs-comment">// 找到相应key,匹配标识+1</span>
matchFlag++;
object value;
<span class="hljs-keyword">if</span> (nowMap.TryGetValue(IsEndChar,out value))
{
<span class="hljs-keyword">if</span> (value is string)
{
<span class="hljs-comment">// 如果为最后一个匹配规则,结束循环,返回匹配标识数</span>
<span class="hljs-keyword">if</span> (<span class="hljs-string">"1"</span> == (string)value)
{
<span class="hljs-keyword">if</span> (nowMap.Keys.Count == <span class="hljs-number">1</span> || tempFlag != <span class="hljs-number">0</span> || i == len - <span class="hljs-number">1</span>)
{
<span class="hljs-comment">// 结束标志位为true</span>
flag = <span class="hljs-keyword">true</span>;
<span class="hljs-comment">// 最小规则,直接返回,最大规则还需继续查找</span>
<span class="hljs-keyword">if</span> (SensitivewordFilter.minMatchTYpe == matchType)
{
<span class="hljs-keyword">break</span>;
}
}
<span class="hljs-keyword">else</span>
{
tempFlag = i;
}
}
}
}
}
<span class="hljs-comment">// 不存在,直接返回</span>
<span class="hljs-keyword">else</span>
{
<span class="hljs-keyword">break</span>;
}
}
<span class="hljs-comment">// 长度必须大于等于1,为词</span>
<span class="hljs-keyword">if</span> (matchFlag < <span class="hljs-number">2</span> || !flag)
{
matchFlag = <span class="hljs-number">0</span>;
}
<span class="hljs-keyword">return</span> matchFlag;
}
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace SensitiveWordFilter
{
class Program
{
<span class="hljs-keyword">static <span class="hljs-keyword">void Main(string[] args)
{
SensitiveWord.SensitivewordFilter filter = SensitiveWord.SensitivewordFilter.getInstance();
String txt = <span class="hljs-string">"$fuckfuck you你麻痹e菜太菜了fuckyou从飞啊 fuck you";
String hou = filter.replaceSensitiveWord(txt,<span class="hljs-string">"*");
Console.WriteLine(<span class="hljs-string">"替换前的文字为:" + txt);
Console.WriteLine(<span class="hljs-string">"替换后的文字为:" + hou);
Console.ReadKey();
}
}
} (编辑:李大同)
【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容!
|