asp.Net   发布时间:2022-04-07  发布网站:大佬教程  code.js-code.com
大佬教程收集整理的这篇文章主要介绍了敏感词汇过滤DFA算法大佬教程大佬觉得挺不错的,现在分享给大家,也给大家做个参考。

<pre class="prettyprint"><code class=" hljs java">using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace SensitiveWordFilter
{
<span class="hljs-keyword">public <span class="hljs-class"><span class="hljs-keyword">class <span class="hljs-title">SensitiveWord
{
<span class="hljs-keyword">private <span class="hljs-keyword">static readonly <span class="hljs-keyword">char IsEndChar = <span class="hljs-String">'$';

    <span class="hljs-javadoc"&gt;/**
     * 初始化敏感词库<br>
     * 将敏感词加入到HashMap中<br>
     * 构建DFA算法模型
     * 
     *<span class="hljs-javadoctag"&gt; @author</span> dxm
     * 
     */</span>
    <span class="hljs-keyword"&gt;public</span> <span class="hljs-class"&gt;<span class="hljs-keyword"&gt;class</span> <span class="hljs-title"&gt;SensitiveWordInit</span>
    {</span>

        <span class="hljs-comment"&gt;// 字符编码</span>
        <span class="hljs-keyword"&gt;private</span> <span class="hljs-keyword"&gt;static</span> readonly  String ENCODING = <span class="hljs-String"&gt;"UTF-8"</span>;

        <span class="hljs-javadoc"&gt;/**
         * 初始化敏感字库
         * 
         *<span class="hljs-javadoctag"&gt; @return</span>
         */</span>
        <span class="hljs-keyword"&gt;public</span> Dictionary<<span class="hljs-keyword"&gt;char</span>,object> <span class="hljs-title"&gt;initKeyWord</span>()
        {

            <span class="hljs-comment"&gt;// 读取敏感词库</span>
            HashSet<String> wordSet = readSensitiveWordFile();

            <span class="hljs-comment"&gt;// 将敏感词库加入到HashMap中</span>
            <span class="hljs-keyword"&gt;return</span> addSensitiveWordToHashMap(wordSet);
        }

        <span class="hljs-javadoc"&gt;/**
         * 读取敏感词库,将敏感词放入HashSet中,构建一个DFA算法模型:<br>
         * 中 = { 
         *       isEnd = 0 
         *       国 = {
         *             isEnd = 1 
         *             人 = { 
         *                   isEnd = 0 
         *                   民 = {
         *                         isEnd = 1 
         *                   }
         *             } 
         *             男 = { 
         *                   isEnd = 0 
         *                   人 = { 
         *                         isEnd = 1 
         *                   } 
         *             } 
         *       } 
         * } 
         * 五 = { 
         *       isEnd = 0 
         *       星 = { 
         *             isEnd = 0 
         *             红 = { 
         *                    isEnd = 0 
         *                    旗 = { 
         *                           isEnd = 1 
         *                    }
         *              } 
         *       } 
         * }
         */</span>
        <span class="hljs-keyword"&gt;private</span> Dictionary<<span class="hljs-keyword"&gt;char</span>,object> <span class="hljs-title"&gt;addSensitiveWordToHashMap</span>(HashSet<String> wordSet)
        {

            <span class="hljs-comment"&gt;// 初始化敏感词容器,减少扩容操作</span>
            Dictionary<<span class="hljs-keyword"&gt;char</span>,object> wordMap = <span class="hljs-keyword"&gt;new</span> Dictionary<<span class="hljs-keyword"&gt;char</span>,object>(wordSet.Count);

            foreach (String word in wordSet)
            {
                IDictionary<<span class="hljs-keyword"&gt;char</span>,object> nowMap = wordMap;
                <span class="hljs-keyword"&gt;for</span> (<span class="hljs-keyword"&gt;int</span> i = <span class="hljs-number"&gt;0</span>; i < word.Length; i++)
                {

                    <span class="hljs-comment"&gt;// 转换成char型</span>
                    <span class="hljs-keyword"&gt;char</span> keyChar = word[i];

                    <span class="hljs-keyword"&gt;if</span> (keyChar == IsEndChar)
                        <span class="hljs-keyword"&gt;conTinue</span>;

                    Object tempMap;
                    <span class="hljs-comment"&gt;// 获取</span>
                    nowMap.TryGetValue(keyChar,out tempMap);

                    <span class="hljs-comment"&gt;// 如果存在该key,直接赋值</span>
                    <span class="hljs-keyword"&gt;if</span> (tempMap != <span class="hljs-keyword"&gt;null</span>)
                    {
                        nowMap = (Dictionary<<span class="hljs-keyword"&gt;char</span>,object>)tempMap;
                    }

                    <span class="hljs-comment"&gt;// 不存在则,则构建一个map,同时将isEnd设置为0,因为他不是最后一个</span>
                    <span class="hljs-keyword"&gt;else</span> {

                        <span class="hljs-comment"&gt;// 设置标志位</span>
                        Dictionary<<span class="hljs-keyword"&gt;char</span>,object> newMap = <span class="hljs-keyword"&gt;new</span> Dictionary<<span class="hljs-keyword"&gt;char</span>,object>();
                        newMap.Add(IsEndChar,<span class="hljs-String"&gt;"0"</span>);

                        <span class="hljs-comment"&gt;// 添加到集合</span>
                        nowMap.Add(keyChar,newMap);
                        nowMap = newMap;
                    }

                    <span class="hljs-comment"&gt;// 最后一个</span>
                    <span class="hljs-keyword"&gt;if</span> (i == word.Length - <span class="hljs-number"&gt;1</span>)
                    {
                        nowMap[IsEndChar] = <span class="hljs-String"&gt;"1"</span>;
                    }
                }
            }

            <span class="hljs-keyword"&gt;return</span> wordMap;
        }

        <span class="hljs-javadoc"&gt;/**
         * 读取敏感词库中的内容,将内容添加到SortedSet集合中
         * 
         *<span class="hljs-javadoctag"&gt; @return</span>
         *<span class="hljs-javadoctag"&gt; @throws</span> Exception
         */</span>
        <span class="hljs-keyword"&gt;private</span> HashSet<String> <span class="hljs-title"&gt;readSensitiveWordFile</span>()
        {
            HashSet<String> wordSet = <span class="hljs-keyword"&gt;new</span> HashSet<String>();
            String content = File.ReadAllText(<span class="hljs-String"&gt;"Dic.txt"</span>,Encoding.GetEncoding(ENCODING));
            using (StringReader sr = <span class="hljs-keyword"&gt;new</span> StringReader(content))
            {
                String s;
                <span class="hljs-keyword"&gt;while</span> ((s = sr.ReadLine()) != <span class="hljs-keyword"&gt;null</span>)
                {
                    wordSet.Add(s);
                }
            }
            <span class="hljs-keyword"&gt;return</span> wordSet;
        }
    }

    <span class="hljs-keyword"&gt;public</span> <span class="hljs-class"&gt;<span class="hljs-keyword"&gt;class</span> <span class="hljs-title"&gt;SensitivewordFilter</span>
    {</span>

        <span class="hljs-keyword"&gt;private</span> Dictionary<<span class="hljs-keyword"&gt;char</span>,object> sensitiveWordMap = <span class="hljs-keyword"&gt;null</span>;

        <span class="hljs-comment"&gt;// 最小匹配规则</span>
        <span class="hljs-keyword"&gt;public</span> <span class="hljs-keyword"&gt;static</span> <span class="hljs-keyword"&gt;int</span> minMatchTYpe = <span class="hljs-number"&gt;1</span>;

        <span class="hljs-comment"&gt;// 最大匹配规则</span>
        <span class="hljs-keyword"&gt;public</span> <span class="hljs-keyword"&gt;static</span> <span class="hljs-keyword"&gt;int</span> maxMatchType = <span class="hljs-number"&gt;2</span>;

        <span class="hljs-comment"&gt;// 单例</span>
        <span class="hljs-keyword"&gt;private</span> <span class="hljs-keyword"&gt;static</span> SensitivewordFilter inst = <span class="hljs-keyword"&gt;null</span>;

        <span class="hljs-javadoc"&gt;/**
         * 构造函数,初始化敏感词库
         */</span>
        <span class="hljs-keyword"&gt;private</span> <span class="hljs-title"&gt;SensitivewordFilter</span>()
        {
            sensitiveWordMap = <span class="hljs-keyword"&gt;new</span> SensitiveWordInit().initKeyWord();
        }

        <span class="hljs-javadoc"&gt;/**
         * 获取单例
         * 
         *<span class="hljs-javadoctag"&gt; @return</span>
         */</span>
        <span class="hljs-keyword"&gt;public</span> <span class="hljs-keyword"&gt;static</span> SensitivewordFilter <span class="hljs-title"&gt;geTinstance</span>()
        {
            <span class="hljs-keyword"&gt;if</span> (<span class="hljs-keyword"&gt;null</span> == inst)
            {
                inst = <span class="hljs-keyword"&gt;new</span> SensitivewordFilter();
            }
            <span class="hljs-keyword"&gt;return</span> inst;
        }

        <span class="hljs-javadoc"&gt;/**
         * 判断文字是否包含敏感字符
         * 
         *<span class="hljs-javadoctag"&gt; @param</span> txt
         *<span class="hljs-javadoctag"&gt; @param</span> matchType
         *<span class="hljs-javadoctag"&gt; @return</span>
         */</span>
        <span class="hljs-keyword"&gt;public</span> bool <span class="hljs-title"&gt;isContaintSensitiveWord</span>(String txt,<span class="hljs-keyword"&gt;int</span> matchType = <span class="hljs-number"&gt;1</span>)
        {
            bool flag = <span class="hljs-keyword"&gt;false</span>;
            <span class="hljs-keyword"&gt;for</span> (<span class="hljs-keyword"&gt;int</span> i = <span class="hljs-number"&gt;0</span>; i < txt.Length; i++)
            {

                <span class="hljs-comment"&gt;// 判断是否包含敏感字符</span>
                <span class="hljs-keyword"&gt;int</span> matchFlag = <span class="hljs-keyword"&gt;this</span>.checkSensitiveWord(txt,i,matchTypE);

                <span class="hljs-comment"&gt;// 大于0存在,返回true</span>
                <span class="hljs-keyword"&gt;if</span> (matchFlag > <span class="hljs-number"&gt;0</span>)
                {
                    flag = <span class="hljs-keyword"&gt;true</span>;
                }
            }
            <span class="hljs-keyword"&gt;return</span> flag;
        }

        <span class="hljs-javadoc"&gt;/**
         * 获取文字中的敏感词
         * 
         *<span class="hljs-javadoctag"&gt; @param</span> txt
         *<span class="hljs-javadoctag"&gt; @param</span> matchType
         *<span class="hljs-javadoctag"&gt; @return</span>
         */</span>
        <span class="hljs-keyword"&gt;public</span> HashSet<String> <span class="hljs-title"&gt;getSensitiveWord</span>(String txt,<span class="hljs-keyword"&gt;int</span> matchType = <span class="hljs-number"&gt;1</span>)
        {
            HashSet<String> sensitiveWordList = <span class="hljs-keyword"&gt;new</span> HashSet<String>();

            <span class="hljs-keyword"&gt;for</span> (<span class="hljs-keyword"&gt;int</span> i = <span class="hljs-number"&gt;0</span>; i < txt.Length; i++)
            {

                <span class="hljs-comment"&gt;// 判断是否包含敏感字符</span>
                <span class="hljs-keyword"&gt;int</span> length = checkSensitiveWord(txt,matchTypE);

                <span class="hljs-comment"&gt;// 存在,加入list中</span>
                <span class="hljs-keyword"&gt;if</span> (length > <span class="hljs-number"&gt;0</span>)
                {
                    sensitiveWordList.Add(txt.SubString(i,length));

                    <span class="hljs-comment"&gt;// 减1的原因,是因为for会自增</span>
                    i = i + length - <span class="hljs-number"&gt;1</span>;
                }
            }

            <span class="hljs-keyword"&gt;return</span> sensitiveWordList;
        }

        <span class="hljs-javadoc"&gt;/**
         * 替换敏感字字符
         * 
         *<span class="hljs-javadoctag"&gt; @param</span> txt
         *<span class="hljs-javadoctag"&gt; @param</span> matchType
         *<span class="hljs-javadoctag"&gt; @param</span> replaceChar
         *<span class="hljs-javadoctag"&gt; @return</span>
         */</span>
        <span class="hljs-keyword"&gt;public</span> String <span class="hljs-title"&gt;replaceSensitiveWord</span>(String txt,String replaceChar,<span class="hljs-keyword"&gt;int</span> matchType = <span class="hljs-number"&gt;1</span>)
        {
            StringBuilder sb = <span class="hljs-keyword"&gt;new</span> StringBuilder(txt);
            <span class="hljs-keyword"&gt;for</span> (<span class="hljs-keyword"&gt;int</span> i = <span class="hljs-number"&gt;0</span>; i < txt.Length; i++)
            {

                <span class="hljs-comment"&gt;// 判断是否包含敏感字符</span>
                <span class="hljs-keyword"&gt;int</span> length = checkSensitiveWord(txt,加入list中</span>
                <span class="hljs-keyword"&gt;if</span> (length > <span class="hljs-number"&gt;0</span>)
                {
                    var ttxt = txt.SubString(i,length);
                    sb.replace(ttxt,getreplaceChars(replaceChar,ttxt.Length),length);

                    <span class="hljs-comment"&gt;// 减1的原因,是因为for会自增</span>
                    i = i + length - <span class="hljs-number"&gt;1</span>;
                }
            }

            <span class="hljs-keyword"&gt;return</span> sb.ToString();
        }

        <span class="hljs-javadoc"&gt;/**
         * 获取替换字符串
         * 
         *<span class="hljs-javadoctag"&gt; @param</span> replaceChar
         *<span class="hljs-javadoctag"&gt; @param</span> length
         *<span class="hljs-javadoctag"&gt; @return</span>
         */</span>
        <span class="hljs-keyword"&gt;private</span> String <span class="hljs-title"&gt;getreplaceChars</span>(String replaceChar,<span class="hljs-keyword"&gt;int</span> length)
        {
            StringBuilder sb = <span class="hljs-keyword"&gt;new</span> StringBuilder();
            <span class="hljs-keyword"&gt;for</span> (<span class="hljs-keyword"&gt;int</span> i = <span class="hljs-number"&gt;0</span>; i < length; i++)
            {
                sb.Append(replaceChar);
            }

            <span class="hljs-keyword"&gt;return</span> sb.ToString();
        }

        <span class="hljs-javadoc"&gt;/**
         * 检查文字中是否包含敏感字符,检查规则如下:<br>
         * 如果存在,则返回敏感词字符的长度,不存在返回0
         * 
         *<span class="hljs-javadoctag"&gt; @param</span> txt
         *<span class="hljs-javadoctag"&gt; @param</span> beginIndex
         *<span class="hljs-javadoctag"&gt; @param</span> matchType
         *<span class="hljs-javadoctag"&gt; @return</span>
         */</span>
        <span class="hljs-keyword"&gt;public</span> <span class="hljs-keyword"&gt;int</span> <span class="hljs-title"&gt;checkSensitiveWord</span>(String txt,<span class="hljs-keyword"&gt;int</span> beginIndex,<span class="hljs-keyword"&gt;int</span> matchTypE)
        {

            <span class="hljs-comment"&gt;// 敏感词结束标识位:用于敏感词只有1位的情况</span>
            bool flag = <span class="hljs-keyword"&gt;false</span>;

            <span class="hljs-comment"&gt;// 匹配标识数默认为0</span>
            <span class="hljs-keyword"&gt;int</span> matchFlag = <span class="hljs-number"&gt;0</span>;
            Dictionary<<span class="hljs-keyword"&gt;char</span>,object> nowMap = sensitiveWordMap;
            <span class="hljs-keyword"&gt;int</span> tempFlag = <span class="hljs-number"&gt;0</span>;
            Dictionary<<span class="hljs-keyword"&gt;char</span>,object> tempMapForBACk = <span class="hljs-keyword"&gt;new</span> Dictionary<<span class="hljs-keyword"&gt;char</span>,object>();
            <span class="hljs-keyword"&gt;int</span> len = txt.Length;
            <span class="hljs-keyword"&gt;for</span> (<span class="hljs-keyword"&gt;int</span> i = beginIndex; i < len; i++)
            {
                <span class="hljs-keyword"&gt;char</span> word = txt[i];

                <span class="hljs-keyword"&gt;if</span> (word == IsEndChar)
                    <span class="hljs-keyword"&gt;conTinue</span>;

                <span class="hljs-comment"&gt;// 获取指定key</span>
                Object tempMap;
                <span class="hljs-comment"&gt;// 获取</span>
                nowMap.TryGetValue(word,out tempMap);

                <span class="hljs-keyword"&gt;if</span> (tempFlag == <span class="hljs-number"&gt;0</span>)
                    tempMapForBACk = nowMap;

                <span class="hljs-comment"&gt;// 如果存在该key,直接赋值</span>
                <span class="hljs-keyword"&gt;if</span> (tempMap != <span class="hljs-keyword"&gt;null</span>)
                {
                    nowMap = (Dictionary<<span class="hljs-keyword"&gt;char</span>,object>)tempMap;
                }
                <span class="hljs-keyword"&gt;else</span>
                {
                    <span class="hljs-keyword"&gt;if</span> (tempFlag > <span class="hljs-number"&gt;0</span>)
                    {
                        matchFlag = matchFlag - (i - tempFlag);
                        i = tempFlag - <span class="hljs-number"&gt;1</span>;
                        nowMap = tempMapForBACk;
                        <span class="hljs-keyword"&gt;conTinue</span>;
                    }
                    <span class="hljs-keyword"&gt;else</span>
                    {
                        nowMap = <span class="hljs-keyword"&gt;null</span>;
                    }
                }

                <span class="hljs-comment"&gt;// 存在,则判断是否为最后一个</span>
                <span class="hljs-keyword"&gt;if</span> (nowMap != <span class="hljs-keyword"&gt;null</span>)
                {

                    <span class="hljs-comment"&gt;// 找到相应key,匹配标识+1</span>
                    matchFlag++;

                    object value;

                    <span class="hljs-keyword"&gt;if</span> (nowMap.TryGetValue(IsEndChar,out value))
                    {
                        <span class="hljs-keyword"&gt;if</span> (value is String)
                        {
                            <span class="hljs-comment"&gt;// 如果为最后一个匹配规则,结束循环,返回匹配标识数</span>
                            <span class="hljs-keyword"&gt;if</span> (<span class="hljs-String"&gt;"1"</span> == (String)value)
                            {
                                <span class="hljs-keyword"&gt;if</span> (nowMap.Keys.Count == <span class="hljs-number"&gt;1</span> || tempFlag != <span class="hljs-number"&gt;0</span> || i == len - <span class="hljs-number"&gt;1</span>)
                                {
                                    <span class="hljs-comment"&gt;// 结束标志位为true</span>
                                    flag = <span class="hljs-keyword"&gt;true</span>;

                                    <span class="hljs-comment"&gt;// 最小规则,直接返回,最大规则还需继续查找</span>
                                    <span class="hljs-keyword"&gt;if</span> (SensitivewordFilter.minMatchTYpe == matchTypE)
                                    {
                                        <span class="hljs-keyword"&gt;break</span>;
                                    }
                                }
                                <span class="hljs-keyword"&gt;else</span>
                                {
                                    tempFlag = i;
                                }
                            }
                        }
                    }
                }
                <span class="hljs-comment"&gt;// 不存在,直接返回</span>
                <span class="hljs-keyword"&gt;else</span>
                {
                    <span class="hljs-keyword"&gt;break</span>;
                }
            }

            <span class="hljs-comment"&gt;// 长度必须大于等于1,为词</span>
            <span class="hljs-keyword"&gt;if</span> (matchFlag < <span class="hljs-number"&gt;2</span> || !flag)
            {
                matchFlag = <span class="hljs-number"&gt;0</span>;
            }
            <span class="hljs-keyword"&gt;return</span> matchFlag;
        }
    }
}

}

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace SensitiveWordFilter
{
class Program
{
<span class="hljs-keyword">static <span class="hljs-keyword">void Main(String[] args)
{
SensitiveWord.SensitivewordFilter filter = SensitiveWord.SensitivewordFilter.geTinstance();
String txt = <span class="hljs-String">"$fuckfuck you你麻痹e菜太菜了fuckyou从飞啊 fuck you";
String hou = filter.replaceSensitiveWord(txt,<span class="hljs-String">"*");
Console.WriteLine(<span class="hljs-String">"替换前的文字为:" + txt);
Console.WriteLine(<span class="hljs-String">"替换后的文字为:" + hou);
Console.ReadKey();
}
}
}

大佬总结

以上是大佬教程为你收集整理的敏感词汇过滤DFA算法全部内容,希望文章能够帮你解决敏感词汇过滤DFA算法所遇到的程序开发问题。

如果觉得大佬教程网站内容还不错,欢迎将大佬教程推荐给程序员好友。

本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
如您有任何意见或建议可联系处理。小编QQ:384754419,请注明来意。
标签: