public class DirtyWordOper { private static Dictionary<string, object> hash = new Dictionary<string, object>(); private static BitArray firstCharCheck = new BitArray(char.MaxValue);//把脏词的第一个字符记录下来 private static BitArray allCharCheck = new BitArray(char.MaxValue);//把每一个个脏词的所有字符都记录下来 private static int maxLength = 0;// private static bool onlyOne = true;
#region /// <summary> /// 返回替换后的字符串 字符串的长度不变 /// </summary> /// <param name="text"></param> /// <returns></returns> public string Replace(string text) { if (onlyOne) { Init();//初始化数据 执行一次就不会执行了 onlyOne = false; } if (!isDirtyword(text)) { return text; } //获取替换操作表 List<DetailRepModel> drlist = GetList(text); //执行替换操作 return Replace2(text, drlist); }
/// <summary> /// 初始化用 只执行一次 /// </summary> /// <param name="text"></param> private static void Init() { string[] badwords = DirtyWordData.DirtyKeyword.Split('|'); foreach (string bw in badwords) { string[] strarrtemp = bw.Split('&'); string word = strarrtemp[0]; word = word.Trim();//去掉数据中的空格及格式 符号 word = word.Replace("/r", ""); word = word.Replace("/n", ""); if (word == "") { break; } if (!hash.ContainsKey(word)) { hash.Add(word, null); maxLength = Math.Max(maxLength, word.Length); firstCharCheck[word[0]] = true; 代码生成器
foreach (char c in word) { allCharCheck[c] = true; } } } } /// <summary> /// 是否包含 了 脏 词 /// </summary> /// <param name="text"></param> /// <returns></returns> private static bool isDirtyword(string text) { int index = 0; //int offset = 0; while (index < text.Length) { //如果第一个字符都不符合 if (!firstCharCheck[text[index]]) {// 直接找到与脏词第一字符相同为止 while (index < text.Length - 1 && !firstCharCheck[text[++index]]) ; } for (int j = 1; j <= Math.Min(maxLength, text.Length - index); j++) { if (!allCharCheck[text[index + j - 1]]) { break; } string sub = text.Substring(index, j); //判定脏字字典中是否包括了脏词 if (hash.ContainsKey(sub)) { return true;//是 } } index++; } return false;//否 }
/// <summary> /// 返回操作列表 /// </summary> /// <param name="text"></param> /// <returns></returns> private static List<DetailRepModel> GetList(string text) { List<DetailRepModel> DetailList = new List<DetailRepModel>(); int index = 0; while (index < text.Length) { if (!firstCharCheck[text[index]]) { while (index < text.Length - 1 && !firstCharCheck[text[++index]]) ; } DetailRepModel tempDetail = null; for (int j = 1; j <= Math.Min(maxLength, text.Length - index); j++) { if (!allCharCheck[text[index + j - 1]]) { if (tempDetail != null) {//优先先字符串替换 index = index + tempDetail.number - 1;//索引要返回上一位,所以要减1 DetailList.Add(tempDetail); } break; } string sub = text.Substring(index, j); if (hash.ContainsKey(sub)) { tempDetail = new DetailRepModel(); tempDetail.index = index; tempDetail.number = sub.Length; tempDetail.content = sub; //break;//进行下一次 不然要出现, abc 其中ab 与a都关键字要生成两个操作 } if (tempDetail != null) { if (j + 1 > Math.Min(maxLength, text.Length - index)) {//优先先字符串替换 DetailList.Add(tempDetail); index = index + tempDetail.number - 1;//索引要返回上一位,所以要减1 } } } index++; } return DetailList; } /// <summary> /// 传入 字串和 脏字替换操作表, /// </summary> /// <param name="text"></param> /// <param name="drlist"></param> /// <returns> 输出替换后的字串</returns> private static string Replace2(string text, List<DetailRepModel> drlist) {
if (drlist == null || drlist.Count == 0 || text == "") { return text; } foreach (DetailRepModel dr in drlist) { if (dr != null) { string strtemp = text.Substring(dr.index, dr.number); object ob = DirtyWordData.DirtyHT[(object)strtemp]; if (ob == null) { //记录错误 break; } // 这样替换 有错误 , text = text.Substring(0, dr.index) + ob.ToString() + text.Substring(dr.index + dr.number); //text = text.Replace(strtemp, ob.ToString()); } } return text; } #endregion }
效果还行, 不过我们老大给我说了个方法更NB,说比这种要快50倍;只是写起来有点麻烦
public interface IReplaceDW { string Replace(string s); } public class ReplaceDW { public static void AddToWords(DirtyChar parent, string s, string t) { DirtyChar dc = parent.Children.Find(o => o.Orienginal == s[0]); if (dc == null) { dc = new DirtyChar() { Orienginal = s[0], Children = new List<DirtyChar>(), Target = "" }; parent.Children.Add(dc); } if (s.Length > 1) {// AddToWords(dc, s.Substring(1), t); } else { dc.Target = t; } }
public static string BuildChildren(DirtyChar dc, int deepLevel) { StringBuilder sb = new StringBuilder(); string spaces = new string(' ', deepLevel + 4);
if (dc.Children.Count > 0) { sb.Append(@" " + spaces + @"if (i + 1 == len){"); sb.Append(@" " + spaces + @" sb.Append(""" + dc.Target + @"""); "); sb.Append(@" " + spaces + @" i++; " + spaces + @" break;}"); sb.Append(@" " + spaces + @" switch (s[i + " + deepLevel.ToString() + @"]) " + spaces + @" { "); foreach (DirtyChar c in dc.Children) { sb.Append(@" " + spaces + @" case '" + c.Orienginal + @"': "); sb.Append(BuildChildren(c, deepLevel + 1)); sb.Append(@" " + spaces + @" break;"); } sb.Append(@" " + spaces + @" default: " + spaces + @" sb.Append(""" + dc.Target + @"""); " + spaces + @" i++; " + spaces + @" break; " + spaces + @" } "); } else { sb.Append(@" " + spaces + @" sb.Append(""" + dc.Target + @"""); "); if (deepLevel == 1) { sb.Append(@" " + spaces + @" i++; "); } else { sb.Append(@" " + spaces + @" i += " + (deepLevel).ToString() + @"; "); } } return sb.ToString(); }
private IReplaceDW _r = null; private static bool isfirst = true; public string Replace(string s) { return _r.Replace(s); } private static List<KeyValuePair<string, string>> tmp = new List<KeyValuePair<string, string>>(); public ReplaceDW() { if (isfirst) { List<KeyValuePair<string, string>> dict = new List<KeyValuePair<string, string>>(); foreach (DictionaryEntry d in KeyWord.DirtyWordData.DirtyHT) { dict.Add(new KeyValuePair<string, string>(d.Key.ToString(), d.Value.ToString())); } // 整理进 list //List<KeyValuePair<string, string>> tmp = new List<KeyValuePair<string, string>>(); foreach (KeyValuePair<string, string> kv in dict) { tmp.Add(kv); } // 倒排 tmp.Sort((a, b) => { return b.Key.CompareTo(a.Key); }); isfirst = false; } var compiler = new CSharpCodeProvider(); var options = new CompilerParameters();
// set compile options options.CompilerOptions = "/o"; options.GenerateExecutable = false; options.GenerateInMemory = true; options.ReferencedAssemblies.Add("System.dll"); options.ReferencedAssemblies.Add(this.GetType().Assembly.Location);
// set the source code to compile DirtyChar words = new DirtyChar() { Children = new List<DirtyChar>() }; //DirtyChar words2 = new DirtyChar(); //words2.Children = new List<DirtyChar>(); foreach (KeyValuePair<string, string> kv in tmp) {//构建字典表 AddToWords(words, kv.Key, kv.Value); }
StringBuilder sb = new StringBuilder(); sb.Append(@" using System; namespace KeyWord { public class ReplaceDW_ : IReplaceDW { public string Replace( string s ) { int len = s.Length, i = 0; System.Text.StringBuilder sb = new System.Text.StringBuilder(len); "); sb.Append(@" while (i < len) { switch (s[i]) { "); foreach (DirtyChar c in words.Children) { sb.Append(@" case '" + c.Orienginal + @"': "); sb.Append(BuildChildren(c, 1)); sb.Append(@" break;"); } sb.Append(@" default: sb.Append(s[i++]); break; } } "); sb.Append(@" return sb.ToString();
} } }"); // compile the code, on-the-fly var result = compiler.CompileAssemblyFromSource(options, sb.ToString()); foreach (var error in result.Errors) { // print errors ; }
// if compilation sucessed if ((!result.Errors.HasErrors) && (result.CompiledAssembly != null)) { var type = result.CompiledAssembly.GetType("KeyWord.ReplaceDW_"); try { if (type != null) { this._r = Activator.CreateInstance(type) as IReplaceDW; } this.Replace("x"); //预热 this.Replace("x"); //预热 } catch (Exception ex) { Console.WriteLine(ex); } } } }
点击下载本例源码
|
请发表评论