C#正则Groups高级使用方法

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› Node.js›Node.js教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

正则表达式号称开发者得瑞士军刀，使用好正则表达式尤其重要。

拆分多个正则：

 public static string[] SplitByManyRegex(string text, string[] subRegexStrings)
        {
            string allRegexString = "^(?<mySubGroup0>.*?)";
            for (int i = 0; i < subRegexStrings.Length; i++)
            {
                allRegexString += "(?<mySubGroup" + (i + 1) + ">" + subRegexStrings[i] + ".*?)";
            }
            allRegexString += "$";

            Regex subRegex = new Regex(allRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase);
            MatchCollection mc = subRegex.Matches(text);
            if (mc.Count <= 0)
            {
                return new string[] { text };
            }

            List<int> positions = new List<int>();
            for (int m = 0; m < subRegexStrings.Length + 1; m++)
            {
                positions.Add(mc[0].Groups["mySubGroup" + m].Index);
            }

            List<string> result = new List<string>();

            for (int i = 0; i < positions.Count; i++)
            {
                int nextPos = 0;
                if (i < positions.Count - 1) nextPos = positions[i + 1];
                else nextPos = text.Length;
                result.Add(text.Substring(positions[i], nextPos - positions[i]));
            }
            return result.ToArray();
        }

　　调用：

string[] tags = { "【答案】", "【解析】" };

　　拆分单个正则：

 public static string[] SplitByRegex(string text, string subRegexString)
        {
            Regex subRegex = new Regex(subRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase);
            MatchCollection mc = subRegex.Matches(text);
            if (mc.Count <= 0)
            {
                return new string[] { text };
            }

            List<int> positions = new List<int>();
            for (int m = 0; m < mc.Count; m++)
            {
                positions.Add(mc[m].Index);
            }

            List<string> result = new List<string>();
            result.Add(text.Substring(0, positions[0]));

            for (int i = 0; i < positions.Count; i++)
            {
                int nextPos = 0;
                if (i < mc.Count - 1) nextPos = positions[i + 1];
                else nextPos = text.Length;
                result.Add(text.Substring(positions[i], nextPos - positions[i]));
            }

            return result.ToArray();
        }

　　不反回第一条：

public static string[] SplitByRegexNoFirtPart(string text, string subRegexString)
        {
            string[] ary = SplitByRegex(text, subRegexString);
            return TrimFirstElementOfArray(ary);
        }

 private static string[] TrimFirstElementOfArray(string[] ary)
        {
            if (ary == null || ary.Length == 0) return new string[0];
            string[] result = new string[ary.Length - 1];
            for (int i = 1; i < ary.Length; i++) result[i - 1] = ary[i];
            return result;
        }

　　拆分如：(A(B(C?)?)?)

  public static string[] SplitByManyRegex_MayLess(string text, string[] subRegexStrings)
        {
            string allRegexString = "^(?<mySubGroup0>.*?)";

            for (int i = 0; i < subRegexStrings.Length; i++)
            {
                allRegexString += "((?<mySubGroup" + (i + 1) + ">" + subRegexStrings[i] + ".*?)";
            }
            for (int i = subRegexStrings.Length-1; i >=0 ; i--)
            {
                allRegexString += "?)";
            }

            allRegexString += "$";

            Regex subRegex = new Regex(allRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase);
            MatchCollection mc = subRegex.Matches(text);
            if (mc.Count <= 0)
            {
                return new string[] { text };
            }

            List<int> positions = new List<int>();
            for (int m = 0; m < subRegexStrings.Length + 1; m++)
            {
                if (mc[0].Groups["mySubGroup" + m].Success)
                {
                    positions.Add(mc[0].Groups["mySubGroup" + m].Index);
                }
            }

            List<string> result = new List<string>();

            for (int i = 0; i < positions.Count; i++)
            {
                int nextPos = 0;
                if (i < positions.Count - 1) nextPos = positions[i + 1];
                else nextPos = text.Length;
                result.Add(text.Substring(positions[i], nextPos - positions[i]));
            }
            return result.ToArray();
        }

　　可以任意顺序，任意个数：

  public static string[] SplitByManyRegex_AnyOrder(string text, string[] subRegexStrings, bool resultChangeOrder = true )
        {
            if(string.IsNullOrEmpty(text) || subRegexStrings==null || subRegexStrings.Length == 0)
            {
                return new string[] { text };
            }

            string allReg = "(" + string.Join("|", subRegexStrings) + ")";
            string[] result = SplitByRegex(text, allReg);

            if (!resultChangeOrder) return result;
 
            string[] ordered = new string[subRegexStrings.Length+1];
            ordered[0] = result[0];
            for(int i=1; i<result.Length; i++)
            {
                //将某部分放到对应的正则顺序
                for(int k=0; k< subRegexStrings.Length; k++)
                {
                    if(Regex.Match( result[i], subRegexStrings[k]).Success)
                    {
                        ordered[k+1] = result[i]; 
                    }
                }
                //如果某个没有找到则保持为null
            }
            return ordered;
        }

　　用正则表达式替换文本中的内容：

 public static string TranformHandAnswer(string html)
        {
            string strReg = "(?<hand>(<handanswer>(.*?)</handanswer>))"; //正则表达式
            Regex regex = new Regex(strReg, RegexOptions.Singleline | RegexOptions.IgnoreCase);

            int _subjectOrderNum = subjectOrderNum;  //TODO: Lambda不允许ref变量，这里临时这样用
            html = regex.Replace(html, (Match match) =>
            {
                string handContent = match.Groups["hand"].Value;
                string result = “替换得文本”

                return result;
            });

            return html;
        }

　　有以上几个辅助类，在难得正则拆分都能搞定。