在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
中文分词太麻烦了,也有些中文分词组件也不错,但要自己维护词库,我还是觉得麻烦,我尝试用SCWS中文分词,直接调用他的api,我什么都不需要做了,在此感谢一下。 废话不多说了,直接上菜。 1 /// <summary>
2 /// 利用SCWS进行中文分词 3 /// [email protected] 4 /// </summary> 5 /// <param name="str">需要分词的字符串</param> 6 /// <returns>用空格分开的分词结果</returns> 7 public static string Segment(string str) 8 { 9 System.Text.StringBuilder sb = new System.Text.StringBuilder(); 10 try 11 { 12 string s = string.Empty; 13 System.Net.CookieContainer cookieContainer = new System.Net.CookieContainer(); 14 // 将提交的字符串数据转换成字节数组 15 byte[] postData = System.Text.Encoding.ASCII.GetBytes("data=" + System.Web.HttpUtility.UrlEncode(str) + "&respond=json&charset=utf8&ignore=yes&duality=no&traditional=no&multi=0"); 16 17 // 设置提交的相关参数 18 System.Net.HttpWebRequest request = System.Net.WebRequest.Create("http://www.ftphp.com/scws/api.php") as System.Net.HttpWebRequest; 19 request.Method = "POST"; 20 request.KeepAlive = false; 21 request.ContentType = "application/x-www-form-urlencoded"; 22 request.CookieContainer = cookieContainer; 23 request.ContentLength = postData.Length; 24 25 // 提交请求数据 26 System.IO.Stream outputStream = request.GetRequestStream(); 27 outputStream.Write(postData, 0, postData.Length); 28 outputStream.Close(); 29 30 // 接收返回的页面 31 System.Net.HttpWebResponse response = request.GetResponse() as System.Net.HttpWebResponse; 32 System.IO.Stream responseStream = response.GetResponseStream(); 33 System.IO.StreamReader reader = new System.IO.StreamReader(responseStream, System.Text.Encoding.GetEncoding("utf-8")); 34 string val = reader.ReadToEnd(); 35 36 Newtonsoft.Json.Linq.JObject results = Newtonsoft.Json.Linq.JObject.Parse(val); 37 foreach (var item in results["words"].Children()) 38 { 39 Newtonsoft.Json.Linq.JObject word = Newtonsoft.Json.Linq.JObject.Parse(item.ToString()); 40 sb.Append(word["word"].ToString() + " "); 41 } 42 } 43 catch 44 { 45 } 46 47 return sb.ToString(); 48 } |
请发表评论