在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
首先去百度:注册个apikey http://developer.baidu.com/wiki/index.php?title=%E5%B8%AE%E5%8A%A9%E6%96%87%E6%A1%A3%E9%A6%96%E9%A1%B5/%E7%99%BE%E5%BA%A6%E7%BF%BB%E8%AF%91API 然后代码如下只看红色部分就可以了: 复制代码 代码如下: using System; using System.Collections.Generic; using System.Linq; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using System.Web.Script.Serialization; using System.Text.RegularExpressions; namespace Fangyi { public partial class _Default : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { GSEntities db = new GSEntities(); for (int i = 740; i < 900000; i++) { try { System.Threading.Thread.Sleep(100); GetUrl("http://www.52mvc.com/story/love/" + i + ".html",db); //采集 } catch (Exception ex) { System.Threading.Thread.Sleep(1500); } } db.Dispose(); } private void GetUrl(string url, GSEntities db) { content cc = new content(); string apikey = "WqLOfG9o2VS1lriX4mz3mDj8"; var enstr = GetHtml.getHtml(url, null); var title = Regex.Match(enstr.Replace("\n",""), @"<font style\=""font\-size\: 18px\; font\-weight\: bold\;""\>(.+?)\<\/font\>").Groups[1].Value; enstr = Regex.Replace(enstr, @"\r|\n", ""); enstr = ReHtml(enstr); enstr = Regex.Match(enstr, @"双击或拖选\)(.+?) ").Groups[1].Value; var atitle = GetHtml.getHtml("http://openapi.baidu.com/public/2.0/bmt/translate?client_id=" + apikey + "&q=" + title + "&from=auto&to=auto", null); JavaScriptSerializer jss2 = new JavaScriptSerializer(); var aa2 = jss2.Deserialize<jsonss>(atitle); title = aa2.trans_result.First().dst; enstr = enstr.Trim(); string val = "\t\t"; if (enstr.Length < 2000) { int i = 0; var list = enstr.Replace(" ", "").Replace(""", "“").Split('.').ToList(); list.ForEach(c => { var a = GetHtml.getHtml("http://openapi.baidu.com/public/2.0/bmt/translate?client_id=" + apikey + "&q=" + c + "&from=auto&to=auto", null); JavaScriptSerializer jss = new JavaScriptSerializer(); var aa = jss.Deserialize<jsonss>(a); val += aa.trans_result.First().dst + "。"; ++i; if (i % 5 == 0) { System.Threading.Thread.Sleep(15000); val += "\n\t\t"; } System.Threading.Thread.Sleep(1000); }); cc.title = title; cc.val = val; db.content.AddObject(cc); db.SaveChanges(); } } public string ReHtml(string HTML) { string pattern = @"<[\s\S]*?>"; Regex regex = new Regex(pattern); HTML = regex.Replace(HTML, ""); return HTML; } //中文转为UNICODE字符 forget never public string ctu(string str) { string outStr = ""; if (!string.IsNullOrEmpty(str)) { for (int i = 0; i < str.Length; i++) { //将中文字符转为10进制整数,然后转为16进制unicode字符 outStr += "\\u" + ((int)str[i]).ToString("x"); } } return outStr; } //UNICODE字符转为中文 public string utc(string str) { string outStr = ""; if (!string.IsNullOrEmpty(str)) { string[] strlist = str.Replace("\\", "").Split('u'); try { for (int i = 1; i < strlist.Length; i++) { //将unicode字符转为10进制整数,然后转为char中文字符 asp.net outStr += (char)int.Parse(strlist[i], System.Globalization.NumberStyles.HexNumber); } } catch (FormatException ex) { outStr = ex.Message; } } return outStr; } } } |
请发表评论