在学习HTML Xpath之前呢我们先来下载一下Dll文件 下载地址:http://htmlagilitypack.codeplex.com/ 大家下载单击如下图片下载就行了 <ignore_js_op> 然后就可以直接调用 了,大家看看 代码吧
普通浏览复制代码
-
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument();
-
-
hd.LoadHtml(strhtml);
-
string str = hd.DocumentNode.SelectSingleNode("//*[@id='e_font']").OuterHtml;
这样就可以得到一个标签的HTml代码了 OuterHtml是取包含本身的Html如果是InnerHtml就是取的包含在这个标签之内的所有Html代码了 这点大家要注意了 如果大家想获取Html代码的Xpath路径就是这部分
这个其实很简单只在大家安装一个Firbug就行了, 看下图片 <ignore_js_op> 大家只要进入选择模式,然后选择你要的内容,然后右键复制一下就行了。 然后放在SelectSingleNode()方法里就OK了 下面我说说几个方法和属性的意思吧、 方法
SelectNodes 获取的是一个集合 SelectSingleNode 获取一个标签 SetAttributeValue 设置标签的属性值例如:SetAttributeValue("name","xpath-89");这说明把name属性的值修改为xpath-89 属性
OuterHtml 是取包含本身的Html InnerHtml 取的包含在这个标签之内的所有Html代码了 XPath 获取相对应的Xpath值 Attributes 获取一个属性的值例如:Attributes("name") 也可以进行添加属性例如:
普通浏览复制代码
-
hd.DocumentNode.SelectSingleNode(item.Key).Attributes.Add("xpathid", "xpath_1" );
下面我写了一个递归获取Html页面所有Xpath值的方法大家看一下吧
普通浏览复制代码
-
-
public List<ObjXpath> XpathList = new List<ObjXpath>();
-
public string strhtml = "";
-
private int Index = 0;
-
-
private void SartNode()
-
{
-
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument();
-
-
hd.LoadHtml(strhtml);
-
HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes;
-
Index = 0;
-
XpathList.Clear();
-
foreach (HtmlNode em in htmllist)
-
{
-
Setxpath(em);
-
}
-
}
-
-
-
-
-
private void Setxpath(HtmlNode node)
-
{
-
foreach (HtmlNode item in node.ChildNodes)
-
{
-
if (item.XPath.Contains("#"))
-
{
-
continue;
-
}
-
if (item.ChildNodes.Count > 0)
-
{
-
XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" });
-
Index++;
-
Setxpath(item);
-
}
-
else
-
{
-
XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" });
-
Index++;
-
}
-
}
-
}
-
public class ObjXpath
-
{
-
public string id { get; set; }
-
public string Key { get; set; }
-
public string Value { get; set; }
-
}
XpathList 就是获取的所有Xpath值了,大家有兴趣的话可以试试 我们先来看看效果吧 <ignore_js_op> 好了下面放出所有代码给大家
普通浏览复制代码
-
using System;
-
using System.Collections.Generic;
-
using System.ComponentModel;
-
using System.Data;
-
using System.Drawing;
-
using System.Linq;
-
using System.Text;
-
using System.Windows.Forms;
-
using System.Text.RegularExpressions;
-
using System.Threading;
-
using HtmlAgilityPack;
-
using System.IO;
-
using System.Runtime.Serialization.Json;
-
-
namespace AutoXpathTools
-
{
-
public partial class Form1 : Form
-
{
-
public Form1()
-
{
-
InitializeComponent();
-
}
-
-
#region 私有变量和方法
-
-
-
private delegate void SetListBox(string str);
-
-
-
List<ObjXpath> XpathList = new List<ObjXpath>();
-
private int Index = 0;
-
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument();
-
-
#endregion
-
-
-
private void btnGetXpath_Click(object sender, EventArgs e)
-
{
-
try
-
{
-
HttpHelper http = new HttpHelper();
-
HttpItem item = new HttpItem() { URL = textBox1.Text.Trim(), IsToLower = false, Encoding = "gbk" };
-
txtXml.Text = http.GetHtml(item);
-
if (!string.IsNullOrWhiteSpace(txtXml.Text) && txtXml.Text.Trim().ToLower() != "error")
-
{
-
-
hd.LoadHtml(txtXml.Text);
-
-
-
Thread pingTask = new Thread(new ThreadStart(delegate
-
{
-
-
SartNode(txtXml.Text);
-
}));
-
pingTask.Start();
-
-
}
-
else
-
{
-
txtXml.Text = "根据您的的ULR:" + textBox1.Text.Trim() + "无法得到任何内容";
-
}
-
}
-
catch (Exception ex)
-
{
-
txtXml.Text = ex.Message.Trim();
-
}
-
}
-
-
-
-
private void SartNode(string strhtml)
-
{
-
-
HtmlAgilityPack.HtmlDocument hd = new HtmlAgilityPack.HtmlDocument();
-
-
hd.LoadHtml(strhtml);
-
HtmlNodeCollection htmllist = hd.DocumentNode.ChildNodes;
-
Index = 0;
-
XpathList.Clear();
-
foreach (HtmlNode em in htmllist)
-
{
-
Setxpath(em);
-
}
-
}
-
-
-
-
-
private void Setxpath(HtmlNode node)
-
{
-
foreach (HtmlNode item in node.ChildNodes)
-
{
-
if (item.XPath.Contains("#"))
-
{
-
continue;
-
}
-
if (item.ChildNodes.Count > 0)
-
{
-
XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" });
-
UIContorol(item.XPath);
-
Index++;
-
Setxpath(item);
-
}
-
else
-
{
-
XpathList.Add(new ObjXpath() { id = Index.ToString(), Key = item.XPath, Value = "" });
-
UIContorol(item.XPath);
-
Index++;
-
}
-
}
-
}
-
-
-
private void UIContorol(string str)
-
{
-
listBox1.Items.Add(str);
-
toolStripStatusLabel1.Text = str;
-
}
-
-
private void listBox1_SelectedValueChanged(object sender, EventArgs e)
-
{
-
if (listBox1.SelectedItem != null)
-
{
-
txtPath.Text = listBox1.SelectedItem.ToString().Trim();
-
}
-
}
-
-
private void button3_Click(object sender, EventArgs e)
-
{
-
txtContents.Text = hd.DocumentNode.SelectSingleNode(txtPath.Text.Trim()).OuterHtml;
-
}
-
-
private void Form1_Load(object sender, EventArgs e)
-
{
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
}
-
}
-
public class ObjXpath
-
{
-
public string id { get; set; }
-
public string Key { get; set; }
-
public string Value { get; set; }
-
}
-
}
就到这里吧,大家可以下载我的源代码试试手 打包下载: <ignore_js_op> (76.32 KB, 下载次数: 0) 如果你感觉可以话就给我推荐一下吧。感谢大家
|
请发表评论