在线时间:8:00-16:00
迪恩网络APP
随时随地掌握行业动态
扫描二维码
关注迪恩网络微信公众号
最近经常会模拟网页提交返回网页源码,然后获得网页中相应的元素,于是需要常常解析Html中相应的各种元素,网络是个好东西,搜索一番,就找到了好几个Delphi版本的HtmlParser的类库,试着使用了几个,发现解析起来都不完整,或多或少的回出现一些问题!于是想到了如果界面上有一个浏览器,我们可以通过WebBrowser的Document接口对网页元素进行操作,很是方便!但是模拟网页提交,界面上是不一定要出现WebBrowser的,肯定有办法,不通过WebBrowser就直接解析HTML的,那便是我不要WebBrowser这个外壳,只要他里面的Document文档接口对象就能实现对Html的解析了,查找了一番MSDN,然后Google一下,果然可行,构建方法如下: //创建IHTMLDocument2接口 接口创建好了之后就能够对文档元素进行解析了,很是爽快! 结合了我自己的特有操作,我对Combobox,Table,Frame等一些网页元素做了相应的封装,实现了一个HTMLParser,大致代码如下: 这里只给出声明,代码请在最后下载
(******************************************************) (* 得闲工作室 *) (* 网页元素操作类库 *) (* *) (* DxHtmlElement Unit *) (* Copyright(c) 2008-2010 不得闲 *) (* email:[email protected] QQ:75492895 *) (******************************************************) unit DxHtmlElement; interface uses Windows,sysUtils,Clipbrd,MSHTML,ActiveX,OleCtrls,Graphics,TypInfo; {Get EleMent Type} function IsSelectElement(eleElement: IHTMLElement): Boolean; function IsPwdElement(eleElement: IHTMLElement): Boolean; function IsTextElement(element: IHTMLElement): boolean; function IsTableElement(element: IHTMLElement): Boolean; function IsElementCollection(element: IHTMLElement): Boolean; function IsChkElement(element: IHTMLElement): boolean; function IsRadioBtnElement(element: IHTMLElement): boolean; function IsMemoElement(element: IHTMLElement): boolean; function IsFormElement(element: IHTMLElement): boolean; function IsIMGElement(element: IHTMLElement): boolean; function IsInIMGElement(element: IHTMLElement): boolean; function IsLabelElement(element: IHTMLElement): boolean; function IsLinkElement(element: IHTMLElement): boolean; function IsListElement(element: IHTMLElement): boolean; function IsControlElement(element: IHTMLElement): boolean; function IsObjectElement(element: IHTMLElement): boolean; function IsFrameElement(element: IHTMLElement): boolean; function IsInPutBtnElement(element: IHTMLElement): boolean; function IsInHiddenElement(element: IHTMLElement): boolean; function IsSubmitElement(element: IHTMLElement): boolean; {Get ImgElement Data} function GetPicIndex(doc: IHTMLDocument2; Src: string; Alt: string): Integer; function GetPicElement(doc: IHTMLDocument2;imgName: string;src: string;Alt: string): IHTMLImgElement; function GetRegCodePic(doc: IHTMLDocument2;ImgName: string; Src: string; Alt: string): TPicture; overload; function GetRegCodePic(doc: IHTMLDocument2;Index: integer): TPicture; overload; function GetRegCodePic(doc: IHTMLDocument2;element: IHTMLIMGElement): TPicture;overload; type TObjectFromLResult = function(LRESULT: lResult;const IID: TIID; WPARAM: wParam;out pObject): HRESULT; stdcall; TEleMentType = (ELE_UNKNOW,ELE_TEXT,ELE_PWD,ELE_SELECT,ELE_CHECKBOX,ELE_RADIOBTN,ELE_MEMO,ELE_FORM,ELE_IMAGE, ELE_LABEL,ELE_LINK,ELE_LIST,ELE_CONTROL,ELE_OBJECT,ELE_FRAME,ELE_INPUTBTN,ELE_INIMAGE,ELE_INHIDDEN); function GetElementType(element: IHTMLELEMENT): TEleMentType; function GetElementTypeName(element: IHTMLELEMENT): string; function GetHtmlTableCell(aTable: IHTMLTable;aRow,aCol: Integer): IHTMLElement; function GetHtmlTable(aDoc: IHTMLDocument2; aIndex: Integer): IHTMLTable; function GetWebBrowserHtmlTableCellText(Doc: IHTMLDocument2; const TableIndex, RowIndex, ColIndex: Integer;var ResValue: string): Boolean; function GetHtmlTableRowHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement; function GetWebBrowserHtmlTableCellHtml(Doc: IHTMLDocument2; const TableIndex,RowIndex,ColIndex: Integer;var ResValue: string): Boolean; function GeHtmlTableHtml(aTable: IHTMLTable; aRow: Integer): IHTMLElement; function GetWebBrowserHtmlTableHtml(Doc: IHTMLDocument2; const TableIndex,RowIndex: Integer;var ResValue: string): Boolean; type TDxWebFrameCollection = class; TDxWebElementCollection = class; TLoadState = (Doc_Loading,Doc_Completed,Doc_Invalidate); TDxWebFrame = class private FFrame: IHTMLWINDOW2; FElementCollections: TDxWebElementCollection; FWebFrameCollections: TDxWebFrameCollection; function GetSrc: string; function GetElementCount: integer; function GetWebFrameCollections: TDxWebFrameCollection; function GetElementCollections: TDxWebElementCollection; function GetDocument: IHTMLDOCUMENT2; function GetReadState: TLoadState; function GetIsLoaded: boolean; procedure SetFrame(const Value: IHTMLWINDOW2); function GetName: string; public Constructor Create(IFrame: IHTMLWINDOW2); Destructor Destroy;override; property Frame: IHTMLWINDOW2 read FFrame write SetFrame; property Src: string read GetSrc; property Document: IHTMLDOCUMENT2 read GetDocument; property Name: string read GetName; property Frames: TDxWebFrameCollection read GetWebFrameCollections; property ElementCount: integer read GetElementCount; property ElementCollections: TDxWebElementCollection read GetElementCollections; property ReadyState: TLoadState read GetReadState; property IsLoaded: boolean read GetIsLoaded; end; TDxWebFrameCollection = Class private FFrameCollection: IHTMLFramesCollection2; Frame: TDxWebFrame; function GetCount: integer; function GetFrameInterfaceByIndex(index: integer): IHTMLWINDOW2; function GetFrameInterfaceByName(Name: string): IHTMLWINDOW2; function GetFrameByIndex(index: integer): TDxWebFrame; function GetFrameByName(Name: string): TDxWebFrame; procedure SetFrameCollection(const Value: IHTMLFramesCollection2); public Constructor Create(ACollection: IHTMLFramesCollection2); Destructor Destroy;override; property FrameCollection: IHTMLFramesCollection2 read FFrameCollection write SetFrameCollection; property Count: integer read GetCount; property FrameInterfaceByIndex[index: integer]: IHTMLWINDOW2 read GetFrameInterfaceByIndex; property FrameInterfaceByName[Name: string]: IHTMLWINDOW2 read GetFrameInterfaceByName; property FrameByIndex[index: integer]: TDxWebFrame read GetFrameByIndex; property FrameByName[Name: string]: TDxWebFrame read GetFrameByName; end; TDxWebElementCollection = class private FCollection: IHTMLElementCollection; FChildCollection: TDxWebElementCollection; function GetCollection(index: String): TDxWebElementCollection; function GetCount: integer; function GetElement(itemName: string; index: integer): IHTMLElement; function GetElementByName(itemName: string): IHTMLELEMENT; function GetElementByIndex(index: integer): IHTMLELEMENT; procedure SetCollection(const Value: IHTMLElementCollection); public Constructor Create(ACollection: IHTMLElementCollection); Destructor Destroy;override; property Collection: IHTMLElementCollection read FCollection write SetCollection; property ChildElementCollection[index: String]: TDxWebElementCollection read GetCollection; property ElementCount: integer read GetCount; property Element[itemName: string;index: integer]: IHTMLElement read GetElement; property ElementByName[itemName: string]: IHTMLELEMENT read GetElementByName; property ElementByIndex[index: integer]: IHTMLELEMENT read GetElementByIndex; end; TLinkCollection = class(TDxWebElementCollection) end; TDxWebTable = class; TDxTableCollection = class private FTableCollection: IHTMLElementCollection; FDocument: IHTMLDOCUMENT2; FWebTable: TDxWebTable; function GetTableInterfaceByName(AName: string): IHTMLTABLE; procedure SetDocument(Value: IHTMLDOCUMENT2); function GetTableInterfaceByIndex(index: integer): IHTMLTABLE; function GetCount: integer; function GetTableByIndex(index: integer): TDxWebTable; function GetTableByName(AName: string): TDxWebTable; public Constructor Create(Doc: IHTMLDOCUMENT2); destructor Destroy;override; property TableInterfaceByName[AName: string]: IHTMLTABLE read GetTableInterfaceByName; property TableInterfaceByIndex[index: integer]: IHTMLTABLE read GetTableInterfaceByIndex; property TableByName[AName: string]: TDxWebTable read GetTableByName; property TableByIndex[index: integer]: TDxWebTable read GetTableByIndex; property Document: IHTMLDOCUMENT2 read FDocument write SetDocument; property Count: integer read GetCount; end; TDxWebTable = class private FTableInterface: IHTMLTABLE; function GetRowCount: integer; procedure SetTableInterface(const Value: IHTMLTABLE); function GetCell(ACol, ARow: integer): string; function GetRowColCount(RowIndex: integer): integer; function GetInnerHtml: string; function GetInnerText: string; function GetCellElement(ACol, ARow: Integer): IHTMLTableCell; public Constructor Create(ATable: IHTMLTABLE); property TableInterface: IHTMLTABLE read FTableInterface write SetTableInterface; property RowCount: integer read GetRowCount; property Cell[ACol: integer;ARow: integer]: string read GetCell; property CellElement[ACol: Integer;ARow: Integer]: IHTMLTableCell read GetCellElement; property RowColCount[RowIndex: integer]: integer read GetRowColCount; property InnerHtml: string read GetInnerHtml; property InnerText: string read GetInnerText; end; TDxWebCombobox = class private FHtmlSelect: IHTMLSelectElement; function GetCount: Integer; procedure SetItemIndex(const Value: Integer); function GetItemIndex: Integer; function GetName: string; procedure SetName(const Value: string); function GetValue: string; procedure SetValue(const Value: string); procedure SetCombInterface(const Value: IHTMLSelectElement); function GetItemByName(EleName: string): string; function GetItemByIndex(index: integer): string; function GetItemAttribute(index: Integer; AttribName: string): OleVariant; public constructor Create(AWebCombo: IHTMLSelectElement); procedure Add(Ele: IHTMLElement); procedure Insert(Ele: IHTMLElement;Index: Integer); procedure Remove(index: Integer); property CombInterface: IHTMLSelectElement read FHtmlSelect write SetCombInterface; property Count: Integer read GetCount; property ItemIndex: Integer read GetItemIndex write SetItemIndex; property ItemByIndex[index: integer]: string read GetItemByIndex; property ItemByName[EleName: string]: string read GetItemByName; property ItemAttribute[index: Integer;AttribName: string]: OleVariant read GetItemAttribute; property Name: string read GetName write SetName; property value: string read GetValue write SetValue; end; implementation end. HTMLParser解析类的代码实现单元
(******************************************************) (* 得闲工作室 *) (* HTML解析单元库 *) (* *) (* DxHtmlParser Unit *) (* Copyright(c) 2008-2010 不得闲 *) (* email:[email protected] QQ:75492895 *) (******************************************************) unit DxHtmlParser; interface uses Windows,MSHTML,ActiveX,DxHtmlElement,Forms; type TDxHtmlParser = class private FHtmlDoc: IHTMLDocument2; FHTML: string; FWebTables: TDxTableCollection; FWebElements: TDxWebElementCollection; FWebComb: TDxWebCombobox; procedure SetHTML(const Value: string); function GetWebCombobox(AName: string): TDxWebCombobox; public constructor Create; destructor Destroy;override; property HTML: string read FHTML write SetHTML; property WebTables: TDxTableCollection read FWebTables; property WebElements: TDxWebElementCollection read FWebElements; property WebCombobox[Name: string]: TDxWebCombobox read GetWebCombobox; end; implementation { TDxHtmlParser } constructor TDxHtmlParser.Create; begin CoInitialize(nil); //创建IHTMLDocument2接口 CoCreateInstance(CLASS_HTMLDocument, nil, CLSCTX_INPROC_SERVER, IID_IHTMLDocument2, FHtmlDoc); Assert(FHtmlDoc<>nil,'构建HTMLDocument接口失败'); FHtmlDoc.Set_designMode('On'); //设置为设计模式,不执行脚本 while not (FHtmlDoc.readyState = 'complete') do begin sleep(1); Application.ProcessMessages; end; FWebTables := TDxTableCollection.Create(FHtmlDoc); FWebElements := TDxWebElementCollection.Create(nil); FWebComb := TDxWebCombobox.Create(nil); end; destructor TDxHtmlParser.Destroy; begin FWebTables.Free; FWebElements.Free; FWebComb.Free; CoUninitialize; inherited; end; function TDxHtmlParser.GetWebCombobox(AName: string): TDxWebCombobox; begin if FWebElements.Collection <> nil then begin FWebComb.CombInterface := FWebElements.ElementByName[AName] as IHTMLSelectElement; Result := FWebComb; end else Result := nil; end; procedure TDxHtmlParser.SetHTML(const Value: string); begin if FHTML <> Value then begin FHTML := Value; FHtmlDoc.body.innerHTML := FHTML; FWebElements.Collection := FHtmlDoc.all; end; end; end. |
2023-10-27
2022-08-15
2022-08-17
2022-09-23
2022-08-13
请发表评论