本文整理汇总了Java中net.htmlparser.jericho.HTMLElementName类的典型用法代码示例。如果您正苦于以下问题:Java HTMLElementName类的具体用法?Java HTMLElementName怎么用?Java HTMLElementName使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HTMLElementName类属于net.htmlparser.jericho包,在下文中一共展示了HTMLElementName类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: isValidTRTag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidTRTag(Tag tag) {
Element parentElement=tag.getElement().getParentElement();
if (parentElement==null) return false; // ignore TR elements without a parent
if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false; // ignore TR elements who's parent is not valid
if(parentElement.getName()==HTMLElementName.TABLE){
return true;
}
if(parentElement.getName()==HTMLElementName.TBODY
||parentElement.getName()==HTMLElementName.THEAD){
Element gradParent=parentElement.getParentElement();
if(gradParent==null){
return false;
}
if (gradParent.getStartTag().getUserData()!=VALID_MARKER) return false;
return (gradParent.getName()==HTMLElementName.TABLE);
}else{
return false;
}
}
开发者ID:trackplus,项目名称:Genji,代码行数:20,代码来源:HTMLSanitiser.java
示例2: printHTMLPage
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public void printHTMLPage(Source source)
throws UnsupportedEncodingException, IOException {
List<StartTag> list = source.getAllStartTags(HTMLElementName.STYLE);
Iterator<StartTag> iterator = list.iterator();
String text = "";
while (iterator.hasNext()) {
final StartTag tag = iterator.next();
final Segment s = new Segment(source, tag.getEnd(), tag
.getElement().getEndTag().getBegin());
text += s.toString();
}
Vector<ReplaceRight> rights = Style.getStyles(text, styles, counter);
// генерація сторінки з оновленими стилями.
list = source.getAllStartTags();
iterator = list.iterator();
StartTag startTag = null;
while (iterator.hasNext()) {
final StartTag st = iterator.next();
if (HTMLElementName.BODY.equals(st.getName())) {
startTag = st;
break;
}
}
if (startTag == null)
return;
final StartTag body = startTag;
final OutputDocument document = new OutputDocument(source);
while (iterator.hasNext()) {
startTag = iterator.next();
replaceAttrs(startTag, document, rights);
}
OutputStreamWriter writer = new OutputStreamWriter(this.out, "UTF-8");
document.writeTo(writer, body.getEnd(), body.getElement().getEndTag()
.getBegin());
writer.flush();
}
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:41,代码来源:Out.java
示例3: main
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public static void main(final String[] args) {
try {
final FileInputStream is = new FileInputStream("d:/test.html");
final FileOutputStream o = new FileOutputStream("d:/res1.txt");
final Source source = new Source(is);
final List<StartTag> list = source
.getAllStartTags(HTMLElementName.STYLE);
final Iterator<StartTag> iterator = list.iterator();
String text = "";
final PrintStream out = new PrintStream(o);
while (iterator.hasNext()) {
final StartTag tag = iterator.next();
final Segment s = new Segment(source, tag.getEnd(), tag
.getElement().getEndTag().getBegin());
text += s.toString();
// out.println(text);
// out.println("---------------");
}
is.close();
final Vector<Style> styles = new Vector<Style>();
getStyles(text, styles, createCounter());
for (int i = 0; i < styles.size(); i++) {
out.println(styles.get(i));
}
o.close();
} catch (final IOException e) {
e.printStackTrace();
}
}
开发者ID:Vitaliy-Yakovchuk,项目名称:ramus,代码行数:30,代码来源:Style.java
示例4: modifyDocument
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
/**
* Updates relative source attributes to externally accessible abs paths
* @param html the document
* @param baseDir html location
* @param out processed document
*/
protected void modifyDocument( Source html, PathOrigin baseDir, OutputDocument out ) {
replaceUrlAttribute( html.getAllStartTags( HTMLElementName.LINK ), "href", baseDir, out );
replaceUrlAttribute( html.getAllStartTags( HTMLElementName.SCRIPT ), "src", baseDir, out );
replaceUrlAttribute( html.getAllStartTags( HTMLElementName.IMG ), "src", baseDir, out );
//int insertPos = html.getFirstElement( HTMLElementName.HEAD ).getEndTag().getBegin();
//out.insert( insertPos, getCodeSnippet( getBackendAssignments( getUrlProvider() ) ) );
}
开发者ID:webdetails,项目名称:cte,代码行数:14,代码来源:ProcessedHtmlPage.java
示例5: processTag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean processTag(Tag tag, OutputDocument outputDocument) {
String elementName=tag.getName();
if (!VALID_ELEMENT_NAMES.contains(elementName)){
//System.out.println("Not ok tag:!"+elementName+"!");
return false;
}
if (tag.getTagType()==StartTagType.NORMAL) {
Element element=tag.getElement();
if (elementName==HTMLElementName.THEAD && !isValidTbodyTHeadTag(tag)) return false;
if (elementName==HTMLElementName.TBODY && !isValidTbodyTHeadTag(tag)) return false;
if (elementName==HTMLElementName.TR && !isValidTRTag(tag)) return false;
if (elementName==HTMLElementName.TD && !isValidTDTHTag(tag)) return false;
if (elementName==HTMLElementName.TH && !isValidTDTHTag(tag)) return false;
if (HTMLElements.getEndTagRequiredElementNames().contains(elementName)) {
if (element.getEndTag()==null) return false; // reject start tag if its required end tag is missing
} else if (HTMLElements.getEndTagOptionalElementNames().contains(elementName)) {
if (elementName==HTMLElementName.LI && !isValidLITag(tag)) return false; // reject invalid LI tags
if (element.getEndTag()==null) outputDocument.insert(element.getEnd(),getEndTagHTML(elementName)); // insert optional end tag if it is missing
}
outputDocument.replace(tag,getStartTagHTML(element.getStartTag()));
} else if (tag.getTagType()==EndTagType.NORMAL) {
if (tag.getElement()==null) return false; // reject end tags that aren't associated with a start tag
if (elementName==HTMLElementName.THEAD && !isValidTbodyTHeadTag(tag)) return false;
if (elementName==HTMLElementName.TBODY && !isValidTbodyTHeadTag(tag)) return false;
if (elementName==HTMLElementName.TR && !isValidTRTag(tag)) return false;
if (elementName==HTMLElementName.TD && !isValidTDTHTag(tag)) return false;
if (elementName==HTMLElementName.TH && !isValidTDTHTag(tag)) return false;
if (elementName==HTMLElementName.LI && !isValidLITag(tag)) return false; // reject invalid LI tags
outputDocument.replace(tag,getEndTagHTML(elementName));
} else {
return false; // reject abnormal tags
}
return true;
}
开发者ID:trackplus,项目名称:Genji,代码行数:35,代码来源:HTMLSanitiser.java
示例6: getMetaValue
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static String getMetaValue(Source source, String key) {
for (int pos=0; pos<source.length();) {
StartTag startTag=source.getNextStartTag(pos,"name",key,false);
if (startTag==null) return null;
if (startTag.getName()==HTMLElementName.META)
return startTag.getAttributeValue("content"); // Attribute values are automatically decoded
pos=startTag.getEnd();
}
return null;
}
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:11,代码来源:ExtractGuardian.java
示例7: processTag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean processTag(Tag tag, OutputDocument output) {
String elementName = tag.getName().toLowerCase();
if (!allowedTags.contains(elementName))
return false;
if (tag.getTagType() == StartTagType.NORMAL) {
Element element = tag.getElement();
if (HTMLElements.getEndTagRequiredElementNames().contains(
elementName)) {
if (element.getEndTag() == null)
return false; // reject start tag if its required end tag is
// missing
} else if (HTMLElements.getEndTagOptionalElementNames().contains(
elementName)) {
if (elementName == HTMLElementName.LI && !isValidLITag(tag))
return false; // reject invalid LI tags
if (element.getEndTag() == null)
output.insert(element.getEnd(), getEndTagHTML(elementName)); // insert
// optional
// end
// tag
// if
// it
// is
// missing
}
output.replace(tag, getStartTagHTML(element.getStartTag()));
} else if (tag.getTagType() == EndTagType.NORMAL) {
if (tag.getElement() == null)
return false; // reject end tags that aren't associated with a
// start tag
if (elementName == HTMLElementName.LI && !isValidLITag(tag))
return false; // reject invalid LI tags
output.replace(tag, getEndTagHTML(elementName));
} else {
return false; // reject abnormal tags
}
return true;
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:39,代码来源:HtmlStripperDiscussion.java
示例8: isValidLITag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean isValidLITag(Tag tag) {
Element parentElement = tag.getElement().getParentElement();
if (parentElement == null)
return false; // ignore LI elements without a parent
if (parentElement.getStartTag().getUserData() != VALID_MARKER)
return false; // ignore LI elements who's parent is not valid
return parentElement.getName() == HTMLElementName.UL
|| parentElement.getName() == HTMLElementName.OL; // only accept
// LI tags
// who's
// immediate
// parent is
// UL or OL.
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:15,代码来源:HtmlStripperDiscussion.java
示例9: processTag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean processTag(Tag tag, OutputDocument output) {
String elementName = tag.getName().toLowerCase();
if (!allowedTags.contains(elementName))
return false;
if (tag.getTagType() == StartTagType.NORMAL) {
Element element = tag.getElement();
if (HTMLElements.getEndTagRequiredElementNames().contains(elementName)) {
if (element.getEndTag() == null)
return false; // reject start tag if its required end tag is
// missing
} else if (HTMLElements.getEndTagOptionalElementNames().contains(elementName)) {
if (elementName == HTMLElementName.LI && !isValidLITag(tag))
return false; // reject invalid LI tags
if (element.getEndTag() == null)
// insert optional end tag if it is missing
output.insert(element.getEnd(), getEndTagHTML(elementName));
}
output.replace(tag, getStartTagHTML(element.getStartTag()));
} else if (tag.getTagType() == EndTagType.NORMAL) {
if (tag.getElement() == null)
return false; // reject end tags that aren't associated with a
// start tag
if (elementName == HTMLElementName.LI && !isValidLITag(tag))
return false; // reject invalid LI tags
output.replace(tag, getEndTagHTML(elementName));
} else {
return false; // reject abnormal tags
}
return true;
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:31,代码来源:HtmlStripper.java
示例10: isValidLITag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private boolean isValidLITag(Tag tag) {
Element parentElement = tag.getElement().getParentElement();
if (parentElement == null)
return false; // ignore LI elements without a parent
if (parentElement.getStartTag().getUserData() != VALID_MARKER)
return false; // ignore LI elements who's parent is not valid
// only accept LI tags who's immediate parent is UL or OL.
return parentElement.getName() == HTMLElementName.UL || parentElement.getName() == HTMLElementName.OL;
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:10,代码来源:HtmlStripper.java
示例11: parseHtml
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public static ArrayList<Image> parseHtml(String html) {
ArrayList<Image> images = new ArrayList<Image>();
Source source = new Source(html);
Element table = source.getAllElements(HTMLElementName.TABLE).get(0);
Element div = table.getFirstElement().getFirstElement().getFirstElement().getFirstElement();
ArrayList<Element> ps = new ArrayList<Element>(div.getAllElements(HTMLElementName.P));
boolean isReallyP = true;
for (Element p : ps) {
if (p.getContent().toString().contains("이미지 결과 더 보기")) {
isReallyP = false;
}
if (isReallyP && p.getAllElements(HTMLElementName.A).size() > 0) {
Element a = p.getAllElements(HTMLElementName.A).get(0);
Element img = a.getAllElements(HTMLElementName.IMG).get(0);
String from = a.getAttributeValue("href");
String url = img.getAttributeValue("src");
Image image = new Image(url, from);
image.debug = p.toString();
images.add(image);
}
}
return images;
}
开发者ID:rishubil,项目名称:GalbijjimSearcher,代码行数:29,代码来源:HTMLParser.java
示例12: isValidLITag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidLITag(Tag tag) {
Element parentElement=tag.getElement().getParentElement();
if (parentElement==null) return false; // ignore LI elements without a parent
if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false; // ignore LI elements who's parent is not valid
return parentElement.getName()==HTMLElementName.UL || parentElement.getName()==HTMLElementName.OL; // only accept LI tags who's immediate parent is UL or OL.
}
开发者ID:trackplus,项目名称:Genji,代码行数:7,代码来源:HTMLSanitiser.java
示例13: isValidTDTHTag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidTDTHTag(Tag tag) {
Element parentElement=tag.getElement().getParentElement();
if (parentElement==null) return false; // ignore TD, TH elements without a parent
if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false; // ignore TD,TH elements who's parent is not valid
return parentElement.getName()==HTMLElementName.TR; // only accept TD,TH tags who's immediate parent is TR.
}
开发者ID:trackplus,项目名称:Genji,代码行数:7,代码来源:HTMLSanitiser.java
示例14: isValidTbodyTHeadTag
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static boolean isValidTbodyTHeadTag(Tag tag) {
Element parentElement=tag.getElement().getParentElement();
if (parentElement==null) return false;
if (parentElement.getStartTag().getUserData()!=VALID_MARKER) return false;
return parentElement.getName()==HTMLElementName.TABLE;
}
开发者ID:trackplus,项目名称:Genji,代码行数:7,代码来源:HTMLSanitiser.java
示例15: main
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length > 0)
inDir = args[0];
if (args.length > 1)
outDir = args[1];
else
outDir = inDir;
File f = new File(inDir);
for (File doc : f.listFiles()){
String sourceUrlString="file:"+inDir+doc.getName();
MicrosoftConditionalCommentTagTypes.register();
PHPTagTypes.register();
PHPTagTypes.PHP_SHORT.deregister(); // remove PHP short tags for this example otherwise they override processing instructions
MasonTagTypes.register();
Source source=new Source(new URL(sourceUrlString));
PrintStream ps = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".txt");
PrintStream ps_key = new PrintStream(outDir+doc.getName().replaceFirst(".html", "")+".key");
// Call fullSequentialParse manually as most of the source will be parsed.
source.fullSequentialParse();
// System.out.println("Document title:");
String title=getTitle(source);
ps.print(title+"\n\n");
// System.out.println("\nDocument description:");
String description=getMetaValue(source,"description");
ps.println(description+"\n\n");
// System.out.println("\nDocument keywords:");
String keywords=getMetaValue(source,"keywords");
if (keywords != null)
ps_key.print(keywords.replaceAll("\\s*,\\s*", "\n"));
ps_key.close();
List<Element> divElements=source.getAllElements(HTMLElementName.DIV);
for (Element div : divElements) {
String id=div.getAttributeValue("id");
if (id==null)
continue;
else if (id.equals("article-body-blocks")){
String article=div.getContent().getTextExtractor().toString();
ps.println(article);
}
}
ps.close();
}
}
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:55,代码来源:ExtractGuardian.java
示例16: getTitle
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private static String getTitle(Source source) {
Element titleElement=source.getFirstElement(HTMLElementName.TITLE);
if (titleElement==null) return null;
// TITLE element never contains other tags so just decode it collapsing whitespace:
return CharacterReference.decodeCollapseWhiteSpace(titleElement.getContent());
}
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:7,代码来源:ExtractGuardian.java
示例17: criarHtmlStripper
import net.htmlparser.jericho.HTMLElementName; //导入依赖的package包/类
private HtmlStripper criarHtmlStripper() {
HtmlStripper stripper = new HtmlStripper();
stripper.addAllowedTags(HTMLElementName.DIV, HTMLElementName.HR, HTMLElementName.FONT);
stripper.addAllowedAttributes("size", "face", "style", "color");
return stripper;
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:7,代码来源:ChatRoomEditBean.java
注:本文中的net.htmlparser.jericho.HTMLElementName类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论