本文整理汇总了Java中net.htmlparser.jericho.CharacterReference类的典型用法代码示例。如果您正苦于以下问题:Java CharacterReference类的具体用法?Java CharacterReference怎么用?Java CharacterReference使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CharacterReference类属于net.htmlparser.jericho包,在下文中一共展示了CharacterReference类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getStartTagHTML
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private static CharSequence getStartTagHTML(StartTag startTag) {
// tidies and filters out non-approved attributes
StringBuilder sb=new StringBuilder();
sb.append('<').append(startTag.getName());
for (Attribute attribute : startTag.getAttributes()) {
if (VALID_ATTRIBUTE_NAMES.contains(attribute.getKey())) {
sb.append(' ').append(attribute.getName());
if (attribute.getValue()!=null) {
sb.append("=\"");
sb.append(CharacterReference.encode(attribute.getValue()));
sb.append('"');
}
}
}
if (startTag.getElement().getEndTag()==null && !HTMLElements.getEndTagOptionalElementNames().contains(startTag.getName())) sb.append(" /");
sb.append('>');
return sb;
}
开发者ID:trackplus,项目名称:Genji,代码行数:19,代码来源:HTMLSanitiser.java
示例2: getStartTagHTML
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private CharSequence getStartTagHTML(StartTag startTag) {
// tidies and filters out non-approved attributes
StringBuilder sb = new StringBuilder();
sb.append('<').append(startTag.getName());
for (Attribute attribute : startTag.getAttributes()) {
if (allowedAttributes.contains(attribute.getKey())) {
sb.append(' ').append(attribute.getName());
if (attribute.getValue() != null) {
sb.append("=\"");
sb.append(CharacterReference.encode(attribute.getValue()));
sb.append('"');
}
}
}
if (startTag.getElement().getEndTag() == null
&& !HTMLElements.getEndTagOptionalElementNames().contains(
startTag.getName()))
sb.append(" /");
sb.append('>');
return sb;
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:22,代码来源:HtmlStripperDiscussion.java
示例3: getStartTagHTML
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private CharSequence getStartTagHTML(StartTag startTag) {
// tidies and filters out non-approved attributes
StringBuilder sb = new StringBuilder();
sb.append('<').append(startTag.getName());
for (Attribute attribute : startTag.getAttributes()) {
if (allowedAttributes.contains(attribute.getKey())) {
sb.append(' ').append(attribute.getName());
if (attribute.getValue() != null) {
sb.append("=\"");
sb.append(CharacterReference.encode(attribute.getValue()));
sb.append('"');
}
}
}
if (startTag.getElement().getEndTag() == null && !HTMLElements.getEndTagOptionalElementNames().contains(startTag.getName()))
sb.append(" /");
sb.append('>');
return sb;
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:20,代码来源:HtmlStripper.java
示例4: parseText
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private String parseText(int start, int end) {
StringBuilder sb = new StringBuilder();
while (start < end) {
CharacterReference ref = source.getNextCharacterReference(start);
if (ref == null || ref.getBegin() >= end) {
break;
}
sb.append(source.subSequence(start, ref.getBegin()));
sb.append(ref.getChar());
start = ref.getEnd();
}
sb.append(source.subSequence(start, end));
return sb.toString();
}
开发者ID:konsoletyper,项目名称:teavm-flavour,代码行数:15,代码来源:Parser.java
示例5: reencodeTextSegment
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private static void reencodeTextSegment(Source source, OutputDocument outputDocument, int begin, int end, boolean formatWhiteSpace) {
if (begin>=end) return;
Segment textSegment=new Segment(source,begin,end);
String decodedText=CharacterReference.decode(textSegment);
String encodedText=formatWhiteSpace ? CharacterReference.encodeWithWhiteSpaceFormatting(decodedText) : CharacterReference.encode(decodedText);
outputDocument.replace(textSegment,encodedText);
}
开发者ID:trackplus,项目名称:Genji,代码行数:8,代码来源:HTMLSanitiser.java
示例6: reencodeTextSegment
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private void reencodeTextSegment(Source source, OutputDocument output, int begin, int end) {
if (begin >= end)
return;
Segment textSegment = new Segment(source, begin, end);
String decodedText = CharacterReference.decode(textSegment);
String encodedText = CharacterReference.encode(decodedText);
output.replace(textSegment, encodedText);
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:9,代码来源:HtmlStripper.java
示例7: reencodeTextSegment
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private void reencodeTextSegment(Source source, OutputDocument output,
int begin, int end) {
if (begin >= end)
return;
Segment textSegment = new Segment(source, begin, end);
String decodedText = CharacterReference.decode(textSegment);
String encodedText = CharacterReference.encode(decodedText);
output.replace(textSegment, encodedText);
}
开发者ID:camaradosdeputadosoficial,项目名称:edemocracia,代码行数:10,代码来源:HtmlStripper.java
示例8: mosesPostprocess
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private String mosesPostprocess(String text, Language targetLanguage) {
String result = CharacterReference.decode(text);
result = DeNormalize.processSingleLine(result).replaceAll("\\s+", " ").trim();
if (!targetLanguage.isSpaceDelimited()) {
result = result.replaceAll("(?<=[\u3001-\u9fa0])\\s+(?=[\u3001-\u9fa0])", "");
}
return cleanSpacesAroundTags(result, text);
}
开发者ID:amake,项目名称:omegat-moses-mt,代码行数:12,代码来源:MosesTranslate.java
示例9: getTitle
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
/**
* Extracts the title out of a text document using Jericho parser
* @param source Source
* @return String
*/
private static String getTitle(Source source) {
net.htmlparser.jericho.Element titleElement = source.getFirstElement(
net.htmlparser.jericho.HTMLElementName.TITLE);
if (titleElement == null) {
return null;
}
// TITLE element never contains other tags so just decode it collapsing whitespace:
return CharacterReference.decodeCollapseWhiteSpace(titleElement.
getContent());
}
开发者ID:shiftdirector,项目名称:youseer,代码行数:16,代码来源:Worker.java
示例10: getTitle
import net.htmlparser.jericho.CharacterReference; //导入依赖的package包/类
private static String getTitle(Source source) {
Element titleElement=source.getFirstElement(HTMLElementName.TITLE);
if (titleElement==null) return null;
// TITLE element never contains other tags so just decode it collapsing whitespace:
return CharacterReference.decodeCollapseWhiteSpace(titleElement.getContent());
}
开发者ID:EUMSSI,项目名称:EUMSSI-tools,代码行数:7,代码来源:ExtractGuardian.java
注:本文中的net.htmlparser.jericho.CharacterReference类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论