本文整理汇总了Java中org.apache.pdfbox.util.TextPosition类的典型用法代码示例。如果您正苦于以下问题:Java TextPosition类的具体用法?Java TextPosition怎么用?Java TextPosition使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TextPosition类属于org.apache.pdfbox.util包,在下文中一共展示了TextPosition类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: processTextPosition
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
@Override
protected void processTextPosition(TextPosition text)
{
String tChar = text.getCharacter();
// String REGEX = "'' ";
char c = tChar.charAt(0);
lineMatch = matchCharLine(text);
if (!Character.isWhitespace(c))
{
if ((!is1stChar) && (lineMatch == true))
{
appendChar(tChar);
}
else if (is1stChar == true)
{
setWordCoord(text, tChar);
}
}
else
{
endWord();
}
}
开发者ID:mkl-public,项目名称:testarea-pdfbox1,代码行数:24,代码来源:PrintTextLocations.java
示例2: strikesThrough
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
boolean strikesThrough(TextPosition textPosition)
{
Matrix matrix = textPosition.getTextPos();
// TODO: This is a very simplistic implementation only working for horizontal text without page rotation
// and horizontal rectangular strikeThroughs with p0 at the left bottom and p2 at the right top
// Check if rectangle horizontally matches (at least) the text
if (p0.getX() > matrix.getXPosition() + textPosition.getWidth() * .1f || p2.getX() < matrix.getXPosition() + textPosition.getWidth() * .9f)
return false;
// Check whether rectangle vertically is at the right height to underline
double vertDiff = p0.getY() - matrix.getYPosition();
if (vertDiff < 0 || vertDiff > textPosition.getFont().getFontDescriptor().getAscent() * textPosition.getFontSizeInPt() / 1000.0)
return false;
// Check whether rectangle is small enough to be a line
return Math.abs(p2.getY() - p0.getY()) < 2;
}
开发者ID:mkl-public,项目名称:testarea-pdfbox1,代码行数:17,代码来源:PDFStyledTextStripper.java
示例3: underlines
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
boolean underlines(TextPosition textPosition)
{
Matrix matrix = textPosition.getTextPos();
// TODO: This is a very simplistic implementation only working for horizontal text without page rotation
// and horizontal rectangular underlines with p0 at the left bottom and p2 at the right top
// Check if rectangle horizontally matches (at least) the text
if (p0.getX() > matrix.getXPosition() + textPosition.getWidth() * .1f || p2.getX() < matrix.getXPosition() + textPosition.getWidth() * .9f)
return false;
// Check whether rectangle vertically is at the right height to underline
double vertDiff = p0.getY() - matrix.getYPosition();
if (vertDiff > 0 || vertDiff < textPosition.getFont().getFontDescriptor().getDescent() * textPosition.getFontSizeInPt() / 500.0)
return false;
// Check whether rectangle is small enough to be a line
return Math.abs(p2.getY() - p0.getY()) < 2;
}
开发者ID:mkl-public,项目名称:testarea-pdfbox1,代码行数:17,代码来源:PDFStyledTextStripper.java
示例4: extractWordLocations
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
String extractWordLocations(PDDocument document) throws IOException
{
PDFTextStripper stripper = new PDFTextStripper()
{
@Override
protected void writeString(String text, List<TextPosition> textPositions) throws IOException
{
super.writeString(text, textPositions);
TextPosition firstProsition = textPositions.get(0);
TextPosition lastPosition = textPositions.get(textPositions.size() - 1);
writeString(String.format("[%s - %s / %s]", firstProsition.getXDirAdj(), lastPosition.getXDirAdj() + lastPosition.getWidthDirAdj(), firstProsition.getYDirAdj()));
}
};
stripper.setSortByPosition(true);
return stripper.getText(document);
}
开发者ID:mkl-public,项目名称:testarea-pdfbox1,代码行数:18,代码来源:ExtractText.java
示例5: CharSegment
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
/**
* This will create a TextFragment object from a TextPosition object.
* As of PDFBox 0.7.2, this is the method currently in use, which
* converts co-ordinates back to the original system.
*
* @param tPos - the TextPosition object; pageDim - page dimensions in order to
* convert co-ordinates
* @return The new TextFragment object
*/
public CharSegment(TextPosition tPos, GenericSegment pageDim)
{
super(tPos.getX(),
tPos.getX() + tPos.getWidth(),
pageDim.getY2() - tPos.getY(),
pageDim.getY2() - tPos.getY() + (tPos.getFontSize() * tPos.getYScale()),
tPos.getCharacter(),
tPos.getFont().getBaseFont(),
tPos.getFontSize() * tPos.getYScale());
// uncomment to print the contents of all text fragments to the screen
// System.out.println("Created text fragment: x1: " + tPos.getX() + " x2: " + (tPos.getX() + tPos.getWidth()) + " y1: " + tPos.getY() + " y2: " + (tPos.getY() + (tPos.getFontSize() * tPos.getYScale())) + " Text: " + text + " Font size: " + tPos.getFontSize() + " X Scale: " + tPos.getYScale() + " Y Scale: " + tPos.getYScale());
// todo: trim the name of the font
String fontName = tPos.getFont().getBaseFont();
/*
this.xScale = tPos.getXScale();
this.yScale = tPos.getYScale();
this.widthOfSpace = tPos.getWidthOfSpace();
this.wordSpacing = tPos.getWordSpacing();
*/
}
开发者ID:tamirhassan,项目名称:pdfxtk,代码行数:32,代码来源:CharSegment.java
示例6: writeStringBefore
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
@Override
protected void writeStringBefore(TextPosition text, String c, String normalized) throws IOException {
String style = null;
if (text.getCharacter() == null) {
style = lastStyle;
} else {
style = parseStyle(text);
}
if (lastStyle == null || !lastStyle.equals(style)) {
if (lastStyle != null) {
output.write("</span>");
}
if (style != null) {
output.write("<span style='" + style + "'>");
}
lastStyle = style;
}
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:22,代码来源:PDFText2HTML.java
示例7: setWordCoord
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
protected void setWordCoord(TextPosition text, String tChar)
{
tWord.append(tChar);
coordTab[p][0] = "" + pageNo;
coordTab[p][1] = "" + roundVal(Float.valueOf(text.getX()));
coordTab[p][2] = "" + roundVal(Float.valueOf(text.getY()));
coordTab[p][3] = String.valueOf(tWord);
coordTab[p][4] = "" + text.getFontSize();
coordTab[p][5] = "" + text.getFont().getBaseFont();
is1stChar = false;
}
开发者ID:mkl-public,项目名称:testarea-pdfbox1,代码行数:14,代码来源:PrintTextLocations.java
示例8: determineStyle
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
Set<String> determineStyle(TextPosition textPosition)
{
Set<String> result = new HashSet<>();
if (textPosition.getFont().getBaseFont().toLowerCase().contains("bold"))
result.add("Bold");
if (textPosition.getFont().getBaseFont().toLowerCase().contains("italic"))
result.add("Italic");
if (rectangles.stream().anyMatch(r -> r.underlines(textPosition)))
result.add("Underline");
if (rectangles.stream().anyMatch(r -> r.strikesThrough(textPosition)))
result.add("StrikeThrough");
return result;
}
开发者ID:mkl-public,项目名称:testarea-pdfbox1,代码行数:19,代码来源:PDFStyledTextStripper.java
示例9: extractNoSpaces
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
String extractNoSpaces(PDDocument document) throws IOException
{
PDFTextStripper stripper = new PDFTextStripper()
{
@Override
protected void processTextPosition(TextPosition text)
{
String character = text.getCharacter();
if (character != null && character.trim().length() != 0)
super.processTextPosition(text);
}
};
stripper.setSortByPosition(true);
return stripper.getText(document);
}
开发者ID:mkl-public,项目名称:testarea-pdfbox1,代码行数:16,代码来源:ExtractWithoutExtraSpaces.java
示例10: writeCharacters
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
@Override
protected void writeCharacters(TextPosition text) throws IOException {
try {
handler.characters(text.getCharacter());
} catch (SAXException e) {
throw new IOExceptionWithCause("Unable to write a character: "
+ text.getCharacter(), e);
}
}
开发者ID:kolbasa,项目名称:OCRaptor,代码行数:10,代码来源:PDF2XHTML.java
示例11: processTextPosition
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
/**
* A method provided as an event interface to allow a subclass to perform
* some specific functionality when text needs to be processed.
*
* @param text
* The text to be processed
*/
protected void processTextPosition(TextPosition text) {
CharInfo c = new CharInfo(text.getCharacter(), text.getXDirAdj(),
text.getYDirAdj());
c.xscale = text.getXScale();
c.yscale = text.getYScale();
c.w = text.getWidthDirAdj();
c.h = text.getHeightDir();
// text.getFontSize()
// text.getWidthOfSpace()
collect_chars_info.add(c);
}
开发者ID:mfit,项目名称:PdfTableAnnotator,代码行数:21,代码来源:ParsePdf.java
示例12: getFontSize
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
private int getFontSize(TextPosition text)
{
float fontSize = text.getFontSizeInPt();
float capHeight = 0;
float italicAngle = 0;
PDFont font = text.getFont();
if (font != null && font.getFontDescriptor() != null)
{
capHeight = font.getFontDescriptor().getCapHeight();
italicAngle = font.getFontDescriptor().getItalicAngle();
}
return (int) (fontSize * FONT_OFFSET + capHeight * CAP_OFFSET + italicAngle);
}
开发者ID:nemausus,项目名称:research-paper-parser,代码行数:14,代码来源:PdfReader.java
示例13: TextFragment
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
public TextFragment(TextPosition tPos)
{
super(tPos.getX(),
tPos.getX() + (tPos.getWidth()),
tPos.getY(),
tPos.getY() + (tPos.getFontSize() * tPos.getYScale()),
tPos.getCharacter(),
findFontName(tPos.getFont()),
tPos.getFontSize() * tPos.getYScale());
// todo: trim the name of the font
String fontName = tPos.getFont().getBaseFont();
}
开发者ID:tamirhassan,项目名称:pdfxtk,代码行数:15,代码来源:TextFragment.java
示例14: printImage
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
protected void printImage(List<TextPosition> line) throws IOException {
TextPosition start = getFirstTrimmed(line);
float y = start.getY();
Iterator<Entry<Float, Image>> iterator = pageImages.entrySet().iterator();
//for (Entry<Float, Image> entry : pageImages.entrySet()) {
while(iterator.hasNext()) {
Entry<Float, Image> entry = iterator.next();
if (entry.getKey() < y) {
Image image = entry.getValue();
String name = imageStripper.printImage(image);
//pageImages.remove(entry.getKey());
iterator.remove();
prevLineY = y + image.image.getHeight();
StringBuilder sb = new StringBuilder();
sb.append("<img ");
sb.append("src='");
sb.append(name);
sb.append("' ");
if (pageBreak) {
sb.append(" style='");
addPageBreak(sb);
sb.append('\'');
}
sb.append("/>");
output.write(sb.toString());
}
}
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:31,代码来源:PDFText2HTML.java
示例15: parseStyle
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
private String parseStyle(TextPosition text) {
StringBuilder sb = new StringBuilder();
if(text.getFont().getFontDescriptor() != null) {
int fontSizes = parseFont(text);
if (fontSizes > 0) {
sb.append("font-size: ");
sb.append(fontSizes);
sb.append("%;");
}
if (statisticParser.isBold(text.getFont().getFontDescriptor())) {
sb.append("font-weight: bold;");
}
if (statisticParser.isItalic(text)) {
sb.append("font-style: italic;");
}
}
if (sb.length() > 0) {
return sb.toString();
}
return null;
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:22,代码来源:PDFText2HTML.java
示例16: parseFont
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
private int parseFont(TextPosition text) {
int fontSize = -1;
if (text instanceof WordSeparator) {
// fontSize = -1;
} else if (text.getFontSizeInPt() != getAverangeFontSize()) {
fontSize = Math.round(text.getFontSizeInPt() * 100 / getAverangeFontSize());
} else {
// fontSize = -1;
}
return fontSize;
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:13,代码来源:PDFText2HTML.java
示例17: writeLineStart
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
@Override
protected void writeLineStart(List<TextPosition> line) throws IOException {
if (isLineEmpty(line)) {
return;
}
align = null;
lineSpacing = null;
endP = false;
printImage(line);
super.writeLineStart(line);
parseAlign(line);
parseLineSpace(line);
String tag = writeStartTag();
if (tag != null) {
output.append(tag);
}
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:18,代码来源:PDFText2HTML.java
示例18: writeLineEnd
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
@Override
protected void writeLineEnd(List<TextPosition> line) throws IOException {
if (isLineEmpty(line)) {
return;
}
super.writeLineEnd(line);
if (lastStyle != null) {
output.append("</span>");
lastStyle = null;
}
String tag = writeEndTag();
if (tag != null) {
output.append(tag);
}
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:18,代码来源:PDFText2HTML.java
示例19: writeLine
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
protected void writeLine(List<TextPosition> line, boolean isRtlDominant, boolean hasRtl) throws IOException {
String c, normalized;
for (TextPosition text : line) {
if (text instanceof WordSeparator) {
writeWordSeparator();
} else {
c = text.getCharacter();
normalized = normalize.normalizePres(c);
writeStringBefore(text, c, normalized);
writeString(normalized);
writeStringAfter(text, c, normalized);
}
}
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:16,代码来源:LocalPDFTextStripper.java
示例20: getLastTrimmed
import org.apache.pdfbox.util.TextPosition; //导入依赖的package包/类
protected static TextPosition getLastTrimmed(List<TextPosition> line) {
String c;
for (int i = line.size() - 1; i >= 0; i--) {
if (line.get(i) == null) {
continue;
}
c = line.get(i).getCharacter();
if (c != null && c.trim().length() > 0) {
return line.get(i);
}
}
return line.get(line.size() - 1);
}
开发者ID:wx5223,项目名称:fileconvert,代码行数:14,代码来源:LocalPDFTextStripper.java
注:本文中的org.apache.pdfbox.util.TextPosition类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论