本文整理汇总了Java中edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation类的典型用法代码示例。如果您正苦于以下问题:Java CharacterOffsetBeginAnnotation类的具体用法?Java CharacterOffsetBeginAnnotation怎么用?Java CharacterOffsetBeginAnnotation使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
CharacterOffsetBeginAnnotation类属于edu.stanford.nlp.ling.CoreAnnotations包,在下文中一共展示了CharacterOffsetBeginAnnotation类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: TokenizedCoreLabelWrapper
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
/**
*
*/
public TokenizedCoreLabelWrapper(final CoreLabel cl) {
this.value = cl.get(ValueAnnotation.class);
this.text = cl.get(TextAnnotation.class);
LOGGER.trace("Wrapping token text: {}", this.text);
this.originalText = cl.get(OriginalTextAnnotation.class);
this.before = cl.get(BeforeAnnotation.class);
this.after = cl.get(AfterAnnotation.class);
this.startSentenceOffset = cl.get(CharacterOffsetBeginAnnotation.class);
this.endSentenceOffset = cl.get(CharacterOffsetEndAnnotation.class);
this.startOffset = Optional.ofNullable(cl.get(TokenBeginAnnotation.class));
this.endOffset = Optional.ofNullable(cl.get(TokenEndAnnotation.class));
LOGGER.trace("TokenBegin: {}", this.startOffset);
LOGGER.trace("TokenEnd: {}", this.endOffset);
this.idx = cl.get(IndexAnnotation.class);
this.sentenceIdx = cl.get(SentenceIndexAnnotation.class);
LOGGER.trace("Got sentence idx: {}", this.sentenceIdx);
}
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:24,代码来源:TokenizedCoreLabelWrapper.java
示例2: addTreebankNodeToIndexes
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
private void addTreebankNodeToIndexes(
TreebankNode node,
JCas jCas,
Tree tree,
List<CoreLabel> tokenAnns) {
// figure out begin and end character offsets
CoreMap label = (CoreMap) tree.label();
CoreMap beginToken = tokenAnns.get(label.get(BeginIndexAnnotation.class));
CoreMap endToken = tokenAnns.get(label.get(EndIndexAnnotation.class) - 1);
int nodeBegin = beginToken.get(CharacterOffsetBeginAnnotation.class);
int nodeEnd = endToken.get(CharacterOffsetEndAnnotation.class);
// set span, node type, children (mutual recursion), and add it to the JCas
node.setBegin(nodeBegin);
node.setEnd(nodeEnd);
node.setNodeType(tree.value());
node.setChildren(this.addTreebankNodeChildrenToIndexes(node, jCas, tokenAnns, tree));
node.setLeaf(node.getChildren().size() == 0);
node.addToIndexes();
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:21,代码来源:StanfordCoreNlpAnnotator.java
示例3: adjustCharacterOffsets
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
public static List<CoreMap> adjustCharacterOffsets(List<CoreMap> sentences, boolean setOriginalText)
{
List<CoreMap> sentencesCopy = sentences;
for (CoreMap sentence : sentencesCopy)
{
List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
int characterCount = 0;
for (int i = 0; i < sentenceTokens.size(); i++)
{
CoreLabel token = sentenceTokens.get(i);
if (setOriginalText)
{
token.set(CoreAnnotations.OriginalTextAnnotation.class, token.get(CoreAnnotations.TextAnnotation.class) + " ");
}
int startCharacterCount = characterCount;
int endCharacterCount = startCharacterCount + token.get(CoreAnnotations.OriginalTextAnnotation.class).length();
token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, startCharacterCount);
token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endCharacterCount);
sentenceTokens.set(i, token);
characterCount = endCharacterCount;
}
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
}
return sentencesCopy;
}
开发者ID:dmnapolitano,项目名称:stanford-thrift,代码行数:27,代码来源:CoreNLPThriftUtil.java
示例4: concreteSectionToCoreMapList
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
public static List<CoreMap> concreteSectionToCoreMapList(final Section sect, final String commText) {
List<CoreMap> toRet = new ArrayList<>();
List<Sentence> sentList = sect.getSentenceList();
int tokOffset = 0;
for (int i = 0; i < sentList.size(); i++) {
Sentence st = sentList.get(i);
CoreMap cm = new ArrayCoreMap();
cm.set(SentenceIndexAnnotation.class, i);
final TextSpan sts = st.getTextSpan();
final int sentCharStart = sts.getStart();
final int sentCharEnd = sts.getEnding();
LOGGER.debug("Setting stanford sentence BeginChar = {}", sentCharStart);
cm.set(CharacterOffsetBeginAnnotation.class, sentCharStart);
LOGGER.debug("Setting stanford sentence EndChar = {}", sentCharEnd);
cm.set(CharacterOffsetEndAnnotation.class, sentCharEnd);
String sectText = commText.substring(sentCharStart, sentCharEnd);
LOGGER.debug("Setting text: {}", sectText);
cm.set(TextAnnotation.class, sectText);
Tokenization tkz = st.getTokenization();
List<CoreLabel> clList = tokenizationToCoreLabelList(tkz, i, sentCharStart);
final int maxIdx = clList.size();
LOGGER.debug("Setting stanford sentence token begin: {}", tokOffset);
cm.set(TokenBeginAnnotation.class, tokOffset);
final int tokEnd = tokOffset + maxIdx;
LOGGER.debug("Setting stanford sentence token end: {}", tokEnd);
cm.set(TokenEndAnnotation.class, tokEnd);
cm.set(TokensAnnotation.class, clList);
tokOffset = tokEnd;
toRet.add(cm);
}
return toRet;
}
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:36,代码来源:ConcreteToStanfordMapper.java
示例5: CoreMapWrapper
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
/**
*
*/
public CoreMapWrapper(final CoreMap cm, final AnalyticUUIDGenerator gen) {
this.text = cm.get(TextAnnotation.class);
this.idx = cm.get(SentenceIndexAnnotation.class);
this.startOffset = cm.get(CharacterOffsetBeginAnnotation.class);
this.endOffset = cm.get(CharacterOffsetEndAnnotation.class);
this.tokenBeginOffset = cm.get(TokenBeginAnnotation.class);
this.tokenEndOffset = cm.get(TokenEndAnnotation.class);
this.clList = cm.get(TokensAnnotation.class);
LOGGER.trace("CoreLabel list has {} elements.", clList.size());
this.gen = gen;
}
开发者ID:hltcoe,项目名称:concrete-stanford-deprecated2,代码行数:17,代码来源:CoreMapWrapper.java
示例6: makeToken
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
/**
* Constructs a CoreLabel as a String with a corresponding BEGIN and END position.
* (Does not take substr).
*/
public CoreLabel makeToken(String str, int begin, int length) {
CoreLabel cl = new CoreLabel();
cl.setWord(str);
cl.setCurrent(str);
if(addIndices) {
cl.set(CharacterOffsetBeginAnnotation.class, begin);
cl.set(CharacterOffsetEndAnnotation.class, begin+length);
}
return cl;
}
开发者ID:FabianFriedrich,项目名称:Text2Process,代码行数:15,代码来源:CoreLabelTokenFactory.java
示例7: getAnnotationFromParseTrees
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
public static Annotation getAnnotationFromParseTrees(List<String> parseTrees)
{
List<CoreMap> sentences = new ArrayList<CoreMap>();
List<String> allTokens = new ArrayList<String>();
int tokenOffset = 0;
for (String tree : parseTrees)
{
List<String> tokens = new ArrayList<String>();
String[] firstSplit = tree.split("\\) ");
for (String f : firstSplit)
{
String[] secondSplit = f.split("\\(");
String[] tagAndToken = secondSplit[secondSplit.length-1].trim().replaceAll("\\)+$", "").split(" ");
tokens.add(tagAndToken[1]);
}
allTokens.addAll(tokens);
String[] tokensArr = new String[tokens.size()];
tokens.toArray(tokensArr);
List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
String originalText = Sentence.listToString(tokens);
CoreMap sentence = new Annotation(originalText);
sentence.set(CharacterOffsetBeginAnnotation.class, 0);
sentence.set(CharacterOffsetEndAnnotation.class, sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, tokenOffset);
tokenOffset += sentenceTokens.size();
sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokenOffset);
ParserAnnotatorUtils.fillInParseAnnotations(false, true, new EnglishGrammaticalStructureFactory(), sentence, Tree.valueOf(tree));
sentences.add(sentence);
}
Annotation allSentences = new Annotation(Sentence.listToString(allTokens));
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
return allSentences;
}
开发者ID:dmnapolitano,项目名称:stanford-thrift,代码行数:40,代码来源:CoreNLPThriftUtil.java
示例8: getAnnotationFromTokens
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
public static Annotation getAnnotationFromTokens(List<String> tokens, Annotation existingAnnotation)
{
List<CoreMap> sentences = new ArrayList<CoreMap>();
Annotation allSentences;
String[] tokensArr = new String[tokens.size()];
tokens.toArray(tokensArr);
List<CoreLabel> sentenceTokens = Sentence.toCoreLabelList(tokensArr);
String originalText = Sentence.listToString(tokens);
CoreMap sentence = new Annotation(originalText);
sentence.set(CharacterOffsetBeginAnnotation.class, 0);
sentence.set(CharacterOffsetEndAnnotation.class,
sentenceTokens.get(sentenceTokens.size() - 1).get(TextAnnotation.class).length());
sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
sentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
sentence.set(CoreAnnotations.TokenEndAnnotation.class, sentenceTokens.size());
sentences.add(sentence);
if (existingAnnotation != null)
{
sentences.addAll(existingAnnotation.get(CoreAnnotations.SentencesAnnotation.class));
allSentences = existingAnnotation.copy();
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
}
else
{
allSentences = new Annotation(Sentence.listToString(tokens));
allSentences.set(CoreAnnotations.SentencesAnnotation.class,
adjustCharacterOffsets(sentences, true));
}
return allSentences;
}
开发者ID:dmnapolitano,项目名称:stanford-thrift,代码行数:37,代码来源:CoreNLPThriftUtil.java
示例9: makeToken
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
/**
* Constructs a CoreLabel as a String with a corresponding BEGIN and END position,
* when the original OriginalTextAnnotation is different from TextAnnotation
* (Does not take substring).
*/
public CoreLabel makeToken(String tokenText, String originalText, int begin, int length) {
CoreLabel cl = addIndices ? new CoreLabel(5) : new CoreLabel();
cl.setValue(tokenText);
cl.setWord(tokenText);
cl.setOriginalText(originalText);
if(addIndices) {
cl.set(CharacterOffsetBeginAnnotation.class, begin);
cl.set(CharacterOffsetEndAnnotation.class, begin+length);
}
return cl;
}
开发者ID:amark-india,项目名称:eventspotter,代码行数:17,代码来源:CoreLabelTokenFactory.java
示例10: addEntityMentions
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
static void addEntityMentions(Map<String,Object> sent_info, CoreMap sentence) {
List<CoreMap> coreMentions = sentence.get(MentionsAnnotation.class);
List<Map> jsonMentions = new ArrayList<>();
/* trying to figure out the keys in each mention. here's a printout from one.
MENTION August 2014
class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation August 2014
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation 3
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation 14
class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation [August-2, 2014-3]
class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation 1
class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation 3
class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation DATE
class edu.stanford.nlp.ling.CoreAnnotations$NormalizedNamedEntityTagAnnotation 2014-08
class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation DATE
class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation 0
class edu.stanford.nlp.time.TimeAnnotations$TimexAnnotation <TIMEX3 tid="t1" type="DATE" value="2014-08">August 2014</TIMEX3>
MENTION Barack Obama
class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation Barack Obama
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation 17
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation 29
class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation [Barack-5, Obama-6]
class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation 4
class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation 6
class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation PERSON
class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation PERSON
class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation 0
MENTION Paris
class edu.stanford.nlp.ling.CoreAnnotations$TextAnnotation Paris
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetBeginAnnotation 66
class edu.stanford.nlp.ling.CoreAnnotations$CharacterOffsetEndAnnotation 71
class edu.stanford.nlp.ling.CoreAnnotations$TokensAnnotation [Paris-5]
class edu.stanford.nlp.ling.CoreAnnotations$TokenBeginAnnotation 14
class edu.stanford.nlp.ling.CoreAnnotations$TokenEndAnnotation 15
class edu.stanford.nlp.ling.CoreAnnotations$NamedEntityTagAnnotation LOCATION
class edu.stanford.nlp.ling.CoreAnnotations$EntityTypeAnnotation LOCATION
class edu.stanford.nlp.ling.CoreAnnotations$SentenceIndexAnnotation 1
*/
for (CoreMap mention : coreMentions) {
// U.p("MENTION " + mention);
// for (Class k : mention.keySet()) {
// U.pf("%s\t%s\n", k, mention.get(k));
// }
Map m = new HashMap<String, Object>();
m.put("tokspan", Lists.newArrayList(
mention.get(TokenBeginAnnotation.class).intValue(),
mention.get(TokenEndAnnotation.class).intValue()));
m.put("charspan", Lists.newArrayList(
mention.get(CharacterOffsetBeginAnnotation.class).intValue(),
mention.get(CharacterOffsetEndAnnotation.class).intValue()));
m.put("sentence", mention.get(SentenceIndexAnnotation.class).intValue());
String entityType = mention.get(EntityTypeAnnotation.class);
m.put("type", entityType);
if (mention.containsKey(NormalizedNamedEntityTagAnnotation.class)) {
m.put("normalized", mention.get(NormalizedNamedEntityTagAnnotation.class));
}
if (mention.containsKey(TimexAnnotation.class)) {
m.put("timex_xml", mention.get(TimexAnnotation.class).toString());
}
jsonMentions.add(m);
}
sent_info.put("entitymentions", jsonMentions);
}
开发者ID:UKPLab,项目名称:tac2015-event-detection,代码行数:64,代码来源:JsonPipeline.java
示例11: getTypeSurfaceForms
import edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation; //导入依赖的package包/类
private void getTypeSurfaceForms(Annotation document, int neStartPos, int neEndPos,
List<ExtendedTypedNamedEntity> results) {
boolean entityFound = false;
StringBuilder parseableTextBuilder = new StringBuilder();
List<CoreLabel> tokens = document.get(TokensAnnotation.class);
int tokenPositions[][] = new int[tokens.size()][4];
int id = 0;
for (CoreLabel token : tokens) {
tokenPositions[id][ORIG_TEXT_START] = token.get(CharacterOffsetBeginAnnotation.class);
tokenPositions[id][ORIG_TEXT_END] = token.get(CharacterOffsetEndAnnotation.class);
if ((tokenPositions[id][ORIG_TEXT_END] <= neStartPos) || (tokenPositions[id][ORIG_TEXT_START] >= neEndPos)) {
if (parseableTextBuilder.length() > 0) {
parseableTextBuilder.append(' ');
}
tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length();
parseableTextBuilder.append(token.getString(TextAnnotation.class));
parseableTextBuilder.append('_');
parseableTextBuilder.append(token.getString(LemmaAnnotation.class));
parseableTextBuilder.append('_');
parseableTextBuilder.append(token.getString(PartOfSpeechAnnotation.class));
tokenPositions[id][GEN_TEXT_END] = parseableTextBuilder.length();
} else {
if (!entityFound) {
if (parseableTextBuilder.length() > 0) {
parseableTextBuilder.append(' ');
}
tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length();
parseableTextBuilder.append(TypeExtractor.ENTITY_MARKING);
entityFound = true;
} else {
tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length();
}
tokenPositions[id][GEN_TEXT_END] = parseableTextBuilder.length();
}
++id;
}
if (!entityFound) {
LOGGER.error("Couldn't find the named entity (" + neStartPos + ", " + neEndPos + ") inside the document \""
+ document.toString() + "\".");
return;
}
String parseableText = parseableTextBuilder.toString();
List<String> types = extractor.extractTypeStrings(parseableText);
if (types != null) {
if(LOGGER.isInfoEnabled()) {
LOGGER.info("Found types " + Arrays.toString(types.toArray()) + " inside the sentence \"" + parseableText + "\".");
}
generateNEsForTypes(document.get(TokensAnnotation.class), tokenPositions, parseableText, types, results);
} else {
LOGGER.warn("Extractor was not able to process the text \"" + parseableText + "\".");
}
}
开发者ID:dice-group,项目名称:Cetus,代码行数:53,代码来源:CetusSurfaceFormExtractor.java
注:本文中的edu.stanford.nlp.ling.CoreAnnotations.CharacterOffsetBeginAnnotation类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论