本文整理汇总了Java中org.apache.uima.cas.FSIndex类的典型用法代码示例。如果您正苦于以下问题:Java FSIndex类的具体用法?Java FSIndex怎么用?Java FSIndex使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
FSIndex类属于org.apache.uima.cas包,在下文中一共展示了FSIndex类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: process
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
public void process(JCas aJCas) {
double[] outcomes = categorizer.categorize(aJCas.getDocumentText());
String category = categorizer.getBestCategory(outcomes);
FSIndex docAnnIndex = aJCas.getAnnotationIndex(DocumentAnnotation.type);
Iterator docAnnIter = docAnnIndex.iterator();
DocumentAnnotation docAnn = null;
if(docAnnIter.hasNext()){
docAnn = (DocumentAnnotation) docAnnIter.next();
}else{
docAnn = new DocumentAnnotation(aJCas);
docAnn.setBegin(0);
docAnn.setEnd(aJCas.getDocumentText().length());
docAnn.addToIndexes();
}
docAnn.setProcess(category.equals("contract"));
docAnn.setClassified(true);
}
开发者ID:IE4OpenData,项目名称:Octroy,代码行数:20,代码来源:ContractClassifier.java
示例2: process
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
public void process(JCas jCas, ResultSpecification rs)
throws AnnotatorProcessException {
JFSIndexRepository indexRep = jCas.getJFSIndexRepository();
FSIndex tokenIndex = indexRep.getAnnotationIndex(Token.type);
Iterator tokens = tokenIndex.iterator();
while(tokens.hasNext()) {
Token tok = (Token)tokens.next();
String tokenString = tok.getString();
if(tokenString != null) {
int tokenLength = tokenString.length();
int numLowerCase = 0;
for(int i = 0; i < tokenLength; i++) {
if(Character.isLowerCase(tokenString.charAt(i))) {
numLowerCase++;
}
}
tok.setLowerCaseLetters(numLowerCase);
}
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:21,代码来源:CountLowercaseAnnotator.java
示例3: process
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
public void process(JCas jCas, ResultSpecification rs)
throws AnnotatorProcessException {
JFSIndexRepository indexRep = jCas.getJFSIndexRepository();
FSIndexRepository fsIndexRep = indexRep.getFSIndexRepository();
List tokensToRemove = new ArrayList();
FSIndex tokenIndex = indexRep.getAnnotationIndex(Token.type);
Iterator tokens = tokenIndex.iterator();
while(tokens.hasNext()) {
Token tok = (Token)tokens.next();
if(((tok.getEnd() - tok.getBegin()) % 2) == 0) {
// mark token for removal if it has an even-length span
tokensToRemove.add(tok);
}
}
// now iterate over tokens marked for removal and remove them from the
// index
Iterator tokensToRemoveIt = tokensToRemove.iterator();
while(tokensToRemoveIt.hasNext()) {
fsIndexRep.removeFS((Token)tokensToRemoveIt.next());
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:23,代码来源:RemoveEvenLengthTokens.java
示例4: hasProcess
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
private boolean hasProcess(JCas aJCas) {
FSIndex docAnnIndex = aJCas.getAnnotationIndex(DocumentAnnotation.type);
Iterator docAnnIter = docAnnIndex.iterator();
if (docAnnIter.hasNext())
return ((DocumentAnnotation) docAnnIter.next()).getClassified();
return false;
}
开发者ID:IE4OpenData,项目名称:Octroy,代码行数:8,代码来源:ContractFlowController.java
示例5: doProcess
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Override
public void doProcess(JCas aJCas) throws AnalysisEngineProcessException {
Entity e;
try {
e = (Entity) et.getConstructor(JCas.class).newInstance(aJCas);
} catch (Exception ex) {
throw new AnalysisEngineProcessException(ex);
}
Set<Entity> toRemove = new HashSet<Entity>();
FSIndex<Annotation> index = aJCas.getAnnotationIndex(e.getType());
for(Annotation a : index){
Entity entity = (Entity) a;
String val = entity.getCoveredText();
if(!caseSensitive)
val = val.toLowerCase();
if(thingsToRemove.contains(val)){
getMonitor().info("Removing entity '{}' because it appears on the blacklist", entity.getCoveredText());
toRemove.add(entity);
}
}
getMonitor().debug("{} has removed {} entities", this.getClass().getName(), toRemove.size());
for(Entity ent : toRemove){
removeFromJCasIndex(ent);
}
}
开发者ID:dstl,项目名称:baleen,代码行数:31,代码来源:Blacklist.java
示例6: testTimes
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testTimes() throws Exception {
this.createSentences(
"I said at 4:45pm.",
" I was born in '80, not the '70s.");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(16, tokenIndex.size());
int index = 0;
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("said", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("at", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("4:45", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("pm", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("was", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("born", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("in", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'80", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(",", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("not", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("the", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'70s", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:30,代码来源:TokenizerTest.java
示例7: testDollars
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testDollars() throws Exception {
this.createSentences(
" ",
"You `paid' US$170,000?!",
" You should've paid only$16.75.",
" ",
" ");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(13, tokenIndex.size());
int index = 0;
assertEquals("You", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("`", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("paid", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("US$", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("170,000", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("?!", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("You", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("should", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'ve", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("paid", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("only$16.75", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:30,代码来源:TokenizerTest.java
示例8: testPercents
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testPercents() throws Exception {
jCas.setDocumentText(" 1. Buy a new Chevrolet (37%-owned in the U.S..) . 15%");
new Sentence(jCas, 0, 54).addToIndexes();
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(18, tokenIndex.size());
int index = 0;
assertEquals("1.", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Buy", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("a", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("new", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Chevrolet", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("(", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("37", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("%", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("-", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("owned", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("in", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("the", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("U.S", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("..", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(")", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("15", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("%", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:30,代码来源:TokenizerTest.java
示例9: testTimes
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testTimes() throws Exception {
this.createSentences(
"I said at 4:45pm.",
" I was born in '80, not the '70s.");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(16, tokenIndex.size());
int index = 0;
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("said", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("at", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("4:45", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("pm", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("was", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("born", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("in", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'80", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(",", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("not", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("the", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'70s", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:30,代码来源:TokenizerAndTokenAnnotatorTest.java
示例10: testDollars
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testDollars() throws Exception {
this.createSentences(
" ",
"You `paid' US$170,000?!",
" You should've paid only$16.75.",
" ",
" ");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(16, tokenIndex.size());
int index = 0;
assertEquals("You", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("`", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("paid", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("US$", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("170,000", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("?", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("!", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("You", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("should", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'ve", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("paid", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("only", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("$", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("16.75", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:32,代码来源:TokenizerAndTokenAnnotatorTest.java
示例11: testPercents
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testPercents() throws Exception {
jCas.setDocumentText(" 1. Buy a new Chevrolet (37%-owned in the U.S..) . 15%");
new Sentence(jCas, 0, 54).addToIndexes();
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(16, tokenIndex.size());
int index = 0;
assertEquals("1", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Buy", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("a", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("new", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Chevrolet", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("(", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("37%-owned", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("in", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("the", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("U.S.", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(")", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("15", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("%", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:28,代码来源:TokenizerAndTokenAnnotatorTest.java
示例12: craftTest1
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void craftTest1() throws Exception {
String treebankParse = "( (X (NP (NP (NML (NN Complex ) (NN trait )) (NN analysis )) (PP (IN of ) (NP (DT the ) (NN mouse ) (NN striatum )))) (: : ) (S (NP-SBJ (JJ independent ) (NNS QTLs )) (VP (VBP modulate ) (NP (NP (NN volume )) (CC and ) (NP (NN neuron ) (NN number)))))) )";
String expectedText = "Complex trait analysis of the mouse striatum: independent QTLs modulate volume and neuron number";
AnalysisEngine engine = AnalysisEngineFactory.createEngine(TreebankGoldAnnotator.class);
JCas tbView = jCas.createView(PennTreebankReader.TREEBANK_VIEW);
tbView.setDocumentText(treebankParse);
engine.process(jCas);
JCas goldView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
FSIndex<Annotation> sentenceIndex = goldView.getAnnotationIndex(Sentence.type);
assertEquals(1, sentenceIndex.size());
Sentence firstSentence = JCasUtil.selectByIndex(goldView, Sentence.class, 0);
assertEquals(expectedText, firstSentence.getCoveredText());
FSIndex<Annotation> topNodeIndex = goldView.getAnnotationIndex(TopTreebankNode.type);
TopTreebankNode topNode = (TopTreebankNode) topNodeIndex.iterator().next();
int i = 0;
assertEquals("Complex", topNode.getTerminals(i++).getCoveredText());
assertEquals("trait", topNode.getTerminals(i++).getCoveredText());
assertEquals("analysis", topNode.getTerminals(i++).getCoveredText());
assertEquals("of", topNode.getTerminals(i++).getCoveredText());
assertEquals("the", topNode.getTerminals(i++).getCoveredText());
assertEquals("mouse", topNode.getTerminals(i++).getCoveredText());
assertEquals("striatum", topNode.getTerminals(i++).getCoveredText());
assertEquals(":", topNode.getTerminals(i++).getCoveredText());
assertEquals("independent", topNode.getTerminals(i++).getCoveredText());
assertEquals("QTLs", topNode.getTerminals(i++).getCoveredText());
assertEquals("modulate", topNode.getTerminals(i++).getCoveredText());
assertEquals("volume", topNode.getTerminals(i++).getCoveredText());
assertEquals("and", topNode.getTerminals(i++).getCoveredText());
assertEquals("neuron", topNode.getTerminals(i++).getCoveredText());
assertEquals("number", topNode.getTerminals(i++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:41,代码来源:TreebankGoldAnnotatorTest.java
示例13: testWhenDefaultViewDocumentTextIsSet
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testWhenDefaultViewDocumentTextIsSet() throws Exception {
String treebankParse = "( (X (NP (NP (NML (NN Complex ) (NN trait )) (NN analysis )) (PP (IN of ) (NP (DT the ) (NN mouse ) (NN striatum )))) (: : ) (S (NP-SBJ (JJ independent ) (NNS QTLs )) (VP (VBP modulate ) (NP (NP (NN volume )) (CC and ) (NP (NN neuron ) (NN number)))))) )";
// String expectedText =
// "Complex trait analysis of the mouse striatum: independent QTLs modulate volume and neuron number";
String expectedText = "Complex trait analysis of the mouse striatum : independent QTLs modulate volume and neuron number";
/*
* set the document text for the default view as it might be set by a collection reader, e.g.
* {@link FilesCollectionReader}
*/
JCas view = ViewCreatorAnnotator.createViewSafely(jCas, CAS.NAME_DEFAULT_SOFA);
view.setSofaDataString(expectedText, "text/plain");
AnalysisEngine engine = AnalysisEngineFactory.createEngine(TreebankGoldAnnotator.class);
JCas tbView = jCas.createView(PennTreebankReader.TREEBANK_VIEW);
tbView.setDocumentText(treebankParse);
engine.process(jCas);
JCas goldView = jCas.getView(CAS.NAME_DEFAULT_SOFA);
FSIndex<Annotation> sentenceIndex = goldView.getAnnotationIndex(Sentence.type);
assertEquals(1, sentenceIndex.size());
Sentence firstSentence = JCasUtil.selectByIndex(goldView, Sentence.class, 0);
assertEquals(expectedText, firstSentence.getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:30,代码来源:TreebankGoldAnnotatorTest.java
示例14: process
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
public void process(JCas jCas, ResultSpecification rs)
throws AnnotatorProcessException {
JFSIndexRepository indexRep = jCas.getJFSIndexRepository();
FSIndex tokenIndex = indexRep.getAnnotationIndex(Token.type);
Iterator tokens = tokenIndex.iterator();
while(tokens.hasNext()) {
Token tok = (Token)tokens.next();
System.out.print("Token: String=\"" + tok.getString() + "\", ");
System.out.print("Kind=\"" + tok.getKind() + "\", ");
System.out.println("Orth=\"" + tok.getOrth() + "\"");
}
}
开发者ID:Network-of-BioThings,项目名称:GettinCRAFTy,代码行数:13,代码来源:TokenPrinterAnnotator.java
示例15: shouldProcess
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
private boolean shouldProcess(JCas aJCas) {
FSIndex docAnnIndex = aJCas.getAnnotationIndex(DocumentAnnotation.type);
Iterator docAnnIter = docAnnIndex.iterator();
return ((DocumentAnnotation) docAnnIter.next()).getProcess();
}
开发者ID:IE4OpenData,项目名称:Octroy,代码行数:6,代码来源:ContractFlowController.java
示例16: testMarysDog
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testMarysDog() throws Exception {
this.createSentences(
"\"John & Mary's 'dog'...\", Jane thought (to herself).",
"\"What a @#$%*!",
"a- ``I like 'AT&T'''.");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(32, tokenIndex.size());
int index = 0;
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("John", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("&", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Mary", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'s", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("dog", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("...", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(",", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Jane", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("thought", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("(", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("to", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("herself", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(")", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("What", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("a", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("@#$%*", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("!", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("a", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("-", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("``", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("like", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("AT&T", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'''", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
//assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
//assertEquals("''", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:50,代码来源:TokenizerTest.java
示例17: testWatcha
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testWatcha() throws Exception {
this.createSentences(
"I can't believe they wanna keep 40% of that.\"",
" ``Whatcha think?''",
" \"I don't --- think so...,\"");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(30, tokenIndex.size());
int index = 0;
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("ca", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("n't", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("believe", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("they", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("wan", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("na", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("keep", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("40", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("%", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("of", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("that", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("``", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("What", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("cha", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("think", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("?", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("''", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("do", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("n't", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("---", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("think", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("so", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("...", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(",", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:45,代码来源:TokenizerTest.java
示例18: testMarysDog
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testMarysDog() throws Exception {
this.createSentences(
"\"John & Mary's 'dog'...\", Jane thought (to herself).",
"\"What a @#$%*!",
"a- ``I like 'AT&T'''.");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(37, tokenIndex.size());
int index = 0;
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("John", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("&", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Mary", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'s", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("dog", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("...", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(",", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Jane", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("thought", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("(", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("to", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("herself", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(")", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("What", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("a", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("@", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("#", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("$", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("%", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("*", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("!", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("a", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("-", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("``", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("like", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("AT&T", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("'", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("''", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:51,代码来源:TokenizerAndTokenAnnotatorTest.java
示例19: testWatcha
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testWatcha() throws Exception {
this.createSentences(
"I can't believe they wanna keep 40% of that.\"",
" ``Whatcha think?''",
" \"I don't --- think so...,\"");
SimplePipeline.runPipeline(jCas, tokenizer);
FSIndex<Annotation> tokenIndex = jCas.getAnnotationIndex(Token.type);
assertEquals(31, tokenIndex.size());
int index = 0;
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("ca", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("n't", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("believe", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("they", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("wan", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("na", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("keep", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("40", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("%", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("of", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("that", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(".", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("``", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("Wha", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("t", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("cha", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("think", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("?", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("''", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("I", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("do", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("n't", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("---", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("think", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("so", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("...", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals(",", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
assertEquals("\"", JCasUtil.selectByIndex(jCas, Token.class, index++).getCoveredText());
}
开发者ID:ClearTK,项目名称:cleartk,代码行数:45,代码来源:TokenizerAndTokenAnnotatorTest.java
示例20: testExtract
import org.apache.uima.cas.FSIndex; //导入依赖的package包/类
@Test
public void testExtract
|
请发表评论