本文整理汇总了Java中org.aksw.gerbil.transfer.nif.Marking类的典型用法代码示例。如果您正苦于以下问题:Java Marking类的具体用法?Java Marking怎么用?Java Marking使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Marking类属于org.aksw.gerbil.transfer.nif包,在下文中一共展示了Marking类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: findMarkings
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected static List<Marking> findMarkings(Set<String> lines, String text) {
List<Marking> markings = new ArrayList<Marking>();
for (String line : lines) {
String[] annotation = line.split("\t");
int start = Integer.parseInt(annotation[1]);
int end = Integer.parseInt(annotation[2]);
int length = end - start;
String uri = annotation[3];
if (uri.startsWith("NIL")) {
uri = "";
}
Set<String> types = new HashSet<String>();
types.add(getTypeURI(annotation[typeIndex]));
markings.add(new TypedNamedEntity(start, length, uri, types));
}
return markings;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2016Dataset.java
示例2: createDocument
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected Document createDocument(String fileName, String text, MSNBC_Result parsedResult) {
String documentUri = generateDocumentUri(fileName);
List<Marking> markings = new ArrayList<Marking>(parsedResult.getMarkings().size());
String retrievedSurfaceForm;
for (MSNBC_NamedEntity ne : parsedResult.getMarkings()) {
retrievedSurfaceForm = text.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
if (!retrievedSurfaceForm.equals(ne.getSurfaceForm())) {
LOGGER.warn("In document " + documentUri + ", the expected surface form of the named entity " + ne
+ " does not fit the surface form derived from the text \"" + retrievedSurfaceForm + "\".");
}
addDBpediaUris(ne.getUris());
markings.add(ne.toNamedEntity());
}
Document document = new DocumentImpl(text, documentUri, markings);
mergeSubNamedEntity(document);
return document;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:18,代码来源:MSNBCDataset.java
示例3: loadDocuments
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Document> loadDocuments(File annFile, File textFile) throws GerbilException {
List<Document> documents = new ArrayList<Document>();
String documentUriPrefix = "http://" + getName() + "/";
try (BufferedReader breader = new BufferedReader(new InputStreamReader(
new FileInputStream(textFile), Charset.forName("UTF-8")))) {
String line;
List<Marking> markings = null;
while ((line = breader.readLine()) != null) {
if(line.isEmpty()){
continue;
}
String[] text = line.split("\t");
markings = findMarkings(text, annFile);
documents.add(new DocumentImpl(text[1], documentUriPrefix
+ text[0], markings));
}
} catch (IOException e) {
throw new GerbilException("Exception while reading dataset.", e,
ErrorTypes.DATASET_LOADING_ERROR);
}
return documents;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:25,代码来源:ERDDataset2.java
示例4: loadDocuments
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Document> loadDocuments(File annotations, File tweets)
throws GerbilException {
List<Document> documents = new ArrayList<Document>();
String documentUriPrefix = "http://" + getName() + "/";
//its json per line
try (BufferedReader bReader = new BufferedReader(new InputStreamReader(
new FileInputStream(tweets), Charset.forName("UTF-8")))) {
String line;
List<Marking> markings;
while ((line = bReader.readLine()) != null) {
JSONObject json = new JSONObject(line);
String id = json.getString("id_str");
String text = json.getString("text");
markings = findMarkings(getMarkingLines(annotations, id), text);
documents.add(new DocumentImpl(text, documentUriPrefix + id,
markings));
}
} catch (IOException e) {
throw new GerbilException("Exception while reading dataset.", e,
ErrorTypes.DATASET_LOADING_ERROR);
}
return documents;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:26,代码来源:WSDMDataset.java
示例5: findMarkings
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static List<Marking> findMarkings(String tweet) {
int start = 0;
List<Marking> markings = new ArrayList<Marking>();
realTweet = new StringBuilder();
String[] line = tweet.split("\n");
int i = 0;
for (String tokenFull : line) {
String[] token = tokenFull.split("\t+");
realTweet.append(token[0] + " ");
token[1] = token[1].trim();
if (token.length>2&&token[2].startsWith("B-")) {
String[] marking = getWholeMarking(line, i);
Set<String> types = new HashSet<String>();
types.add(marking[2]);
markings.add(new TypedNamedEntity(start, marking[0].length(),
marking[1], types));
}
start += token[0].length() + 1;
i++;
}
return markings;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:25,代码来源:DerczynskiDataset.java
示例6: findMarkings
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static List<Marking> findMarkings(String tweet){
int start=0;
List<Marking> markings = new ArrayList<Marking>();
realTweet = new StringBuilder();
String[] line = tweet.split("\n");
int i=0;
for(String tokenFull : line){
String[] token = tokenFull.split("\t+");
realTweet.append(token[0]+" ");
token[1]=token[1].trim();
if(token[1].startsWith("B-")){
String[] marking = getWholeMarking(line, i);
Set<String> types = new HashSet<String>();
types.add(marking[1]);
markings.add(new TypedNamedEntity(start, marking[0].length(), "", types));
}
start+=token[0].length()+1;
i++;
}
return markings;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:24,代码来源:RitterDataset.java
示例7: logResult
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected static void logResult(List<? extends Marking> result, String annotatorName, String markingName) {
StringBuilder builder = new StringBuilder();
builder.append('[');
builder.append(annotatorName);
builder.append("] result=[");
boolean first = true;
for (Marking m : result) {
if (first) {
first = false;
} else {
builder.append(',');
}
builder.append(markingName);
builder.append(m.toString());
}
builder.append(']');
LOGGER.debug(builder.toString());
}
开发者ID:dice-group,项目名称:gerbil,代码行数:19,代码来源:ErrorCountingAnnotatorDecorator.java
示例8: storeAnnotatorOutput
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public <T extends Marking> void storeAnnotatorOutput(ExperimentTaskConfiguration configuration,
List<List<T>> results, List<Document> documents) {
if (outputShouldBeStored(configuration)) {
FileOutputStream fout = null;
GZIPOutputStream gout = null;
try {
File file = generateOutputFile(configuration);
List<Document> resultDocuments = generateResultDocuments(results, documents);
fout = new FileOutputStream(file);
gout = new GZIPOutputStream(fout);
NIFWriter writer = new TurtleNIFWriter();
writer.writeNIF(resultDocuments, gout);
} catch (Exception e) {
LOGGER.error("Couldn't write annotator result to file.", e);
} finally {
IOUtils.closeQuietly(gout);
IOUtils.closeQuietly(fout);
}
}
}
开发者ID:dice-group,项目名称:gerbil,代码行数:21,代码来源:AnnotatorOutputWriter.java
示例9: test
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
Set<String> lines = new HashSet<String>();
for(String m : mentions){
lines.add(m);
}
List<Marking> markings = Microposts2015Dataset.findMarkings(lines, tweet);
Assert.assertNotNull(markings);
Assert.assertTrue(markings.size() > 0);
int i =0;
for(Marking marking : markings){
Assert.assertTrue(marking instanceof TypedNamedEntity);
TypedNamedEntity ne = (TypedNamedEntity) marking;
String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
Assert.assertEquals(expectedMentions[i], mention);
String type = ne.getTypes().iterator().next();
Assert.assertEquals(expectedTypes[i], type);
i++;
}
}
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2015DatasetMentionSearchTest.java
示例10: test
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
Set<String> lines = new HashSet<String>();
for(String m : mentions){
lines.add(m);
}
List<Marking> markings = Microposts2016Dataset.findMarkings(lines, tweet);
Assert.assertNotNull(markings);
Assert.assertTrue(markings.size() > 0);
int i =0;
for(Marking marking : markings){
Assert.assertTrue(marking instanceof TypedNamedEntity);
TypedNamedEntity ne = (TypedNamedEntity) marking;
String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
Assert.assertEquals(expectedMentions[i], mention);
String type = ne.getTypes().iterator().next();
Assert.assertEquals(expectedTypes[i], type);
i++;
}
}
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2016DatasetMentionSearchTest.java
示例11: test
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() throws GerbilException {
MSNBCDataset dataset = new MSNBCDataset(TEST_TEXT_DIR, TEST_ANNOTATION_DIR);
dataset.setName(DATASET_NAME);
dataset.init();
Assert.assertEquals(1, dataset.getInstances().size());
Document document = dataset.getInstances().get(0);
Assert.assertEquals(EXPECTED_DOCUMENT_URI, document.getDocumentURI());
Assert.assertEquals(EXPECTED_TEXT, document.getText());
Set<Marking> expectedNEs = new HashSet<Marking>(Arrays.asList(EXPECTED_MARKINGS));
for (Marking marking : document.getMarkings()) {
Assert.assertTrue("Couldn't find " + marking.toString() + " inside " + expectedNEs.toString(),
expectedNEs.contains(marking));
}
Assert.assertEquals(expectedNEs.size(), document.getMarkings().size());
IOUtils.closeQuietly(dataset);
}
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:MSNBCDatasetTest.java
示例12: test
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
Set<String> lines = new HashSet<String>();
for(String m : mentions){
lines.add(m);
}
List<Marking> markings = WSDMDataset.findMarkings(lines, tweet);
Assert.assertNotNull(markings);
Assert.assertTrue(markings.size() > 0);
int i =0;
for(Marking marking : markings){
Assert.assertTrue(marking instanceof Annotation);
Annotation ne = (Annotation) marking;
Assert.assertEquals(expectedMentions[i], ne.getUris().iterator().next());
i++;
}
}
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:WSDM2012DatasetMentionSearchTest.java
示例13: test
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() throws GerbilException, IOException {
SensevalDataset data = new SensevalDataset(this.file);
data.init();
List<Document> documents = data.getInstances();
Document doc = documents.get(docIndex);
assertEquals(expectedSentence, doc.getText());
List<Marking> markings = doc.getMarkings();
String[] marks = new String[markings.size()];
for(int i=0; i<markings.size();i++){
NamedEntity entity = ((NamedEntity)markings.get(i));
marks[i]=doc.getText().substring(entity.getStartPosition(),
entity.getStartPosition()+entity.getLength());
}
assertArrayEquals(expectedMarkings,
marks);
data.close();
}
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:SensevalDatasetTest.java
示例14: reduceToTextAndEntities
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static Document reduceToTextAndEntities(Document document) {
MarkingFilter<TypedNamedEntity> filter = new TypeBasedMarkingFilter<TypedNamedEntity>(false,
RDFS.Class.getURI(), OWL.Class.getURI());
List<TypedNamedEntity> namedEntities = document.getMarkings(TypedNamedEntity.class);
List<Marking> markings = new ArrayList<Marking>(namedEntities.size());
for (TypedNamedEntity tne : namedEntities) {
if (filter.isMarkingGood(tne)) {
markings.add(new NamedEntity(tne.getStartPosition(), tne.getLength(), tne.getUris()));
}
}
return new DocumentImpl(document.getText(), document.getDocumentURI(), markings);
}
开发者ID:dice-group,项目名称:Cetus,代码行数:13,代码来源:ExtendedCetusSurfaceFormExtractorTest.java
示例15: main
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static void main(String[] args) {
CetusAnnotator annotator = new CetusAnnotator(
CetusSurfaceFormExtractor.create(),
YagoBasedTypeSearcher.create());
Document document = annotator
.performTypeExtraction(new DocumentImpl(
"Born on December, 1629 and died on 19 August 1686, Jean-Baptiste Cotelier or Cotelerius was a Patristic scholar and Catholic theologian.",
Arrays.asList((Marking) new NamedEntity(51, 22,
"http://dbpedia.org/resource/Jean-Baptiste_Cotelier"))));
System.out.println(document);
}
开发者ID:dice-group,项目名称:Cetus,代码行数:12,代码来源:SimpleCetusAnnotatorTestClass.java
示例16: checkMarkings
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Override
public void checkMarkings(Collection<? extends Marking> markings) {
for (Marking marking : markings) {
if (marking instanceof Meaning) {
checkMeaning((Meaning) marking);
} else if (marking instanceof MeaningsContainingMarking) {
checkMeanings(((MeaningsContainingMarking) marking).getMeanings());
}
}
}
开发者ID:dice-group,项目名称:gerbil,代码行数:11,代码来源:EntityCheckerManagerImpl.java
示例17: loadDocuments
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected List<Document> loadDocuments(File annotations, File tweetsFile)
throws GerbilException {
List<Document> documents = new ArrayList<Document>();
String documentUriPrefix = "http://" + getName() + "/";
try (BufferedReader bReader = new BufferedReader(new InputStreamReader(
new FileInputStream(tweetsFile), Charset.forName("UTF-8")))) {
String line;
List<Marking> markings;
while ((line = bReader.readLine()) != null) {
String[] tweet = line.split("\t");
if (tweet.length < 2) {
continue;
}
String id = tweet[0];
String text = tweet[1];
markings = findMarkings(getMarkingLines(annotations, id), text );
documents.add(new DocumentImpl(text, documentUriPrefix + id,
markings));
}
} catch (IOException e) {
throw new GerbilException("Exception while reading dataset.", e,
ErrorTypes.DATASET_LOADING_ERROR);
}
return documents;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:29,代码来源:Microposts2016Dataset.java
示例18: loadDocuments
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected List<Document> loadDocuments(File annotations, File tweetsFile)
throws GerbilException {
List<Document> documents = new ArrayList<Document>();
String documentUriPrefix = "http://" + getName() + "/";
try (BufferedReader bReader = new BufferedReader(new InputStreamReader(
new FileInputStream(tweetsFile), Charset.forName("UTF-8")))) {
String line;
List<Marking> markings;
while ((line = bReader.readLine()) != null) {
String[] tweet = line.split("\t");
if (tweet.length < 2) {
continue;
}
String id = tweet[0];
String text = tweet[1];
markings = findMarkings(getMarkingLines(annotations, id), text);
documents.add(new DocumentImpl(text, documentUriPrefix + id,
markings));
}
} catch (IOException e) {
throw new GerbilException("Exception while reading dataset.", e,
ErrorTypes.DATASET_LOADING_ERROR);
}
return documents;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:29,代码来源:Microposts2015Dataset.java
示例19: mergeSubNamedEntity
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
/**
* Merge {@link NamedEntity}s that are sub spans of another named entity and
* that have the same URIs.
*
* @param document
*/
private void mergeSubNamedEntity(Document document) {
List<NamedEntity> spanList = document.getMarkings(NamedEntity.class);
NamedEntity nes[] = spanList.toArray(new NamedEntity[spanList.size()]);
Arrays.sort(nes, this);
Set<Marking> markingsToRemove = new HashSet<Marking>();
boolean uriOverlapping;
Iterator<String> uriIterator;
for (int i = 0; i < nes.length; ++i) {
uriOverlapping = false;
for (int j = i + 1; (j < nes.length) && (!uriOverlapping); ++j) {
// if nes[i] is a "sub span" of nes[j]
if ((nes[i].getStartPosition() >= nes[j].getStartPosition()) && ((nes[i].getStartPosition()
+ nes[i].getLength()) <= (nes[j].getStartPosition() + nes[j].getLength()))) {
uriOverlapping = false;
uriIterator = nes[i].getUris().iterator();
while ((!uriOverlapping) && (uriIterator.hasNext())) {
uriOverlapping = nes[j].containsUri(uriIterator.next());
}
if (uriOverlapping) {
nes[j].getUris().addAll(nes[j].getUris());
markingsToRemove.add(nes[i]);
} else {
LOGGER.debug("There are two overlapping named entities with different URI sets. {}, {}", nes[i],
nes[j]);
}
}
}
}
document.getMarkings().removeAll(markingsToRemove);
}
开发者ID:dice-group,项目名称:gerbil,代码行数:37,代码来源:MSNBCDataset.java
示例20: findMarkings
import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Marking> findMarkings(String[] text, File annFile) throws GerbilException {
List<Marking> markings = new ArrayList<Marking>();
try (BufferedReader breader = new BufferedReader(new InputStreamReader(
new FileInputStream(annFile), Charset.forName("UTF-8")))) {
String line;
while ((line = breader.readLine()) != null) {
if(line.isEmpty()){
continue;
}
String[] annotation = line.split("\t");
int searchID = getTrecID(text[0]);
int annoID = getTrecID(annotation[0]);
if(searchID == annoID){
int start = text[1].indexOf(annotation[3]);
int length = annotation[3].length();
//FIXME time consuming!
String freebaseID = annotation[2].substring(1, annotation[2].length()).replace("/",".");
Query query = QueryFactory.create(queryTemp.replace("%%v%%", freebaseID));
QueryExecution qexec = QueryExecutionFactory.createServiceRequest(DBPEDIA_SERVICE, query);
String uri = qexec.execSelect().next().getResource("s").getURI();
markings.add(new NamedEntity(start, length, uri));
}
else if(annoID > searchID){
//There is no annotation for the given text
break;
}
}
} catch (IOException e) {
throw new GerbilException("Exception while reading dataset.", e,
ErrorTypes.DATASET_LOADING_ERROR);
}
return markings;
}
开发者ID:dice-group,项目名称:gerbil,代码行数:39,代码来源:ERDDataset2.java
注:本文中的org.aksw.gerbil.transfer.nif.Marking类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论