• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Java Marking类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中org.aksw.gerbil.transfer.nif.Marking的典型用法代码示例。如果您正苦于以下问题:Java Marking类的具体用法?Java Marking怎么用?Java Marking使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



Marking类属于org.aksw.gerbil.transfer.nif包,在下文中一共展示了Marking类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: findMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected static List<Marking> findMarkings(Set<String> lines, String text) {
	List<Marking> markings = new ArrayList<Marking>();

	for (String line : lines) {
		String[] annotation = line.split("\t");

		int start = Integer.parseInt(annotation[1]);
		int end = Integer.parseInt(annotation[2]);
		int length = end - start;
		String uri = annotation[3];
		if (uri.startsWith("NIL")) {
			uri = "";
		}
		Set<String> types = new HashSet<String>();
		types.add(getTypeURI(annotation[typeIndex]));

		markings.add(new TypedNamedEntity(start, length, uri, types));

	}

	return markings;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2016Dataset.java


示例2: createDocument

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected Document createDocument(String fileName, String text, MSNBC_Result parsedResult) {
    String documentUri = generateDocumentUri(fileName);
    List<Marking> markings = new ArrayList<Marking>(parsedResult.getMarkings().size());
    String retrievedSurfaceForm;
    for (MSNBC_NamedEntity ne : parsedResult.getMarkings()) {
        retrievedSurfaceForm = text.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
        if (!retrievedSurfaceForm.equals(ne.getSurfaceForm())) {
            LOGGER.warn("In document " + documentUri + ", the expected surface form of the named entity " + ne
                    + " does not fit the surface form derived from the text \"" + retrievedSurfaceForm + "\".");
        }
        addDBpediaUris(ne.getUris());
        markings.add(ne.toNamedEntity());
    }
    Document document = new DocumentImpl(text, documentUri, markings);
    mergeSubNamedEntity(document);
    return document;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:18,代码来源:MSNBCDataset.java


示例3: loadDocuments

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Document> loadDocuments(File annFile, File textFile) throws GerbilException {
	List<Document> documents = new ArrayList<Document>();
	String documentUriPrefix = "http://" + getName() + "/";
	try (BufferedReader breader = new BufferedReader(new InputStreamReader(
			new FileInputStream(textFile), Charset.forName("UTF-8")))) {
		String line;
		List<Marking> markings = null;
		while ((line = breader.readLine()) != null) {
			if(line.isEmpty()){
				continue;
			}
			String[] text = line.split("\t");

			markings = findMarkings(text, annFile);
			documents.add(new DocumentImpl(text[1], documentUriPrefix
					+ text[0], markings));
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return documents;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:25,代码来源:ERDDataset2.java


示例4: loadDocuments

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Document> loadDocuments(File annotations, File tweets)
		throws GerbilException {
	List<Document> documents = new ArrayList<Document>();
	String documentUriPrefix = "http://" + getName() + "/";
	//its json per line 
	try (BufferedReader bReader = new BufferedReader(new InputStreamReader(
			new FileInputStream(tweets), Charset.forName("UTF-8")))) {
		String line;
		List<Marking> markings;
		while ((line = bReader.readLine()) != null) {
			JSONObject json = new JSONObject(line);
			
			String id = json.getString("id_str");
			String text = json.getString("text");
			markings = findMarkings(getMarkingLines(annotations, id), text);
			documents.add(new DocumentImpl(text, documentUriPrefix + id,
					markings));
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return documents;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:26,代码来源:WSDMDataset.java


示例5: findMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static List<Marking> findMarkings(String tweet) {
	int start = 0;
	List<Marking> markings = new ArrayList<Marking>();
	realTweet = new StringBuilder();
	String[] line = tweet.split("\n");
	int i = 0;
	for (String tokenFull : line) {
		String[] token = tokenFull.split("\t+");
		realTweet.append(token[0] + " ");
		token[1] = token[1].trim();
		if (token.length>2&&token[2].startsWith("B-")) {
			String[] marking = getWholeMarking(line, i);
			Set<String> types = new HashSet<String>();
			types.add(marking[2]);
			markings.add(new TypedNamedEntity(start, marking[0].length(),
					marking[1], types));

		}
		start += token[0].length() + 1;
		i++;
	}

	return markings;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:25,代码来源:DerczynskiDataset.java


示例6: findMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static List<Marking> findMarkings(String tweet){
	int start=0;
	List<Marking> markings = new ArrayList<Marking>();
	realTweet = new StringBuilder();
	String[] line = tweet.split("\n");
	int i=0;
	for(String tokenFull : line){
		String[] token = tokenFull.split("\t+");
		realTweet.append(token[0]+" ");
		token[1]=token[1].trim();
		if(token[1].startsWith("B-")){
			String[] marking = getWholeMarking(line, i);
			Set<String> types = new HashSet<String>();
			types.add(marking[1]);
			markings.add(new TypedNamedEntity(start, marking[0].length(), "", types));
			
		}
		start+=token[0].length()+1;
		i++;
	}
	
	return markings;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:24,代码来源:RitterDataset.java


示例7: logResult

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected static void logResult(List<? extends Marking> result, String annotatorName, String markingName) {
    StringBuilder builder = new StringBuilder();
    builder.append('[');
    builder.append(annotatorName);
    builder.append("] result=[");
    boolean first = true;
    for (Marking m : result) {
        if (first) {
            first = false;
        } else {
            builder.append(',');
        }
        builder.append(markingName);
        builder.append(m.toString());
    }
    builder.append(']');
    LOGGER.debug(builder.toString());
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:19,代码来源:ErrorCountingAnnotatorDecorator.java


示例8: storeAnnotatorOutput

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public <T extends Marking> void storeAnnotatorOutput(ExperimentTaskConfiguration configuration,
        List<List<T>> results, List<Document> documents) {
    if (outputShouldBeStored(configuration)) {
        FileOutputStream fout = null;
        GZIPOutputStream gout = null;
        try {
            File file = generateOutputFile(configuration);
            List<Document> resultDocuments = generateResultDocuments(results, documents);
            fout = new FileOutputStream(file);
            gout = new GZIPOutputStream(fout);
            NIFWriter writer = new TurtleNIFWriter();
            writer.writeNIF(resultDocuments, gout);
        } catch (Exception e) {
            LOGGER.error("Couldn't write annotator result to file.", e);
        } finally {
            IOUtils.closeQuietly(gout);
            IOUtils.closeQuietly(fout);
        }
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:21,代码来源:AnnotatorOutputWriter.java


示例9: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
    Set<String> lines = new HashSet<String>();
    for(String m : mentions){
    	lines.add(m);
    }
    List<Marking> markings = Microposts2015Dataset.findMarkings(lines, tweet); 
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    int i =0;
    for(Marking marking : markings){
    	Assert.assertTrue(marking instanceof TypedNamedEntity);
    	TypedNamedEntity ne = (TypedNamedEntity) marking;
    	
    	String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
    	Assert.assertEquals(expectedMentions[i], mention);
    	
    	String type = ne.getTypes().iterator().next();
    	Assert.assertEquals(expectedTypes[i], type);
    	i++;
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2015DatasetMentionSearchTest.java


示例10: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
    Set<String> lines = new HashSet<String>();
    for(String m : mentions){
    	lines.add(m);
    }
    List<Marking> markings = Microposts2016Dataset.findMarkings(lines, tweet); 
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    int i =0;
    for(Marking marking : markings){
    	Assert.assertTrue(marking instanceof TypedNamedEntity);
    	TypedNamedEntity ne = (TypedNamedEntity) marking;
    	
    	String mention = tweet.substring(ne.getStartPosition(), ne.getStartPosition() + ne.getLength());
    	Assert.assertEquals(expectedMentions[i], mention);
    	
    	String type = ne.getTypes().iterator().next();
    	Assert.assertEquals(expectedTypes[i], type);
    	i++;
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:23,代码来源:Microposts2016DatasetMentionSearchTest.java


示例11: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() throws GerbilException {
    MSNBCDataset dataset = new MSNBCDataset(TEST_TEXT_DIR, TEST_ANNOTATION_DIR);
    dataset.setName(DATASET_NAME);
    dataset.init();
    Assert.assertEquals(1, dataset.getInstances().size());
    Document document = dataset.getInstances().get(0);

    Assert.assertEquals(EXPECTED_DOCUMENT_URI, document.getDocumentURI());
    Assert.assertEquals(EXPECTED_TEXT, document.getText());

    Set<Marking> expectedNEs = new HashSet<Marking>(Arrays.asList(EXPECTED_MARKINGS));
    for (Marking marking : document.getMarkings()) {
        Assert.assertTrue("Couldn't find " + marking.toString() + " inside " + expectedNEs.toString(),
                expectedNEs.contains(marking));
    }
    Assert.assertEquals(expectedNEs.size(), document.getMarkings().size());
    IOUtils.closeQuietly(dataset);
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:MSNBCDatasetTest.java


示例12: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() {
    Set<String> lines = new HashSet<String>();
    for(String m : mentions){
    	lines.add(m);
    }
    List<Marking> markings = WSDMDataset.findMarkings(lines, tweet);
    Assert.assertNotNull(markings);
    Assert.assertTrue(markings.size() > 0);
    int i =0;
    for(Marking marking : markings){
    	Assert.assertTrue(marking instanceof Annotation);
    	Annotation ne = (Annotation) marking;
    	
    	Assert.assertEquals(expectedMentions[i], ne.getUris().iterator().next());
    	
    	i++;
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:WSDM2012DatasetMentionSearchTest.java


示例13: test

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Test
public void test() throws GerbilException, IOException {
	SensevalDataset data = new SensevalDataset(this.file);
	data.init();
	List<Document> documents = data.getInstances();
	Document doc = documents.get(docIndex);
	assertEquals(expectedSentence, doc.getText());
	List<Marking> markings = doc.getMarkings();
	String[] marks = new String[markings.size()];
	for(int i=0; i<markings.size();i++){
		NamedEntity entity = ((NamedEntity)markings.get(i));
		marks[i]=doc.getText().substring(entity.getStartPosition(), 
				entity.getStartPosition()+entity.getLength());
	}
	assertArrayEquals(expectedMarkings, 
			marks);
	data.close();

}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:20,代码来源:SensevalDatasetTest.java


示例14: reduceToTextAndEntities

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static Document reduceToTextAndEntities(Document document) {
    MarkingFilter<TypedNamedEntity> filter = new TypeBasedMarkingFilter<TypedNamedEntity>(false,
            RDFS.Class.getURI(), OWL.Class.getURI());
    List<TypedNamedEntity> namedEntities = document.getMarkings(TypedNamedEntity.class);
    List<Marking> markings = new ArrayList<Marking>(namedEntities.size());
    for (TypedNamedEntity tne : namedEntities) {
        if (filter.isMarkingGood(tne)) {
            markings.add(new NamedEntity(tne.getStartPosition(), tne.getLength(), tne.getUris()));
        }
    }
    return new DocumentImpl(document.getText(), document.getDocumentURI(), markings);
}
 
开发者ID:dice-group,项目名称:Cetus,代码行数:13,代码来源:ExtendedCetusSurfaceFormExtractorTest.java


示例15: main

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
public static void main(String[] args) {
CetusAnnotator annotator = new CetusAnnotator(
	CetusSurfaceFormExtractor.create(),
	YagoBasedTypeSearcher.create());
Document document = annotator
	.performTypeExtraction(new DocumentImpl(
		"Born on December, 1629 and died on 19 August 1686, Jean-Baptiste Cotelier or Cotelerius was a Patristic scholar and Catholic theologian.",
		Arrays.asList((Marking) new NamedEntity(51, 22,
			"http://dbpedia.org/resource/Jean-Baptiste_Cotelier"))));
System.out.println(document);
   }
 
开发者ID:dice-group,项目名称:Cetus,代码行数:12,代码来源:SimpleCetusAnnotatorTestClass.java


示例16: checkMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
@Override
public void checkMarkings(Collection<? extends Marking> markings) {
    for (Marking marking : markings) {
        if (marking instanceof Meaning) {
            checkMeaning((Meaning) marking);
        } else if (marking instanceof MeaningsContainingMarking) {
            checkMeanings(((MeaningsContainingMarking) marking).getMeanings());
        }
    }
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:11,代码来源:EntityCheckerManagerImpl.java


示例17: loadDocuments

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected List<Document> loadDocuments(File annotations, File tweetsFile)
		throws GerbilException {

	List<Document> documents = new ArrayList<Document>();
	String documentUriPrefix = "http://" + getName() + "/";

	try (BufferedReader bReader = new BufferedReader(new InputStreamReader(
			new FileInputStream(tweetsFile), Charset.forName("UTF-8")))) {
		String line;
		List<Marking> markings;
		while ((line = bReader.readLine()) != null) {
			String[] tweet = line.split("\t");
			if (tweet.length < 2) {
				continue;
			}
			String id = tweet[0];
			String text = tweet[1];
			markings = findMarkings(getMarkingLines(annotations, id), text );
			documents.add(new DocumentImpl(text, documentUriPrefix + id,
					markings));
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return documents;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:29,代码来源:Microposts2016Dataset.java


示例18: loadDocuments

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
protected List<Document> loadDocuments(File annotations, File tweetsFile)
		throws GerbilException {

	List<Document> documents = new ArrayList<Document>();
	String documentUriPrefix = "http://" + getName() + "/";

	try (BufferedReader bReader = new BufferedReader(new InputStreamReader(
			new FileInputStream(tweetsFile), Charset.forName("UTF-8")))) {
		String line;
		List<Marking> markings;
		while ((line = bReader.readLine()) != null) {
			String[] tweet = line.split("\t");
			if (tweet.length < 2) {
				continue;
			}
			String id = tweet[0];
			String text = tweet[1];
			markings = findMarkings(getMarkingLines(annotations, id), text);
			documents.add(new DocumentImpl(text, documentUriPrefix + id,
					markings));
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return documents;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:29,代码来源:Microposts2015Dataset.java


示例19: mergeSubNamedEntity

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
/**
 * Merge {@link NamedEntity}s that are sub spans of another named entity and
 * that have the same URIs.
 * 
 * @param document
 */
private void mergeSubNamedEntity(Document document) {
    List<NamedEntity> spanList = document.getMarkings(NamedEntity.class);
    NamedEntity nes[] = spanList.toArray(new NamedEntity[spanList.size()]);
    Arrays.sort(nes, this);
    Set<Marking> markingsToRemove = new HashSet<Marking>();
    boolean uriOverlapping;
    Iterator<String> uriIterator;
    for (int i = 0; i < nes.length; ++i) {
        uriOverlapping = false;
        for (int j = i + 1; (j < nes.length) && (!uriOverlapping); ++j) {
            // if nes[i] is a "sub span" of nes[j]
            if ((nes[i].getStartPosition() >= nes[j].getStartPosition()) && ((nes[i].getStartPosition()
                    + nes[i].getLength()) <= (nes[j].getStartPosition() + nes[j].getLength()))) {
                uriOverlapping = false;
                uriIterator = nes[i].getUris().iterator();
                while ((!uriOverlapping) && (uriIterator.hasNext())) {
                    uriOverlapping = nes[j].containsUri(uriIterator.next());
                }
                if (uriOverlapping) {
                    nes[j].getUris().addAll(nes[j].getUris());
                    markingsToRemove.add(nes[i]);
                } else {
                    LOGGER.debug("There are two overlapping named entities with different URI sets. {}, {}", nes[i],
                            nes[j]);
                }
            }
        }
    }
    document.getMarkings().removeAll(markingsToRemove);
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:37,代码来源:MSNBCDataset.java


示例20: findMarkings

import org.aksw.gerbil.transfer.nif.Marking; //导入依赖的package包/类
private List<Marking> findMarkings(String[] text, File annFile) throws GerbilException {
	List<Marking> markings = new ArrayList<Marking>();
	try (BufferedReader breader = new BufferedReader(new InputStreamReader(
			new FileInputStream(annFile), Charset.forName("UTF-8")))) {
		String line;
		
		while ((line = breader.readLine()) != null) {
			if(line.isEmpty()){
				continue;
			}
			String[] annotation = line.split("\t");
			int searchID = getTrecID(text[0]);
			int annoID = getTrecID(annotation[0]);
			if(searchID == annoID){
				int start = text[1].indexOf(annotation[3]);
				int length = annotation[3].length();
				
				//FIXME time consuming!
                   String freebaseID = annotation[2].substring(1, annotation[2].length()).replace("/",".");
                   Query query = QueryFactory.create(queryTemp.replace("%%v%%", freebaseID));
                   QueryExecution qexec = QueryExecutionFactory.createServiceRequest(DBPEDIA_SERVICE, query);
                   String uri =  qexec.execSelect().next().getResource("s").getURI();
                   
				
				markings.add(new NamedEntity(start, length, uri));
			}
			else if(annoID > searchID){
				//There is no annotation for the given text
				break;
			}
		}
	} catch (IOException e) {
		throw new GerbilException("Exception while reading dataset.", e,
				ErrorTypes.DATASET_LOADING_ERROR);
	}

	return markings;
}
 
开发者ID:dice-group,项目名称:gerbil,代码行数:39,代码来源:ERDDataset2.java



注:本文中的org.aksw.gerbil.transfer.nif.Marking类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java EntityEnchantmentTableParticleFX类代码示例发布时间:2022-05-22
下一篇:
Java ApplicationStartData类代码示例发布时间:2022-05-22
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap