本文整理汇总了C++中indri::api::QueryEnvironment类的典型用法代码示例。如果您正苦于以下问题:C++ QueryEnvironment类的具体用法?C++ QueryEnvironment怎么用?C++ QueryEnvironment使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了QueryEnvironment类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: runtime_error
double indri::query::ConceptSelectorFuns::findConceptScorePrf(string conceptSty, string conceptStr, string qId, std::vector<lemur::api::DOCID_T> topDocIds,
indri::api::QueryEnvironment & env,
indri::query::QueryReformulator * queryReformulator,
vector<string> resourceNames_)
{
// runQuery the new query text on these workset of top-ranked documents
vector<pair<string, vector<pair<string, string> > > > candConcepts_;
vector<pair<string, string> > tmp = {make_pair(conceptSty, conceptStr)};
candConcepts_ = {make_pair(qId, tmp )};
oneResourceConceptsParams.oneResourceConcepts = candConcepts_;
vector<pair<string, string> > queriesText = queryReformulator->testOneConceptAddition2OneQuery(conceptSty, conceptStr, qId, resourceNames_);
std::vector< indri::api::ScoredExtentResult > results_;
if(wsuIr::expander::Utility::runQuery_results_isExist(queriesText, topDocIds))
{
results_ = wsuIr::expander::Utility::runQuery_results_get(queriesText, topDocIds);
}
else
{
results_ = env.runQuery(queriesText.front().second, topDocIds, topDocIds.size());
wsuIr::expander::Utility::runQuery_results_store(queriesText, topDocIds, results_);
}
if(results_.size() != topDocIds.size())
throw runtime_error("RunQUery.cpp: some of top-ranked documents are not scored");
double conceptScore = 0;
for(auto r: results_)
{
conceptScore += r.score;
}
return conceptScore;
}
开发者ID:teanalab,项目名称:MRF-L,代码行数:35,代码来源:ConceptSelectorFuns.cpp
示例2: open_indexes
static void open_indexes( indri::api::QueryEnvironment& environment,
indri::api::Parameters& param ) {
if( param.exists( "index" ) ) {
indri::api::Parameters indexes = param["index"];
for( unsigned int i=0; i < indexes.size(); i++ ) {
environment.addIndex( std::string(indexes[i]) );
}
}
if( param.exists( "server" ) ) {
indri::api::Parameters servers = param["server"];
for( unsigned int i=0; i < servers.size(); i++ ) {
environment.addServer( std::string(servers[i]) );
}
}
std::vector<std::string> smoothingRules;
if( copy_parameters_to_string_vector( smoothingRules, param, "rule" ) )
environment.setScoringRules( smoothingRules );
}
开发者ID:blaze3j,项目名称:DocHunt,代码行数:18,代码来源:clarity.cpp
示例3: clarity
// how to just compute the clarity score without printing out the terms.
static double clarity( const std::string& query,
indri::api::QueryEnvironment & env,
const std::vector<indri::query::RelevanceModel::Gram*>& grams, int numTerms ) {
int count = 0;
double sum=0, ln_Pr=0;
for( size_t j=0; j< numTerms && j < grams.size(); j++ ) {
std::string t = grams[j]->terms[0];
count++;
// query-clarity = SUM_w{P(w|Q)*log(P(w|Q)/P(w))}
// P(w)=cf(w)/|C|
// the relevance model uses stemmed terms, so use stemCount
double pw = ((double)env.stemCount(t)/(double)env.termCount());
// P(w|Q) is a prob computed by any model, e.g. relevance models
double pwq = grams[j]->weight;
sum += pwq;
ln_Pr += (pwq)*log(pwq/pw);
}
return (ln_Pr/(sum ? sum : 1.0)/log(2.0));
}
开发者ID:blaze3j,项目名称:DocHunt,代码行数:21,代码来源:clarity.cpp
示例4:
void matIR::QueryStats::init(const std::string& query, indri::api::QueryEnvironment& environment)
{
// Extract only the terms from the query and add to the vector
indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri");
indri::lang::ScoredExtentNode* rootNode = parser->query();
indri::lang::RawScorerNodeExtractor extractor;
rootNode->walk(extractor);
std::vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes();
for (int i = 0; i < scorerNodes.size(); i++){
std::string qterm = environment.stemTerm(scorerNodes[i]->queryText());
queryString.push_back(qterm);
if(environment.stemCount(qterm) == 0)
continue;
if( _queryTokens.find(qterm) == _queryTokens.end() )
_queryTokens.insert(make_pair( qterm, 1));
else
_queryTokens[qterm] += 1;
}
// Initialize vectors
_query_collectionFrequency.set_size(_queryTokens.size());
_query_documentFrequency.set_size(_queryTokens.size());
// Now obtain the statistics
int i = 0;
map<std::string, int>::const_iterator iter;
for (iter=_queryTokens.begin(); iter != _queryTokens.end(); ++iter) {
std::string stem = environment.stemTerm(iter->first);
_query_collectionFrequency(i) = (double) environment.stemCount(stem);
_query_documentFrequency(i) = (double) environment.documentStemCount(stem);
++i;
}
}
开发者ID:semanticpc,项目名称:matIR,代码行数:40,代码来源:QueryStats.cpp
示例5: updateQueryDetails
void updateQueryDetails(indri::api::QueryEnvironment& environment,
Results& resultData,
string query){
indri::api::QueryParserWrapper *parser = indri::api::QueryParserFactory::get(query, "indri");
indri::lang::ScoredExtentNode* rootNode = parser->query();
indri::lang::RawScorerNodeExtractor extractor;
rootNode->walk(extractor);
vector<indri::lang::RawScorerNode*>& scorerNodes = extractor.getScorerNodes();
for (int i = 0; i < scorerNodes.size(); i++){
string qterm = environment.stemTerm(scorerNodes[i]->queryText());
if(environment.stemCount(qterm) == 0)
continue;
if( resultData.queryStems.find(qterm) == resultData.queryStems.end() ){
resultData.queryStems.insert(make_pair( qterm, 1));
resultData.queryStemOrder.push_back(qterm);
}
else
resultData.queryStems[qterm] += 1;
}
}
开发者ID:semanticpc,项目名称:indriR,代码行数:22,代码来源:indriRetOld.cpp
示例6: max
multimap<double, pair<string, string> > indri::query::ConceptSelectorFuns::normConceptScorePrf(
vector<pair<string, string> > concatenatedGoodConcepts,
string qId,
vector<string> topDocsNames,
indri::api::QueryEnvironment & env,
indri::query::QueryReformulator * queryReformulator,
vector<string> resourceNames_)
{
std::vector<lemur::api::DOCID_T> topDocIds = env.documentIDsFromMetadata("docno", topDocsNames);
multimap<double, pair<string, string>, std::greater<double> > scoredConcepts_;
for(auto concStyStrPair: concatenatedGoodConcepts) // for each each extracted concept
{
string conceptSty = concStyStrPair.first;
string conceptStr = concStyStrPair.second;
double conceptScore = indri::query::ConceptSelectorFuns::findConceptScorePrf(conceptSty,
conceptStr,
qId,
topDocIds,
env,
queryReformulator,
resourceNames_);
scoredConcepts_.insert(make_pair(conceptScore, make_pair(conceptSty, conceptStr)));
cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: conceptScore = " << conceptStr << " -> " << conceptScore << endl;
}
double max_sc = 0;
double min_sc = std::numeric_limits<double>::infinity();
for (auto sc: scoredConcepts_)
{
max_sc = max(max_sc, sc.first);
min_sc = min(min_sc, sc.first);
}
cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: min_sc, max_sc: " << min_sc << ", " << max_sc << endl;
// min-max normalize socores in scoredConcepts_
multimap<double, pair<string, string> > scoredConcepts_norm;
for (auto itSc = scoredConcepts_.begin(); itSc != scoredConcepts_.end(); itSc++)
{
double conceptScore = (itSc->first- min_sc)/(max_sc- min_sc);
scoredConcepts_norm.insert(make_pair(conceptScore, make_pair((itSc->second).first, (itSc->second).second)));
cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: scoredConcepts_ = " << itSc->first << endl;
cout << "indri::query::ConceptSelectorFuns::normConceptScorePrf: scoredConcepts_norm: conceptScore = " << conceptScore << " = (" << itSc->first << " - " << min_sc << " )/( " << max_sc << " - " << min_sc << " )" << endl;
}
return scoredConcepts_norm;
}
开发者ID:teanalab,项目名称:MRF-L,代码行数:47,代码来源:ConceptSelectorFuns.cpp
示例7: convert_docnoscore_to_binary
void convert_docnoscore_to_binary( indri::file::File& outfile, const std::string& infile, indri::api::QueryEnvironment& env ) {
std::ifstream in;
std::string docnoName = "docno";
indri::file::SequentialWriteBuffer* outb = new indri::file::SequentialWriteBuffer( outfile, 1024*1024 );
in.open( infile.c_str(), std::ifstream::in );
while( !in.eof() ) {
std::string docno;
double score;
in >> docno
>> score;
if( in.eof() )
break;
std::cout << "looking up: " << docno << " " << score << std::endl;
std::vector<std::string> docnoValues;
docnoValues.push_back( docno );
std::vector<lemur::api::DOCID_T> result = env.documentIDsFromMetadata( docnoName, docnoValues );
if( result.size() == 0 ) {
// LEMUR_THROW( LEMUR_IO_ERROR, "No document exists with docno: " + docno );
continue; // allow entries that don't exist and ignore silently.
}
int document = result[0];
std::cout << document << std::endl;
outb->write( (const void*) &document, sizeof(UINT32) );
outb->write( (const void*) &score, sizeof(double) );
}
outb->flush();
delete outb;
in.close();
}
开发者ID:blaze3j,项目名称:DocHunt,代码行数:40,代码来源:makeprior.cpp
示例8: generateResults
SEXP generateResults(string _qno, string _query, int _documentLimit, bool stats) {
resultsData = resultsData_nullCopy;
documentIDs.clear();
scores.clear();
extDocIDs.clear();
terms.clear();
_gramTable.clear();
results.clear();
qno = _qno;
query = _query;
documentLimit = _documentLimit;
qa = environment.runAnnotatedQuery(query, _documentLimit);
results = qa->getResults();
_logtoposterior(results);
// Extract Documents
for (size_t i = 0; i < results.size(); i++){
documentIDs.push_back(results[i].document);
scores.push_back(results[i].score);
}
extDocIDs = environment.documentMetadata(documentIDs, "docno");
if(stats){
updateQueryDetails(environment, resultsData, query);
countGrams();
buildStats();
}
return Rcpp::wrap(true);
}
开发者ID:semanticpc,项目名称:indriR,代码行数:35,代码来源:indriRetOld.cpp
示例9: buildStats
void buildStats() {
HGram::iterator iter;
resultsData.tfMatrix = arma::zeros<arma::mat>(results.size(),
_gramTable.size());
// Initialize the
resultsData.dfVector.set_size(_gramTable.size());
resultsData.ctfVector.set_size(_gramTable.size());
int tmpTermID = -1;
for( iter = _gramTable.begin(); iter != _gramTable.end(); iter++ ) {
double gramCount = 0;
++tmpTermID;
Gram* gram = *iter->first;
GramCounts* gramCounts = *iter->second;
gram->internal_termID = tmpTermID;
terms.push_back(gram->term);
if( resultsData.queryStems.find(gram->term) != resultsData.queryStems.end() )
resultsData.queryStemIndex[gram->term] = tmpTermID;
resultsData.ctfVector(tmpTermID) = environment.stemCount(gram->term);
resultsData.dfVector(tmpTermID) = environment.documentStemCount(gram->term);
size_t c, r;
for( r = 0, c = 0; r < results.size() && c < gramCounts->counts.size(); r++ ) {
if( gramCounts->counts[c].first == r ) {
resultsData.tfMatrix(r, tmpTermID) = gramCounts->counts[c].second;
c++;
}
}
}
_gramTable.clear();
}
开发者ID:semanticpc,项目名称:indriR,代码行数:31,代码来源:indriRetOld.cpp
示例10: runQuery
SEXP runQuery(string _qno, string _query, int _documentLimit, string _runid="default"){
indri::api::QueryAnnotation* qa;
qa = environment.runAnnotatedQuery(_query, _documentLimit);
std::vector<indri::api::ScoredExtentResult> results = qa->getResults();
//_logtoposterior(results);
// Extract Documents
std::vector<lemur::api::DOCID_T> documentIDs;
std::vector<double> scores;
for (size_t i = 0; i < results.size(); i++){
documentIDs.push_back(results[i].document);
scores.push_back(results[i].score);
}
vector<string> res_qno;
vector<string> res_q0;
vector<string> res_runid;
int documentLimit = _documentLimit;
for(int i=0; i < documentLimit; i++){
res_qno.push_back(qno);
res_q0.push_back("Q0");
res_runid.push_back(_runid);
}
std::vector<string> extDocIDs = environment.documentMetadata(documentIDs, "docno");
return Rcpp::DataFrame::create( Named("topic")= _qno,
Named("q0")= res_q0, Named("docID")= wrap(extDocIDs),
Named("rank")= seq( 1, documentLimit ),
Named("score")= wrap(scores),
Named("runID")= res_runid);
}
开发者ID:semanticpc,项目名称:indriR,代码行数:33,代码来源:indriRetOld.cpp
示例11: Index
Index(string _indexPath, bool _server) {
try {
if (_server) environment.addServer(_indexPath);
else environment.addIndex(_indexPath);
} catch (std::exception &ex) {
forward_exception_to_r(ex);
} catch (lemur::api::Exception& e) {
::Rf_error("Unable to open index");
} catch (...) {
::Rf_error("Caught unhandled exception");
}
}
开发者ID:semanticpc,项目名称:indriR,代码行数:12,代码来源:indriRetOld.cpp
示例12: getDocTermMatrix
SEXP getDocTermMatrix(string termWeighting){
Rcpp::List dimnms = Rcpp::List::create(extDocIDs, terms);
if(termWeighting == "tf"){
NumericMatrix d = Rcpp::wrap(resultsData.tfMatrix);
d.attr("dimnames") = dimnms;
return d;
}else if(termWeighting == "tf_normalized"){
arma::mat tfnorm = resultsData.tfMatrix;
arma::rowvec docLen = arma::sum(tfnorm, 0);
tfnorm.each_row() /= docLen;
NumericMatrix d = Rcpp::wrap(tfnorm);
d.attr("dimnames") = dimnms;
return d;
}else if(termWeighting == "tfidf"){
arma::mat tfidfMat = resultsData.tfMatrix;
arma::vec idf = arma::log((environment.documentCount() + 1) /
(resultsData.dfVector + 0.5));
tfidfMat.each_row() %= idf.t();
NumericMatrix d = Rcpp::wrap(tfidfMat);
d.attr("dimnames") = dimnms;
return d;
}else if(termWeighting == "idf"){
}
}
开发者ID:semanticpc,项目名称:indriR,代码行数:27,代码来源:indriRetOld.cpp
示例13: _runQuery
// Runs the query, expanding it if necessary. Will print output as well if verbose is on.
void _runQuery( std::stringstream& output, const std::string& query,
const std::string &queryType, const std::vector<std::string> &workingSet, std::vector<std::string> relFBDocs ) {
try {
if( _printQuery ) output << "# query: " << query << std::endl;
std::vector<lemur::api::DOCID_T> docids;;
if (workingSet.size() > 0)
docids = _environment.documentIDsFromMetadata("docno", workingSet);
if (relFBDocs.size() == 0) {
if( _printSnippets ) {
if (workingSet.size() > 0)
_annotation = _environment.runAnnotatedQuery( query, docids, _initialRequested, queryType );
else
_annotation = _environment.runAnnotatedQuery( query, _initialRequested );
_results = _annotation->getResults();
} else {
if (workingSet.size() > 0)
_results = _environment.runQuery( query, docids, _initialRequested, queryType );
else
_results = _environment.runQuery( query, _initialRequested, queryType );
}
}
if( _expander ) {
std::vector<indri::api::ScoredExtentResult> fbDocs;
if (relFBDocs.size() > 0) {
docids = _environment.documentIDsFromMetadata("docno", relFBDocs);
for (size_t i = 0; i < docids.size(); i++) {
indri::api::ScoredExtentResult r(0.0, docids[i]);
fbDocs.push_back(r);
}
}
std::string expandedQuery;
if (relFBDocs.size() != 0)
expandedQuery = _expander->expand( query, fbDocs );
else
expandedQuery = _expander->expand( query, _results );
if( _printQuery ) output << "# expanded: " << expandedQuery << std::endl;
if (workingSet.size() > 0) {
docids = _environment.documentIDsFromMetadata("docno", workingSet);
_results = _environment.runQuery( expandedQuery, docids, _requested, queryType );
} else {
_results = _environment.runQuery( expandedQuery, _requested, queryType );
}
}
}
catch( lemur::api::Exception& e )
{
_results.clear();
LEMUR_RETHROW(e, "QueryThread::_runQuery Exception");
}
}
开发者ID:wangxuemin,项目名称:coding,代码行数:53,代码来源:IndriRunQuery.cpp
示例14: countGrams
void countGrams() {
std::vector<indri::api::DocumentVector*> vectors =
environment.documentVectors( documentIDs );
// for each query result
for( size_t i=0; i< results.size(); i++ ) {
// run through the text, extracting n-grams
indri::api::ScoredExtentResult& result = results[i];
indri::api::DocumentVector* v = vectors[i];
std::vector<int>& positions = v->positions();
std::vector<std::string>& stems = v->stems();
std::vector< indri::api::DocumentVector::Field >& fields = v->fields();
if (result.end == 0) result.end = positions.size();
// for each word position in the text
for( int j = result.begin; j < result.end; j++ ) {
//int maxGram = std::min( _maxGrams, result.end - j );
GramCounts* newCounts = new GramCounts;
bool containsOOV = false;
// build the gram
if( positions[ j ] == 0 || (! isValid(stems[ positions[ j ] ])) ) {
containsOOV = true;
continue;
}
newCounts->gram.term = stems[ positions[ j ] ] ;
if( containsOOV ) {
// if this contanied OOV, all larger n-grams
// starting at this point also will
delete newCounts;
break;
}
GramCounts** gramCounts = 0;
gramCounts = _gramTable.find( &newCounts->gram );
if( gramCounts == 0 ) {
_gramTable.insert( &newCounts->gram, newCounts );
gramCounts = &newCounts;
} else {
delete newCounts;
}
if( (*gramCounts)->counts.size() && (*gramCounts)->counts.back().first == i ) {
// we already have some counts going for this query result, so just add this one
(*gramCounts)->counts.back().second++;
} else {
// no counts yet in this document, so add an entry
(*gramCounts)->counts.push_back( std::make_pair( i, 1 ) );
}
}
}
for (unsigned int i = 0; i < vectors.size(); i++)
delete vectors[i];
}
开发者ID:semanticpc,项目名称:indriR,代码行数:54,代码来源:indriRetOld.cpp
示例15: generateSnippets
SEXP generateSnippets(bool html){
vector<string> snippetString;
vector< indri::api::ParsedDocument* > pdocuments = environment.documents(documentIDs);
indri::api::SnippetBuilder sp(html);
for( size_t row=0; row < documentIDs.size(); row++ )
snippetString.push_back(sp.build(documentIDs[row], pdocuments[row], qa));
CharacterVector c = wrap(snippetString);
c.attr("names") = extDocIDs;
return c;
}
开发者ID:semanticpc,项目名称:indriR,代码行数:11,代码来源:indriRetOld.cpp
示例16: addServer
SEXP addServer(string _server){
try {
environment.addServer(_server);
} catch (std::exception &ex) {
forward_exception_to_r(ex);
} catch (lemur::api::Exception& e) {
::Rf_error("Unable to open index");
} catch (...) {
::Rf_error("Caught unhandled exception");
}
}
开发者ID:semanticpc,项目名称:indriR,代码行数:11,代码来源:indriRetOld.cpp
示例17: getTermStats
SEXP getTermStats(){
vector<string> statName;
statName.push_back("DocFreq");
statName.push_back("IDF");
statName.push_back("cTF");
arma::vec idf = arma::log((environment.documentCount() + 1) /
(resultsData.dfVector + 0.5));
DataFrame d = DataFrame::create(Named("DocFreq")=resultsData.dfVector,
Named("IDF")=idf,
Named("cTF")=resultsData.ctfVector);
d.attr("row.names") = terms;
return d;
}
开发者ID:semanticpc,项目名称:indriR,代码行数:14,代码来源:indriRetOld.cpp
示例18: _runQuery
// Runs the query, expanding it if necessary. Will print output as well if verbose is on.
void _runQuery( std::stringstream& output, const std::string& query,
const std::string &queryType ) {
try {
if( _printQuery ) output << "# query: " << query << std::endl;
if( _printSnippets ) {
_annotation = _environment.runAnnotatedQuery( query, _initialRequested );
_results = _annotation->getResults();
} else {
_results = _environment.runQuery( query, _initialRequested, queryType );
}
if( _expander ) {
std::string expandedQuery = _expander->expand( query, _results );
if( _printQuery ) output << "# expanded: " << expandedQuery << std::endl;
_results = _environment.runQuery( expandedQuery, _requested, queryType );
}
}
catch( lemur::api::Exception& e )
{
_results.clear();
LEMUR_RETHROW(e, "QueryThread::_runQuery Exception");
}
}
开发者ID:foremire,项目名称:lemur-mix,代码行数:25,代码来源:IndriRunQuery.cpp
示例19: setScoringRules
SEXP setScoringRules(string method, string parameters){
vector<string> scoringRules;
//if(method == "tfidf" || method == "Okapi" || method == "BM25" ){
// string rule = method + "," + parameters;
//environment.setBaseline(rule);
//scoringRules.push_back("");
//environment.setScoringRules(scoringRules);
//}else{
string rule = "method:" + method + "," + parameters;
scoringRules.push_back(rule);
environment.setScoringRules(scoringRules);
//}
return R_NilValue;
}
开发者ID:semanticpc,项目名称:indriR,代码行数:16,代码来源:indriRetOld.cpp
示例20: closeIndex
SEXP closeIndex() {
environment.close();
return (Rcpp::wrap(true));
}
开发者ID:semanticpc,项目名称:indriR,代码行数:4,代码来源:indriRetOld.cpp
注:本文中的indri::api::QueryEnvironment类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论