• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

C++ xapian::Document类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中xapian::Document的典型用法代码示例。如果您正苦于以下问题:C++ Document类的具体用法?C++ Document怎么用?C++ Document使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Document类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: main

int main(int argc, char **argv)
{
    // Simplest possible options parsing: we just require three or more
    // parameters.
    if(argc < 4) {
        cout << "usage: " << argv[0] <<
	    " <path to database> <document data> <document terms>" << endl;
        exit(1);
    }

    // Catch any Xapian::Error exceptions thrown
    try {
        // Make the database
	Xapian::WritableDatabase database(argv[1], Xapian::DB_CREATE_OR_OPEN);

        // Make the document
	Xapian::Document newdocument;

        // Put the data in the document
        newdocument.set_data(string(argv[2]));

        // Put the terms into the document
        for (int i = 3; i < argc; ++i) {
            newdocument.add_posting(argv[i], i - 2);
        }

        // Add the document to the database
        database.add_document(newdocument);
    } catch(const Xapian::Error &error) {
        cout << "Exception: "  << error.get_msg() << endl;
    }
}
开发者ID:IthacaDream,项目名称:Test,代码行数:32,代码来源:quickstartindex.cpp


示例2: addTermsToDocument

void XapianIndex::addTermsToDocument(Tokenizer &tokens, Xapian::Document &doc,
	const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
	Xapian::Stem *pStemmer = NULL;
	string term;

	// Do we know what language to use for stemming ?
	if (m_stemLanguage.empty() == false)
	{
		pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
	}

	// Get the terms
	while (tokens.nextToken(term) == true)
	{
		if (term.empty() == true)
		{
			continue;
		}
		// Does it start with a capital letter ?
		if (isupper((int)term[0]) != 0)
		{
			// R-prefix the raw term
			doc.add_posting(string("R") + term, termPos);
		}
		// Lower case the term
		term = StringManip::toLowerCase(term);

		// Stem the term ?
		if ((mode == STORE_UNSTEM) ||
			(pStemmer == NULL))
		{
			doc.add_posting(limitTermLength(prefix + term), termPos++);
		}
		else if (mode == STORE_STEM)
		{
			string stemmedTerm = pStemmer->stem_word(term);

			doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
		}
		else if (mode == STORE_BOTH)
		{
			string stemmedTerm = pStemmer->stem_word(term);

			// Add both
			doc.add_posting(limitTermLength(prefix + term), termPos);
			// ...at the same position
			doc.add_posting(limitTermLength(prefix + stemmedTerm), termPos++);
		}
	}
#ifdef DEBUG
	cout << "XapianIndex::addTermsToDocument: added " << termPos << " terms" << endl;
#endif

	if (pStemmer != NULL)
	{
		delete pStemmer;
	}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:59,代码来源:XapianIndex.cpp


示例3: getDocumentInfo

/// Returns a document's properties.
bool XapianIndex::getDocumentInfo(unsigned int docId, DocumentInfo &docInfo) const
{
	bool foundDocument = false;

	if (docId == 0)
	{
		return false;
	}

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::Database *pIndex = pDatabase->readLock();
		if (pIndex != NULL)
		{
			Xapian::Document doc = pIndex->get_document(docId);

			// Get the current document data
			string record = doc.get_data();
			if (record.empty() == false)
			{
				string language = Languages::toLocale(StringManip::extractField(record, "language=", ""));

				docInfo = DocumentInfo(StringManip::extractField(record, "caption=", "\n"),
					StringManip::extractField(record, "url=", "\n"),
					StringManip::extractField(record, "type=", "\n"),
					language);
				docInfo.setTimestamp(StringManip::extractField(record, "timestamp=", "\n"));
#ifdef DEBUG
				cout << "XapianIndex::getDocumentInfo: language is "
					<< docInfo.getLanguage() << endl;
#endif
				foundDocument = true;
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't get document properties: " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't get document properties, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return foundDocument;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:55,代码来源:XapianIndex.cpp


示例4: setDocumentData

void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
                                  const string &language) const
{
    time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());

    // Add this value to allow sorting by date
    doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));

    DocumentInfo docCopy(info);
    docCopy.setLanguage(language);
    doc.set_data(XapianDatabase::propsToRecord(&docCopy));
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:12,代码来源:XapianIndex.cpp


示例5: renameLabel

/// Renames a label.
bool XapianIndex::renameLabel(const string &name, const string &newName)
{
	bool renamedLabel = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
		if (pIndex != NULL)
		{
			string term("XLABEL:");

			// Get documents that have this label
			term += name;
			for (Xapian::PostingIterator postingIter = pIndex->postlist_begin(term);
				postingIter != pIndex->postlist_end(term); ++postingIter)
			{
				Xapian::docid docId = *postingIter;

				// Get the document
				Xapian::Document doc = pIndex->get_document(docId);
				// Remove the term
				doc.remove_term(term);
				// ...add the new one
				doc.add_term(limitTermLength(string("XLABEL:") + newName));
				// ...and update the document
				pIndex->replace_document(docId, doc);
			}

			renamedLabel = true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't delete label: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't delete label, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return renamedLabel;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:51,代码来源:XapianIndex.cpp


示例6: setDocumentData

void XapianIndex::setDocumentData(const DocumentInfo &info, Xapian::Document &doc,
	const string &language) const
{
	string title(info.getTitle());
	string timestamp(info.getTimestamp());
	char timeStr[64];
	time_t timeT = TimeConverter::fromTimestamp(timestamp);

	// Set the document data omindex-style
	string record = "url=";
	record += info.getLocation();
	// The sample will be generated at query time
	record += "\nsample=";
	record += "\ncaption=";
	if (badField(title) == true)
	{
		// Modify the title if necessary
		string::size_type pos = title.find("=");
		while (pos != string::npos)
		{
			title[pos] = ' ';
			pos = title.find("=", pos + 1);
		}
#ifdef DEBUG
		cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
	}
	record += title;
	record += "\ntype=";
	record += info.getType();
	// Append a timestamp, in a format compatible with Omega
	record += "\nmodtime=";
	snprintf(timeStr, 64, "%ld", timeT);
	record += timeStr;
	// ...and the language
	record += "\nlanguage=";
	record += StringManip::toLowerCase(language);
#ifdef DEBUG
	cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
	doc.set_data(record);

	// Add this value to allow sorting by date
	doc.add_value(0, StringManip::integerToBinaryString((uint32_t)timeT));
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:45,代码来源:XapianIndex.cpp


示例7: saveMessage

void HistoryLogger::saveMessage(const Message* message)
{
	if (message->flags() & MESSAGE_FLAG_ALARM)
		return;
	
	Xapian::Document doc;
	
	quint32 flags = message->flags();
	std::string plainText(message->plainText().toUtf8());
	std::string confUser(message->getConfUser().constData());

	std::string data;
	if (flags & MESSAGE_FLAG_RTF)
		data = message->rtfText().constData();
	else
		data = plainText;

	std::cout << "HistoryLogger::saveMessage data = " << data << std::endl;
	doc.set_data(data);

	Xapian::TermGenerator termGen;
	termGen.set_stemmer(Xapian::Stem("ru"));
	termGen.set_document(doc);
	termGen.index_text(plainText);

	doc.add_value(0, message->dateTime().toString("yyyyMMdd").toStdString());
	doc.add_value(1, message->dateTime().toString("hhmmss").toStdString());
	doc.add_value(2, QString::number(flags, 16).toStdString());
	doc.add_value(3, message->type() == Message::Outgoing? "o" : "i");
	doc.add_value(4, confUser);

	database->add_document(doc);
	database->flush();
}
开发者ID:Andrsid,项目名称:myagent-im,代码行数:34,代码来源:historylogger.cpp


示例8: setDocumentData

void XapianIndex::setDocumentData(Xapian::Document &doc, const DocumentInfo &info,
	const string &language) const
{
	string title(info.getTitle());
	string timestamp(info.getTimestamp());
	char timeStr[64];

	// Set the document data omindex-style
	string record = "url=";
	record += info.getLocation();
	// The sample will be generated at query time
	record += "\nsample=";
	record += "\ncaption=";
	if (badField(title) == true)
	{
		// Modify the title if necessary
		string::size_type pos = title.find("=");
		while (pos != string::npos)
		{
			title[pos] = ' ';
			pos = title.find("=", pos + 1);
		}
#ifdef DEBUG
		cout << "XapianIndex::setDocumentData: modified title" << endl;
#endif
	}
	record += title;
	record += "\ntype=";
	record += info.getType();
	// Append a timestamp
	record += "\ntimestamp=";
	record += timestamp;
	// ...and the language
	record += "\nlanguage=";
	record += language;
#ifdef DEBUG
	cout << "XapianIndex::setDocumentData: document data is " << record << endl;
#endif
	doc.set_data(record);

	// Add this value to allow sorting by date
	snprintf(timeStr, 64, "%d", TimeConverter::fromTimestamp(timestamp));
	doc.add_value(0, timeStr);
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:44,代码来源:XapianIndex.cpp


示例9: main

int main(int argc, char **argv)
{
    // Simplest possible options parsing: we just require two or more
    // parameters.
    if (argc < 3) {
        cout << "usage: " << argv[0] << " <path to database> <search terms>" << endl;
        exit(1);
    }
 
    // Catch any Xapian::Error exceptions thrown
    try {
        // Make the database
	Xapian::Database db(argv[1]);
 
        // Start an enquire session
	Xapian::Enquire enquire(db);
         
        // Set percent and/or weight cutoffs
        enquire.set_cutoff(90,0.2);
         
        // Set weighting schema
        BM25Weight bm1(1.0,0.0,1.0,0.5,0.3);
        enquire.set_weighting_scheme(bm1);
 
        // Build the query object
	Xapian::Query query(Xapian::Query::OP_AND, argv + 2, argv + argc);
        cout << "Performing query" << query.get_description() << "'" << endl;
	
        // Set Stopper
        string stop[8]={"的","了","呵","吧","就","你","我","他"};
        SimpleStopper *ss=new SimpleStopper;
        for(int i=0;i<8;i++){
            ss->add(stop[i]);
        }
        QueryParser qparser;
        qparser.set_stopper(ss);
        qparser.set_database(db);
 
        // Give the query object to the enquire session
        enquire.set_query(query);
 
        // Get the top 10 results of the query
	Xapian::MSet matches = enquire.get_mset(0, 10);                     //最多返回10个文档
 
        // Display the results
        cout << matches.size() << " results found" << endl;
 
        for (Xapian::MSetIterator i = matches.begin();i != matches.end(); ++i) {
	    Xapian::Document doc = i.get_document();
            cout << "Document ID " << *i << "\nPercent " <<i.get_percent() << "%\n" << doc.get_data() << "\n" << endl;
        }
        db.close();
    } catch(const Xapian::Error &error) {
        cout << "Exception: "  << error.get_msg() << endl;
    }
}
开发者ID:IthacaDream,项目名称:Test,代码行数:56,代码来源:xapian_test.cpp


示例10: removeFirstPostingsFromDocument

void XapianIndex::removeFirstPostingsFromDocument(Tokenizer &tokens, Xapian::Document &doc,
	const string &prefix, const string &language, StemmingMode mode) const
{
	Xapian::TermIterator termListIter = doc.termlist_begin();
	Xapian::Stem *pStemmer = NULL;
	string term;

	// Do we know what language to use for stemming ?
	if (language.empty() == false)
	{
		pStemmer = new Xapian::Stem(StringManip::toLowerCase(language));
	}

	// Get the terms and remove the first posting for each
	while (tokens.nextToken(term) == true)
	{
		if (term.empty() == true)
		{
			continue;
		}
		// Does it start with a capital letter ?
		if (isupper((int)term[0]) != 0)
		{
			// R-prefix the raw term
			removeFirstPosting(doc, termListIter, string("R") + term);
		}
		// Lower case the term
		term = StringManip::toLowerCase(term);

		// Stem the term ?
		if ((mode == STORE_UNSTEM) ||
			(pStemmer == NULL))
		{
			removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
		}
		else if (mode == STORE_STEM)
		{
			removeFirstPosting(doc, termListIter, limitTermLength(prefix + pStemmer->stem_word(term)));
		}
		else if (mode == STORE_BOTH)
		{
			string stemmedTerm = pStemmer->stem_word(term);

			removeFirstPosting(doc, termListIter, limitTermLength(prefix + term));
			if (stemmedTerm != term)
			{
				removeFirstPosting(doc, termListIter, limitTermLength(prefix + stemmedTerm));
			}
		}
	}

	if (pStemmer != NULL)
	{
		delete pStemmer;
	}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:56,代码来源:XapianIndex.cpp


示例11: requestImage

QImage ThumbnailProvider::requestImage(const QString &id, QSize *size, const QSize &requestedSize)
{
    QImage image;

    if (m_thumb32->findImage(id, &image)) {
        return image;
    } else {
        QString filePath;
        if (id.at(0) == QLatin1Char('Q')) {
            Xapian::Document doc = m_xapianDB->findDocument(id);
            if (doc.get_docid() == 0) {
                return image;
            } else {
                filePath = QString::fromStdString(doc.get_value(Database::FilePath));
            }
        } else {
            filePath = id;
        }

        // Load thumbnail
//        KExiv2Iface::KExiv2Previews preview(filePath);
        KExiv2Iface::KExiv2	preview(filePath);
        image = preview.getExifThumbnail(true);
        if (image.isNull()) {
//            image = preview.image();
//        } else {
            // Store thumbnail
            // TODO smooth or fast?
            image = QImage(filePath).scaled(160, 120, Qt::KeepAspectRatio);
//            preview.
            kWarning() << "Could not find preview image for" << filePath << image.isNull();
        }

        // Store the thumbnail into the cache file
        if (m_thumb32->insertImage(id, image)) {
            kWarning() << "Added preview for" << image.byteCount() << filePath << id;
        } else {
            kWarning() << "FAILED to add preview for" << filePath << id;
        }
    }

    return image;
}
开发者ID:KDE,项目名称:photobook,代码行数:43,代码来源:ThumbnailProvider.cpp


示例12: text

QString EmailSearchStore::text(int queryId)
{
    Xapian::Document doc = docForQuery(queryId);

    QMutexLocker lock(&m_mutex);
    std::string data;
    try {
        data = doc.get_data();
    } catch (const Xapian::Error &) {
        // Nothing to do, move along
    }

    QString subject = QString::fromUtf8(data.c_str(), data.length());
    if (subject.isEmpty()) {
        return QStringLiteral("No Subject");
    }

    return subject;
}
开发者ID:KDE,项目名称:akonadi-search,代码行数:19,代码来源:emailsearchstore.cpp


示例13: prepareDocument

bool XapianIndex::prepareDocument(const DocumentInfo &info, Xapian::Document &doc,
	Xapian::termcount &termPos) const
{
	string title(info.getTitle());
	string location(info.getLocation());
	Url urlObj(location);

	// Add a magic term :-)
	doc.add_term(MAGIC_TERM);

	// Index the title with and without prefix S
	if (title.empty() == false)
	{
		Document titleDoc;
		titleDoc.setData(title.c_str(), title.length());
		Tokenizer titleTokens(&titleDoc);
		addTermsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
		titleTokens.rewind();
		addTermsToDocument(titleTokens, doc, "", termPos, m_stemMode);
	}

	// Index the full URL with prefix U
	doc.add_term(limitTermLength(string("U") + location, true));
	// ...the host name and included domains with prefix H
	string hostName(StringManip::toLowerCase(urlObj.getHost()));
	if (hostName.empty() == false)
	{
		doc.add_term(limitTermLength(string("H") + hostName, true));
		string::size_type dotPos = hostName.find('.');
		while (dotPos != string::npos)
		{
			doc.add_term(limitTermLength(string("H") + hostName.substr(dotPos + 1), true));

			// Next
			dotPos = hostName.find('.', dotPos + 1);
		}
	}
	// ...and the file name with prefix P
	string fileName(urlObj.getFile());
	if (fileName.empty() == false)
	{
		doc.add_term(limitTermLength(string("P") + StringManip::toLowerCase(fileName), true));
	}
	// Finally, add the language code with prefix L
	doc.add_term(string("L") + Languages::toCode(m_stemLanguage));

	setDocumentData(doc, info, m_stemLanguage);

	return true;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:50,代码来源:XapianIndex.cpp


示例14: removeFirstPosting

static void removeFirstPosting(Xapian::Document &doc,
                               Xapian::TermIterator &termListIter, const string &term)
{
    termListIter.skip_to(term);

    Xapian::PositionIterator firstPosIter = termListIter.positionlist_begin();
    if (firstPosIter != termListIter.positionlist_end())
    {
        try
        {
            doc.remove_posting(term, *firstPosIter);
        }
        catch (const Xapian::Error &error)
        {
            // This posting may have been removed already
#ifdef DEBUG
            cout << "XapianIndex::removeFirstPosting: " << error.get_msg() << endl;
#endif
        }
    }
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:21,代码来源:XapianIndex.cpp


示例15: db

Indexer::Indexer(const string &datapath, const string &dbpath)
{
    // Hardcode field offsets for simplicity.
    const size_t FIELD_ID_NUMBER = 0;
    const size_t FIELD_TITLE = 2;
    const size_t FIELD_DESCRIPTION = 8;

    // Create or open the database we're going to be writing to.
    Xapian::WritableDatabase db(dbpath, Xapian::DB_CREATE_OR_OPEN);

    // Set up a TermGenerator that we'll use in indexing.
    Xapian::TermGenerator termgenerator;
    termgenerator.set_stemmer(Xapian::Stem("en"));

    ifstream csv(datapath.c_str());
    vector<string> fields;
    csv_parse_line(csv, fields);

    // Check the CSV header line matches our hard-code offsets.
    if (fields.at(FIELD_ID_NUMBER) != "id_NUMBER" ||
    fields.at(FIELD_TITLE) != "TITLE" ||
    fields.at(FIELD_DESCRIPTION) != "DESCRIPTION") {
    // The CSV format doesn't match what we expect.
    cerr << "CSV format has changed!" << endl;
    exit(1);
    }

    while (csv_parse_line(csv, fields)) {
    // 'fields' is a vector mapping from field number to value.
    // We look up fields with the 'at' method so we get an exception
    // if that field isn't set.
    //
    // We're just going to use DESCRIPTION, TITLE and id_NUMBER.
    const string & description = fields.at(FIELD_DESCRIPTION);
    const string & title = fields.at(FIELD_TITLE);
    const string & identifier = fields.at(FIELD_ID_NUMBER);

    // We make a document and tell the term generator to use this.
    Xapian::Document doc;
    termgenerator.set_document(doc);

    // Index each field with a suitable prefix.
    termgenerator.index_text(title, 1, "S");
    termgenerator.index_text(description, 1, "XD");

    // Index fields without prefixes for general search.
    termgenerator.index_text(title);
    termgenerator.increase_termpos();
    termgenerator.index_text(description);

    // Store all the fields for display purposes.
    doc.set_data(identifier + "\n" + title + "\n" + description);

    // We use the identifier to ensure each object ends up in the
    // database only once no matter how many times we run the
    // indexer.
    string idterm = "Q" + identifier;
    doc.add_boolean_term(idterm);
    db.replace_document(idterm, doc);
    }
}
开发者ID:jainnidhi703,项目名称:xapianclusteringexample,代码行数:61,代码来源:indexer.cpp


示例16: QueryHandler

    void QueryHandler(const QueryMessage &message, const Theron::Address from)
        {
            search::QueryInfo qi=*(message.query);
            std::string resKey(message.resKey);
            delete message.query;
            std::string segString;
            char *output=new char[qi.query.length()*9];
            char *input=new char[qi.query.length()*3];
            memset(output,0,qi.query.length()*9);
            memset(input,0,qi.query.length()*3);
            try 
            {
                UErrorCode  error = U_ZERO_ERROR;
                ucnv_convert("GBK","UTF-8",input,  qi.query.length()*3, qi.query.c_str(), qi.query.length(), &error );
                
                
                bool ret = result->ParagraphProcessing(input, output);
                if (ret)
                {
                    int oLen=strlen(output);
                    char *utf8out=new char[oLen*3];
                    memset(utf8out,0,oLen*3);
                    ucnv_convert("UTF-8","GBK",utf8out,  oLen*3, output, oLen, &error );
                    
                    segString=std::string(utf8out);
                    delete [] utf8out;
                }
            }
            catch (...) {
            }
            delete [] output;
            delete [] input;
            std::list<std::string> segList;
            if(segString.length()>0)
            {
                std::vector<std::string> resv;
                boost::algorithm::split( resv, segString, boost::algorithm::is_any_of(" ") );
                for(std::vector<std::string>::iterator it=resv.begin();it!=resv.end();++it)
                {
                    std::vector<std::string> tmpv;
                    boost::algorithm::split( tmpv, *it, boost::algorithm::is_any_of("/") );
                    if(tmpv.size()>1&&tmpv[1]!="w")
                        segList.push_back(std::string("K")+tmpv[0]);
                }
            }
            search::DocList *dList=new search::DocList();
            if(segList.size()>0)
            {
                Xapian::Query query(Xapian::Query::OP_AND,segList.begin(), segList.end());
                
                while(1)
                {
                    try
                    {
                        db.reopen();
                        Xapian::Enquire  enquire(db);
                        enquire.set_query(query);
                        Xapian::MSet matches = enquire.get_mset(0, 100);
                        for (Xapian::MSetIterator i = matches.begin(); i != matches.end(); ++i) {
                            Xapian::Document doc = i.get_document();
                            search::IndexInfo info;
                            info.uid=doc.get_value(1);
                            info.attMap.insert(std::make_pair(std::string("title"),doc.get_value(2)));
                            info.content=doc.get_data();
                            dList->docList.push_back(info);
                        }
                        std::cout<<"doc size:"<<dList->docList.size()<<std::endl;
                        break;
                    }catch(Xapian::DatabaseModifiedError exception)
                    {
                        std::cout<<"try agian"<<std::endl;
                    }catch(...)
                    {
                        break;
                    }
                    
                }
                

            }
            Send(QueryResponceMessage(dList,resKey.c_str()), from);
            
        }
开发者ID:firememory,项目名称:dfwbi,代码行数:83,代码来源:QueryActor.hpp


示例17: indexDocument

/// Indexes the given data.
bool XapianIndex::indexDocument(Tokenizer &tokens, const std::set<std::string> &labels,
	unsigned int &docId)
{
	unsigned int dataLength = 0;
	bool indexed = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		// Get the document
		const Document *pDocument = tokens.getDocument();
		if (pDocument == NULL)
		{
#ifdef DEBUG
			cout << "XapianIndex::indexDocument: no document" << endl;
#endif
			return false;
		}

		// Cache the document's properties
		DocumentInfo docInfo(pDocument->getTitle(), pDocument->getLocation(),
			pDocument->getType(), pDocument->getLanguage());
		docInfo.setTimestamp(pDocument->getTimestamp());
		docInfo.setLocation(Url::canonicalizeUrl(docInfo.getLocation()));

		const char *pData = pDocument->getData(dataLength);
		if (pData != NULL)
		{
			m_stemLanguage = scanDocument(pData, dataLength, docInfo);
		}

		Xapian::Document doc;
		Xapian::termcount termPos = 0;

#ifdef DEBUG
		cout << "XapianIndex::indexDocument: adding terms" << endl;
#endif
		// Add the tokenizer's terms to the Xapian document
		addPostingsToDocument(tokens, doc, "", termPos, m_stemMode);
		// Add labels
		for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
			++labelIter)
		{
			doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
		}
		if (addCommonTerms(docInfo, doc, termPos) == true)
		{
			setDocumentData(docInfo, doc, m_stemLanguage);

			Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
			if (pIndex != NULL)
			{
				// Add this document to the Xapian index
				docId = pIndex->add_document(doc);
				indexed = true;
			}
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't index document: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't index document, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return indexed;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:77,代码来源:XapianIndex.cpp


示例18: addPostingsToDocument

void XapianIndex::addPostingsToDocument(Tokenizer &tokens, Xapian::Document &doc,
	const string &prefix, Xapian::termcount &termPos, StemmingMode mode) const
{
	Xapian::Stem *pStemmer = NULL;
	string upperCasePrefix("R");
	string term;

	// Do we know what language to use for stemming ?
	if (m_stemLanguage.empty() == false)
	{
		pStemmer = new Xapian::Stem(StringManip::toLowerCase(m_stemLanguage));
	}

	// Terms starting with a capital letter are R-prefixed, unless a prefix is already defined
	if (prefix.empty() == false)
	{
		upperCasePrefix = prefix;
	}

	// Get the terms
	while (tokens.nextToken(term) == true)
	{
		if (term.empty() == true)
		{
			continue;
		}
		// Does it start with a capital letter ?
		if (isupper((int)term[0]) != 0)
		{
			doc.add_posting(upperCasePrefix + XapianDatabase::limitTermLength(term), termPos);
		}
		// Lower case the term
		term = StringManip::toLowerCase(term);

		// Stem the term ?
		if ((mode == STORE_UNSTEM) ||
			(pStemmer == NULL))
		{
			doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
		}
		else if (mode == STORE_STEM)
		{
#if XAPIAN_MAJOR_VERSION==0
			string stemmedTerm(pStemmer->stem_word(term));
#else
			string stemmedTerm((*pStemmer)(term));
#endif

			doc.add_posting(prefix + XapianDatabase::limitTermLength(stemmedTerm), termPos);
		}
		else if (mode == STORE_BOTH)
		{
#if XAPIAN_MAJOR_VERSION==0
			string stemmedTerm(pStemmer->stem_word(term));
#else
			string stemmedTerm((*pStemmer)(term));
#endif

			// Add both at the same position
			doc.add_posting(prefix + XapianDatabase::limitTermLength(term), termPos);
			if (stemmedTerm != term)
			{
				// No point adding the same term twice
				doc.add_posting(prefix + XapianDatabase::limitTermLength(stemmedTerm), termPos);
			}
		}

		++termPos;
	}
#ifdef DEBUG
	cout << "XapianIndex::addPostingsToDocument: added " << termPos << " terms" << endl;
#endif

	if (pStemmer != NULL)
	{
		delete pStemmer;
	}
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:78,代码来源:XapianIndex.cpp


示例19: addCommonTerms

void XapianIndex::addCommonTerms(const DocumentInfo &info, Xapian::Document &doc,
                                 Xapian::termcount &termPos) const
{
    string title(info.getTitle());
    string location(info.getLocation());
    Url urlObj(location);

    // Add a magic term :-)
    doc.add_term(MAGIC_TERM);

    // Index the title with and without prefix S
    if (title.empty() == false)
    {
        Document titleDoc;
        titleDoc.setData(title.c_str(), title.length());
        Tokenizer titleTokens(&titleDoc);
        addPostingsToDocument(titleTokens, doc, "S", termPos, STORE_UNSTEM);
        titleTokens.rewind();
        addPostingsToDocument(titleTokens, doc, "", termPos, m_stemMode);
    }

    // Index the full URL with prefix U
    doc.add_term(string("U") + XapianDatabase::limitTermLength(Url::escapeUrl(location), true));
    // ...the base file with XFILE:
    string::size_type qmPos = location.find("?");
    if ((urlObj.isLocal() == true) &&
            (qmPos != string::npos))
    {
        doc.add_term(string("XFILE:") + XapianDatabase::limitTermLength(Url::escapeUrl(location.substr(0, qmPos)), true));
    }
    // ...the host name and included domains with prefix H
    string hostName(StringManip::toLowerCase(urlObj.getHost()));
    if (hostName.empty() == false)
    {
        doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName, true));
        string::size_type dotPos = hostName.find('.');
        while (dotPos != string::npos)
        {
            doc.add_term(string("H") + XapianDatabase::limitTermLength(hostName.substr(dotPos + 1), true));

            // Next
            dotPos = hostName.find('.', dotPos + 1);
        }
    }
    // ...the location (as is) and all directories with prefix XDIR:
    string tree(urlObj.getLocation());
    if (tree.empty() == false)
    {
        doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree), true));
        if (tree[0] == '/')
        {
            doc.add_term("XDIR:/");
        }
        string::size_type slashPos = tree.find('/', 1);
        while (slashPos != string::npos)
        {
            doc.add_term(string("XDIR:") + XapianDatabase::limitTermLength(Url::escapeUrl(tree.substr(0, slashPos)), true));

            // Next
            slashPos = tree.find('/', slashPos + 1);
        }
    }
    // ...and the file name with prefix P
    string fileName(urlObj.getFile());
    if (fileName.empty() == false)
    {
        string extension;

        doc.add_term(string("P") + XapianDatabase::limitTermLength(Url::escapeUrl(fileName), true));

        // Does it have an extension ?
        string::size_type extPos = fileName.rfind('.');
        if ((extPos != string::npos) &&
                (extPos + 1 < fileName.length()))
        {
            extension = StringManip::toLowerCase(fileName.substr(extPos + 1));
        }
        doc.add_term(string("XEXT:") + XapianDatabase::limitTermLength(extension));
    }
    // Add the date terms D, M and Y
    time_t timeT = TimeConverter::fromTimestamp(info.getTimestamp());
    struct tm *tm = localtime(&timeT);
    string yyyymmdd = TimeConverter::toYYYYMMDDString(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday);
    if (yyyymmdd.length() == 8)
    {
        doc.add_term(string("D") + yyyymmdd);
        doc.add_term(string("M") + yyyymmdd.substr(0, 6));
        doc.add_term(string("Y") + yyyymmdd.substr(0, 4));
    }
    // Finally, add the language code with prefix L
    doc.add_term(string("L") + Languages::toCode(m_stemLanguage));
    // ...and the MIME type with prefix T
    doc.add_term(string("T") + info.getType());
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:94,代码来源:XapianIndex.cpp


示例20: setDocumentLabels

/// Sets a document's labels.
bool XapianIndex::setDocumentLabels(unsigned int docId, const set<string> &labels,
	bool resetLabels)
{
	bool updatedLabels = false;

	XapianDatabase *pDatabase = XapianDatabaseFactory::getDatabase(m_databaseName, false);
	if (pDatabase == NULL)
	{
		cerr << "Bad index " << m_databaseName << endl;
		return false;
	}

	try
	{
		Xapian::WritableDatabase *pIndex = pDatabase->writeLock();
		if (pIndex != NULL)
		{
			Xapian::Document doc = pIndex->get_document(docId);

			// Reset existing labels ?
			if (resetLabels == true)
			{
				Xapian::TermIterator termIter = pIndex->termlist_begin(docId);
				if (termIter != pIndex->termlist_end(docId))
				{
					for (termIter.skip_to("XLABEL:");
						termIter != pIndex->termlist_end(docId); ++termIter)
					{
						// Is this a label ?
						if (strncasecmp((*termIter).c_str(), "XLABEL:", min(7, (int)(*termIter).length())) == 0)
						{
							doc.remove_term(*termIter);
						}
					}
				}
			}

			// Set new labels
			for (set<string>::const_iterator labelIter = labels.begin(); labelIter != labels.end();
				++labelIter)
			{
				if (labelIter->empty() == false)
				{
					doc.add_term(limitTermLength(string("XLABEL:") + *labelIter));
				}
			}

			pIndex->replace_document(docId, doc);
			updatedLabels = true;
		}
	}
	catch (const Xapian::Error &error)
	{
		cerr << "Couldn't update document's labels: " << error.get_type() << ": " << error.get_msg() << endl;
	}
	catch (...)
	{
		cerr << "Couldn't update document's labels, unknown exception occured" << endl;
	}
	pDatabase->unlock();

	return updatedLabels;
}
开发者ID:BackupTheBerlios,项目名称:pinot-svn,代码行数:64,代码来源:XapianIndex.cpp



注:本文中的xapian::Document类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ xapian::WritableDatabase类代码示例发布时间:2022-05-31
下一篇:
C++ wtl::CString类代码示例发布时间:2022-05-31
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap