C++ tokenizer类代码示例

OStack程序员社区-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中tokenizer类的典型用法代码示例。如果您正苦于以下问题：C++ tokenizer类的具体用法？C++ tokenizer怎么用？C++ tokenizer使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了tokenizer类的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: read_scene

void read_scene(tokenizer& t,document_type& doc)
{
    //std::cerr << "unsupported data: Scene" << std::endl;
    OutputDebugStringA("Scene>>>>");
    t.expect_literal( "{" );
    for( ;; ) {
        substr token = t();
        OutputDebugStringA((token.str() + "\n").c_str());
        if( token == "}" ) { break; }
        if( token == "amb" ) {
            doc.scene.ambient.red = t.expect_float(0,1);
            doc.scene.ambient.green = t.expect_float(0,1);
            doc.scene.ambient.blue = t.expect_float(0,1);
            doc.scene.ambient.alpha = 1;
            t.expect_linefeed();
        } else if( token == "dirlights" ) {
            t();
            skip_chunk(t);
            skip_to_linefeed(t);
        } else {
            skip_to_linefeed(t);
        }
    }
    OutputDebugStringA("Scene<<<<");
}

开发者ID:jonigata，项目名称:partix，代码行数:25，代码来源:mqoreader.cpp

示例2: recognize_vertical

void recognize_vertical(istream& is, ostream& os, const ner& recognizer, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  vector<named_entity> entities;
  unsigned total_tokens = 0;
  string entity_ids, entity_text;

  while (getpara(is, para)) {
    // Tokenize and tag
    tokenizer.set_text(para);
    while (tokenizer.next_sentence(&forms, nullptr)) {
      recognizer.recognize(forms, entities);
      sort_entities(entities);

      for (auto&& entity : entities) {
        entity_ids.clear();
        entity_text.clear();
        for (auto i = entity.start; i < entity.start + entity.length; i++) {
          if (i > entity.start) {
            entity_ids += ',';
            entity_text += ' ';
          }
          entity_ids += to_string(total_tokens + i + 1);
          entity_text.append(forms[i].str, forms[i].len);
        }
        os << entity_ids << '\t' << entity.type << '\t' << entity_text << '\n';
      }
      os << flush;
      total_tokens += forms.size() + 1;
    }
  }
}

开发者ID:ufal，项目名称:nametag，代码行数:32，代码来源:run_ner.cpp

示例3: extract_identifier

template <typename R> bool extract_identifier(R& result,tokenizer& tokenizer)
{
 result.clear();
 
 //buffer
 
 array<ascii> buffer;
 array<ascii> current;
 
 //identifier

 if(!tokenizer.identifier(current))
  return false;
  
 buffer.append(current);
  
 //word
 
 if(tokenizer.word(current))
  buffer.append(current);
  
 //delimited
 
 if(!tokenizer.is_delimited())
  return false;
  
 //commit

 result=buffer;
 
 update(symbols()._identifier,buffer);
 
 return true;
}

开发者ID:vmorgulys，项目名称:sandbox，代码行数:34，代码来源:class.token.h.extract.cpp

示例4: extract_word

template <typename R> bool extract_word(R& result,tokenizer& tokenizer)
{
 result.clear();
 
 //buffer
 
 array<ascii> buffer;

 //word
 
 if(!tokenizer.word(buffer))
  return false;
  
 //delimited
 
 if(!tokenizer.is_delimited())
  return false;
  
 //commit
 
 result=buffer;
 
 update(symbols()._word,buffer);
 
 return true;
}

开发者ID:vmorgulys，项目名称:sandbox，代码行数:26，代码来源:class.token.h.extract.cpp

示例5: tag_xml

void tag_xml(istream& is, ostream& os, const tagger& tagger, tokenizer& tokenizer, const tagset_converter& tagset_converter, const derivation_formatter& derivation, morpho::guesser_mode guesser) {
  string para;
  vector<string_piece> forms;
  vector<tagged_lemma> tags;

  while (getpara(is, para)) {
    // Tokenize and tag
    tokenizer.set_text(para);
    const char* unprinted = para.c_str();
    while (tokenizer.next_sentence(&forms, nullptr)) {
      tagger.tag(forms, tags, guesser);

      for (unsigned i = 0; i < forms.size(); i++) {
        tagset_converter.convert(tags[i]);
        derivation.format_derivation(tags[i].lemma);

        os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
        if (!i) os << "<sentence>";
        os << "<token lemma=\"" << xml_encoded(tags[i].lemma, true) << "\" tag=\"" << xml_encoded(tags[i].tag, true) << "\">"
           << xml_encoded(forms[i]) << "</token>";
        if (i + 1 == forms.size()) os << "</sentence>";
        unprinted = forms[i].str + forms[i].len;
      }
    }
    os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted)) << flush;
  }
}

开发者ID:ufal，项目名称:morphodita，代码行数:27，代码来源:run_tagger.cpp

示例6: wowEvent

spellEvent::spellEvent(tokenizer& t) : wowEvent(t)
{
	spellID = asInt(t.token(9));
	string spellName = t.token(10); trimQuotes(spellName);
	spells[spellID] = spellName;
	spellSchool = asuIntFromHexa(t.token(11));
}

开发者ID:alhunor，项目名称:projects，代码行数:7，代码来源:events.cpp

示例7: is_next

bool number::is_next(tokenizer &tokens, int i, void *data)
{
	while (tokens.peek_char(i) == '-' || tokens.peek_char(i) == '+' || tokens.peek_char(i) == '.')
		i++;

	return (tokens.peek_char(i) >= '0' && tokens.peek_char(i) <= '9');
}

开发者ID:yuchien302，项目名称:skeleton，代码行数:7，代码来源:number.cpp

示例8: is_next

bool statement::is_next(tokenizer &tokens, int i, void *data)
{
	return (node_id::is_next(tokens, i, data) ||
			tokens.is_next("subgraph") ||
			tokens.is_next("graph") ||
			tokens.is_next("node") ||
			tokens.is_next("edge"));
}

开发者ID:nbingham1，项目名称:parse_dot，代码行数:8，代码来源:statement.cpp

示例9: tokenize_vertical

void tokenize_vertical(istream& is, ostream& os, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  while (getpara(is, para)) {
    // Tokenize
    tokenizer.set_text(para);
    while (tokenizer.next_sentence(&forms, nullptr)) {
      for (auto&& form : forms) {
        os << form << '\n';
      }
      os << '\n' << flush;
    }
  }
}

开发者ID:ufal，项目名称:nametag，代码行数:14，代码来源:run_tokenizer.cpp

示例10: stmt_def_field

	inline stmt_def_field(const statement&parent,const token&tk,tokenizer&t):
		statement{parent,tk},
		ident_{t.next_token()}
	{
		if(ident_.is_name(""))
			throw compiler_error(ident_,"expected field name");

		if(!t.is_next_char('{'))
			throw compiler_error(ident_,"expected '{' initial value   then '}' ",ident_.name());

		while(true){
			if(t.is_next_char('}'))break;
			tokens_.push_back(t.next_token());
		}
	}

开发者ID:calint，项目名称:compiler-2，代码行数:15，代码来源:stmt_def_field.hpp

示例11: extract_control

template <typename R> bool extract_control(R& result,tokenizer& tokenizer)
{
 result.clear();
 
 //controls
 
 dictionary<string,id<string>> controls=
 {
  "\r",symbols()._cr,
  "\n",symbols()._lf
 };

 //buffer
 
 array<ascii> buffer;

 //any
 
 if(!tokenizer.any(buffer,controls.keys()))
  return false;
  
 //commit
 
 result=buffer;

 update(controls[buffer.join("")],buffer);
 
 return true;
}

开发者ID:vmorgulys，项目名称:sandbox，代码行数:29，代码来源:class.token.h.extract.cpp

示例12: stmt_def_func_param

	inline stmt_def_func_param(const statement&parent,tokenizer&t):
		statement{parent,t.next_token()}
	{
		assert(!tok().is_name(""));

		if(!t.is_next_char(':'))
			return;

		while(true){
			if(t.is_eos())throw compiler_error(*this,"unexpected end of stream",tok().name_copy());
			keywords_.push_back(t.next_token());
			if(t.is_next_char(':'))
					continue;
			break;
		}
	}

开发者ID:calint，项目名称:compiler-2，代码行数:16，代码来源:stmt_def_func_param.hpp

示例13: parse

void attribute_list::parse(tokenizer &tokens, void *data)
{
	tokens.syntax_start(this);

	tokens.increment(false);
	tokens.expect<assignment_list>();

	while (tokens.decrement(__FILE__, __LINE__, data))
	{
		attributes.push_back(assignment_list(tokens, data));

		tokens.increment(false);
		tokens.expect<assignment_list>();
	}

	tokens.syntax_end(this);
}

开发者ID:nbingham1，项目名称:parse_dot，代码行数:17，代码来源:attribute_list.cpp

示例14: read_scene

void read_scene(tokenizer& t,document_type& doc)
{
        //std::cerr << "unsupported data: Scene" << std::endl;
        t.expect_literal( "{" );
        for( ;; ) {
                substr token = t();
                if( token == "}" ) { break; }
                if( token == "amb" ) {
                        doc.scene.ambient.red = t.expect_float(0,1);
                        doc.scene.ambient.green = t.expect_float(0,1);
                        doc.scene.ambient.blue = t.expect_float(0,1);
                        doc.scene.ambient.alpha = 1;
						t.expect_linefeed();
                } else {
                        skip_to_linefeed(t);
                }
        }
}

开发者ID:jonigata，项目名称:yamadumi，代码行数:18，代码来源:mqoreader.cpp

示例15: tokenize_xml

static void tokenize_xml(istream& is, ostream& os, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  while (getpara(is, para)) {
    // Tokenize
    tokenizer.set_text(para);
    const char* unprinted = para.c_str();
    while (tokenizer.next_sentence(&forms, nullptr))
      for (unsigned i = 0; i < forms.size(); i++) {
        if (unprinted < forms[i].str) os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
        if (!i) os << "<sentence>";
        os << "<token>" << xml_encoded(forms[i]) << "</token>";
        if (i + 1 == forms.size()) os << "</sentence>";
        unprinted = forms[i].str + forms[i].len;
      }

    if (unprinted < para.c_str() + para.size()) os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted));
    os << flush;
  }
}

开发者ID:ufal，项目名称:nametag，代码行数:20，代码来源:run_tokenizer.cpp

示例16: tag_vertical

void tag_vertical(istream& is, ostream& os, const tagger& tagger, tokenizer& tokenizer, const tagset_converter& tagset_converter, const derivation_formatter& derivation, morpho::guesser_mode guesser) {
  string para;
  vector<string_piece> forms;
  vector<tagged_lemma> tags;

  while (getpara(is, para)) {
    // Tokenize and tag
    tokenizer.set_text(para);
    while (tokenizer.next_sentence(&forms, nullptr)) {
      tagger.tag(forms, tags, guesser);

      for (unsigned i = 0; i < tags.size(); i++) {
        tagset_converter.convert(tags[i]);
        derivation.format_derivation(tags[i].lemma);
        os << forms[i] << '\t' << tags[i].lemma << '\t' << tags[i].tag << '\n';
      }
      os << endl;
    }
  }
}

开发者ID:ufal，项目名称:morphodita，代码行数:20，代码来源:run_tagger.cpp

示例17: asInt

damage::damage(tokenizer& t, int offset)
{
	dmgDone = asInt(t.token(22+offset));
	overkill = asInt(t.token(23+offset));
	magicSchool = asInt(t.token(24+offset));
	resisted = asInt(t.token(25 + offset));
	blocked = asInt(t.token(26 + offset));
	absorbed = asInt(t.token(27 + offset));
	critical = asInt(t.token(28 + offset));
	glancing = asInt(t.token(29 + offset));
	crushing = asInt(t.token(30 + offset));
	multistrike = asInt(t.token(31 + offset));
}

开发者ID:alhunor，项目名称:projects，代码行数:13，代码来源:events.cpp

示例18: recognize_untokenized

void recognize_untokenized(istream& is, ostream& os, const ner& recognizer, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  vector<named_entity> entities;
  vector<size_t> entity_ends;

  while (getpara(is, para)) {
    // Tokenize the text and find named entities
    tokenizer.set_text(para);
    const char* unprinted = para.c_str();
    while (tokenizer.next_sentence(&forms, nullptr)) {
      recognizer.recognize(forms, entities);
      sort_entities(entities);

      for (unsigned i = 0, e = 0; i < forms.size(); i++) {
        if (unprinted < forms[i].str) os << xml_encoded(string_piece(unprinted, forms[i].str - unprinted));
        if (i == 0) os << "<sentence>";

        // Open entities starting at current token
        for (; e < entities.size() && entities[e].start == i; e++) {
          os << "<ne type=\"" << xml_encoded(entities[e].type, true) << "\">";
          entity_ends.push_back(entities[e].start + entities[e].length - 1);
        }

        // The token itself
        os << "<token>" << xml_encoded(forms[i]) << "</token>";

        // Close entities ending after current token
        while (!entity_ends.empty() && entity_ends.back() == i) {
          os << "</ne>";
          entity_ends.pop_back();
        }
        if (i + 1 == forms.size()) os << "</sentence>";
        unprinted = forms[i].str + forms[i].len;
      }
    }
    // Write rest of the text (should be just spaces)
    if (unprinted < para.c_str() + para.size()) os << xml_encoded(string_piece(unprinted, para.c_str() + para.size() - unprinted));
    os << flush;
  }
}

开发者ID:ufal，项目名称:nametag，代码行数:41，代码来源:run_ner.cpp

示例19: cur

std::list<toSQLParse::statement> toSQLParse::parse(tokenizer &tokens)
{
	std::list<toSQLParse::statement> ret;
	statement cur(statement::Statement);
	for (cur = parseStatement(tokens, false, false);
			cur.subTokens().begin() != cur.subTokens().end();
			cur = parseStatement(tokens, false, false))
	{
		if (cur.Type == statement::List)
		{
			QMessageBox::warning(QApplication::activeWindow(), "Sqliteman",
								 "toSQLparse: Unbalanced parenthesis (Too many ')')");
		}
		ret.insert(ret.end(), cur);
	}
	QString str = tokens.remaining(false);
	if (!str.isEmpty())
		ret.insert(ret.end(), statement(statement::Raw,
										str, tokens.line()));
	return ret;
}

开发者ID:MatiasNAmendola，项目名称:sqliteman，代码行数:21，代码来源:tosqlparse.cpp

示例20: recognize_conll

void recognize_conll(istream& is, ostream& os, const ner& recognizer, tokenizer& tokenizer) {
  string para;
  vector<string_piece> forms;
  vector<named_entity> entities;

  while (getpara(is, para)) {
    // Tokenize and tag
    tokenizer.set_text(para);
    while (tokenizer.next_sentence(&forms, nullptr)) {
      recognizer.recognize(forms, entities);
      sort_entities(entities);

      string entity_type;
      unsigned in_entity = 0;
      bool entity_start;
      for (unsigned i = 0, e = 0; i < forms.size(); i++) {
        if (!in_entity && e < entities.size() && entities[e].start == i) {
          in_entity = entities[e].length;
          entity_start = true;
          entity_type = entities[e].type;
          e++;
        }

        os << forms[i] << '\t';
        if (in_entity) {
          os << (entity_start ? "B-" : "I-") << entity_type;
          entity_start = false;
          in_entity--;
        } else {
          os << '_';
        }
        os << '\n';
      }

      os << '\n' << flush;
    }
  }
}

开发者ID:ufal，项目名称:nametag，代码行数:38，代码来源:run_ner.cpp

注：本文中的tokenizer类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。