Boost Spirit is built on Proto (by the same hero, Eric Niebler!), so I hope you don't mind if I uphold a personal tradition of mine and present an implementation in Boost Spirit.
I found it pretty tricky to see what you wanted to achieve, from just the code shown. Therefore I just went straight to the mustache
docs and implemented a parser for the following AST:
namespace mustache {
// any atom refers directly to source iterators for efficiency
using boost::string_ref;
template <typename Kind> struct atom {
string_ref value;
atom() { }
atom(string_ref const& value) : value(value) { }
};
// the atoms
using verbatim = atom<struct verbatim_tag>;
using variable = atom<struct variable_tag>;
using partial = atom<struct partial_tag>;
// the template elements (any atom or a section)
struct section;
using melement = boost::variant<
verbatim,
variable,
partial,
boost::recursive_wrapper<section>
// TODO comments and set-separators
>;
// the template: sequences of elements
using sequence = std::vector<melement>;
// section: recursively define to contain a template sequence
struct section {
bool sense; // positive or negative
string_ref control;
sequence content;
};
}
As you can see, I've added support for negated sections as well as partial templates (i.e. variables that expand to a template to dynamically expand).
Here are the productions:
sequence = *element;
element =
!(lit("{{") >> '/') >> // section-end ends the current sequence
(partial | section | variable | verbatim);
reference = +(graph - "}}");
partial = qi::lit("{{") >> "> " >> reference >> "}}";
sense = ('#' > attr(true))
| ('^' > attr(false));
section %= "{{" >> sense >> reference [ section_id = phx::bind(&boost::string_ref::to_string, _1) ] >> "}}"
>> sequence // contents
> ("{{" >> ('/' >> lit(section_id)) >> "}}");
variable = "{{" >> reference >> "}}";
verbatim = +(char_ - "{{");
The only nifty thing is the use of a qi::local<>
named section_id
to check that the closing tag of a section matches the opening tag of the current section.
qi::rule<Iterator, mustache::sequence()> sequence;
qi::rule<Iterator, mustache::melement()> element;
qi::rule<Iterator, mustache::partial()> partial;
qi::rule<Iterator, mustache::section(), qi::locals<std::string> > section;
qi::rule<Iterator, bool()> sense; // postive or negative
qi::rule<Iterator, mustache::variable()> variable;
qi::rule<Iterator, mustache::verbatim()> verbatim;
I optimize things based on the assumption that the input data will stay around, so we don't need to copy actual data. This should avoid 99% of allocation needs here. I used boost::string_ref
to achieve this here, and I think it's fair to say that this introduces the only bits of complexity (see full code below).
qi::rule<Iterator, boost::string_ref()> reference;
Now we're ready to take our parser for a spin See It Live On Coliru
int main()
{
std::cout << std::unitbuf;
std::string input = "<ul>{{#time}}
<li>{{> partial}}</li>{{/time}}</ul>
"
"<i>for all good men</i> to come to the {007} aid of "
"their</bold> {{country}}. Result: {{^Res2}}(absent){{/Res2}}{{#Res2}}{{Res2}}{{/Res2}}"
;
// Parser setup --------------------------------------------------------
typedef std::string::const_iterator It;
static const mustache_grammar<It> p;
It first = input.begin(), last = input.end();
try {
mustache::sequence parsed_template;
if (qi::parse(first, last, p, parsed_template))
std::cout << "Parse success
";
else
std::cout << "Parse failed
";
if (first != last)
std::cout << "Remaing unparsed input: '" << std::string(first, last) << "'
";
std::cout << "Input: " << input << "
";
std::cout << "Dump: ";
Dumping::dumper()(std::cout, parsed_template) << "
";
} catch(qi::expectation_failure<It> const& e)
{
std::cout << "Unexpected: '" << std::string(e.first, e.last) << "'
";
}
}
Dumping::dumper
simply prints the mustache template back from the parsed AST. You might wonder how dumper
is implemented:
struct dumper : boost::static_visitor<std::ostream&>
{
std::ostream& operator()(std::ostream& os, mustache::sequence const& v) const {
for(auto& element : v)
boost::apply_visitor(std::bind(dumper(), std::ref(os), std::placeholders::_1), element);
return os;
}
std::ostream& operator()(std::ostream& os, mustache::verbatim const& v) const {
return os << v.value;
}
std::ostream& operator()(std::ostream& os, mustache::variable const& v) const {
return os << "{{" << v.value << "}}";
}
std::ostream& operator()(std::ostream& os, mustache::partial const& v) const {
return os << "{{> " << v.value << "}}";
}
std::ostream& operator()(std::ostream& os, mustache::section const& v) const {
os << "{{" << (v.sense?'#':'^') << v.control << "}}";
(*this)(os, v.content);
return os << "{{/" << v.control << "}}";
}
};
Nothing overly complicated. Boost Variant really affords a declarative programming style. To illustrate this even more thoroughly, let's add expansion based on context objects!
I wasn't going to implement JSON just for this, so instead let's assume a context Value model like:
struct Nil { };
using Value = boost::make_recursive_variant<
Nil,
double,
std::string,
std::map<std::string, boost::recursive_variant_>,
std::vector<boost::recursive_variant_>
>::type;
using Dict = std::map<std::string, Value>;
using Array = std::vector<Value>;
Now we use binary visitation against mustache::melement
and this context Value
variant. This is a bit more code than just dumping, but let's look at the use-site first:
using namespace ContextExpander;
expander engine;
Value const ctx = Dict {
{ "time", Array {
Dict { { "partial", "gugus {{zeit}} (a.k.a. <u>{{title}}</u>)"}, { "title", "noon" }, { "zeit", "12:00" } },
Dict { { "partial", "gugus {{zeit}} (a.k.a. <u>{{title}}</u>)"}, { "title", "evening" }, { "zeit", "19:30" } },
Dict { { "partial", "gugus <u>{{title}}</u> (expected at around {{zeit}})"}, { "title", "dawn" }, { "zeit", "06:00" } },
} },
{ "country", "ESP" },
{ "Res3", "unused" }
};
engine(std::cout, ctx, parsed_template);
This prints (See it Live On Coliru again):
Evaluation: <ul>
<li>gugus 12:00 (a.k.a. <u>noon</u>)</li>
<li>gugus 19:30 (a.k.a. <u>evening</u>)</li>
<li>gugus <u>dawn</u> (expected at around 06:00)</li></ul>
<i>for all good men</i> to come to the {007} aid of their</bold> ESP. Result: (absent)
Full Code Listing
for reference:
//#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/utility/string_ref.hpp>
#include <functional>
#include <map>
namespace mustache {
// any atom refers directly to source iterators for efficiency
using boost::string_ref;
template <typename Kind> struct atom {
string_ref value;
atom() { }
atom(string_ref const& value) : value(value) { }
friend std::ostream& operator<<(std::ostream& os, atom const& v) { return os << typeid(v).name() << "[" << v.value << "]"; }
};
// the atoms
using verbatim = atom<struct verbatim_tag>;
using variable = atom<struct variable_tag>;
using partial = atom<struct partial_tag>;
// the template elements (any atom or a section)
struct section;
using melement = boost::variant<
verbatim,
variable,
partial, // TODO comments and set-separators
boost::recursive_wrapper<section>
>;
// the template: sequences of elements
using sequence = std::vector<melement>;
// section: recursively define to contain a template sequence
struct section {
bool sense; // positive or negative
string_ref control;
sequence content;
};
}
BOOST_FUSION_ADAPT_STRUCT(mustache::section, (bool, sense)(boost::string_ref, control)(mustache::sequence, content))
namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;
template <typename Iterator>
struct mustache_grammar : qi::grammar<Iterator, mustache::sequence()>
{
mustache_grammar() : mustache_grammar::base_type(sequence)
{
using namespace qi;
static const _a_type section_id; // local
using boost::phoenix::construct;
using boost::phoenix::begin;
using boost::phoenix::size;
sequence = *element;
element =
!(lit("{{") >> '/') >> // section-end ends the current sequence
(partial | section | variable | verbatim);
reference = raw [ lexeme [ +(graph - "}}") ] ]
[ _val = construct<boost::string_ref>(&*begin(_1), size(_1)) ];
partial = qi::lit("{{") >> "> " >> reference >> "}}";
sense = ('#' > attr(true))
| ('^' > attr(false));
section %= "{{" >> sense >> reference [ section_id = phx::bind(&boost::string_ref::to_string, _1) ] >> "}}"
>> sequence // contents
> ("{{" >> ('/' >> lexeme [ lit(section_id) ]) >> "}}");
variable = "{{" >> reference >> "}}";
verbatim = raw [ lexeme [ +(char_ - "{{") ] ]
[ _val = construct<boost::string_ref>(&*begin(_1), size(_1)) ];
BOOST_SPIRIT_DEBUG_NODES(
(sequence)(element)(partial)(variable)(section