无法获得 boost::spirit parser&lexer 适用于 std::string 或 int 或 double 以外的令牌类型

cannot get boost::spirit parser&lexer working for token types other than std::string or int or double

本文关键字:double int string std 令牌 类型 适用于 boost spirit lexer parser      更新时间:2023-10-16

这不会编译(下面的代码)。

这里还有另一个同样错误的问题。但我不明白答案。我已经试过在某些地方插入qi::eps,但没有成功。

我还尝试过为所使用的类型添加元函数(boost::spirit::raits::is_container),但这也无济于事。

我还尝试使用相同的变体,其中包含我需要在任何地方使用的所有类型。同样的问题。

有人为lexer返回除double、int或string之外的其他内容而工作吗?对于同样返回非平凡对象的解析器?

我尝试过在所有返回默认对象的地方实现语义函数。但这也于事无补。

代码来了:

// spirit_error.cpp : Defines the entry point for the console application.
//
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/phoenix/object.hpp>
#include <boost/spirit/include/qi_char_class.hpp>
#include <boost/spirit/include/phoenix_bind.hpp>
#include <boost/mpl/index_of.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/intrusive_ptr.hpp>
#include <boost/smart_ptr/intrusive_ref_counter.hpp>
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;

namespace frank
{
class ref_counter:public boost::intrusive_ref_counter<ref_counter>
{   public:
virtual ~ref_counter(void)
{
}
};
class symbol:public ref_counter
{   public:
typedef boost::intrusive_ptr<const symbol> symbolPtr;
typedef std::vector<symbolPtr> symbolVector;
struct push_scope
{   push_scope()
{
}
~push_scope(void)
{
}
};
};
class nature:public symbol
{   public:
enum enumAttribute
{   eAbstol,
eAccess,
eDDT,
eIDT,
eUnits
};
struct empty
{   bool operator<(const empty&) const
{   return false;
}
friend std::ostream &operator<<(std::ostream &_r, const empty&)
{   return _r;
}
};
typedef boost::variant<empty, std::string> attributeValue;
};
class discipline:public symbol
{   public:
enum enumDomain
{   eDiscrete,
eContinuous
};
};
class type:public ref_counter
{   public:
typedef boost::intrusive_ptr<type> typePtr;
};
struct myIterator:std::iterator<std::random_access_iterator_tag, char, std::ptrdiff_t, const char*, const char&>
{   std::string *m_p;
std::size_t m_iPos;
myIterator(void)
:m_p(nullptr),
m_iPos(~std::size_t(0))
{
}
myIterator(std::string &_r, const bool _bEnd = false)
:m_p(&_r),
m_iPos(_bEnd ? ~std::size_t(0) : 0)
{
}
myIterator(const myIterator &_r)
:m_p(_r.m_p),
m_iPos(_r.m_iPos)
{
}
myIterator &operator=(const myIterator &_r)
{   if (this != &_r)
{   m_p = _r.m_p;
m_iPos = _r.m_iPos;
}
return *this;
}
const char &operator*(void) const
{   return m_p->at(m_iPos);
}
bool operator==(const myIterator &_r) const
{   return m_p == _r.m_p && m_iPos == _r.m_iPos;
}
bool operator!=(const myIterator &_r) const
{   return m_p != _r.m_p || m_iPos != _r.m_iPos;
}
myIterator &operator++(void)
{   ++m_iPos;
if (m_iPos == m_p->size())
m_iPos = ~std::size_t(0);
return *this;
}
myIterator operator++(int)
{   const myIterator s(*this);
operator++();
return s;
}
myIterator &operator--(void)
{   --m_iPos;
return *this;
}
myIterator operator--(int)
{   const myIterator s(*this);
operator--();
return s;
}
bool operator<(const myIterator &_r) const
{   if (m_p == _r.m_p)
return m_iPos < _r.m_iPos;
else
return m_p < _r.m_p;
}
std::ptrdiff_t operator-(const myIterator &_r) const
{   return m_iPos - _r.m_iPos;
}
};
struct onInclude
{   auto operator()(myIterator &_rStart, myIterator &_rEnd) const
{       // erase what has been matched (the include statement)
_rStart.m_p->erase(_rStart.m_iPos, _rEnd.m_iPos - _rStart.m_iPos);
// and insert the contents of the file
_rStart.m_p->insert(_rStart.m_iPos, "abcd");
_rEnd = _rStart;
return lex::pass_flags::pass_ignore;
}
};
template<typename LEXER>
class lexer:public lex::lexer<LEXER>
{   public:
lex::token_def<type::typePtr> m_sKW_real, m_sKW_integer, m_sKW_string;
lex::token_def<lex::omit> m_sLineComment, m_sCComment;
lex::token_def<lex::omit> m_sWS;
lex::token_def<lex::omit> m_sSemicolon, m_sEqual, m_sColon, m_sInclude, m_sCharOP, m_sCharCP,
m_sComma;
lex::token_def<std::string> m_sIdentifier, m_sString;
lex::token_def<double> m_sReal;
lex::token_def<int> m_sInteger;
lex::token_def<lex::omit> m_sKW_units, m_sKW_access, m_sKW_idt_nature, m_sKW_ddt_nature, m_sKW_abstol,
m_sKW_nature, m_sKW_endnature, m_sKW_continuous, m_sKW_discrete,
m_sKW_potential, m_sKW_flow, m_sKW_domain, m_sKW_discipline, m_sKW_enddiscipline, m_sKW_module,
m_sKW_endmodule, m_sKW_parameter;
//typedef const type *typePtr;
template<typename T>
struct extractValue
{   T operator()(const myIterator &_rStart, const myIterator &_rEnd) const
{   return boost::lexical_cast<T>(std::string(_rStart, _rEnd));
}
};
struct extractString
{   std::string operator()(const myIterator &_rStart, const myIterator &_rEnd) const
{   const auto s = std::string(_rStart, _rEnd);
return s.substr(1, s.size() - 2);
}
};
lexer(void)
:m_sWS("[ \t\n\r]+"),
m_sKW_parameter(""parameter""),
m_sKW_real(""real""),
m_sKW_integer(""integer""),
m_sKW_string(""string""),
m_sLineComment("\/\/[^\n]*"),
m_sCComment("\/\*"
"("
"[^*]"
"|" "[\n]"
"|" "([*][^/])"
")*"
"\*\/"),
m_sSemicolon("";""),
m_sEqual(""=""),
m_sColon("":""),
m_sCharOP(""(""), 
m_sCharCP("")""),
m_sComma("",""),
m_sIdentifier("[a-zA-Z_]+[a-zA-Z0-9_]*"),
m_sString("[\"]"
//"("
//  "(\["])"
//  "|"
//"[^"]"
//")*"
"[^\"]*"
"[\"]"),
m_sKW_units(""units""),
m_sKW_access(""access""),
m_sKW_idt_nature(""idt_nature""),
m_sKW_ddt_nature(""ddt_nature""),
m_sKW_abstol(""abstol""),
m_sKW_nature(""nature""),
m_sKW_endnature(""endnature""),
m_sKW_continuous(""continuous""),
m_sKW_discrete(""discrete""),
m_sKW_domain(""domain""),
m_sKW_discipline(""discipline""),
m_sKW_enddiscipline(""enddiscipline""),
m_sKW_potential(""potential""),
m_sKW_flow(""flow""),
//realnumber      ({uint}{exponent})|((({uint}.{uint})|(.{uint})){exponent}?)
//exponent        [Ee][+-]?{uint}
//uint            [0-9][_0-9]*
m_sReal("({uint}{exponent})"
"|"
"("
"(({uint}[\.]{uint})|([\.]{uint})){exponent}?"
")"
),
m_sInteger("{uint}"),
m_sInclude(""`include""),
m_sKW_module(""module""),
m_sKW_endmodule(""endmodule"")
{   this->self.add_pattern
("uint", "[0-9]+")
("exponent", "[eE][\+\-]?{uint}");
this->self = m_sSemicolon
| m_sEqual
| m_sColon
| m_sCharOP
| m_sCharCP
| m_sComma
| m_sString[lex::_val = boost::phoenix::bind(extractString(), lex::_start, lex::_end)]
| m_sKW_real//[lex::_val = boost::phoenix::bind(&type::getReal)]
| m_sKW_integer//[lex::_val = boost::phoenix::bind(&type::getInteger)]
| m_sKW_string//[lex::_val = boost::phoenix::bind(&type::getString)]
| m_sKW_parameter
| m_sKW_units
| m_sKW_access
| m_sKW_idt_nature
| m_sKW_ddt_nature
| m_sKW_abstol
| m_sKW_nature
| m_sKW_endnature
| m_sKW_continuous
| m_sKW_discrete
| m_sKW_domain
| m_sKW_discipline
| m_sKW_enddiscipline
| m_sReal[lex::_val = boost::phoenix::bind(extractValue<double>(), lex::_start, lex::_end)]
| m_sInteger[lex::_val = boost::phoenix::bind(extractValue<int>(), lex::_start, lex::_end)]
| m_sKW_potential
| m_sKW_flow
| m_sKW_module
| m_sKW_endmodule
| m_sIdentifier
| m_sInclude [ lex::_state = "INCLUDE" ]
;
this->self("INCLUDE") += m_sString [
lex::_state = "INITIAL", lex::_pass = boost::phoenix::bind(onInclude(), lex::_start, lex::_end)
];
this->self("WS") = m_sWS
| m_sLineComment
| m_sCComment
;
}
};
template<typename Iterator, typename Lexer>
class natureParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
qi::rule<Iterator, std::pair<nature::enumAttribute, nature::attributeValue>(void), qi::in_state_skipper<Lexer> > m_sProperty;
qi::rule<Iterator, std::string(), qi::in_state_skipper<Lexer> > m_sName;
public:
template<typename Tokens>
natureParser(const Tokens &_rTokens)
:natureParser::base_type(m_sStart)
{   m_sProperty = (_rTokens.m_sKW_units
>> _rTokens.m_sEqual
>> _rTokens.m_sString
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_access
>> _rTokens.m_sEqual
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_idt_nature
>> _rTokens.m_sEqual
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_ddt_nature
>> _rTokens.m_sEqual
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
)
| (_rTokens.m_sKW_abstol
>> _rTokens.m_sEqual
>> _rTokens.m_sReal
>> _rTokens.m_sSemicolon
)
;
m_sName = (_rTokens.m_sColon >> _rTokens.m_sIdentifier);
m_sStart = (_rTokens.m_sKW_nature
>> _rTokens.m_sIdentifier
>> -m_sName
>> _rTokens.m_sSemicolon
>> *(m_sProperty)
>> _rTokens.m_sKW_endnature
);
m_sStart.name("start");
m_sProperty.name("property");
}
};
/*
// Conservative discipline
discipline electrical; 
potential    Voltage;
flow         Current;
enddiscipline
*/
// a parser for a discipline declaration
template<typename Iterator, typename Lexer>
class disciplineParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
typedef std::pair<bool, boost::intrusive_ptr<const nature> > CPotentialAndNature;
struct empty
{   bool operator<(const empty&) const
{   return false;
}
friend std::ostream &operator<<(std::ostream &_r, const empty&)
{   return _r;
}
};
typedef boost::variant<empty, CPotentialAndNature, discipline::enumDomain> property;
qi::rule<Iterator, discipline::enumDomain(), qi::in_state_skipper<Lexer> > m_sDomain;
qi::rule<Iterator, property(void), qi::in_state_skipper<Lexer> > m_sProperty;
public:
template<typename Tokens>
disciplineParser(const Tokens &_rTokens)
:disciplineParser::base_type(m_sStart)
{   m_sDomain = _rTokens.m_sKW_continuous
| _rTokens.m_sKW_discrete
;
m_sProperty = (_rTokens.m_sKW_potential >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
| (_rTokens.m_sKW_flow >> _rTokens.m_sIdentifier >> _rTokens.m_sSemicolon)
| (_rTokens.m_sKW_domain >> m_sDomain >> _rTokens.m_sSemicolon)
;
m_sStart = (_rTokens.m_sKW_discipline
>> _rTokens.m_sIdentifier
>> _rTokens.m_sSemicolon
>> *m_sProperty
>> _rTokens.m_sKW_enddiscipline
);
}
};
template<typename Iterator, typename Lexer>
class moduleParser:public qi::grammar<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> >
{   public:
qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sStart;
qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sModulePortList;
qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sPortList;
qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sPort;
qi::rule<Iterator, std::shared_ptr<symbol::push_scope>(void), qi::in_state_skipper<Lexer> > m_sModule;
typedef boost::intrusive_ptr<const ref_counter> intrusivePtr;
typedef std::vector<intrusivePtr> vectorOfPtr;
qi::rule<Iterator, vectorOfPtr(void), qi::in_state_skipper<Lexer> > m_sModuleItemList;
qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sParameter;
qi::rule<Iterator, intrusivePtr(void), qi::in_state_skipper<Lexer> > m_sModuleItem;
qi::rule<Iterator, type::typePtr(void), qi::in_state_skipper<Lexer> > m_sType;
template<typename Tokens>
moduleParser(const Tokens &_rTokens)
:moduleParser::base_type(m_sStart)
{   m_sPort = _rTokens.m_sIdentifier;
m_sPortList %= m_sPort % _rTokens.m_sComma;
m_sModulePortList %= _rTokens.m_sCharOP >> m_sPortList >> _rTokens.m_sCharCP;
m_sModule = _rTokens.m_sKW_module;
m_sType = _rTokens.m_sKW_real | _rTokens.m_sKW_integer | _rTokens.m_sKW_string;
m_sParameter = _rTokens.m_sKW_parameter
>> m_sType
>> _rTokens.m_sIdentifier
;
m_sModuleItem = m_sParameter;
m_sModuleItemList %= *m_sModuleItem;
m_sStart = (m_sModule
>> _rTokens.m_sIdentifier
>> m_sModulePortList
>> m_sModuleItemList
>> _rTokens.m_sKW_endmodule);
}
};
template<typename Iterator, typename Lexer>
class fileParser:public qi::grammar<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> >
{   public:
disciplineParser<Iterator, Lexer> m_sDiscipline;
natureParser<Iterator, Lexer> m_sNature;
moduleParser<Iterator, Lexer> m_sModule;
qi::rule<Iterator, symbol::symbolVector(void), qi::in_state_skipper<Lexer> > m_sStart;
qi::rule<Iterator, symbol::symbolPtr(void), qi::in_state_skipper<Lexer> > m_sItem;
//public:
template<typename Tokens>
fileParser(const Tokens &_rTokens)
:fileParser::base_type(m_sStart),
m_sNature(_rTokens),
m_sDiscipline(_rTokens),
m_sModule(_rTokens)
{   m_sItem = m_sDiscipline | m_sNature | m_sModule;
m_sStart = *m_sItem;
}
};
}
int main()
{   std::string sInput = "
nature Current;n
units        = "A";n
access       = I;n
idt_nature   = Charge;n
abstol       = 1e-12;n
endnaturen
n
// Charge in coulombsn
nature Charge;n
units      = "coul";n
access     = Q;n
ddt_nature = Current;n
abstol     = 1e-14;n
endnaturen
n
// Potential in voltsn
nature Voltage;n
units      = "V";n
access     = V;n
idt_nature = Flux;n
abstol     = 1e-6;n
endnaturen
n
discipline electrical;n
potential    Voltage;n
flow         Current;n
enddisciplinen
";
typedef lex::lexertl::token<frank::myIterator, boost::mpl::vector<frank::type::typePtr, std::string, double, int> > token_type;
typedef lex::lexertl::actor_lexer<token_type> lexer_type;
typedef frank::lexer<lexer_type>::iterator_type iterator_type;
typedef frank::fileParser<iterator_type, frank::lexer<lexer_type>::lexer_def> grammar_type;
frank::lexer<lexer_type> sLexer;
grammar_type sParser(sLexer);
frank::symbol::push_scope sPush;
auto pStringBegin = frank::myIterator(sInput);
auto pBegin(sLexer.begin(pStringBegin, frank::myIterator(sInput, true)));
const auto b = qi::phrase_parse(pBegin, sLexer.end(), sParser, qi::in_state("WS")[sLexer.self]); 
}

有人为lexer返回除double、int或string之外的其他内容而工作吗?

当然。简单的例子可以在这个网站上找到

对于同样返回非平凡对象的解析器来说?

这是您真正的问题。Spirit适用于在eDSL中轻松表达的解析器子集,并且具有"神奇地"映射到属性选择的巨大好处。

一些现实是:

  • 属性应具有值语义;使用多态属性很难(如何使用boost::spirit::qi解析器的多态属性?例如)

  • 使用Lex会使大部分最佳点消失,因为所有"高级"解析器(如real_parser[u]int_parser)都在窗口之外。Spirit的开发者已经记录了他们不喜欢使用Lex。此外,Spirit X3不再支持Lex。


背景信息:

我非常考虑按原样将源解析为直接值类型的AST节点。我知道,这可能是你所认为的"琐碎对象",但不要被明显的简单性所欺骗:递归变异树具有一定的表达能力。

示例

  • 这里有一个简单的AST来表示<20 LoC:用于类构成的Boost Karma生成器
  • 在这里,我们以完全保真的方式表示Graphviz源格式:如何使用具有强制性最小元素量的boost精神列表运算符

此后,我创建了代码,将AST转换为具有完全正确所有权的域表示,级联词法范围的节点/边缘属性和交叉引用。如果你感兴趣的话,我刚刚恢复了这项工作,并将其放在github上,主要是因为该任务在许多方面都非常相似,比如重写/继承属性和解析范围内的标识符:https://github.com/sehe/spirit-graphviz/blob/master/spirit-graphviz.cpp#L660

建议、想法

在你的情况下,我会采取类似的方法来保持简单。显示的代码(还)没有涵盖最棘手的成分(比如规程中的自然属性覆盖)。

一旦您开始实现用例,例如在给定节点上解决兼容规程和绝对公差,您就需要一个具有完全保真度的领域模型。优选地,不会丢失源信息和不可变的AST信息²。

作为中间立场,您可能会避免在内存中构建一个完整的源AST,只为了在一次大的尝试中对其进行转换,在顶层您可以拥有:

file = qi::skip(skipper) [
*(m_sDiscipline | m_sNature | m_sModule) [process_ast(_1)]
];

其中process_ast将"琐碎"AST表示应用于域类型,一次一个。通过这种方式,您只保留少量的临时AST表示。

域表示可以任意复杂,以支持所有逻辑和用例。

让我们"表演,不说">

烘焙脑海中最简单的AST,匹配语法³:

namespace frank { namespace ast {
struct nature {
struct empty{};
std::string name;
std::string inherits;
enum class Attribute { units, access, idt, ddt, abstol };
using Value = boost::variant<int, double, std::string>;
std::map<Attribute, Value> attributes;
};
struct discipline {
enum enumDomain { eUnspecified, eDiscrete, eContinuous };
struct properties_t {
enumDomain domain = eUnspecified;
boost::optional<std::string> flow, potential;
};
std::string name;
properties_t properties;
};
// TODO
using module = qi::unused_type;
using file   = std::vector<boost::variant<nature, discipline, module> >;
enum class type { real, integer, string };
} }

这是微不足道的,并且将1:1映射到语法产物上,这意味着我们几乎没有阻抗。

代币?我们不需要Lex

您可以拥有通用的令牌解析器,而无需Lex 的复杂性

是的,Lex(尤其是静态生成的)可能会提高性能,但

  • 如果你需要的话,我打赌灵气无论如何都不是你的最佳选择
  • 过早优化

我做了什么:

struct tokens {
// implicit lexemes
qi::rule<It, std::string()> string, identifier;
qi::rule<It, double()> real;
qi::rule<It, int()> integer;
qi::rule<It, ast::nature::Value()> value;
qi::rule<It, ast::nature::Attribute()> attribute;
qi::rule<It, ast::discipline::enumDomain()> domain;
struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
attribute_sym_t() {
this->add
("units", ast::nature::Attribute::units)
("access", ast::nature::Attribute::access)
("idt_nature", ast::nature::Attribute::idt)
("ddt_nature", ast::nature::Attribute::ddt)
("abstol", ast::nature::Attribute::abstol);
}
} attribute_sym;
struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
domain_sym_t() {
this->add
("discrete", ast::discipline::eDiscrete)
("continuous", ast::discipline::eContinuous);
}
} domain_sym;
tokens() {
using namespace qi;
auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));
string     = '"' >> *("\" >> char_ | ~char_('"')) >> '"';
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
real       = double_;
integer    = int_;
attribute  = kw[attribute_sym];
domain     = kw[domain_sym];
value = string | identifier | real | integer;
BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
}
};

解放,不是吗?注意

  • 所有属性都会自动传播
  • 字符串处理转义(这一点在Lex方法中被注释掉了)。我们甚至不需要语义操作来(糟糕地)窥探未引用/未标注的值
  • 我们使用CCD_ 4来确保关键字解析只匹配完整的标识符。(请参阅如何正确解析boost精神中的保留词)。

    这实际上是您注意到缺少单独lexer的地方。

    另一方面,这使得上下文敏感的关键字变得轻而易举(lex可以很容易地将关键字优先于出现在关键字无法出现的地方的标识符。⁴)

跳过空格/注释怎么办

我们本可以添加一个令牌,但出于惯例的原因,我将其作为一个解析器:

struct skipParser : qi::grammar<It> {
skipParser() : skipParser::base_type(spaceOrComment) {
using namespace qi;
spaceOrComment = space
| ("//" >> *(char_ - eol) >> (eoi|eol))
| ("/*" >> *(char_ - "*/") >> "*/");
BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
}
private:
qi::rule<It> spaceOrComment;
};

natureParser

我们从tokens:继承了我们的AST解析器

struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {

从那时起,一切都很顺利:

property = attribute >> '=' >> value >> ';';
nature
= kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
>> *property
>> kw["endnature"];

disciplineParser

discipline = kw["discipline"] >> identifier >> ';' 
>> properties 
>> kw["enddiscipline"]
;
properties
= kw["domain"] >> domain >> ';'
^ kw["flow"] >> identifier >> ';'
^ kw["potential"] >> identifier >> ';'
;

这显示了一种竞争方法,该方法使用置换运算符(^)将任意顺序的可选备选方案解析为固定的frank::ast::discipline属性结构。当然,您可能会选择在这里使用更通用的表示,就像我们使用ast::nature一样。

模块AST留给读者练习,尽管解析器规则在下面实现。

顶级,封装Skipper

我讨厌从调用代码中指定队长(这比要求的更复杂,更改队长会改变语法)。因此,我将其封装在顶级解析器中:

struct fileParser : qi::grammar<It, ast::file()> {
fileParser() : fileParser::base_type(file) {
file = qi::skip(qi::copy(m_sSkip)) [
*(m_sDiscipline | m_sNature | m_sModule)
];
BOOST_SPIRIT_DEBUG_NODES((file))
}
private:
disciplineParser m_sDiscipline;
natureParser     m_sNature;
moduleParser     m_sModule;
skipParser       m_sSkip;
qi::rule<It, ast::file()> file;
};

演示时间

此演示为枚举添加了operator<<,并添加了一个变体访问者来打印一些AST详细信息,用于调试/演示(print_em)。

然后我们有一个测试驱动程序:

int main() {
using iterator_type = std::string::const_iterator;
iterator_type iter = sInput.begin(), last = sInput.end();
frank::Parsers<iterator_type>::fileParser parser;
print_em print;
frank::ast::file file;
bool ok = qi::parse(iter, last, parser, file);
if (ok) {
for (auto& symbol : file)
print(symbol);
}
else {
std::cout << "Parse failedn";
}
if (iter != last) {
std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'n";
}
}

根据您问题的样本输入,我们得到以下输出:

在Coliru上直播

-- Nature
name: Current
inherits: 
attribute: units = A
attribute: access = I
attribute: idt = Charge
attribute: abstol = 1e-12
-- Nature
name: Charge
inherits: 
attribute: units = coul
attribute: access = Q
attribute: ddt = Current
attribute: abstol = 1e-14
-- Nature
name: Voltage
inherits: 
attribute: units = V
attribute: access = V
attribute: idt = Flux
attribute: abstol = 1e-06
-- Discipline
name: electrical
domain: (unspecified)
flow:  Current
potential:  Voltage
Remaining unparsed: '
'

定义BOOST_SPIRIT_DEBUG后,您可以获得丰富的调试信息:LiveOnColiru

完整列表

在Coliru上直播

//#define BOOST_SPIRIT_DEBUG
#include <map>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapted.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
namespace qi = boost::spirit::qi;
namespace frank { namespace ast {
struct nature {
struct empty{};
std::string name;
std::string inherits;
enum class Attribute { units, access, idt, ddt, abstol };
using Value = boost::variant<int, double, std::string>;
std::map<Attribute, Value> attributes;
};
struct discipline {
enum enumDomain { eUnspecified, eDiscrete, eContinuous };
struct properties_t {
enumDomain domain = eUnspecified;
boost::optional<std::string> flow, potential;
};
std::string name;
properties_t properties;
};
// TODO
using module = qi::unused_type;
using file   = std::vector<boost::variant<nature, discipline, module> >;
enum class type { real, integer, string };
} }
BOOST_FUSION_ADAPT_STRUCT(frank::ast::nature, name, inherits, attributes)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline, name, properties)
BOOST_FUSION_ADAPT_STRUCT(frank::ast::discipline::properties_t, domain, flow, potential)
namespace frank {
namespace qr = boost::spirit::repository::qi;
template <typename It> struct Parsers {
struct tokens {
// implicit lexemes
qi::rule<It, std::string()> string, identifier;
qi::rule<It, double()> real;
qi::rule<It, int()> integer;
qi::rule<It, ast::nature::Value()> value;
qi::rule<It, ast::nature::Attribute()> attribute;
qi::rule<It, ast::discipline::enumDomain()> domain;
struct attribute_sym_t : qi::symbols<char, ast::nature::Attribute> {
attribute_sym_t() {
this->add
("units", ast::nature::Attribute::units)
("access", ast::nature::Attribute::access)
("idt_nature", ast::nature::Attribute::idt)
("ddt_nature", ast::nature::Attribute::ddt)
("abstol", ast::nature::Attribute::abstol);
}
} attribute_sym;
struct domain_sym_t : qi::symbols<char, ast::discipline::enumDomain> {
domain_sym_t() {
this->add
("discrete", ast::discipline::eDiscrete)
("continuous", ast::discipline::eContinuous);
}
} domain_sym;
tokens() {
using namespace qi;
auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));
string     = '"' >> *("\" >> char_ | ~char_('"')) >> '"';
identifier = char_("a-zA-Z_") >> *char_("a-zA-Z0-9_");
real       = double_;
integer    = int_;
attribute  = kw[attribute_sym];
domain     = kw[domain_sym];
value = string | identifier | real | integer;
BOOST_SPIRIT_DEBUG_NODES((string)(identifier)(real)(integer)(value)(domain)(attribute))
}
};
struct skipParser : qi::grammar<It> {
skipParser() : skipParser::base_type(spaceOrComment) {
using namespace qi;
spaceOrComment = space
| ("//" >> *(char_ - eol) >> (eoi|eol))
| ("/*" >> *(char_ - "*/") >> "*/");
BOOST_SPIRIT_DEBUG_NODES((spaceOrComment))
}
private:
qi::rule<It> spaceOrComment;
};
struct natureParser : tokens, qi::grammar<It, ast::nature(), skipParser> {
natureParser() : natureParser::base_type(nature) {
using namespace qi;
auto kw = qr::distinct(copy(char_("a-zA-Z0-9_")));
property = attribute >> '=' >> value >> ';';
nature
= kw["nature"] >> identifier >> -(':' >> identifier) >> ';'
>> *property
>> kw["endnature"];
BOOST_SPIRIT_DEBUG_NODES((nature)(property))
}
private:
using Attribute = std::pair<ast::nature::Attribute, ast::nature::Value>;
qi::rule<It, ast::nature(), skipParser> nature;
qi::rule<It, Attribute(), skipParser> property;
using tokens::attribute;
using tokens::value;
using tokens::identifier;
};
struct disciplineParser : tokens, qi::grammar<It, ast::discipline(), skipParser> {
disciplineParser() : disciplineParser::base_type(discipline) {
auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));
discipline = kw["discipline"] >> identifier >> ';' 
>> properties 
>> kw["enddiscipline"]
;
properties
= kw["domain"] >> domain >> ';'
^ kw["flow"] >> identifier >> ';'
^ kw["potential"] >> identifier >> ';'
;
BOOST_SPIRIT_DEBUG_NODES((discipline)(properties))
}
private:
qi::rule<It, ast::discipline(), skipParser> discipline;
qi::rule<It, ast::discipline::properties_t(), skipParser> properties;
using tokens::domain;
using tokens::identifier;
};
struct moduleParser : tokens, qi::grammar<It, ast::module(), skipParser> {
moduleParser() : moduleParser::base_type(module) {
auto kw = qr::distinct(qi::copy(qi::char_("a-zA-Z0-9_")));
m_sPort           = identifier;
m_sPortList       = m_sPort % ',';
m_sModulePortList = '(' >> m_sPortList >> ')';
m_sModule         = kw["module"];
m_sType           = kw["real"] | kw["integer"] | kw["string"];
m_sParameter      = kw["parameter"] >> m_sType >> identifier;
m_sModuleItem     = m_sParameter;
m_sModuleItemList = *m_sModuleItem;
module =
(m_sModule >> identifier >> m_sModulePortList >> m_sModuleItemList >> kw["endmodule"]);
}
private:
qi::rule<It, ast::module(), skipParser> module;
qi::rule<It, skipParser> m_sModulePortList;
qi::rule<It, skipParser> m_sPortList;
qi::rule<It, skipParser> m_sPort;
qi::rule<It, skipParser> m_sModule;
qi::rule<It, skipParser> m_sModuleItemList;
qi::rule<It, skipParser> m_sParameter;
qi::rule<It, skipParser> m_sModuleItem;
qi::rule<It, skipParser> m_sType;
using tokens::identifier;
};
struct fileParser : qi::grammar<It, ast::file()> {
fileParser() : fileParser::base_type(file) {
file = qi::skip(qi::copy(m_sSkip)) [
*(m_sDiscipline | m_sNature | m_sModule)
];
BOOST_SPIRIT_DEBUG_NODES((file))
}
private:
disciplineParser m_sDiscipline;
natureParser     m_sNature;
moduleParser     m_sModule;
skipParser       m_sSkip;
qi::rule<It, ast::file()> file;
};
};
}
extern std::string const sInput;
// just for demo
#include <boost/optional/optional_io.hpp>
namespace frank { namespace ast {
//static inline std::ostream &operator<<(std::ostream &os, const nature::empty &) { return os; }
static inline std::ostream &operator<<(std::ostream &os, nature::Attribute a) {
switch(a) {
case nature::Attribute::units:  return os << "units";
case nature::Attribute::access: return os << "access";
case nature::Attribute::idt:    return os << "idt";
case nature::Attribute::ddt:    return os << "ddt";
case nature::Attribute::abstol: return os << "abstol";
};
return os << "?";
}
static inline std::ostream &operator<<(std::ostream &os, discipline::enumDomain d) {
switch(d) {
case discipline::eDiscrete:    return os << "discrete";
case discipline::eContinuous:  return os << "continuous";
case discipline::eUnspecified: return os << "(unspecified)";
};
return os << "?";
}
} }
struct print_em {
using result_type = void;
template <typename V>
void operator()(V const& variant) const {
boost::apply_visitor(*this, variant);
}
void operator()(frank::ast::nature const& nature) const {
std::cout << "-- Naturen";
std::cout << "name: " << nature.name << "n";
std::cout << "inherits: " << nature.inherits << "n";
for (auto& a : nature.attributes) {
std::cout << "attribute: " << a.first << " = " << a.second << "n";
}
}
void operator()(frank::ast::discipline const& discipline) const {
std::cout << "-- Disciplinen";
std::cout << "name: " << discipline.name << "n";
std::cout << "domain: " << discipline.properties.domain << "n";
std::cout << "flow: " << discipline.properties.flow << "n";
std::cout << "potential: " << discipline.properties.potential << "n";
}
void operator()(frank::ast::module const&) const {
std::cout << "-- Module (TODO)n";
}
};
int main() {
using iterator_type = std::string::const_iterator;
iterator_type iter = sInput.begin(), last = sInput.end();
frank::Parsers<iterator_type>::fileParser parser;
print_em print;
frank::ast::file file;
bool ok = parse(iter, last, parser, file);
if (ok) {
for (auto& symbol : file)
print(symbol);
}
else {
std::cout << "Parse failedn";
}
if (iter != last) {
std::cout << "Remaining unparsed: '" << std::string(iter,last) << "'n";
}
}
std::string const sInput = R"(
nature Current;
units        = "A";
access       = I;
idt_nature   = Charge;
abstol       = 1e-12;
endnature
// Charge in coulombs
nature Charge;
units      = "coul";
access     = Q;
ddt_nature = Current;
abstol     = 1e-14;
endnature
// Potential in volts
nature Voltage;
units      = "V";
access     = V;
idt_nature = Flux;
abstol     = 1e-6;
endnature
discipline electrical;
potential    Voltage;
flow         Current;
enddiscipline
)";

顺便说一句,那里的另一个答案证明了多态属性和精神的"阻抗失配"——这一次是在其的因果报应方面

²(以防止依赖于评估顺序或类似事项的细微错误,例如)

³(从这里收集了一些,但没有导入太多Lex方法中没有反映的复杂性)

⁴(事实上,这就是你需要在语法中进行状态切换的地方,这是Spirit Lex中臭名昭著的不发达和实际上不可用的领域:例如,当它工作时,如何避免定义与boost::Spirit::Lex中的所有内容相匹配的令牌,或者当它运行不好时:boost.SSpirit SQL语法/lexer失败)

一种解决方案是在所有地方使用std::字符串,并定义一个包含所有所需内容的boost::变体,但不直接在解析器或lexer中的任何地方使用它,而只序列化&将其反序列化为字符串/从字符串反序列化。

这就是boost::精神的创始人所意图的吗?