使用boost精神解析结构化文本

Parsing a structured text using boost spirit

本文关键字:结构化 文本 boost 使用      更新时间:2023-10-16

我再一次请求你的帮助。使用boost spirit库,我想将下面的语法解析为结构体"unitConstruct"。到目前为止,我的解析器失败了,无法正确解析此语法。非常感谢你的帮助。请在下面找到我代码的快照。

此语法将遵循基于标准的此格式。我想在键中存储UNIT(如结构体unitConstruct所示),并在标识符中存储TB_SENSOR_PRIMARY_VALUE_UNIT,最后我想将令牌存储在向量中(变量引用)。这个模式需要在后面的代码中唯一地检索此信息。

// Syntax to be parsed
UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}
//structure  to store the syntax above 
struct unitConstruct
{
    std::string key;
    std::string identifier;
    std::vector<std::string> variablereferences;
};

// code snapshot 
typedef std::vector<unitConstruct> eddlParsedData
template <typename Iterator>
struct skipper : qi::grammar<Iterator>
{
    skipper() : skipper::base_type(start)
    {
        start = ascii::blank;
    }
private:
    qi::rule<Iterator> start, comment;
};
template <typename Iterator>
struct eddlparser : qi::grammar<Iterator, eddlParsedData(), skipper<Iterator> >
{
    eddlparser() : eddlparser::base_type(start)
    {
        unitkey = qi::string("UNIT");
        text = +qi::graph;
        unit = unitkey >> text >> qi::eol
                       >> '{' >> qi::eol
                       >> +text >> qi::eol
                       >> '}' ;
        BOOST_SPIRIT_DEBUG_NODE(unit);
        start = (unit) % *qi::eol;
    }
private:
    qi::rule<Iterator, std::string(), skipper<Iterator> > uni, unitkey, text;
    qi::rule<Iterator, unitConstruct(), skipper<Iterator> > unit;

// adapt unitConstruct as a Fusion sequence
BOOST_FUSION_ADAPT_STRUCT(
    unitConstruct,
    (std::string, key)
    (std::string, identifier)
    (std::vector<std::string>, variablereferences)
)

让我们从您的示例和我们在注释中讨论的细节开始。您希望解析一个定义,如下所示:

UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}
  • UNIT指定定义类型。只允许使用大写,并且我们希望只使用预定义的关键字(此时只使用"UNIT")。
  • TB_SENSOR_PRIMARY_VALUE_UNIT指定定义名称。可以包含大写字母、数字或下划线。第一个符号不能是数字。
  • 标识符可以由小写字母、大写字母、数字或下划线组成。第一个符号不能是数字。
  • trans1_primary_value_unit指定单个依赖标识符
  • trans1_primary_value,…func1_AI_simulate_value指定依赖。每个依赖项是一个标识符。多个依赖项用逗号分隔。

基于这些信息,我们需要一个语法,看起来像这样(在EBNF中)。

type = 'UNIT'
name = ( upper | "_" ), { upper | digit | "_" }
identifier = ( upper | lower | "_" ), { upper | lower | digit | "_" }
dependent = identifier
dependency = identifier
dependencies = dependency, { ",", dependency }
definition = type, name, "{", dependent, ":", dependencies, "}"

我们可以使用短语解析,忽略空格、制表符、换行符和返回符,因为我们不需要它们来正确解析。

<标题> 源代码

Live on Coliru

#include <string>
#include <iostream>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
// ======================================================================
std::string TEST_INPUT = R"(UNIT TB_SENSOR_PRIMARY_VALUE_UNIT
{
trans1_primary_value_unit:
    trans1_primary_value,
    trans1_scale_out_lower_value,
    trans1_scale_out_upper_value,
    func1_AI_pv_upper_range_value,
    func1_AI_pv_lower_range_value,
    func1_AI_simulate_value
}
)";
// ======================================================================
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
namespace ascii = boost::spirit::ascii;
// ======================================================================
struct definition
{
    std::string type;
    std::string name;
    std::string dependent;
    std::vector<std::string> dependencies;
};
// ======================================================================
BOOST_FUSION_ADAPT_STRUCT(
    definition,
    (std::string, type)
    (std::string, name)
    (std::string, dependent)
    (std::vector<std::string>, dependencies)
)
// ======================================================================
template <typename Iterator>
struct skipper
    : qi::grammar<Iterator>
{
    skipper()
        : skipper::base_type(start)
    {
        start %= ascii::space;
    }
private:
    qi::rule<Iterator> start;
};
// ======================================================================
template <typename Iterator>
struct def_parser
    : qi::grammar<Iterator, definition(), skipper<Iterator> >
{
    def_parser()
        : def_parser::base_type(start)
    {
        def_type %= qi::string("UNIT");
        def_name %= (ascii::upper | ascii::char_('_'))
            >> *(ascii::upper | ascii::digit | ascii::char_('_'));
        identifier %= (ascii::upper | ascii::lower | ascii::char_('_'))
            >> *(ascii::upper | ascii::lower | ascii::digit | ascii::char_('_'));
        def_dependent %= identifier;
        def_dependency %= identifier;
        def_dependencies %= def_dependency % qi::lit(",");
        start %= def_type
            >> def_name
            >> qi::lit("{")
            >> def_dependent
            >> qi::lit(":")
            >> def_dependencies
            >> qi::lit("}")
            ;
        init_debug();
    }
    void init_debug()
    {
        def_type.name("def_type");
        def_name.name("def_name");
        identifier.name("identifier");
        def_dependent.name("def_dependent");
        def_dependency.name("def_dependency");
        def_dependencies.name("def_dependencies");
        start.name("start");
        qi::debug(def_type);
        qi::debug(def_name);
        qi::debug(identifier);
        qi::debug(def_dependent);
        qi::debug(def_dependency);
        qi::debug(def_dependencies);
        qi::debug(start);
    }
private:
    qi::rule<Iterator, std::string()> def_type;
    qi::rule<Iterator, std::string()> def_name;    
    qi::rule<Iterator, std::string()> identifier;
    qi::rule<Iterator, std::string()> def_dependent;
    qi::rule<Iterator, std::string()> def_dependency;
    qi::rule<Iterator, std::vector<std::string>(), skipper<Iterator>> def_dependencies;
    qi::rule<Iterator, definition(), skipper<Iterator>> start;
};
// ======================================================================
int main()
{
    typedef std::string::const_iterator iterator_type;
    def_parser<iterator_type> g;
    skipper<iterator_type> s;
    definition d;
    iterator_type iter = TEST_INPUT.begin();
    iterator_type end = TEST_INPUT.end();
    bool r = qi::phrase_parse(iter, end, g, s, d);
    if (r) {
        std::cout << "Bytes left = " << std::distance(iter, end) << " -> "
            << ((iter == end) ? "SUCCEEDED" : "FAILED") << "n";
        std::cout << "Type = " << d.type << "n";
        std::cout << "Name = " << d.name << "n";
        std::cout << "Dependent = " << d.dependent << "n";        
        for (auto const& ref : d.dependencies) {
            std::cout << "Dependency = " << ref << "n";
        }
    } else {
        std::cout << "FAILED COMPLETELYn";
    }
    return 0;
}
// ======================================================================

调试输出
<start>
  <try>UNIT TB_SENSOR_PRIMA</try>
  <def_type>
    <try>UNIT TB_SENSOR_PRIMA</try>
    <success> TB_SENSOR_PRIMARY_V</success>
    <attributes>[[U, N, I, T]]</attributes>
  </def_type>
  <def_name>
    <try> TB_SENSOR_PRIMARY_V</try>
    <success>{ntrans1_primary_val</success>
    <attributes>[[T, B, _, S, E, N, S, O, R, _, P, R, I, M, A, R, Y, _, V, A, L, U, E, _, U, N, I, T]]</attributes>
  </def_name>
  <def_dependent>
    <try>ntrans1_primary_valu</try>
    <identifier>
      <try>ntrans1_primary_valu</try>
      <success>:n    trans1_primary</success>
      <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e, _, u, n, i, t]]</attributes>
    </identifier>
    <success>:n    trans1_primary</success>
    <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e, _, u, n, i, t]]</attributes>
  </def_dependent>
  <def_dependencies>
    <try>n    trans1_primary_</try>
    <def_dependency>
      <try>n    trans1_primary_</try>
      <identifier>
        <try>n    trans1_primary_</try>
        <success>,n    trans1_scale_o</success>
        <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,n    trans1_scale_o</success>
      <attributes>[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>n    trans1_scale_ou</try>
      <identifier>
        <try>n    trans1_scale_ou</try>
        <success>,n    trans1_scale_o</success>
        <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,n    trans1_scale_o</success>
      <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>n    trans1_scale_ou</try>
      <identifier>
        <try>n    trans1_scale_ou</try>
        <success>,n    func1_AI_pv_up</success>
        <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,n    func1_AI_pv_up</success>
      <attributes>[[t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>n    func1_AI_pv_upp</try>
      <identifier>
        <try>n    func1_AI_pv_upp</try>
        <success>,n    func1_AI_pv_lo</success>
        <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,n    func1_AI_pv_lo</success>
      <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>n    func1_AI_pv_low</try>
      <identifier>
        <try>n    func1_AI_pv_low</try>
        <success>,n    func1_AI_simul</success>
        <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>,n    func1_AI_simul</success>
      <attributes>[[f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <def_dependency>
      <try>n    func1_AI_simula</try>
      <identifier>
        <try>n    func1_AI_simula</try>
        <success>}n</success>
        <attributes>[[f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]</attributes>
      </identifier>
      <success>}n</success>
      <attributes>[[f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]</attributes>
    </def_dependency>
    <success>}n</success>
    <attributes>[[[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]]</attributes>
  </def_dependencies>
  <success>n</success>
  <attributes>[[[U, N, I, T], [T, B, _, S, E, N, S, O, R, _, P, R, I, M, A, R, Y, _, V, A, L, U, E, _, U, N, I, T], [t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e, _, u, n, i, t], [[t, r, a, n, s, 1, _, p, r, i, m, a, r, y, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, l, o, w, e, r, _, v, a, l, u, e], [t, r, a, n, s, 1, _, s, c, a, l, e, _, o, u, t, _, u, p, p, e, r, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, u, p, p, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, p, v, _, l, o, w, e, r, _, r, a, n, g, e, _, v, a, l, u, e], [f, u, n, c, 1, _, A, I, _, s, i, m, u, l, a, t, e, _, v, a, l, u, e]]]]</attributes>
</start>
控制台输出

Bytes left = 0 -> SUCCEEDED
Type = UNIT
Name = TB_SENSOR_PRIMARY_VALUE_UNIT
Dependent = trans1_primary_value_unit
Dependency = trans1_primary_value
Dependency = trans1_scale_out_lower_value
Dependency = trans1_scale_out_upper_value
Dependency = func1_AI_pv_upper_range_value
Dependency = func1_AI_pv_lower_range_value
Dependency = func1_AI_simulate_value

解析多个定义

我们已经有了一个单一定义的语法。要解析多个定义,我们只需重用这个。

让我们对代码做一些小修改:

  • std::vector<definition> d;
  • bool r = qi::phrase_parse(iter, end, +g, s, d);

Live on Coliru