如何概括精神解析器以任意顺序获取列表

How to generalize a spirit parser to take lists in arbitrary order?

本文关键字:任意 顺序 获取 列表 何概括      更新时间:2023-10-16

我有一个简单的解析器,可以解析整数或带引号的字符串列表。

如果我执行SIMPLE_CASE,我将输入为:

std::string input1 = "{ INT: 42, 24 STR: "Smith", "John" }";

它正确地解析为 my_record ,其中包含一个整数列表和一个 std::string 列表。

我想将此代码修改为通用代码,以便它可以以任意顺序获取零个或多个 INT 列表和零个或多个 STR 列表,并按正确的顺序将它们填充到my_record中。 我想要我的第二个更通用的测试用例:

std::string input1 = "{ STR: "Joe" INT: 42, 24 STR: "Smith", "John" }";

解析为:

client::my_record expected1 { { 42, 24 }, {"Joe", "Smith", "John"} }; 

如果我运行,下面的代码工作正常:

/tmp$ g++ -DSIMPLE_CASE -g -std=c++11 sandbox.cpp -o sandbox && ./sandbox 

但我不确定在运行这个时如何让一般情况工作:

/tmp$ g++ -g -std=c++11 sandbox.cpp -o sandbox && ./sandbox 

沙盒代码.cpp

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/fusion/include/io.hpp>
#include <string>
#include <complex>
#include <algorithm>
namespace client
{
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;
    struct my_record
    {
        std::vector<int>          m_ints;
        std::vector<std::string>  m_strs;
        bool operator==( const my_record& other ) const
        {
            return std::equal( m_ints.begin(), m_ints.end(), other.m_ints.begin() )
                && std::equal( m_strs.begin(), m_strs.end(), other.m_strs.begin() );
        }
        bool operator!=( const my_record& other ) const
        {
            return ! operator==( other );
        }
        friend std::ostream& operator<<( std::ostream& os, const my_record& rec );
    };
    std::ostream& operator<<( std::ostream& os, const my_record& rec )
    {
        for( const auto& x : rec.m_ints )
            std::cerr << x << ' ';
        std::cerr << std::endl;
        for( const auto& x : rec.m_strs )
            std::cerr << x << ' ';
        std::cerr << std::endl;
    }
}
BOOST_FUSION_ADAPT_STRUCT(
    client::my_record,
        (std::vector<int>,          m_ints)
        (std::vector<std::string>,  m_strs)
)
namespace client
{
    template <typename Iterator>
    struct employee_parser : qi::grammar<Iterator, my_record(), ascii::space_type>
    {
        employee_parser() : employee_parser::base_type(start)
    {
        using qi::int_;
        using qi::lit;
        using qi::double_;
        using qi::lexeme;
        using ascii::char_;
        quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
#ifdef SIMPLE_CASE
        start %=
            '{'
            >>  int_list
            >>  str_list
            >>  '}'
            ;
#else
        // not sure how to approach this
        start %=
            '{'
            >>  *(int_list)  // want zero or more of these, in any order
            >>  *(str_list)  // want zero or more of these, in any order
            >>  '}'
            ;
#endif
        str_list %=
                lit( "STR:" ) >> quoted_string % ','    
                ;
        int_list %=
                lit( "INT:" ) >> int_ % ','
                ;
    }
    qi::rule<Iterator, std::string(), ascii::space_type>               quoted_string;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type>  str_list;
    qi::rule<Iterator, std::vector<int>(),         ascii::space_type>  int_list;
    qi::rule<Iterator, my_record(), ascii::space_type>                 start;
    };
}
static int 
TryParse( const std::string& input, const client::my_record& expected )
{
    using boost::spirit::ascii::space;
    client::my_record                        rec;
    auto                                     iter = input.begin(), end = input.end();
    client::employee_parser<decltype(iter)>  g;
    phrase_parse( iter, end, g, space, rec );
    if ( iter!=end )
    {
        std::cerr << "failed to parse completely" << std::endl;
        return -1;
    } else if ( rec!=expected ) {
        std::cerr << "unexpected result in parse" << std::endl;
        std::cerr << rec;
        return -1;
    }
    return 0;
}
int 
main(int argc, char* argv[])
{
#ifdef SIMPLE_CASE
    client::my_record  expected1 { { 42, 24 }, {"Smith", "John"} }, emp;
    std::string        input1 = "{ INT: 42, 24 STR: "Smith", "John" }";
    return TryParse( input1, expected1 );
#else
    client::my_record  expected1 { { 42, 24 }, {"Joe", "Smith", "John"} }, emp;
    std::string        input1 = "{ STR: "Joe" INT: 42, 24 STR: "Smith", "John" }";
    return TryParse( input1, expected1 );
#endif
}

你的语法错了,

    start %=
        '{'
        >>  *(int_list)  // want zero or more of these, in any order
        >>  *(str_list)  // want zero or more of these, in any order
        >>  '}'
        ;

这意味着接受任意数量的int,后跟任意数量的string。您不能有intstringint或任何其他组合。

你需要类似的东西

    start %=
        '{'
         >> *( int_list  // want zero or more of these, in any order
             | str_list  // want zero or more of these, in any order
             )
        >>  
        '}'
        ;

但显然你需要把它放到你的数据结构中,警告你可能不得不使用语义操作。

也:

当我在这里时,我不能让这张幻灯片:

    std::ostream& operator<<( std::ostream& os, const my_record& rec )
    {
        for( const auto& x : rec.m_ints )
            std::cerr << x << ' ';
        std::cerr << std::endl;
        for( const auto& x : rec.m_strs )
            std::cerr << x << ' ';
        std::cerr << std::endl;
    }

应该像这样os

        for( const auto& x : rec.m_ints )
            os << x << ' ';
        os << 'n';

还要尽量避免在流插入运算符中endl,如果需要新行,请使用n

解决方案:

最终需要的是使用凤凰功能、push_back和粘合剂。

template<typename Iterator>
struct my_grammar 
: qi::grammar<Iterator, my_record(), ascii::space_type> {
    my_grammar() 
    : my_grammar::base_type(start) {
        quoted_string %= qi::lexeme['"' >> +(qi::char_ - '"') >> '"'];
        start = qi::lit("{")
                >>
                *( "INT:" >> qi::int_     
                    [ 
                        phx::push_back(
                            phx::at_c<0>(
                                qi::_val
                            ), 
                            qi::_1
                        ) 
                    ] % ","
                 | "STR:" >> quoted_string
                     [ 
                        phx::push_back(
                            phx::bind(
                                &my_record::m_strs,
                                qi::_val
                            ), 
                            qi::_1
                        ) 
                    ] % ","
                 )
                >> 
                "}"
                 ;
    }
    qi::rule<Iterator, std::string(), ascii::space_type> quoted_string;
    qi::rule<Iterator, my_record(),   ascii::space_type>   start;
};

完整的代码清单可以在这里看到:

http://ideone.com/XW18Z2

使用 is_containerpush_back_container 而不是语义操作的替代方法:

步骤1:删除BOOST_FUSION_ADAPT_STRUCT宏。

第 2 步:更改start规则。

start %=
            '{'
            >>  *(int_list // want zero or more of these, in any order
                | str_list)  // want zero or more of these, in any order
            >>  '}'
            ;

步骤3:添加以下专业。

namespace boost { namespace spirit { namespace traits
{
    template <>
    struct is_container<client::my_record>: mpl::true_//my_record acts as a container
    {};
    template <>
    struct container_value<client::my_record>
    {
        typedef boost::variant<std::vector<int>,std::vector<std::string>> type;//The elements to add to that container are either vector<int> or vector<string>
    };

    template <>
    struct push_back_container<client::my_record,std::vector<int>>//when you add a vector of ints...
    {
        static bool call(client::my_record& c, std::vector<int> const& val)
        {
            c.m_ints.insert(c.m_ints.end(),val.begin(), val.end());//insert it at the end of your acumulated vector of ints
            return true;
        }
    };
    template <>
    struct push_back_container<client::my_record,std::vector<std::string>>//when you add a vector of strings
    {
        static bool call(client::my_record& c, std::vector<std::string> const& val)//insert it at the end of your acumulated vector of strings
        {
            c.m_strs.insert(c.m_strs.end(),val.begin(),val.end());
            return true;
        }
    };
}}}

以下是请求的完整代码(如果我使用多个push_backs创建预期结果,则使用 g++ 4.7.1 和 msvc11 编译):

更新了示例以添加适应结构的另一个成员向量。

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted/struct.hpp>

#include <string>
#include <vector>
#include <iostream>
namespace client
{
    struct my_subrec
    {
        double foo;
        double bar;
        bool operator==( const my_subrec& other ) const
        {
            return foo==other.foo && bar==other.bar;
        }
    };
    std::ostream& operator<<( std::ostream& os, const my_subrec& rec )
    {
        os << rec.foo << "->" << rec.bar;
        return os;
    }   
}
BOOST_FUSION_ADAPT_STRUCT(client::my_subrec,
                (double, foo)
                (double, bar)
                )

namespace client
{
    namespace qi    = boost::spirit::qi;
    namespace ascii = boost::spirit::ascii;

    struct my_record
    {
        std::vector<int>          m_ints;
        std::vector<std::string>  m_strs;
        std::vector<my_subrec>    m_recs;
        bool operator==( const my_record& other ) const 
        {
            return std::equal( m_ints.begin(), m_ints.end(), other.m_ints.begin() )
                && std::equal( m_strs.begin(), m_strs.end(), other.m_strs.begin() )
                && std::equal( m_recs.begin(), m_recs.end(), other.m_recs.begin() );
        }
        bool operator!=( const my_record& other ) const
        {
            return ! operator==( other );
        }
        friend std::ostream& operator<<( std::ostream& os, const my_record& rec );
    };
    std::ostream& operator<<( std::ostream& os, const my_record& rec ) 
    {
        for( const auto& x : rec.m_ints )
            os << x << ' ';
        os << 'n';
        for( const auto& x : rec.m_strs )
            os << x << ' ';
        os << 'n';
        for( const auto& x : rec.m_recs )
            os << x << ' ';
        return os;
    }
}
//BOOST_FUSION_ADAPT_STRUCT(
//    client::my_record,
//        (std::vector<int>,          m_ints)
//        (std::vector<std::string>,  m_strs)
//)

namespace client
{
    template <typename Iterator>
    struct employee_parser : qi::grammar<Iterator, my_record(), ascii::space_type>
    {
        employee_parser() : employee_parser::base_type(start)
    {
        using qi::int_;
        using qi::lit;
        using qi::double_;
        using qi::lexeme;
        using ascii::char_;
        quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
#ifdef SIMPLE_CASE
        start %=
            '{'
            >>  int_list
            >>  str_list
            >>  '}'
            ;
#else
        // not sure how to approach this
        start %=
            '{'
            >>  *(int_list // want zero or more of these, in any order
                | str_list  // want zero or more of these, in any order
                | rec_list)
            >>  '}'
            ;
#endif
        str_list %=
                lit( "STR:" ) >> quoted_string % ','    
                ;
        int_list %=
                lit( "INT:" ) >> int_ % ','
                ;
        rec_list =
                lit( "REC:" ) >> rec % ','
                ;
        rec = double_ >> lit('-') >> double_
                ;
    }
    qi::rule<Iterator, std::string(), ascii::space_type>               quoted_string;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type>  str_list;
    qi::rule<Iterator, std::vector<int>(),         ascii::space_type>  int_list;
    qi::rule<Iterator, client::my_subrec(), ascii::space_type> rec;
    qi::rule<Iterator, std::vector<client::my_subrec>(),ascii::space_type> rec_list;
    qi::rule<Iterator, my_record(), ascii::space_type>                 start;
    };
}
namespace boost { namespace spirit { namespace traits
{
    template <>
    struct is_container<client::my_record>: mpl::true_//my_record acts as a container
    {};
    template <>
    struct container_value<client::my_record>
    {
        typedef boost::variant<std::vector<int>,std::vector<std::string>,std::vector<client::my_subrec> >type;
        //The elements to add to that container are vector<int>, vector<string> or vector<my_subrec>
    };

    template <>
    struct push_back_container<client::my_record,std::vector<int>>//when you add a vector of ints...
    {
        static bool call(client::my_record& c, std::vector<int> const& val)
        {
            c.m_ints.insert(c.m_ints.end(),val.begin(), val.end());//insert it at the end of your acumulated vector of ints
            return true;
        }
    };
    template <>
    struct push_back_container<client::my_record,std::vector<std::string>>//when you add a vector of strings
    {
        static bool call(client::my_record& c, std::vector<std::string> const& val)//insert it at the end of your acumulated vector of strings
        {
            c.m_strs.insert(c.m_strs.end(),val.begin(),val.end());
            return true;
        }
    };
    template <>
    struct push_back_container<client::my_record,std::vector<client::my_subrec>>//when you add a vector of subrecs
    {
        static bool call(client::my_record& c, std::vector<client::my_subrec> const& val)//insert it at the end of your acumulated vector of subrecs
        {
            c.m_recs.insert(c.m_recs.end(),val.begin(),val.end());
            return true;
        }
    };
}}}
static int 
TryParse( const std::string& input, const client::my_record& expected )
{
    using boost::spirit::ascii::space;
    client::my_record                        rec;
    auto                                     iter = input.begin(), end = input.end();
    client::employee_parser<decltype(iter)>  g;
    phrase_parse( iter, end, g, space, rec );
    if ( iter!=end )
    {
        std::cerr << "failed to parse completely" << std::endl;
        return -1;
    } else if ( rec!=expected ) {
        std::cerr << "unexpected result in parse" << std::endl;
        std::cerr << rec;
        return -1;
    }
    std::cout << rec << std::endl;
    return 0;
}
int 
main(int argc, char* argv[])
{
#ifdef SIMPLE_CASE
    client::my_record  expected1 { {42, 24 }, {"Smith", "John"} }, emp;
    std::string        input1 = "{ INT: 42, 24 STR: "Smith", "John" }";
    return TryParse( input1, expected1 );
#else
    client::my_record  expected1 { { 42, 24,240 }, {"Joe", "Smith", "John"}, {{1.5,2.5}} }, emp;
    std::string        input1 = "{ STR: "Joe" INT: 42, 24 STR: "Smith", "John" INT: 240 REC: 1.5-2.5 }";
    return TryParse( input1, expected1 );
#endif
}