boost-sprit-lex 将多个令牌统一为由 id 区分的 lex 中的单个令牌

boost-sprit-lex unifying multiple tokens into a single token in lex differentiated by the id

本文关键字:令牌 lex 单个 id boost-sprit-lex      更新时间:2023-10-16

edit:我已经删除了词法分析器,因为它没有与Qi完全集成,只是混淆了语法(见下面的答案(。


我的词法分析器如下所示:

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
tokens()
    : left_curly(""{""),
    right_curly(""}""),
    left_paren(""(""),
    right_paren("")""),
    colon(":"),
    scolon(";"),
    namespace_("(?i:namespace)"),
    event("(?i:event)"),
    optional("(?i:optional)"),
    required("(?i:required)"),
    repeated("(?i:repeated)"),
    t_int_4("(?i:int4)"),
    t_int_8("(?i:int8)"),
    t_string("(?i:string)"),
    ordinal("\d+"),
    identifier("\w+")
{
    using boost::spirit::lex::_val;
    this->self
        = 
        left_curly    
        | right_curly 
        | left_paren
        | right_paren
        | colon         
        | scolon
        | namespace_      
        | event             
        | optional           
        | required          
        | repeated
        | t_int_4
        | t_int_8
        | t_string
        | ordinal             
        | identifier         
        | lex::token_def<>("[ \t\n]+")   [lex::_pass = lex::pass_flags::pass_ignore];
}

lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
lex::token_def<boost::uint32_t> ordinal;
lex::token_def<std::string> identifier;

};

我希望t_int_4t_int_8t_string由整型属性的单个令牌类型表示。目前,我的 QI 语法必须为此进行提升,然后在 qi::rule 语义操作中设置令牌:

 atomic_type = tok.t_int_4     [ _val = RBL_INT4]
                | tok.t_int_8             [ _val = RBL_INT8]
                | tok.t_string            [ _val = RBL_STRING];

来自您最近几天有关将lex整合到qi语法中的问题。您似乎已经发现了多个集成问题。在这一点上,你应该问问自己,为什么你甚至试图将词法分析器集成到PEG语法中。PEG语法可以巧妙地捕获原位标记化,因此您从引入词法分析器中并没有真正获得太多好处,特别是考虑到lex->qi的情况,引入词法分析器已经向您表明,您不仅需要技巧来做qi中表达语法的整洁工作,还需要技巧才能使错误处理和注释正常工作。因此,我建议去掉莱克斯,坚持气。

这是删除词法分析器的语法。ast 位于它自己的文件中。

#include "ast.hpp"
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/range/iterator_range.hpp>
#include <vector>
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace px = boost::phoenix;
template <typename Iterator>
struct skipper : qi::grammar<Iterator>
{
    skipper() : skipper::base_type(start)
    {
        using boost::spirit::ascii::char_;
        start = ascii::space | qi::lit("//") >> *(ascii::char_ - qi::eol) >> qi::eol;
    }
    qi::rule<Iterator> start;
};
struct error_handler_
{
    typedef void result_type;
    template<typename First, typename Last, typename ErrorPos, typename What>
    void operator()(First f, Last l, ErrorPos e, What w) const
    {
        std::cout << "Expected : " << w << std::endl;
        std::cout << std::string(f,l) << std::endl;
        int i = std::distance(f,e);
        std::cout << std::string(i+1,' ') <<  "^---- here"  << std::endl;
    }
};
px::function<error_handler_> error_handler;
template<typename Iterator>
struct annotation_state
{
  typedef boost::iterator_range<Iterator> annotation_iterator;
  typedef std::vector<annotation_iterator> annotation_iterators;
  annotation_iterators annotations;
};
template<typename Iterator>
struct annotate_
{
    typedef void result_type;
    annotation_state<Iterator> & as;
    annotate_(annotation_state<Iterator> & as) : as(as) {}
    template<typename Val, typename First, typename Last>
    void operator()(Val v, First f, Last l) const
    {
      v.id = as.annotations.size();
      as.annotations.push_back(boost::make_iterator_range(f,l));
      std::cout << std::string(f,l) << std::endl;
    }
};

template <typename Iterator, typename Skipper>
struct grammar : qi::grammar<Iterator,namespace_descriptor(),Skipper>
{
    grammar(annotation_state<Iterator> & as) 
        : grammar::base_type(namespace_descriptor_),
          annotation_state_(as),
          annotate(as)
    {
        using namespace qi;
        atomic_type.add
            ("int4", RBL_INT4)
            ("int8", RBL_INT8)
            ("string", RBL_STRING);
        event_entry_qualifier.add
            ("optional", ENTRY_OPTIONAL)
            ("required", ENTRY_REQUIRED)
            ("repeated", ENTRY_REPEATED);
        oid_ = ordinal  > ':' > identifier;
        ordinal = uint_parser<boost::uint32_t>();
        identifier = +(char_("a","z") | char_("A","Z") | char_('_'));
        type_descriptor_ = atomic_type_ | compound_type_;
        atomic_type_ = no_case[atomic_type] > attr("");
        compound_type_ = 
            no_case[lit("event")] 
            > attr(RBL_EVENT) 
            > '(' 
            > identifier  
            > ')';
        event_entry_ = 
            no_case[event_entry_qualifier] 
            > oid_ 
            > type_descriptor_ 
            > ';';
        event_descriptor_ = 
            no_case[lit("event")] 
            > oid_ 
            > '{' 
            > *(event_entry_) 
            > '}'; 
        namespace_descriptor_ = 
            no_case[lit("namespace")] 
            > identifier 
            > '{' 
            > * (event_descriptor_) 
            > '}'; 
        identifier.name("identifier");
        oid_.name("ordinal-identifier pair");
        ordinal.name("ordinal");
        on_error<fail>(namespace_descriptor_, ::error_handler(_1,_2,_3,_4));
        on_success(oid_, annotate(_val,_1,_3));
        on_success(type_descriptor_, annotate(_val,_1,_3));
        on_success(event_entry_, annotate(_val,_1,_3));
        on_success(event_descriptor_, annotate(_val,_1,_3));
    }
    annotation_state<Iterator> & annotation_state_;
    px::function<annotate_<Iterator> > annotate;
    qi::rule< Iterator, oid()> oid_;
    qi::rule< Iterator, boost::uint32_t()> ordinal;
    qi::rule< Iterator, std::string()> identifier;
    qi::rule< Iterator, type_descriptor()> type_descriptor_;
    qi::rule< Iterator, type_descriptor()> atomic_type_;
    qi::rule< Iterator, type_descriptor()> compound_type_; 
    qi::rule< Iterator, event_entry(), Skipper> event_entry_;
    qi::rule< Iterator, event_descriptor(), Skipper> event_descriptor_;
    qi::rule< Iterator, namespace_descriptor(), Skipper> namespace_descriptor_;
    qi::symbols<char, int> atomic_type;
    qi::symbols<char, int> event_entry_qualifier;
};
int main()
{
    std::string test = "namespace ns { event 1:sihan { OpTIONAL 1:hassan event(haSsan);} }";
    typedef std::string::iterator it;
    it beg = test.begin();
    it end = test.end();
    annotation_state<it> as;
    skipper<it> skip;
    grammar<it, skipper<it> > g(as);

    bool r = qi::phrase_parse(beg,end,g,skip);
    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
}