C++ 提升精神attr_cast不会调用预期的tranform_attribute

c++ boost spirit attr_cast does not invoke the expected tranform_attribute

本文关键字:调用 attribute tranform cast C++ attr      更新时间:2023-10-16

我正在研究具有提升精神的 c++ 字符串文字解析器。

这是我到目前为止所拥有的:

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/home/support/iterators/line_pos_iterator.hpp>
#include <boost/spirit/repository/include/qi_confix.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
using namespace boost::spirit;
#include <boost/fusion/include/adapt_struct.hpp>
////////////////////////////////
// extra facilities
struct get_line_f
{
    template <typename> struct result { typedef size_t type; };
    template <typename It> size_t operator()(It const& pos_iter) const
    {
        return get_line(pos_iter);
    }
};
namespace boost { namespace spirit { namespace traits
{
    template <>
    struct transform_attribute<uint16_t, std::string, qi::domain>
    {
        typedef std::string& type;
        static std::string pre(uint16_t& d) { return "pre16"; }
        static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; }
        static void fail(uint16_t&) {}
    };
}}}
namespace boost { namespace spirit { namespace traits
{
    template <>
    struct transform_attribute<uint32_t, std::string, qi::domain>
    {
        typedef std::string& type;
        static std::string pre(uint32_t& d) { return "pre32"; }
        static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; }
        static void fail(uint32_t&) {}
    };
}}}
//
////////////////////////////////
struct RangePosition
{
    RangePosition()
        : beginLine(-1)
        , endLine(-1)
    {
    }
    size_t beginLine;
    size_t endLine;
};
struct String : public RangePosition
{
    String()
        : RangePosition()
        , value()
        , source()
    {
    }
    std::string value;
    std::string source;
};
BOOST_FUSION_ADAPT_STRUCT(String,
                            (std::string, value)
                            (std::string, source)
                            (size_t,      beginLine)
                            (size_t,      endLine)
                          )
template <typename Iterator>
struct source_string : qi::grammar<Iterator, String(), qi::space_type>
{
    struct escape_symbols : qi::symbols<char, char>
    {
        escape_symbols()
        {
            add
                ("'"    , ''')
                ("\""    , '"')
                ("\?"    , '?')
                ("\\"    , '')
                ("\0"     , '')
                ("\a"     , 'a')
                ("\b"     , 'b')
                ("\f"     , 'f')
                ("\n"     , 'n')
                ("\r"     , 'r')
                ("\t"     , 't')
                ("\v"     , 'v')
            ;
        }
    } escape_symbol;
    source_string() : source_string::base_type(start)
    {
        using qi::raw;
        using qi::_val;
        using qi::_1;
        using qi::space;
        using qi::omit;
        using qi::no_case;
        using qi::attr_cast;
        using qi::print;
        namespace phx = boost::phoenix;
        using phx::at_c;
        using phx::begin;
        using phx::end;
        using phx::construct;
        using phx::ref;
        escape %= escape_symbol;
        character %=   (no_case["\x"] >> hex12)
                     | ("\"  >> oct123)
                     | escape
                     | (print - (lit('"') | ''));
        unicode %=   ("\u" >> attr_cast(hex4))
                   | ("\U" >> attr_cast(hex8));
        string_section %= '"' >> *(unicode | character) >> '"';
        string %= string_section % omit[*space];
        start = raw[
                        string[at_c<0>(_val) = _1]
                   ]
                   [
                       at_c<1>(_val) = construct<std::string>(begin(_1), end(_1)),
                       at_c<2>(_val) = get_line_(begin(_1)),
                       at_c<3>(_val) = get_line_(end(_1))
                   ]
        ;
    }
    boost::phoenix::function<get_line_f> get_line_;
    qi::rule<Iterator, String(), qi::space_type> start;
    qi::rule<Iterator, std::string()> escape;
    qi::uint_parser<char, 16, 1, 2> hex12;
    qi::uint_parser<uint16_t, 16, 4, 4> hex4;
    qi::uint_parser<uint32_t, 16, 8, 8> hex8;
    qi::uint_parser<char,  8, 1, 3> oct123;
    qi::rule<Iterator, std::string()> character;
    qi::rule<Iterator, std::string()> unicode;
    qi::rule<Iterator, std::string()> string_section;
    qi::rule<Iterator, std::string()> string;
};

我的测试代码是

std::string str[] =
{
    ""\u1234\U12345678"",
    ""te""st"",
    ""te"  "st"",
    ""te" n "st"",
    """",
    ""\""",
    ""test"",
    ""test" something",
    ""'\"\?\\\a\b\f\n\r\t\v"",
    ""\x61cd\X3012\x7z"",
    ""\141cd\06012\78\778"",
    ""te",
    ""tenst"",
    ""test\"",
    ""te\st"",
    //
};
typedef line_pos_iterator<std::string::const_iterator> Iterator;
std::ostringstream result;
for (size_t i = 0; i < sizeof(str) / sizeof(str[0]); ++i)
{
    source_string<Iterator> g;
    Iterator iter(str[i].begin());
    Iterator end(str[i].end());
    String string;
    bool r = phrase_parse(iter, end, g, qi::space, string);
    if (r)
        result << string.beginLine << "-" << string.endLine << ": " << string.value << " === " << string.source << "n";
    else
        result << "Parsing failedn";
}

有人可以帮我为什么在这个规则中:

        unicode %=   ("\u" >> attr_cast(hex4))
                   | ("\U" >> attr_cast(hex8));

attr_cast不调用我定义的transform_attribute?

namespace boost { namespace spirit { namespace traits
{
    template <>
    struct transform_attribute<uint16_t, std::string, qi::domain>
    {
        typedef std::string& type;
        static std::string pre(uint16_t& d) { return "pre16"; }
        static void post(uint16_t& val, std::string& attr) { attr = "unicode16"; }
        static void fail(uint16_t&) {}
    };
}}}
namespace boost { namespace spirit { namespace traits
{
    template <>
    struct transform_attribute<uint32_t, std::string, qi::domain>
    {
        typedef std::string& type;
        static std::string pre(uint32_t& d) { return "pre32"; }
        static void post(uint32_t& val, std::string& attr) { attr = "unicode32"; }
        static void fail(uint32_t&) {}
    };
}}}

让内置基元类型的行为"奇怪"似乎是一个非常糟糕的主意™。

假设你只是想解码,我建议使用语义操作的更简单的方法,例如

  • https://github.com/sehe/spirit-v2-json/blob/master/JSON.cpp#L102

    char_ = +(
            ~encoding::char_(L""\")) [ qi::_val += qi::_1 ] |
               qi::lit(L"x5C") >> (                    //  (reverse solidus)
               qi::lit(L"x22") [ qi::_val += L'"'  ] | // "    quotation mark  U+0022
               qi::lit(L"x5C") [ qi::_val += L'' ] | //     reverse solidus U+005C
               qi::lit(L"x2F") [ qi::_val += L'/'  ] | // /    solidus         U+002F
               qi::lit(L"x62") [ qi::_val += L'b' ] | // b    backspace       U+0008
               qi::lit(L"x66") [ qi::_val += L'f' ] | // f    form feed       U+000C
               qi::lit(L"x6E") [ qi::_val += L'n' ] | // n    line feed       U+000A
               qi::lit(L"x72") [ qi::_val += L'r' ] | // r    carriage return U+000D
               qi::lit(L"x74") [ qi::_val += L't' ] | // t    tab             U+0009
               qi::lit(L"x75")                         // uXXXX                U+XXXX
                    >> _4HEXDIG [ qi::_val += qi::_1 ]
    

    这似乎很容易适应您的用例。

现在,如果您坚持,首先包装类型(这样您就不会"重新定义"Spirit的基本类型(,其次,自定义container insertion特征,因为std::string(或者更确切地说是std::vector<char>?(是一种容器类型。

不过我不推荐这个。我喜欢把事情"简单"和逻辑放在一个地方。显然,当使用像Spirit这样的解析器生成器时,这是一件"有趣"的事情,因为幕后似乎有很多事情"神奇"地发生。然而,这就是抽象的本质。我想我不想在这里"抽象"解码 unicode 转义:它们感觉属于问题域,而不是工具。