Boost Spirit X3:将(一些)空格解析为枚举

Boost Spirit X3: Parsing (some) whitespace into an enum

本文关键字:空格 枚举 一些 X3 Spirit Boost      更新时间:2023-10-16

我有一个解析器,我想在其中捕获某些类型的空格作为枚举值并保留"文本"值的空格。

我的空格解析器非常基本(注意:我只是出于测试/开发目的在这里添加了管道字符(:

struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("n", Whitespace::NEWLINE)
("t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;

我想将所有内容捕获到我的枚举或std::strings 中:

struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};

为了解析我的输入,我使用如下内容:

const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);

但问题是解析器在它命中的第一个选项卡或换行符处停止。

代码:http://coliru.stacked-crooked.com/a/d2cda4ce721279a4

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
namespace x3 = boost::spirit::x3;
enum Whitespace
{
NEWLINE,
TAB,
PIPE
};
struct whitespace_p : x3::symbols<Whitespace>
{
whitespace_p()
{
add
("n", Whitespace::NEWLINE)
("t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} whitespace;
struct Element : x3::variant<Whitespace, std::string>
{
using base_type::base_type;
using base_type::operator=;
};
const auto contentParser
= x3::rule<class ContentParserID, Element, true> { "contentParser" }
= x3::no_skip[+(x3::char_ - (whitespace))]
| whitespace
;
using Elements = std::vector<Element>;
const auto elementsParser
= x3::rule<class ContentParserID, Elements, true> { "elementsParser" }
= contentParser >> *(contentParser);
struct print_visitor
: public boost::static_visitor<std::string>
{
std::string operator()(const Whitespace& ws) const
{
if (ws == Whitespace::NEWLINE) 
{
return "newline";
}
else if (ws == Whitespace::PIPE)
{
return "pipe";
}
else
{
return "tab";
}
}
std::string operator()(const std::string& str) const
{
return str;
}
};
int main() 
{
const std::string text = "Hello n World";
std::string::const_iterator start = std::begin(text);
const std::string::const_iterator stop = std::end(text);
Elements elements{};
bool result =
phrase_parse(start, stop, elementsParser, x3::ascii::space, elements);
if (!result) 
{
std::cout << "failed to parse!n";
} 
else if (start != stop)
{
std::cout << "unparsed: " << std::string{start, stop} << 'n';
}
else
{
for (const auto& e : elements)
{
std::cout << "element: [" << boost::apply_visitor(print_visitor{}, e) << "]n";
}
}
}

如果我Hello | World解析文本,那么我会得到我期望的结果。但是,如果我在吞下n并且永远不会解析World之后使用空格Hello n World。理想情况下,我希望看到此输出:

element: [Hello ]
element: [newline]
element: [ World]

我怎样才能做到这一点?谢谢!

我关于船长问题的参考:提升精神船长问题

在这种情况下,您使其与no_skip[]一起使用。没错。

no_skip就像lexeme一样,除了它没有从源代码(boost/spirit/home/x3/directive/no_skip.hpp(预先跳过:

// same as lexeme[], but does not pre-skip

另类拍摄

在您的情况下,我会颠倒逻辑:只需调整船长本身。

另外,不要向船长提供phrase_parse,因为您的语法对船长的正确值高度敏感。

您的整个语法可能是:

const auto p  = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];

这是关于Coliru的现场演示

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
enum Whitespace { NEWLINE, TAB, PIPE };
struct whitespace_p : x3::symbols<Whitespace> {
whitespace_p() {
add
("n", Whitespace::NEWLINE)
("t", Whitespace::TAB)
("|", Whitespace::PIPE)
;
}
} static const whitespace;
struct Element : x3::variant<Whitespace, std::string> {
using base_type::base_type;
using base_type::operator=;
};
using Elements = std::vector<Element>;
static inline std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(Whitespace ws) const {
switch(ws) {
case Whitespace::NEWLINE: return os << "[newline]";
case Whitespace::PIPE: return os << "[pipe]";
case Whitespace::TAB: return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
int main() {
std::string const text = "tHello n World";
auto start = begin(text), stop = end(text);
const auto p  = x3::skip(x3::space - whitespace) [
*(+x3::graph | whitespace)
];
Elements elements;
if (!parse(start, stop, p, elements)) {
std::cout << "failed to parse!n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << 'n';
}
}

指纹

[tab]
"Hello"
[newline]
"World"

甚至更简单?

这里似乎根本不需要任何船长。为什么不呢:

const auto p  = *(+~x3::char_("nt|") | whitespace);

当我们使用它时,不需要符号来映射枚举:

struct Element : x3::variant<char, std::string> {
// ...
};
using Elements = std::vector<Element>;

然后

const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("nt|") | x3::char_;

住在科里鲁

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <iostream>
#include <iomanip>
namespace x3 = boost::spirit::x3;
struct Element : x3::variant<char, std::string> {
using variant = x3::variant<char, std::string>;
using variant::variant;
using variant::operator=;
friend std::ostream& operator<<(std::ostream& os, Element const& el) {
struct print_visitor {
std::ostream& os;
auto& operator()(char ws) const {
switch(ws) {
case 'n': return os << "[newline]";
case 't': return os << "[pipe]";
case '|': return os << "[tab]";
}
return os << "?";
}
auto& operator()(const std::string& str) const { return os << std::quoted(str); }
} vis{os};
return boost::apply_visitor(vis, el);
}
};
using Elements = std::vector<Element>;
int main() {
std::string const text = "tHello n World";
auto start = begin(text);
auto const stop = end(text);
Elements elements;
const auto p
= x3::rule<struct ID, Element> {}
= +~x3::char_("nt|") | x3::char_;
if (!parse(start, stop, *p, elements)) {
std::cout << "failed to parse!n";
} else {
std::copy(begin(elements), end(elements), std::ostream_iterator<Element>(std::cout, "n"));
}
if (start != stop) {
std::cout << "unparsed: " << std::quoted(std::string(start, stop)) << 'n';
}
}

指纹

[pipe]
"Hello "
[newline]
" World"

问题是您在第 76 行使用了 phrase_parser 而不是解析器。 尝试使用类似的东西

bool result =
parse(start, stop, elementsParser, elements);

你的phrase_parser被指示跳过空格,你真正不想要的。

看看第一个答案 如何使用boost::spirit将一系列单词解析为向量?