使用boost::spirit解析Newick语法
Parsing Newick grammar using boost::spirit
我正在尝试使用boost::spirit库解析Newick语法(此处定义)。
我已经制作了自己的语法分析器,可以正确识别语法。这是代码:
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <vector>
namespace parser
{
struct ptree;
typedef boost::variant<boost::recursive_wrapper<ptree>> ptree_recursive;
struct ptree
{
std::vector<ptree_recursive> children;
std::string name;
double length;
};
/* Used to cast ptree_recursive into ptree. */
class ptree_visitor : public boost::static_visitor<ptree>
{
public:
ptree operator() (ptree tree) const
{
return tree;
}
};
}
BOOST_FUSION_ADAPT_STRUCT(
parser::ptree,
(std::vector<parser::ptree_recursive>, children)
(std::string, name)
(double, length)
)
namespace parser
{
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
template<typename Iterator>
struct newick_grammar : qi::grammar<Iterator, ptree(), ascii::space_type>
{
public:
newick_grammar() : newick_grammar::base_type(tree)
{
using qi::lexeme;
using qi::double_;
using ascii::char_;
/* This is the only grammar that works fine:
* http://evolution.genetics.washington.edu/phylip/newick_doc.html */
label = lexeme[+(char_ - ':' - ')' - ',')];
branch_length = ':' >> double_;
subtree =
-descendant_list
>> -label
>> -branch_length;
descendant_list =
'('
>> subtree
>> *(',' >> subtree )
>> ')';
tree = subtree >> ';';
BOOST_SPIRIT_DEBUG_NODE(label);
BOOST_SPIRIT_DEBUG_NODE(branch_length);
BOOST_SPIRIT_DEBUG_NODE(subtree);
BOOST_SPIRIT_DEBUG_NODE(descendant_list);
BOOST_SPIRIT_DEBUG_NODE(tree);
}
private:
/* grammar rules */
qi::rule<Iterator, ptree(), ascii::space_type> tree, subtree;
qi::rule<Iterator, ptree_recursive(), ascii::space_type> descendant_list;
qi::rule<Iterator, double(), ascii::space_type> branch_length;
qi::rule<Iterator, std::string(), ascii::space_type> label;
};
}
提供给解析器的ptree实例存储newick树。用于此代码的测试字符串如下:
(((One:0.1,Two:0.2)Sub1:0.3,(Three:0.4,Four:0.5)Sub2:0.6)Sub3:0.7,Five:0.8)Root:0.9;
语法分析器正确地识别语法,但它生成一个部分树。特别是,被转发的ptree实例包含"根"节点及其第一个子节点"Sub3"。我试着使用push_at和at_c方法(在这里解释)。我得到了同样的结果。
为什么语法似乎不能创建和添加所有节点,甚至不能识别语法并在树上移动?
谢谢你的建议。
解决方案
template<typename Iterator>
struct newick_grammar : qi::grammar<Iterator, base::ptree()>
{
public:
newick_grammar() : newick_grammar::base_type(tree)
{
/* This is the only grammar that works fine:
* http://evolution.genetics.washington.edu/phylip/newick_doc.html */
label %= qi::lexeme[+(qi::char_ - ':' - ')' - ',')];
branch_length %= ':' >> qi::double_;
subtree =
-descendant_list
>> -label
>> -branch_length;
descendant_list =
'('
>> subtree
>> *(',' >> subtree )
>> ')';
tree %= subtree >> ';';
BOOST_SPIRIT_DEBUG_NODE(label);
BOOST_SPIRIT_DEBUG_NODE(branch_length);
BOOST_SPIRIT_DEBUG_NODE(subtree);
BOOST_SPIRIT_DEBUG_NODE(descendant_list);
BOOST_SPIRIT_DEBUG_NODE(tree);
}
private:
/* grammar rules */
qi::rule<Iterator, base::ptree()> tree, subtree;
qi::rule<Iterator, base::children_ptree()> descendant_list;
qi::rule<Iterator, double()> branch_length;
qi::rule<Iterator, std::string()> label;
};
我认为您的程序中有很多cargo cult编码。例如,变体是完全无用的。所以我重写了一点,添加了一些评论来帮助你理解(我希望,如果不清楚,请不要犹豫,在评论中询问)。我把空间规格放在一边,因为我认为它对你来说毫无用处。
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <vector>
#include <string>
#include <iostream>
namespace parser
{
// Forward declaration for the vector
struct ptree;
// typedef to ease the writing
typedef std::vector<ptree> children_vector;
// The tree structure itseflf
struct ptree
{
children_vector children;
std::string name;
double length;
};
// Streaming operator for printing the result
std::ostream& operator<<(std::ostream& stream, const ptree& tree)
{
bool first = true;
stream << "(" << tree.name << ": " << tree.length << " { ";
for (auto child: tree.children)
{
stream << (first ? "" : "," ) << child;
first = false;
}
stream << " }";
return stream;
}
}
// adapt the structure to fusion phoenix
BOOST_FUSION_ADAPT_STRUCT(
parser::ptree,
(parser::children_vector, children)
(std::string, name)
(double, length)
)
namespace parser
{
// namespace aliasing to shorten the names
namespace qi = boost::spirit::qi;
namespace phoenix = boost::phoenix;
// This grammar parse string to a ptree
struct newick_grammar : qi::grammar<std::string::const_iterator, ptree()>
{
public:
newick_grammar()
: newick_grammar::base_type(tree) // We try to parse the tree rule
{
using phoenix::at_c; // Access nth field of structure
using phoenix::push_back; // Push into vector
// For label use %= to assign the result of the parse to the string
label %= qi::lexeme[+(qi::char_ - ':' - ')' - ',')];
// For branch length use %= to assign the result of the parse to the
// double
branch_length %= ':' >> qi::double_;
// When parsing the subtree just assign the elements that have been
// built in the subrules
subtree =
// Assign vector of children to the first element of the struct
-descendant_list [at_c<0>(qi::_val) = qi::_1 ]
// Assign the label to the second element
>> -label [ at_c<1>(qi::_val) = qi::_1 ]
// Assign the branch length to the third element
>> -branch_length [ at_c<2>(qi::_val) = qi::_1 ];
// Descendant list is a vector of ptree, we just push back the
// created ptrees into the vector
descendant_list =
'(' >> subtree [ push_back(qi::_val, qi::_1) ]
>> *(',' >> subtree [ push_back(qi::_val, qi::_1) ])
>> ')';
// The tree receive the whole subtree using %=
tree %= subtree >> ';' ;
}
private:
// Here are the various grammar rules typed by the element they do
// generate
qi::rule<std::string::const_iterator, ptree()> tree, subtree;
qi::rule<std::string::const_iterator, children_vector()> descendant_list;
qi::rule<std::string::const_iterator, double()> branch_length;
qi::rule<std::string::const_iterator, std::string()> label;
};
}
int main(int argc, char const *argv[])
{
namespace qi = boost::spirit::qi;
std::string str;
while (getline(std::cin, str))
{
// Instantiate grammar and tree
parser::newick_grammar grammar;
parser::ptree tree;
// Parse
bool result = qi::phrase_parse(str.cbegin(), str.cend(), grammar, qi::space, tree);
// Print the result
std::cout << "Parsing result: " << std::boolalpha << result << std::endl;
std::cout << tree << std::endl;
}
return 0;
}
以下是您的样本输出:
$ ./a.exe
(((One:0.1,Two:0.2)Sub1:0.3,(Three:0.4,Four:0.5)Sub2:0.6)Sub3:0.7,Five:0.8)Root:0.9;
Parsing result: true
(Root: 0.9 { (Sub3: 0.7 { (Sub1: 0.3 { (One: 0.1 { },(Two: 0.2 { } },(Sub2: 0.6 { (Three: 0.4 { },(Four: 0.5 { } } },(Five: 0.8 { } }
相关文章:
- 1d 智能指针不适用于语法 (*)++
- 助记符和指向成员语法的指针
- 有人能分解一下这个c++模板的语法吗
- C++避免重复声明的语法是什么
- QMetaObject invokeMethod的基于函数指针的语法
- 这个语法std::class<>{}(arg1, arg2) 在C++中是什么意思?
- 为什么包含windows.h会产生语法错误,从而阻止类的实例化?(C2146,C2065)
- 单独定义模板化嵌套类方法的正确语法
- 共享指针和具有自定义删除程序的唯一指针之间的语法差异背后的任何原因
- 错误 C2760:语法错误:映射迭代器上意外的标记"标识符",预期的";"
- 为什么我会收到错误 C2143 语法错误:缺少"*"之前的';'?
- 奇怪的代码抛出编译错误模板< J,int aSize=10> C2143:语法错误:在"<"之前缺少";"
- 使用基类指针调用基类的值构造函数的语法是什么?
- 很好的语法来获取对向量/数组数据的大小引用?
- C++语法运算符功能?
- C++使用 rand 定义函数语法
- 什么文件可以修改 atom 的C++语法?
- 创建模板嵌套类实例的语法?
- C++语法差异:二维和一维数组(指针算术)
- 使用boost::spirit解析Newick语法