使用istream迭代器时解析失败

parsing fails when using istream iterator

本文关键字:失败 istream 迭代器 使用      更新时间:2023-10-16

我使用boost::spirit来解析csv输入(请不要建议替代方法,这只是测试)。当我将stdin的内容读入字符串并对其进行迭代时,解析成功;然而,当直接读取std::cin的内容时(通过我自己编写的包装器,因为phrase_parse需要继承std::iterator<std::forward_iterator_tag, T>的迭代器,而std::istream_iterator<T>不这样做),解析失败,我无法弄清楚为什么,因为调试输出似乎表明在两种情况下解析相同的文本,结果不同。

我甚至尝试迭代std::cin并将其放入字符串中,并正确解析;我不明白为什么所提供的迭代器类型会影响结果。下面是我正在处理的示例(对不起,它太大了,但是您可以插入并轻松编译它)。尝试定义宏SECTION_STRINGSTREAM(成功)或SECTION_CIN(失败)来观察奇怪的行为(默认行为(成功)是当std::cin被读入字符串时)。

如果使用echo ""f",111,222,333,"ref_type","spc","type","lan","name","scop"" | ./spirit_csv编译并运行此命令,则调试输出将清楚地显示正在解析整个字符串。我还添加了if (++start == end) std::cerr << "woah";,并且在所有情况下都被触发,因此似乎它肯定是解析到输入的末尾。

// following example from:
// http://www.boost.org/doc/libs/1_58_0/libs/spirit/example/qi/employee.cpp, and
// num_list4.cpp, and others
#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
namespace frontend {
namespace spirit = boost::spirit;
namespace qi = spirit::qi;
namespace ascii = spirit::ascii;
struct cursor {
  std::string file;
  unsigned long long offset;
  unsigned long long line;
  unsigned long long col;
  // verify inputs using enum
  // decl/ref/defn/call
  std::string reference_type;
  // variable/function/scope/label/type
  std::string specifier;
  // if variable/function, then type
  std::string type;
  std::string language;
  std::string name;
  std::string scope;
};
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, (std::string, file),
                          (unsigned long long, offset),
                          (unsigned long long, line), (unsigned long long, col),
                          (std::string, reference_type),
                          (std::string, specifier), (std::string, type),
                          (std::string, language), (std::string, name),
                          (std::string, scope));
// note: blank_type is so that newlines aren't counted as skippable, because
// they are significant for csv! however, typically you'll be wanting to use
// boost::spirit::ascii::space as your whitespace operator if you really do not
// care about whitespace
namespace frontend {
template <typename Iterator>
struct cursor_parser
    : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
  qi::rule<Iterator, std::string(), qi::blank_type> quoted_string;
  qi::rule<Iterator, cursor(), qi::blank_type> start;
  qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
  cursor_parser() : cursor_parser::base_type(vec) {
    using qi::uint_;
    using qi::eol;
    using qi::lexeme;
    using qi::_1;
    using ascii::char_;
    using boost::phoenix::push_back;
    using boost::phoenix::ref;
    using boost::spirit::_val;
    quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
    start %=
        // file
        quoted_string >> ',' >>
        // offset
        uint_ >> ',' >>
        // line
        uint_ >> ',' >>
        // col
        uint_ >> ',' >>
        // reference_type
        quoted_string >> ',' >>
        // specifier
        quoted_string >> ',' >>
        // type
        quoted_string >> ',' >>
        // language
        quoted_string >> ',' >>
        // name
        quoted_string >> ',' >>
        // scope
        quoted_string;
    vec %= start % eol;
    quoted_string.name("qs");
    debug(quoted_string);
    start.name("s");
    debug(start);
    vec.name("v");
    debug(vec);
  }
};
template <typename T>
class cin_forward_iterator : std::iterator<std::forward_iterator_tag, T> {
private:
  std::istream_iterator<T> i;
public:
  cin_forward_iterator() : i(std::istream_iterator<T>()) {}
  cin_forward_iterator(std::istream &in) : i(std::istream_iterator<T>(in)) {}
  const T &operator*() const { return *i; }
  cin_forward_iterator<T> operator++() {
    ++i;
    return *this;
  };
  cin_forward_iterator<T> operator++(int) {
    cin_forward_iterator<T> tmp = *this;
    i++;
    return tmp;
  };
  bool operator==(const cin_forward_iterator<T> &rhs) const {
    return i == rhs.i;
  }
  bool operator!=(const cin_forward_iterator<T> &rhs) const {
    return not(*this == rhs);
  }
};
}
namespace std {
template <typename T> class iterator_traits<frontend::cin_forward_iterator<T>> {
public:
  typedef typename std::istream_iterator<T>::value_type value_type;
  typedef typename std::istream_iterator<T>::difference_type difference_type;
  typedef typename std::istream_iterator<T>::reference reference;
  typedef typename std::istream_iterator<T>::pointer pointer;
  typedef std::forward_iterator_tag iterator_category;
};
}
/* try:
echo 
  ""f",111,222,333,"ref_type","spc","type","lan","name","scop"" 
  | ./spirit_csv
*/
int main() {
  std::vector<frontend::cursor> v;
// succeeds
#ifdef SECTION_STRINGSTREAM
  std::stringstream ss;
  ss << std::cin.rdbuf();
  std::string s(ss.str());
  auto start = s.cbegin();
  auto end = s.cend();
// fails
#elif SECTION_CIN
  noskipws(std::cin);
  frontend::cin_forward_iterator<char> start(std::cin);
  frontend::cin_forward_iterator<char> end;
// succeeds
#else
  noskipws(std::cin);
  frontend::cin_forward_iterator<char> start_in(std::cin);
  frontend::cin_forward_iterator<char> end_in;
  std::string s;
  for (; start_in != end_in; ++start_in) {
    s += *start_in;
  }
  auto start = s.begin();
  auto end = s.end();
#endif
  if (phrase_parse(start, end,
#ifdef SECTION_STRINGSTREAM
                   frontend::cursor_parser<std::string::const_iterator>(),
#elif SECTION_CIN
                   frontend::cursor_parser<
                       frontend::cin_forward_iterator<char>>(),
#else
                   frontend::cursor_parser<std::string::iterator>(),
#endif
                   boost::spirit::qi::blank, v)) {
    for (auto &c : v) {
      std::cout << boost::fusion::as_vector(c) << std::endl;
    }
    std::cerr << "success!" << std::endl;
    return 0;
  } else {
    std::cerr << "failure!" << std::endl;
    return 1;
  }
}

为什么要有自己的迭代器?

很难正确设置,而且看起来也不像你让它多通道感知。

有一个原因为什么输入迭代器和前向迭代器有不同的类别!只是敷衍是没有用的。正向迭代器必须是可复制的,并且在解引用时具有可重复的值。输入迭代器不满足这些条件。

事实上你应该使用boost::spirit::istream_iterator或者你可以使用Spirit的multi_pass适配器组成一个迭代器:

  • http://www.boost.org/doc/libs/1_58_0/libs/spirit/doc/html/spirit/support/multi_pass.html

修复后的版本:

Live On Coliru

#define BOOST_SPIRIT_DEBUG 1
#define BOOST_SPIRIT_DEBUG_PRINT_SOME 200
#define BOOST_SPIRIT_DEBUG_OUT std::cerr
// std includes
#include <iostream>
#include <string>
// boost includes
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/as_vector.hpp>
namespace frontend {
    namespace qi     = boost::spirit::qi;
    struct cursor {
        std::string file;
        unsigned long long offset;
        unsigned long long line;
        unsigned long long col;
        // verify inputs using enum
        // decl/ref/defn/call
        std::string reference_type;
        // variable/function/scope/label/type
        std::string specifier;
        // if variable/function, then type
        std::string type;
        std::string language;
        std::string name;
        std::string scope;
    };
}
// adapt struct to boost fusion
BOOST_FUSION_ADAPT_STRUCT(frontend::cursor, 
        (std::string, file)
        (unsigned long long, offset)
        (unsigned long long, line)
        (unsigned long long, col)
        (std::string, reference_type)
        (std::string, specifier)
        (std::string, type)
        (std::string, language)
        (std::string, name)
        (std::string, scope))
namespace frontend {
    // NOTE: blank_type doesn't skip newlines
    template <typename Iterator>
    struct cursor_parser : public qi::grammar<Iterator, std::vector<cursor>(), qi::blank_type> {
        cursor_parser() : cursor_parser::base_type(vec) {
            using qi::uint_;
            using qi::eol;
            using qi::lexeme;
            using qi::char_;
            quoted_string %= lexeme['"' >> *(char_ - '"') >> '"'];
            start %=
                quoted_string  >> ','   >>  // file
                uint_          >> ','   >>  // offset
                uint_          >> ','   >>  // line
                uint_          >> ','   >>  // col
                quoted_string  >> ','   >>  // reference_type
                quoted_string  >> ','   >>  // specifier
                quoted_string  >> ','   >>  // type
                quoted_string  >> ','   >>  // language
                quoted_string  >> ','   >>  // name
                quoted_string; // scope
            vec %= start % eol;
            BOOST_SPIRIT_DEBUG_NODES((quoted_string)(start)(vec))
        }
        private:
        qi::rule<Iterator, std::string()        , qi::blank_type> quoted_string;
        qi::rule<Iterator, cursor()             , qi::blank_type> start;
        qi::rule<Iterator, std::vector<cursor>(), qi::blank_type> vec;
    };
}
int main() {
    // '"f",111,222,333,"ref_type","spc","type","lan","name","scop"'
    using It = boost::spirit::istream_iterator;
    It start_in(std::cin >> std::noskipws), end_in;
    std::vector<frontend::cursor> v;
    if (phrase_parse(start_in, end_in, frontend::cursor_parser<It>(), frontend::qi::blank, v)) {
        for (auto &c : v) {
            std::cout << boost::fusion::as_vector(c) << std::endl;
        }
        std::cerr << "success!" << std::endl;
    } else {
        std::cerr << "failure!" << std::endl;
        return 1;
    }
}

输出
(f 111 222 333 ref_type spc type lan name scop)
success!
调试输出:

<vec>
  <try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"n</try>
  <start>
    <try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"n</try>
    <quoted_string>
      <try>"f",111,222,333,"ref_type","spc","type","lan","name","scop"n</try>
      <success>,111,222,333,"ref_type","spc","type","lan","name","scop"n</success>
      <attributes>[[f]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"ref_type","spc","type","lan","name","scop"n</try>
      <success>,"spc","type","lan","name","scop"n</success>
      <attributes>[[r, e, f, _, t, y, p, e]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"spc","type","lan","name","scop"n</try>
      <success>,"type","lan","name","scop"n</success>
      <attributes>[[s, p, c]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"type","lan","name","scop"n</try>
      <success>,"lan","name","scop"n</success>
      <attributes>[[t, y, p, e]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"lan","name","scop"n</try>
      <success>,"name","scop"n</success>
      <attributes>[[l, a, n]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"name","scop"n</try>
      <success>,"scop"n</success>
      <attributes>[[n, a, m, e]]</attributes>
    </quoted_string>
    <quoted_string>
      <try>"scop"n</try>
      <success>n</success>
      <attributes>[[s, c, o, p]]</attributes>
    </quoted_string>
    <success>n</success>
    <attributes>[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]</attributes>
  </start>
  <start>
    <try></try>
    <quoted_string>
      <try></try>
      <fail/>
    </quoted_string>
    <fail/>
  </start>
  <success>n</success>
  <attributes>[[[[f], 111, 222, 333, [r, e, f, _, t, y, p, e], [s, p, c], [t, y, p, e], [l, a, n], [n, a, m, e], [s, c, o, p]]]]</attributes>
</vec>

指出:

  • BOOST_FUSION_ADAPT_STRUCT宏调用错误(逗号太多)