使用boost::spirit::qi解析输入的CSV文件
parse typed csv file with boost::spirit::qi
我想用类型值解析一个CSV-File。每个列的类型在页眉中定义,例如:
int double double int unsigned
12 1.3 23445 1 42
45 46 47 48 49
结果数据结构可能是如下的二维向量:
using ColumnType = boost::variant<
std::vector<int>,
std::vector<unsigned>,
std::vector<double>
>;
using ResultType = std::vector<ColumnType>;
工作代码:
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using ColumnType = boost::variant<
std::vector<int>,
std::vector<unsigned>,
std::vector<double>
>;
using ResultType = std::vector<ColumnType>;
enum class CSVDataType
{
Int, UInt, Double
};
template<typename Iterator>
struct TypedCSVGrammar: qi::grammar<Iterator, ResultType(), ascii::blank_type> {
struct types_: qi::symbols<char, CSVDataType> {
types_() {
add
("int", CSVDataType::Int)
("unsigned", CSVDataType::UInt)
("double", CSVDataType::Double);
}
} types;
TypedCSVGrammar() :
TypedCSVGrammar::base_type(csv, "csv")
{
using ascii::string;
using namespace qi::labels;
header %= *(types);
cell =
(
qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::Int))
>> qi::int_ [phoenix::bind(&TypedCSVGrammar::add_int, this, _r1, _1)]
) | (
qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::UInt))
>> qi::uint_ [phoenix::bind(&TypedCSVGrammar::add_uint, this, _r1, _1)]
) | (
qi::eps(phoenix::ref(column_types)[_r1] == phoenix::val(CSVDataType::Double))
>> qi::double_ [phoenix::bind(&TypedCSVGrammar::add_double, this, _r1, _1)]
);
row =
qi::eps [phoenix::ref(column) = phoenix::val(0)]
>> qi::repeat(phoenix::size(phoenix::ref(column_types))) [
cell(phoenix::ref(column))
>> qi::eps [phoenix::ref(column)++]
];
csv =
header [phoenix::bind(&TypedCSVGrammar::construct_columns, this, _1)]
> qi::eol
> row % qi::eol
> *qi::eol
> qi::attr(result);
qi::on_error<qi::fail>
(
csv
, std::cout
<< phoenix::val("Error! Expecting ")
<< _4 // what failed?
<< phoenix::val(" here: "")
<< phoenix::construct<std::string>(_3, _2) // iterators to error-pos, end
<< phoenix::val(""")
<< std::endl
);
}
void add_int(std::size_t c, int i) {
boost::get<std::vector<int>>(result[c]).push_back(i);
}
void add_uint(std::size_t c, unsigned i) {
boost::get<std::vector<unsigned>>(result[c]).push_back(i);
}
void add_double(std::size_t c, double i) {
boost::get<std::vector<double>>(result[c]).push_back(i);
}
void construct_columns(const std::vector<CSVDataType>& columns) {
column_types = columns;
for (const auto& c : columns) {
switch (c) {
case CSVDataType::Int:
result.push_back(std::vector<int>());
break;
case CSVDataType::UInt:
result.push_back(std::vector<unsigned>());
break;
case CSVDataType::Double:
result.push_back(std::vector<double>());
break;
}
}
}
std::vector<CSVDataType> column_types;
std::size_t column;
ResultType result;
qi::rule<Iterator, ResultType(), ascii::blank_type> csv;
qi::rule<Iterator, std::vector<CSVDataType>(), ascii::blank_type> header;
qi::rule<Iterator, void(std::size_t), ascii::blank_type> cell;
qi::rule<Iterator, void(), ascii::blank_type> row;
};
有更好的解决方案吗?我想使用不止3种类型(可能超过10种类型)。这将是大量的打字。
我不明白您为什么要提出这样一个人为的目标数据结构。如果索引不匹配,似乎会导致错误。
我可以在这里推荐一个Nabialek技巧吗?
如果你把AST改成:
,效果会很好using ValueType = boost::variant<int, unsigned, double>;
using ResultType = std::vector<std::vector<ValueType>>;
(这似乎是一个更可取的方法)
简而言之,将列类型转换为解析器规则向量(std::vector<dynamic>
)。
Live On Coliru
#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
using ValueType = boost::variant<int, unsigned, double>;
using ResultType = std::vector<std::vector<ValueType>>;
enum class CSVDataType { Int, UInt, Double };
namespace boost { namespace spirit { namespace qi { // FOR DEBUG
template <typename... T> std::ostream& operator<<(std::ostream& os, rule<T...> const*) { return os << "(lazy rule)"; }
template <typename... T> std::ostream& operator<<(std::ostream& os, std::vector<rule<T...> const*> const&) { return os << "(column mappings)"; }
} } }
template<typename Iterator, typename Skipper = ascii::blank_type>
struct TypedCSVGrammar: qi::grammar<Iterator, ResultType(), Skipper> {
TypedCSVGrammar() : TypedCSVGrammar::base_type(start, "csv")
{
using namespace qi::labels;
header = *types;
csv = qi::omit[ header [ _cols = _1 ] ] > qi::eol
> qi::repeat(_current=0, px::size(_cols)) [ qi::lazy(*_cols[_current++]) ] % qi::eol
> *qi::eol
;
start = csv;
BOOST_SPIRIT_DEBUG_NODES((start)(csv)(header));
qi::on_error<qi::fail> (csv, px::ref(std::cout)
<< "Error! Expecting " << _4 // what failed?
<< " here: "" << px::construct<std::string>(_3, _2) // iterators to error-pos, end
<< ""n"
);
}
private:
using cell_parser_t = qi::rule<Iterator, ValueType(), Skipper>;
using dynamic = cell_parser_t const*;
struct types_: qi::symbols<char, dynamic> {
cell_parser_t
int_cell = qi::int_,
uint_cell = qi::uint_,
double_cell = qi::double_;
types_() {
this->add
("int", &int_cell)
("unsigned", &uint_cell)
("double", &double_cell);
BOOST_SPIRIT_DEBUG_NODES((int_cell)(uint_cell)(double_cell))
}
} types;
using state = qi::locals<std::vector<dynamic>, size_t>;
qi::_a_type _cols;
qi::_b_type _current;
qi::rule<Iterator, ResultType(), Skipper> start;
qi::rule<Iterator, std::vector<dynamic>(), Skipper> header;
qi::rule<Iterator, ResultType(), Skipper, state> csv;
};
int main() {
using It = boost::spirit::istream_iterator;
It f(std::cin >> std::noskipws), l;
TypedCSVGrammar<It> g;
ResultType data;
bool ok = qi::phrase_parse(f, l, g, ascii::blank, data);
if (ok) {
std::cout << "Parse successn";
for(auto& row: data) {
for(auto& cell: row) std::cout << cell << "t";
std::cout << "n";
}
}
else
std::cout << "Parse failedn";
if (f!=l)
std::cout << "Remaining unparsed: '" << std::string(f,l) << "'n";
}
对于所示的输入,它输出
Parse success
12 1.3 23445 1 42
45 46 47 48 49
和
的调试信息<start>
<try>int double double in</try>
<csv>
<try>int double double in</try>
<header>
<try>int double double in</try>
<success>n12 1.3 23445 1</success>
<attributes>[[(lazy rule), (lazy rule), (lazy rule), (lazy rule), (lazy rule)]]</attributes>
</header>
<int_cell>
<try>12 1.3 23445 1 </try>
<success> 1.3 23445 1 </success>
<attributes>[12]</attributes>
</int_cell>
<double_cell>
<try> 1.3 23445 1 </try>
<success> 23445 1 42n45</success>
<attributes>[1.3]</attributes>
</double_cell>
<double_cell>
<try> 23445 1 42n45</try>
<success> 1 42n45 46 </success>
<attributes>[23445]</attributes>
</double_cell>
<int_cell>
<try> 1 42n45 46 </try>
<success> 42n45 46 47 </success>
<attributes>[1]</attributes>
</int_cell>
<uint_cell>
<try> 42n45 46 47 </try>
<success>n45 46 47 4</success>
<attributes>[42]</attributes>
</uint_cell>
<int_cell>
<try>45 46 47 48</try>
<success> 46 47 48 </success>
<attributes>[45]</attributes>
</int_cell>
<double_cell>
<try> 46 47 48 </try>
<success> 47 48 49n</success>
<attributes>[46]</attributes>
</double_cell>
<double_cell>
<try> 47 48 49n</try>
<success> 48 49n</success>
<attributes>[47]</attributes>
</double_cell>
<int_cell>
<try> 48 49n</try>
<success> 49n</success>
<attributes>[48]</attributes>
</int_cell>
<uint_cell>
<try> 49n</try>
<success>n</success>
<attributes>[49]</attributes>
</uint_cell>
<int_cell>
<try></try>
<fail/>
</int_cell>
<success></success>
<attributes>[[[12, 1.3, 23445, 1, 42], [45, 46, 47, 48, 49], []]]</attributes><locals>((column mappings) 1)</locals>
</csv>
<success></success>
<attributes>[[[12, 1.3, 23445, 1, 42], [45, 46, 47, 48, 49], []]]</attributes>
</start>
相关文章:
- 在C++程序中输入的文本文件将不起作用,除非文本被复制和粘贴
- 2D数组来自文本输入,中间有空格
- 如何使用 < 和 > 命令获取 c++ 中的输入和输出?
- 检查输入是否不是整数或数字
- 正在尝试了解输入验证循环
- 读取文件并输入到矢量中
- C++如何通过用户输入删除列表元素
- 用c++从输入文件中读取另一行
- 读取文件的最后一行并输入到链接列表时出错
- 创建一个函数以在输入为负数或零时输出字符串.第一次执行用户定义的函数
- 如何使用用户输入在C++中正确填充2D数组
- C++MySQL C api用户输入行
- 正在将csv文件读取为双精度矢量
- 如何从输入中读取 CSV 行并将每个值初始化为不同类型的变量?(C++)
- 尝试从 csv 文件输入大量数据时出错
- 如何在用户输入值后写入.csv文件的下一行,而不会覆盖以前的值?
- 如何处理csv文件输入流,我需要将文件的每一行中的数据划分为6个不同的变量(类型字符串和int)
- 从链表的.csv文件中读取输入
- 使用boost::spirit::qi解析输入的CSV文件
- 如何在C++中从CSV文件输入数据