高效解析 FIX 消息 c++
Efficient Parsing FIX Message c++
我需要解析一个包含金融FIX协议的文件。示例如下:
1128=99=24535=X49=CME75=2017040934=82452=2017040920070508394791460=201704092007050800000005799=10000000268=2279=0269=B48=900655=ESM783=23271=1473460731=100000005796=17263279=0269=C48=900655=ESM783=24271=2861528731=100000005796=1726310=219
我的应用程序将加载许多文件,每个文件包含数百万行历史数据,因此需要考虑性能。
我已经在网上查看了有关FIX解析的类似问题,并探索了QuickFix库(特别是使用FIX::Message(string)来破解消息),但我的目标是获得比使用快速修复所能实现的更好的吞吐量。
我为最常见的消息类型(市场数据增量刷新)写了一个模拟,以查看我正在实现的速度,并且对~60,000条消息/秒的结果印象最不深,包括3m行文件的文件解析。
这是我的第一个 c++ 应用程序,所以我希望我的方法中存在许多缺陷,任何有关如何提高其性能的建议将不胜感激。
当前流是文件>字符串>MDIncrementalRefresh。MDIncrementalRefresh有两个可选的重复组,我正在使用向量来存储它们,因为它们在消息之间的大小未知。
我猜我在每次更新时重建 MDIncrementalRefresh 的事实与通过更新以前的 MDIncrementalRefresh 的内容来重用对象相比,会导致不必要的开销?
提前致谢
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
using namespace std;
std::vector<std::string> string_split(std::string s, const char delimiter)
{
size_t start=0;
size_t end=s.find_first_of(delimiter);
std::vector<std::string> output;
while (end <= std::string::npos)
{
output.emplace_back(s.substr(start, end-start));
if (end == std::string::npos)
break;
start=end+1;
end = s.find_first_of(delimiter, start);
}
return output;
}
const char FIX_FIELD_DELIMITER = 'x01';
const char FIX_KEY_DELIMITER = '=';
const int STR_TO_CHAR = 0;
const int KEY = 0;
const int VALUE = 1;
const string Field_TransactTime = "60";
const string Field_MatchEventIndicator = "5799";
const string Field_NoMDEntries = "268";
const string Field_MDUpdateAction = "279";
const string Field_MDEntryType = "269";
const string Field_SecurityID = "48";
const string Field_RptSeq = "83";
const string Field_MDEntryPx = "270";
const string Field_MDEntrySize = "271";
const string Field_NumberOfOrders = "346";
const string Field_MDPriceLevel = "1023";
const string Field_OpenCloseSettlFlag = "286";
const string Field_AggressorSide = "5797";
const string Field_TradingReferenceDate = "5796";
const string Field_HighLimitPrice = "1149";
const string Field_LowLimitPrice = "1148";
const string Field_MaxPriceVariation = "1143";
const string Field_ApplID = "1180";
const string Field_NoOrderIDEntries = "37705";
const string Field_OrderID = "37";
const string Field_LastQty = "32";
const string Field_SettlPriceType= "731";
class OrderIdEntry {
public:
string OrderID;
int LastQty;
};
struct MDEntry {
public:
// necessary for defaults?
char MDUpdateAction;
char MDEntryType;
int SecurityID;
int RptSeq;
double MDEntryPx;
int MDEntrySize;
int NumberOfOrders = 0;
int MDPriceLevel = 0;
int OpenCloseSettlFlag = 0;
string SettlPriceType = "";
int AggressorSide = 0;
string TradingReferenceDate = "";
double HighLimitPrice = 0.0;
double LowLimitPrice = 0.0;
double MaxPriceVariation = 0.0;
int ApplID = 0;
};
class MDIncrementalRefresh {
public:
string TransactTime;
string MatchEventIndicator;
int NoMDEntries;
int NoOrderIDEntries = 0;
vector<MDEntry> MDEntries;
vector<OrderIdEntry> OrderIdEntries;
MDIncrementalRefresh(const string& message)
{
MDEntry* currentMDEntry = nullptr;
OrderIdEntry* currentOrderIDEntry = nullptr;
for (auto fields : string_split(message, FIX_FIELD_DELIMITER))
{
vector<string> kv = string_split(fields, FIX_KEY_DELIMITER);
// Header :: MDIncrementalRefresh
if (kv[KEY] == Field_TransactTime) this->TransactTime = kv[VALUE];
else if (kv[KEY] == Field_MatchEventIndicator) this->MatchEventIndicator = kv[VALUE];
else if (kv[KEY] == Field_NoMDEntries) this->NoMDEntries = stoi(kv[VALUE]);
else if (kv[KEY] == Field_NoOrderIDEntries) this->NoOrderIDEntries = stoi(kv[VALUE]);
// Repeating Group :: MDEntry
else if (kv[KEY] == Field_MDUpdateAction)
{
MDEntries.push_back(MDEntry());
currentMDEntry = &MDEntries.back(); // use pointer for fast lookup on subsequent repeating group fields
currentMDEntry->MDUpdateAction = kv[VALUE][STR_TO_CHAR];
}
else if (kv[KEY] == Field_MDEntryType) currentMDEntry->MDEntryType = kv[VALUE][STR_TO_CHAR];
else if (kv[KEY] == Field_SecurityID) currentMDEntry->SecurityID = stoi(kv[VALUE]);
else if (kv[KEY] == Field_RptSeq) currentMDEntry->RptSeq = stoi(kv[VALUE]);
else if (kv[KEY] == Field_MDEntryPx) currentMDEntry->MDEntryPx = stod(kv[VALUE]);
else if (kv[KEY] == Field_MDEntrySize) currentMDEntry->MDEntrySize = stoi(kv[VALUE]);
else if (kv[KEY] == Field_NumberOfOrders) currentMDEntry->NumberOfOrders = stoi(kv[VALUE]);
else if (kv[KEY] == Field_MDPriceLevel) currentMDEntry->MDPriceLevel = stoi(kv[VALUE]);
else if (kv[KEY] == Field_OpenCloseSettlFlag) currentMDEntry->OpenCloseSettlFlag = stoi(kv[VALUE]);
else if (kv[KEY] == Field_SettlPriceType) currentMDEntry->SettlPriceType= kv[VALUE];
else if (kv[KEY] == Field_AggressorSide) currentMDEntry->AggressorSide = stoi(kv[VALUE]);
else if (kv[KEY] == Field_TradingReferenceDate) currentMDEntry->TradingReferenceDate = kv[VALUE];
else if (kv[KEY] == Field_HighLimitPrice) currentMDEntry->HighLimitPrice = stod(kv[VALUE]);
else if (kv[KEY] == Field_LowLimitPrice) currentMDEntry->LowLimitPrice = stod(kv[VALUE]);
else if (kv[KEY] == Field_MaxPriceVariation) currentMDEntry->MaxPriceVariation = stod(kv[VALUE]);
else if (kv[KEY] == Field_ApplID) currentMDEntry->ApplID = stoi(kv[VALUE]);
// Repeating Group :: OrderIDEntry
else if (kv[KEY] == Field_OrderID) {
OrderIdEntries.push_back(OrderIdEntry());
currentOrderIDEntry = &OrderIdEntries.back();
currentOrderIDEntry->OrderID = kv[VALUE];
}
else if (kv[KEY] == Field_LastQty) currentOrderIDEntry->LastQty = stol(kv[VALUE]);
}
}
};
int main() {
//std::string filename = "test/sample";
std::string line;
std::ifstream file (filename);
int count = 0;
if (file.is_open())
{
while ( std::getline( file, line ) )
{
MDIncrementalRefresh md(line);
if (md.TransactTime != "") {
count++;
}
}
file.close();
}
cout << count << endl;
return 0;
}
对于那些感兴趣的人,处理上述代码的大部分时间都在split_string函数中。对split_string的大量调用导致在堆上完成许多(昂贵的)分配。
另一种实现split_string_optim重用预分配的向量。这可以防止在每次split_string函数调用时进行不必要的堆分配/扩展。下面的运行 1.5m 迭代的示例表明速度提高了 3.4 倍。通过使用 vector.clear() 本身不会将分配的内存释放回堆,它确保后续split_string调用split_string_optim,其中生成的向量大小 <= 前一个没有额外的分配。
#include <string>
#include <vector>
void string_split_optim(std::vector<std::string>& output, const std::string &s, const char delimiter)
{
output.clear();
size_t start = 0;
size_t end = s.find_first_of(delimiter);
while (end <= std::string::npos)
{
output.emplace_back(s.substr(start, end - start));
if (end == std::string::npos)
break;
start = end + 1;
end = s.find_first_of(delimiter, start);
}
}
int main()
{
const int NUM_RUNS = 1500000;
const std::string s = "1128=9u00019=174u000135=Xu000149=CMEu000175=20170403u000134=1061u000152=20170402211926965794928u000160=20170402211926965423233u00015799=10000100u0001268=1u0001279=1u0001269=1u000148=9006u000155=ESM7u000183=118u0001270=236025.0u0001271=95u0001346=6u00011023=9u000110=088u0001";
std::vector<std::string> vec;
// standard
clock_t tStart = clock();
for (int i = 0; i < NUM_RUNS; ++i)
{
vec = string_split(s, '=');
}
printf("Time taken: %.2fsn", (double) (clock() - tStart) / CLOCKS_PER_SEC);
// reused vector
tStart = clock();
for (int i = 0; i < NUM_RUNS; ++i)
{
string_split_optim(vec, s, '=');
vec.clear();
}
printf("Time taken: %.2fsn", (double) (clock() - tStart) / CLOCKS_PER_SEC);
}
我的Macbook上的结果是提高了3.4倍。
Time taken: 6.60s
Time taken: 1.94s
此外,MDIncrementalRefresh 对象正在重复构造(在堆栈上,但它的向量成员也在堆上扩展)。根据上述split_string发现,我决定重用临时对象并简单地清除其先前状态,从而再次显著提高性能。
- boost::进程间消息队列引发错误
- 在线编译器中的分段C++没有打印消息
- C++错误消息*成员参考.**初学者*
- 在createdialog创建的窗口中捕获用于编辑控件的OnMouseMove消息
- 要与"if constexpr"一起使用的编译时消息(在预处理器之后)
- 如何通过参数抛出错误消息
- 从服务器传输到客户端的消息不会出现
- ROS2 动态消息模板
- C++秘密消息学校作业
- glad 导致 glfwSwapBuffers 返回错误消息
- C++入门 5 版:类消息和文件夹
- FindPackageHandleStandardArgs.cmake:137 的 CMake 错误(消息):找不到 Boost (缺少:正则表达式)(找到合适的版本"1.72.0",
- 如何处理从一个对象传递到另一个在C++中具有公共抽象类的对象的消息
- 如何接受 [ENTER] 键作为无效输入并发送错误消息
- 由于无效的 ValidateRgn() 子窗口不会收到WM_PAINT消息
- "string.h"在构建适用于iOS的qt应用程序中找不到消息
- 如何将 Firebase 与基于 Linux 的客户端应用配合使用,以便与服务器进行双向消息通信
- 重新定义预定义的 errno 错误消息 (E2BIG)
- Libmosquitto publish 不会将所有消息传递到 Azure IoT Hub
- 在 capnp FlatArrayMessageReader 的对齐内存缓冲区中接收 zmq 消息