
restore runtime unicode strings

本文关键字:字符串 unicode 运行时 恢复      更新时间:2023-10-16


QString restoreUnicode(QString strText)
      QRegExp rx("\\u([0-9a-z]){4}");
      return strText.replace(rx, QString::fromUtf8("u\1"));



您应该自己解码字符串。只需取Unicode条目(rx.indexIn(strText)),解析它(int result; std::istringstream iss(s); if (!(iss>>std::hex>>result).fail()) ...)并将原始字符串\uXXXX替换为(wchar_t)result


QString restoreUnicode(QString strText)
    QRegExp rxUnicode("\\u([0-9a-z]){4}");
    bool bSuccessFlag;
    int iSafetyOffset = 0;
    int iNeedle = strText.indexOf(rxUnicode, iSafetyOffset);
    while (iNeedle != -1)
        QChar cCodePoint(strText.mid(iNeedle + 2, 4).toInt(&bSuccessFlag, 16));
        if ( bSuccessFlag )
            strText = strText.replace(strText.mid(iNeedle, 6), QString(cCodePoint));
            iSafetyOffset = iNeedle + 1; // hop over non code point to avoid lock
        iNeedle = strText.indexOf(rxUnicode, iSafetyOffset);
    return strText;
#include <assert.h>
#include <iostream>
#include <string>
#include <sstream>
#include <locale>
#include <codecvt>          // C++11
using namespace std;
int main()
    char const  data[]  = "\u7cfb\u8eca\u4e21\uff1a\u6771\u5317";
    istringstream   stream( data );
    wstring     ws;
    int         code;
    char        slashCh, uCh;
    while( stream >> slashCh >> uCh >> hex >> code )
        assert( slashCh == '' && uCh == 'u' );
        ws += wchar_t( code );
    cout << "Unicode code points:" << endl;
    for( auto it = ws.begin();  it != ws.end();  ++it )
        cout << hex << 0 + *it << endl;
    cout << endl;
    // The following is C++11 specific.
    cout << "UTF-8 encoding:" << endl;
    wstring_convert< codecvt_utf8< wchar_t > >  converter;
    string const bytes = converter.to_bytes( ws );
    for( auto it = bytes.begin();  it != bytes.end();  ++it )
        cout << hex << 0 + (unsigned char)*it << ' ';
    cout << endl;