提取文件名的子字符串

Extract substrings of a filename

本文关键字:字符串 文件名 提取      更新时间:2023-10-16

在C/c++中,如何从c:Blabla - dsfblupAAA - BBBblabla.bmp提取子字符串AAABBB ?

。在文件名的最后一个文件夹中提取-前后的部分

提前感谢。

(PS:如果可能的话,没有框架。net或这样的东西,我很容易迷路)

#include <iostream>
using namespace std;
#include <windows.h>
#include <Shlwapi.h> // link with shlwapi.lib
int main()
{
    char buffer_1[ ] = "c:\Blabla - dsf\blup\AAA - BBB\blabla.bmp"; 
    char *lpStr1 = buffer_1;
    // Remove the file name from the string
    PathRemoveFileSpec(lpStr1);
    string s(lpStr1);
    // Find the last directory name
    stringstream ss(s.substr(s.rfind('') + 1));
   // Split the last directory name into tokens separated by '-'
    while (getline(ss, s, '-')) 
        cout << s << endl;
}

注释说明

这不会在输出中修剪前导空格-如果你也想这样做-检查这个

这可以相对容易地用正则表达式完成:std::regex如果你有c++ 11;boost::regex如果你不:

static std::regex( R"(.*\(w+)s*-s*(w+)\[^\]*$" );
smatch results;
if ( std::regex_match( path, results, regex ) ) {
    std::string firstMatch = results[1];
    std::string secondMatch = results[2];
    //  ...
}

此外,您肯定应该有函数splittrim in toolkit:

template <std::ctype_base::mask test>
class IsNot
{
    std::locale ensureLifetime;
    std::ctype<char> const* ctype;  //  Pointer to allow assignment
public:
    Is( std::locale const& loc = std::locale() )
        : ensureLifetime( loc )
        , ctype( &std::use_facet<std::ctype<char>>( loc ) )
    {
    }
    bool operator()( char ch ) const
    {
        return !ctype->is( test, ch );
    }
};
typedef IsNot<std::ctype_base::space> IsNotSpace;
std::vector<std::string>
split( std::string const& original, char separator )
{
    std::vector<std::string> results;
    std::string::const_iterator current = original.begin();
    std::string::const_iterator end = original.end();
    std::string::const_iterator next = std::find( current, end, separator );
    while ( next != end ) {
        results.push_back( std::string( current, next ) );
        current = next + 1;
        next = std::find( current, end, separator );
    }
    results.push_back( std::string( current, next ) );
    return results;
}
std::string
trim( std::string const& original )
{
    std::string::const_iterator end
        = std::find_if( original.rbegin(), original.rend(), IsNotSpace() ).base();
    std::string::const_iterator begin
        = std::find_if( original.begin(), end, IsNotSpace() );
    return std::string( begin, end );
}

这些正是你需要的。你肯定会想要IsXxx和IsNotXxx谓词的完整补充,即拆分哪个可以根据正则表达式拆分,修剪哪个可以传递一个谓词对象来指定要做什么修剪等)

无论如何,splittrim的应用应该是显而易见的给你想要的

这将完成普通C中的所有工作和验证:

int FindParts(const char* source, char** firstOut, char** secondOut)
{
const char* last        = NULL;
const char* previous    = NULL;
const char* middle      = NULL;
const char* middle1     = NULL;
const char* middle2     = NULL;
char* first;
char* second;
last = strrchr(source, '');
if (!last || (last  == source))
    return -1;
--last;
if (last == source)
    return -1;
previous = last;
for (; (previous != source) && (*previous != ''); --previous);
++previous;
{
    middle = strchr(previous, '-');
    if (!middle || (middle > last))
        return -1;
    middle1 = middle-1;
    middle2 = middle+1;
}
//  now skip spaces
for (; (previous != middle1) && (*previous == ' '); ++previous);
if (previous == middle1)
    return -1;
for (; (middle1 != previous) && (*middle1 == ' '); --middle1);
if (middle1 == previous)
    return -1;
for (; (middle2 != last) && (*middle2 == ' '); ++middle2);
if (middle2 == last)
    return -1;
for (; (middle2 != last) && (*last == ' '); --last);
if (middle2 == last)
    return -1;
first   = (char*)malloc(middle1-previous+1 + 1);
second  = (char*)malloc(last-middle2+1 + 1);
if (!first || !second)
{
    free(first);
    free(second);
    return -1;
}
strncpy(first, previous, middle1-previous+1);
first[middle1-previous+1] = '';
strncpy(second, middle2, last-middle2+1);
second[last-middle2+1] = '';
*firstOut   = first;
*secondOut  = second;
return 1;
}

普通的c++解决方案(没有boost,也不是c++ 11),仍然是James Kanze (https://stackoverflow.com/a/16605408/1032277)的正则表达式解决方案是最通用和最优雅的:

inline void Trim(std::string& source)
{
size_t position = source.find_first_not_of(" ");
if (std::string::npos != position)
    source = source.substr(position);
position = source.find_last_not_of(" ");
if (std::string::npos != position)
    source = source.substr(0, position+1);
}
inline bool FindParts(const std::string& source, std::string& first, std::string& second)
{
size_t last = source.find_last_of('');
if ((std::string::npos == last) || !last)
    return false;
size_t previous = source.find_last_of('', last-1);
if (std::string::npos == last)
    previous = -1;
size_t middle = source.find_first_of('-',1+previous);
if ((std::string::npos == middle) || (middle > last))
    return false;
first   = source.substr(1+previous, (middle-1)-(1+previous)+1);
second  = source.substr(1+middle, (last-1)-(1+middle)+1);
Trim(first);
Trim(second);
return true;
}

使用std::string rfind rfind (char c, size_t pos = npos)

  1. 使用rfind (pos1)从末尾查找字符''
  2. 使用rfind (pos2)查找下一个字符''
  3. 获取位置pos2和pos1之间的子字符串。使用子字符串函数。
  4. 查找字符'-' (pos3)
  5. 提取pos3和pos1、pos3和pos2之间的2个子字符串
  6. 删除子字符串中的空格

结果子字符串将是AAA和BBB