递归功能轨道目录未检测到所有文件夹

recursive function crawler directories dont detect all folders

本文关键字:文件夹 检测 功能 轨道 递归      更新时间:2023-10-16

我正在创建一个目录crawler,以列出其中的所有目录,子目录和文件,但是某种程度上有些文件夹是从crawler跳过的。我试图检查代码,如果我试图将路径作为绝对。我可以得到其余的文件。基本上递归通过目录搜索子目录的所有目录中的所有子目录

bool crawldirs(wstring path, wstring mask, vector<wstring>& files) {
    HANDLE hFind = INVALID_HANDLE_VALUE;
    WIN32_FIND_DATA ffd;
    wstring spec;
    stack<wstring> directories;
    directories.push(path);
    files.clear();
    while (!directories.empty()) {
        path = directories.top();
        spec = path + L"\" + mask;
        directories.pop();
        hFind = FindFirstFile(spec.c_str(), &ffd);
        if (hFind == INVALID_HANDLE_VALUE) {
            return false;
        }
        do {
            if (wcscmp(ffd.cFileName, L".") != 0 &&
                wcscmp(ffd.cFileName, L"..") != 0) {
                if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
                    directories.push(path + L"\" + ffd.cFileName);
                }
                else {
                    files.push_back(path + L"\" + ffd.cFileName);
                }
            }
        } while (FindNextFile(hFind, &ffd) != 0);
        if (GetLastError() != ERROR_NO_MORE_FILES) {
            FindClose(hFind);
            return false;
        }
        FindClose(hFind);
        hFind = INVALID_HANDLE_VALUE;
    }
    return true;
}
void crawlDirectories() {
    vector<wstring> files;
    if (crawldirs(L"D:\", L"*", files)) {
        for (vector<wstring>::iterator it = files.begin();
            it != files.end();
            ++it) {
            wcout << it->c_str() << endl;
        }
    }
}
int main(int argc, char* argv[])
{
    crawlDirectories();
    return 0;
}
C:UsersxxxxDocumentsxxxxstrixRelease>strix.exe
D:\poc.c
D:\things.txt
D:\xxxxVideosmoviesNo Reservations (2007) [BluRay] [720p] [YTS.AM]No.Reservations.2007.720p.BluRay.x264-[YTS.AM].mp4
D:\xxxxVideosmoviesNo Reservations (2007) [BluRay] [720p] [YTS.AM]No.Reservations.2007.720p.BluRay.x264-[YTS.AM].srt
D:\xxxxMusic
C:UsersxxxxDocumentsxxxxstrixRelease>D:
D:>dir
 Volume in drive D is Bk-Storage
 Volume Serial Number is 7AC4-6C0A
 Directory of D:
06/30/2019  12:30 AM             1,346 poc.c
06/30/2019  12:30 AM               210 things.txt
05/30/2019  04:45 PM    <DIR>          xxxx
               2 File(s)          1,556 bytes
               1 Dir(s)  961,850,376,192 bytes free
D:>cd xxxx
D:xxxx>dir
 Volume in drive D is Bk-Storage
 Volume Serial Number is 7AC4-6C0A
 Directory of D:xxxx
05/30/2019  04:45 PM    <DIR>          .
05/30/2019  04:45 PM    <DIR>          ..
05/27/2019  11:33 PM    <DIR>          Documents
05/22/2019  07:54 AM    <DIR>          Pictures
05/22/2019  12:56 PM    <DIR>          Music
04/25/2019  11:07 PM    <DIR>          Videos
               0 File(s)              0 bytes
               6 Dir(s)  961,850,376,192 bytes free

您需要确定当前目录是否为空,如果是空的,则将目录放入文件向量中,如果不是,则输入目录并继续查找。

bool crawldirs(wstring path, wstring mask, vector<wstring>& files) {
    HANDLE hFind = INVALID_HANDLE_VALUE;
    WIN32_FIND_DATA ffd;
    wstring spec;
    stack<wstring> directories;
    directories.push(path);
    files.clear();
    while (!directories.empty()) {
        path = directories.top();
        spec = path + L"\" + mask;
        directories.pop();
        BOOL isEmpty = true;//determine if it's an empty folder.
        hFind = FindFirstFile(spec.c_str(), &ffd);
        if (hFind == INVALID_HANDLE_VALUE) {
            return false;
        }
        do {
            if (wcscmp(ffd.cFileName, L".") != 0 &&
                wcscmp(ffd.cFileName, L"..") != 0) {
                if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
                    directories.push(path + L"\" + ffd.cFileName);
                }
                else {
                    files.push_back(path + L"\" + ffd.cFileName);
                    isEmpty = false;//It not an empty folder since it has normal file.
                }
            }
        } while (FindNextFile(hFind, &ffd) != 0);
        if (isEmpty)
        {
            files.push_back(path);
        }
        if (GetLastError() != ERROR_NO_MORE_FILES) {
            FindClose(hFind);
            return false;
        }
        FindClose(hFind);
        hFind = INVALID_HANDLE_VALUE;
    }
    return true;
}

或while循环。更清楚地使用递归调用来列出所有目录,子目录和文件。

bool RetrieveFile(TCHAR* szDir)
{
    BOOL isEmpty = true;
    WIN32_FIND_DATA ffd;
    LARGE_INTEGER filesize;
    _tcscat(szDir, _T("\*"));
    HANDLE hFind = FindFirstFile(szDir, &ffd);
    // List all the files in the directory with some info about them.
    if (hFind == INVALID_HANDLE_VALUE) {
        return false;
    }
    do
    {
        if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
        {
            if (!_tcscmp(ffd.cFileName, _T(".")) || !_tcscmp(ffd.cFileName, _T("..")))
                continue;
            TCHAR szDircpy[MAX_PATH] = { 0 };
            _tcscpy(szDircpy, szDir);
            szDircpy[_tcslen(szDircpy) - 1] = _T(''); // remove "*" character from "xxx\*"
            _tcscat(szDircpy, ffd.cFileName); //add the subdirectory name into szDir
            RetrieveFile(szDircpy);
        }
        else
        {
            isEmpty = false;
            filesize.LowPart = ffd.nFileSizeLow;
            filesize.HighPart = ffd.nFileSizeHigh;
            TCHAR szfilepath[MAX_PATH] = { 0 };
            _tcscpy(szfilepath, szDir);
            szfilepath[_tcslen(szfilepath) - 1] = _T(''); // remove "*" character from "xxx\*"
            _tcscat(szfilepath, ffd.cFileName); //add the subdirectory name into szDir
            _tprintf(TEXT("%s   %ld bytesn"), szfilepath, filesize.QuadPart);
        }
    } while (FindNextFile(hFind, &ffd) != 0);
    if (isEmpty)
        _tprintf(TEXT("%s   [DIR]n"), szDir);
    FindClose(hFind);
    return true;
}

用法:

TCHAR dir[MAX_PATH] = _T("D:\");
RetrieveFile(dir);