从文件读取时内存泄漏

Memory leak when read from file

本文关键字:内存 泄漏 读取 文件      更新时间:2023-10-16

我正试图从XML文件中读取数据,并将每个元素("<some data/>")存储在向量容器vector<TCHAR*>中,这就是为什么任务管理器显示的内存使用量远大于向量大小(~80mb而不是~59mb):

#define _UNICODE
#include<tchar.h>
#include<iostream>
#include<windows.h>
#include<vector>
using namespace std;
HANDLE hFile;
HANDLE hThread;
vector<TCHAR*> tokens;
DWORD tokensSize;
DWORD WINAPI Thread(LPVOID lpVoid);

void main()
{   
tokensSize = 0;
hFile = CreateFile("db.xml",GENERIC_READ,0,NULL,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,NULL);
if(hFile == INVALID_HANDLE_VALUE)   {
cout<<"CreateFile Error # "<<GetLastError()<<endl;      
}
DWORD fileSize = GetFileSize(hFile,NULL);
cout<<"fileSize = "<<fileSize<<" bytes = "<<fileSize/1024/1024<<" mb"<<endl;
TCHAR* buffer = new TCHAR[fileSize / sizeof(TCHAR) + 1];
ZeroMemory(buffer,fileSize);
DWORD bytesRead;
if(!ReadFile(hFile,buffer,fileSize,&bytesRead,NULL)){
cout<<"ReadFile Error # "<<GetLastError()<<endl;        
}
CloseHandle(hFile);
hThread = CreateThread(NULL,0,Thread,(LPVOID)buffer,0,NULL);    
WaitForSingleObject(hThread,INFINITE);
for(int i=0;i<tokens.size();i++)
tokensSize+=(_tcslen(tokens[i])+1)*sizeof(TCHAR);
cout<<"vector size = "<<tokensSize<<" bytes = "<<tokensSize/1024/1024<<" mb"<<endl;
cin.get();  
}
DWORD WINAPI Thread(LPVOID lpVoid)
{
wstring entireDB = (TCHAR*)lpVoid;
delete[]lpVoid; 
wstring currentElement;
wstring::size_type lastPos = 0;
wstring::size_type next;
next = entireDB.find(_T(">"),lastPos);
TCHAR* szStr;
do
{               
currentElement = entireDB.substr(lastPos,next+1-lastPos);
szStr = new TCHAR[currentElement.length()+1];
_tcscpy(szStr,currentElement.c_str());
tokens.push_back(szStr);
lastPos = next+1;
next = entireDB.find(_T(">"),lastPos);
}
while(next != wstring::npos);
entireDB.clear();
return 0;
}

输出:~文件大小=57mbvectorSize=58mb

但TaskManager显示约810mb。我做错了什么?THNX!

首先,正如唯美主义者所指出的,一旦完成标记向量,就永远不会清除它。应该这样做,或者更改标记向量以利用自清洁内容,如std::string或std::wstring。

这让我想到了下面的并排。请对照您现有的代码进行检查。您需要比较许多更改。在cmopile+运行之前,您可能不会看到内存占用的差异,这可能会让您感到惊讶。

主要变化

  • 全局tokens现在是std::wstring的向量,而不是原始wchar_t指针
  • 使用MultiByteToWideChar翻译输入文件
  • 动态分配一个std::wstring作为线程参数。这将删除文件映像的一个完整副本。一旦内容解析完成,线程就负责deletewstring
  • 使用_beginthreadex()启动线程。其根本原因是C/C++运行时的使用。在过去,运行库设置了各种必须正确清理的线程本地存储,在使用_beginthreadex()时也是如此。它几乎与CreateThread()相同,但老实说,我期待着有一天MS能把他们的东西整合在一起,像文明世界的其他地方一样,正式为我们提供std::thread

微小/无意义的更改

  • 在适当的情况下,全局变量被带到局部范围。这意味着现在唯一真正的全局是CCD_ 11向量
  • 线程过程现在将子字符串直接推送到tokens向量
  • 使用argv[1]作为文件名(这样很容易调试,没有其他特殊原因)。可以根据需要更改回硬编码的文件名

我希望这能给你一些清理的想法,更重要的是,你如何在不必疯狂地完成newdelete的情况下完成几乎所有任务。

注意:这不会检查输入文件中的字节顺序标记。我相信你所说的UTF8是直截了当的,文件开头没有BOM。如果您的输入文件确实有BOM表,则需要调整读取文件的代码以考虑到这一点。

#include <windows.h>
#include <tchar.h>
#include <process.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
// global map of tokens
vector<wstring> tokens;
// format required by _beginthreadex()
unsigned int _stdcall ThreadProc(void *p);
int main(int argc, char *argv[])
{
HANDLE hThread = NULL;
std::string xml;
std::wstring* pwstr = NULL;
// check early exit
if (argc != 2)
{
cout << "Usage: " << argv[0] << " filename" << endl;
return EXIT_FAILURE;
}
// use runtime library for reading the file content. the WIN32 CreateFile
//  API is required for some things, but not for general file ops.
HANDLE hFile = CreateFileA(argv[1], GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (hFile != INVALID_HANDLE_VALUE)
{
DWORD dwFileSize = GetFileSize(hFile, NULL);
if (dwFileSize > 0)
{
// allocate a string large enough for the whole file.
std::string xml(dwFileSize, 0);
DWORD bytesRead = 0;
if (ReadFile(hFile, &xml.at(0), dwFileSize, &bytesRead, NULL) && (bytesRead == dwFileSize))
{
// invoke MB2WC to determine wide-char requirements
int ires = MultiByteToWideChar(CP_UTF8, 0, xml.c_str(), -1, NULL, 0);
if (ires > 0)
{
// allocate a wstring for our thread parameter.
pwstr = new wstring(ires, 0);
MultiByteToWideChar(CP_UTF8, 0, xml.c_str(), -1, &pwstr->at(0), ires);
// launch thread. it own the wstring we're sending, including cleanup.
hThread = (HANDLE)_beginthreadex(NULL, 0, ThreadProc, pwstr, 0, NULL);
}
}
}
// release the file handle
CloseHandle(hFile);
}
// wait for potential thread
if (hThread != NULL)
{
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
}
// report space taken by tokens
size_t tokensSize = 0;
for (vector<wstring>::const_iterator it = tokens.begin(); it != tokens.end(); ++it)
tokensSize += it->size()+1;
cout << "tokens count = " << tokens.size() << endl
<< "tokens size = "<< tokensSize <<" bytes" << endl;
cin.get();  
}
// our thread parameter is a dynamic-allocated wstring.
unsigned int _stdcall ThreadProc(void *p)
{
// early exit on null insertion
if (p == NULL)
return EXIT_FAILURE;
// use string passed to us.
wstring* pEntireDB = static_cast<wstring*>(p);
wstring::size_type last = 0;
wstring::size_type next = pEntireDB->find(L'>',last);
while(next != wstring::npos)
{               
tokens.push_back(pEntireDB->substr(last, next-last+1));
last = next+1;
next = pEntireDB->find(L'>', last);
}
// delete the wstring (no longer needed)
delete pEntireDB;
return EXIT_SUCCESS;
}

在这里分配内存,在do-while循环中:

szStr = new TCHAR[currentElement.length()+1];

你永远不会用delete操作员发布它