C/C++ argv 内存管理
C/C++ argv memory manage
一种标准的C/C++程序格式。
int main(int argc, char *argv[]){}
我想知道调用main
时argv
数据如何在内存中排列。我从 Node.js 存储库中获得了这个函数 copy_argv((。它的工作原理就像内存是这样排列的:
argv_area|NULL|argv_data_area
操作系统真的以这种方式处理argv
的内存吗?
就这依赖于操作系统而言,只需讨论Linux 64位
原始argv
通常作为char *
值的单个连续块处理,紧跟环境的另一个char *
值块(main()
的int main(int argc, char **argv, char **envp)
变体中的envp
,也由environ
指向(。 然后是参数字符串和环境字符串本身。
参数列表和环境可能不是由malloc()
本身创建的 — 参数和环境是由execve()
系统调用设置的。
三年前的某个时候,我玩过"从main以外的函数中查找argv[0]
"并编写了如下所示的代码。 它仍然可以在Mac OS X Mavericks(10.9.4 - 最初的测试版本是Snow Leopard 10.6(和Ubuntu 14.04上运行。 (有更好但特定于平台的方法可以从函数中获取argv[0]
,但这是一个单独的SO问题,所以我不会使用这种技术,但它确实适用于一些常见的平台。
#include "posixver.h"
#include <inttypes.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h> /* putenv(), setenv() */
extern char **environ; /* Should be declared in <unistd.h> */
/*
** The object of the exercise is: given just environ (since that is all
** that is available to a library function) attempt to find argv[0] (and
** hence argc).
**
** On some platforms, the layout of memory is such that the number of
** arguments (argc) is available, followed by the argument vector,
** followed by the environment vector.
**
** argv environ
** | |
** v v
** | argc | argv0 | argv1 | ... | argvN | 0 | env0 | env1 | ... | envN | 0 |
**
** This applies to:
** -- Solaris 10 (32-bit, 64-bit SPARC)
** -- MacOS X 10.6 (Snow Leopard, 32-bit and 64-bit)
** -- Linux (RHEL 5 on x86/64, 32-bit and 64-bit)
**
** Sadly, this is not quite what happens on the other two Unix
** platforms. The value preceding argv0 seems to be a 0.
** -- AIX 6.1 (32-bit, 64-bit)
** -- HP-UX 11.23 IA64 (32-bit, 64-bit)
** Sub-standard POSIX support (no setenv()) and C99 support (no %zd).
**
** NB: If putenv() or setenv() is called to add an environment variable,
** then the base address of environ changes radically, moving off the
** stack onto heap, and all bets are off. Modifying an existing
** variable is not a problem.
**
** Spotting the change from stack to heap is done by observing whether
** the address pointed to by environ is more than 128 K times the size
** of a pointer from the address of a local variable.
**
** This code is nominally incredibly machine-specific - but actually
** works remarkably portably.
*/
typedef struct Arguments
{
char **argv;
size_t argc;
} Arguments;
static void print_cpp(const char *tag, int i, char **ptr)
{
uintptr_t p = (uintptr_t)ptr;
printf("%s[%d] = 0x%" PRIXPTR " (0x%" PRIXPTR ") (%s)n",
tag, i, p, (uintptr_t)(*ptr), (*ptr == 0 ? "<null>" : *ptr));
}
enum { MAX_DELTA = sizeof(void *) * 128 * 1024 };
static Arguments find_argv0(void)
{
static char *dummy[] = { "<unknown>", 0 };
Arguments args;
uintptr_t i;
char **base = environ - 1;
uintptr_t delta = ((uintptr_t)&base > (uintptr_t)environ) ? (uintptr_t)&base - (uintptr_t)environ : (uintptr_t)environ - (uintptr_t)&base;
if (delta < MAX_DELTA)
{
for (i = 2; (uintptr_t)(*(environ - i) + 2) != i && (uintptr_t)(*(environ - i)) != 0; i++)
print_cpp("test", i, environ-i);
args.argc = i - 2;
args.argv = environ - i + 1;
}
else
{
args.argc = 1;
args.argv = dummy;
}
printf("argc = %zdn", args.argc);
for (i = 0; i <= args.argc; i++)
print_cpp("argv", i, &args.argv[i]);
return args;
}
static void print_arguments(void)
{
Arguments args = find_argv0();
printf("Command name and argumentsn");
printf("argc = %zdn", args.argc);
for (size_t i = 0; i <= args.argc; i++)
printf("argv[%zd] = %sn", i, (args.argv[i] ? args.argv[i] : "<null>"));
}
static int check_environ(int argc, char **argv)
{
size_t n = argc;
size_t i;
unsigned long delta = (argv > environ) ? argv - environ : environ - argv;
printf("environ = 0x%lX; argv = 0x%lX (delta: 0x%lX)n", (unsigned long)environ, (unsigned long)argv, delta);
for (i = 0; i <= n; i++)
print_cpp("chkv", i, &argv[i]);
if (delta > (unsigned long)argc + 1)
return 0;
for (i = 1; i < n + 2; i++)
{
printf("chkr[%zd] = 0x%lX (0x%lX) (%s)n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)),
(*(environ-i) ? *(environ-i) : "<null>"));
fflush(0);
}
i = n + 2;
printf("chkF[%zd] = 0x%lX (0x%lX)n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
i = n + 3;
printf("chkF[%zd] = 0x%lX (0x%lX)n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
return 1;
}
int main(int argc, char **argv)
{
printf("Before setting environmentn");
if (check_environ(argc, argv))
print_arguments();
//putenv("TZ=US/Pacific");
setenv("SHELL", "/bin/csh", 1);
printf("After modifying environmentn");
if (check_environ(argc, argv) == 0)
printf("Modifying environment messed everything upn");
print_arguments();
putenv("CODSWALLOP=nonsense");
printf("After adding to environmentn");
if (check_environ(argc, argv) == 0)
printf("Adding environment messed everything upn");
print_arguments();
return 0;
}
Mac OS X 的输出示例:
Before setting environment
environ = 0x7FFF584D04C8; argv = 0x7FFF584D0498 (delta: 0x6)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[1] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkr[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkr[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkr[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkr[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkF[7] = 0x7FFF584D0490 (0x5)
chkF[8] = 0x7FFF584D0488 (0x0)
test[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
test[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
test[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
test[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
test[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argc = 5
argv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
argv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
argv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
argv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
argv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Command name and arguments
argc = 5
argv[0] = ./find_argv0
argv[1] = macedonian
argv[2] = obelisk
argv[3] = mental breakdown
argv[4] = testing: 1, 2, 3
argv[5] = <null>
After modifying environment
environ = 0x7FFF584D04C8; argv = 0x7FFF584D0498 (delta: 0x6)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[1] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkr[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkr[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkr[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkr[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkF[7] = 0x7FFF584D0490 (0x5)
chkF[8] = 0x7FFF584D0488 (0x0)
test[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
test[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
test[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
test[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
test[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argc = 5
argv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
argv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
argv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
argv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
argv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Command name and arguments
argc = 5
argv[0] = ./find_argv0
argv[1] = macedonian
argv[2] = obelisk
argv[3] = mental breakdown
argv[4] = testing: 1, 2, 3
argv[5] = <null>
After adding to environment
environ = 0x7FB1EA403B60; argv = 0x7FFF584D0498 (delta: 0x9ADC19927)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Adding environment messed everything up
argc = 1
argv[0] = 0x107730040 (0x10772FEC0) (<unknown>)
argv[1] = 0x107730048 (0x0) (<null>)
Command name and arguments
argc = 1
argv[0] = <unknown>
argv[1] = <null>
你在 Node.js 中链接的代码实际上并不假设任何关于 argv 及其引用的布局。 相反,您提到的布局是函数的输出格式。 它实际上确实像你说的那样创建了一个数组,但它不需要像那样排列输入(如果是这样,它可以只做一个memcpy()
(。
换句话说,您描述的特殊布局对于任何程序的 argv 都足够,但不是必需的。
为什么 Node 会这样做? 它就在评论中:
// Logic to duplicate argv as Init() modifies arguments
// that are passed into it.
char **argv_copy = copy_argv(argc, argv);
// This needs to run *before* V8::Initialize()
// Use copy here as to not modify the original argv:
Init(argc, argv_copy);
这是为什么呢? 因为Init()
调用parseArgs()
它从 argv 中去除了它理解的几个选项,其余的则留给其他地方处理。 这对我来说似乎有点迂回,但是您发现的argv复制例程的全部目的只是拥有程序可以用作argv的东西,而不是对它们使用的确切布局有任何特定要求。 不过,这是一个简单的布局,以后只需要一个free()
。
- 当vector是tje全局变量时,c++中vector的内存管理
- 我有一个线程 1:EXC_BAD_ACCESS(代码 = 1,地址 = 0x8)错误.我认为这是由于内存管理不好.我可以
- C++将字符串传递给 C 库以进行内存管理
- 从函数返回时C++内存管理
- 函数指针和 lambda 的内存管理
- 自定义内存管理器在发布模式下工作正常,但在调试模式下则不然
- C++中的内存管理
- C和C++中的内存管理有什么区别
- 字符 * 未从重载运算符或内存管理问题正确返回
- 如何在源代码中使用执行策略检测 C++17 的扩展内存管理算法的可用性?
- 底层指针和内存管理
- 智能指针,避免使用QNetworkAccessManager时进行手动内存管理
- c++中的内存管理问题
- 使用矢量时的内存管理
- 循环和内存管理中的指针算术C++?
- C++堆栈内存管理问题
- C 内存管理中的课程如何管理 - 研究
- 不可变数据模型的内存管理
- C++ 使用数组初始化时的 STL 向量内存管理
- SFML 纹理内存管理