是否可以在跨内核线程迁移后强制重新加载 thread_local 变量?

Is it possible to force a reload of a thread_local variable after migration across kernel threads?

本文关键字：加载新加载 thread 变量 local 内核线程迁移是否更新时间：2023-10-16

我在内核和线程之上实现用户线程，并观察到，当用户线程在内核线程之间迁移时，即使变量也被标记为volatile，也会从以前的内核位置读取thread_local变量。

由于编译器仅将用户级swapcontext视为函数调用，因此下面的示例演示了简单函数调用的问题。

#include <stdio.h>
struct Foo {
int x;
int y;
};
__thread Foo* volatile foo;
void bar() {
asm("nop");
}
void f() {
foo->x = 5;
bar();
asm volatile("":::"memory");
// We desire a second computation of the address of foo here as an offset
// from the FS register.
foo->y = 7;
}
int main(){
foo = new Foo;
f();
delete foo;
}

接下来，我们运行以下命令进行编译和反汇编。请注意，-fPIC标志似乎是重现此问题所必需的，并且对于我的用例也是必需的，因为我正在构建一个库。假设上面的代码位于名为TL.cc的文件中

g++ -std=c++11 -O3  -fPIC  -Wall -g TL.cc   -o TL 
objdump -d TL

这是函数f()的程序集转储。

400760:       53                      push   %rbx
# Notice this computation happens only once.
400761:       64 48 8b 04 25 00 00    mov    %fs:0x0,%rax
400768:       00 00 
40076a:       48 8d 80 f8 ff ff ff    lea    -0x8(%rax),%rax
400771:       48 89 c3                mov    %rax,%rbx
400774:       48 8b 00                mov    (%rax),%rax
400777:       c7 00 05 00 00 00       movl   $0x5,(%rax)
40077d:       e8 ce ff ff ff          callq  400750 <_Z3barv>
# Observe that the value of rbx came from before the function call,
# so if the function bar() actually returned on a different kernel
# thread, we would be referencing the original kernel thread's 
# version of foo, instead of the new kernel thread's version.
400782:       48 8b 03                mov    (%rbx),%rax
400785:       c7 40 04 07 00 00 00    movl   $0x7,0x4(%rax)
40078c:       5b                      pop    %rbx
40078d:       c3                      retq   
40078e:       66 90                   xchg   %ax,%ax

我们观察到寄存器rax正在从内存中重新加载，但是内存位置是在调用bar()之前确定的。

有没有办法强制重新加载变量的地址作为fs寄存器当前值的偏移量？

如果存在的话，我可以使用 gcc 特定的黑客。

这是输出g++ --version

g++ (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
Copyright (C) 2015 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

我写了以下技巧，在我测试的所有情况下都会强制重新加载 TLS，但希望得到有关它可能被破坏的所有方式的反馈。

#define SafeTLS(TypeName, name) 
struct name##_SafeClass { 
name##_SafeClass& 
__attribute__ ((noinline)) 
operator=(const TypeName& other) { 
asm (""); 
name = const_cast<TypeName&>(other); 
return *this; 
} 
TypeName& 
__attribute__ ((noinline)) 
operator->() { 
asm (""); 
return get(); 
} 
operator TypeName() { return get(); } 
TypeName& 
__attribute__ ((noinline)) 
get() { 
asm (""); 
return name; 
} 

TypeName* 
operator&() { 
asm (""); 
return &name; 
} 
} name##_Safe

下面是使用它的更复杂的测试用例。

#include <stdio.h>
#include "TLS.h"
struct Foo {
int x;
int y;
};
__thread Foo* volatile foo;
__thread int bar;
SafeTLS(Foo* volatile, foo);
SafeTLS(int, bar);
void f2() {
asm("nop");
}
void f() {
foo_Safe->x = 5;
f2();
asm volatile("":::"memory");
// We desire a second computation of the address of foo here as an offset
// from the FS register.
(*foo_Safe).y = 7;
bar = 7;
printf("%dn", bar);
printf("%d %dn", foo->x, foo->y);
bar = 8;
printf("%dn", bar_Safe.get());
}
int main(){
foo = new Foo;
f();
delete foo;
}