以 7 位身份访问 8 位数据

Accessing 8-bit data as 7-bit

本文关键字：数据访问身份更新时间：2023-10-16

>我有一个 100 个uint8_t的数组，它被视为 800 位的流，一次处理 7 位。换句话说，如果 8 位数组的第一个元素保存0b11001100并且第二个元素保持ob11110000那么当我以 7 位格式读取它时，7 位数组的第一个元素将是0b1100110的，第二个元素将被0b0111100，其余 2 位保存在第三个元素中。我尝试的第一件事是工会...

struct uint7_t {
uint8_t i1:7;
};
union uint7_8_t {
uint8_t u8[100];
uint7_t u7[115];
};

但当然，所有内容都是字节对齐的，我基本上最终只是丢失了每个元素的第 8 位。

有没有人知道我该怎么做？

需要明确的是，这是联合结果的视觉表示：

xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx32 位 8 位数据
0xxxxxxx 0xxxxxxx 0xxxxxxx 0xxxxxxx32 位 7 位数据。

这代表了我想做的：

xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx32 位 8 位数据
xxxxxxx xxxxxxx xxxxxxx xxxxxxx xxxx32 位 7 位数据。

我知道最后一位可能会被填充，但这没关系，我只是想要某种方式一次访问每个字节 7 位而不会丢失任何 800 位。到目前为止，我能想到的唯一方法是大量的位移位，这当然会起作用，但我相信有一种更干净的方法(？

提前感谢您的任何答案。

不确定"更干净"是什么意思。通常，处理此类问题的人经常考虑移动和屏蔽是正确的原始工具。可以做一些事情，比如使用从流中读取任意数量的位的方法定义比特流抽象。这种抽象有时会出现在压缩应用程序中。当然，该方法的内部确实使用移位和遮罩。

一种相当干净的方法是编写一个函数，该函数在无符号字符数组中的任何位索引处提取 7 位数字。使用除法将位索引转换为字节索引，使用模数获取字节内的位索引。然后移位并蒙面。输入位可以跨越两个字节，因此您必须在提取之前将 16 位值粘合在一起，或者执行两个较小的提取并/或将它们一起构造结果。

如果我的目标是中等性能的东西，我可能会采取以下两种方法之一：

第一个有两个状态变量，表示从当前字节和下一个字节中获取多少位。它将使用移位、掩码和按位，或者生成当前输出(例如，0 到 127 之间的数字作为 int)，然后循环将通过加法和模数更新两个状态变量，如果第一个字节中的所有位都被消耗，则会增加当前字节指针。

第二种方法是将 56 位(相当于 8 个输出的输入)加载到 64 位整数中，并使用完全展开的结构来提取 8 个输出中的每一个。在不使用未对齐的内存读取的情况下执行此操作需要构造 64 位整数逐段。(56 位是特殊的，因为起始位位置是字节对齐的。

为了真正快速实现，我可能会尝试用 Halide 编写 SIMD 代码。我相信这超出了这里的范围。(目前还不清楚它实际上会赢得很多。

一次将多个字节读入整数的设计可能必须考虑处理器字节排序。

以 8 为一组处理它们(因为 8x7 很好地舍入到 8 位对齐的内容)。按位运算符是这里的日常工作。用最后(最多)7个数字乱搞有点古怪，但并非不可能。(此代码假定这些是无符号的 7 位整数！有符号转换需要您考虑在 bit[6] 为 1 时翻转顶部位

// convert 8 x 7bit ints in one go
void extract8(const uint8_t input[7], uint8_t output[8])
{
output[0] =   input[0] & 0x7F;
output[1] =  (input[0] >> 7)  | ((input[1] << 1) & 0x7F);
output[2] =  (input[1] >> 6)  | ((input[2] << 2) & 0x7F);
output[3] =  (input[2] >> 5)  | ((input[3] << 3) & 0x7F);
output[4] =  (input[3] >> 4)  | ((input[4] << 4) & 0x7F);
output[5] =  (input[4] >> 3)  | ((input[5] << 5) & 0x7F);
output[6] =  (input[5] >> 2)  | ((input[6] << 6) & 0x7F);
output[7] =   input[6] >> 1;
}
// convert array of 7bit ints to 8bit
void seven_bit_to_8bit(const uint8_t* const input, uint8_t* const output, const size_t count)
{
size_t count8 = count >> 3;
for(size_t i = 0; i < count8; ++i)
{
extract8(input + 7 * i, output + 8 * i);
}
// handle remaining (upto) 7 bytes 
const size_t countr = (count % 8);
if(countr)
{
// how many bytes do we need to copy from the input?
size_t remaining_bits = 7 * countr;
if(remaining_bits % 8)
{
// round to next nearest multiple of 8
remaining_bits += (8 - remaining_bits % 8);
}
remaining_bits /= 8;
{
uint8_t in[7] = {0}, out[8] = {0};
for(size_t i = 0; i < remaining_bits; ++i)
{
in[i] = input[count8 * 7 + i];
}
extract8(in, out);
for(size_t i = 0; i < countr; ++i)
{
output[count8 * 8 + i] = in[i];
}
}
}
}

这是一个使用向量布尔专用化的解决方案。它还使用类似的机制来允许通过引用对象访问七位元素。

成员函数允许执行以下操作：

uint7_t x{5};               // simple value
Arr<uint7_t> arr(10);       // array of size 10
arr[0] = x;                 // set element
uint7_t y = arr[0];         // get element
arr.push_back(uint7_t{9});  // add element
arr.push_back(x);           //
std::cout << "Array size is " 
<< arr.size() << 'n';  // get size
for(auto&& i : arr) 
std::cout << i << 'n'; // range-for to read values
int z{50};
for(auto&& i : arr)
i = z++;                // range-for to change values
auto&& v = arr[1];          // get reference to second element
v = 99;                     // change second element via reference

完整程序：

#include <vector>
#include <iterator>
#include <iostream>
struct uint7_t {
unsigned int i : 7;
};
struct seven_bit_ref {
size_t begin;
size_t end;
std::vector<bool>& bits;
seven_bit_ref& operator=(const uint7_t& right)
{
auto it{bits.begin()+begin};
for(int mask{1}; mask != 1 << 7; mask <<= 1)
*it++ = right.i & mask;
return *this;
}
operator uint7_t() const
{
uint7_t r{};
auto it{bits.begin() + begin};
for(int i{}; i < 7; ++i)
r.i += *it++ << i;
return r;
}
seven_bit_ref operator*()
{
return *this;
}
void operator++()
{
begin += 7;
end += 7;
}
bool operator!=(const seven_bit_ref& right)
{
return !(begin == right.begin && end == right.end);
}
seven_bit_ref operator=(int val)
{
uint7_t temp{};
temp.i = val;
operator=(temp);
return *this;
}
};
template<typename T>
class Arr;
template<>
class Arr<uint7_t> {
public:
Arr(size_t size) : bits(size * 7, false) {}
seven_bit_ref operator[](size_t index)
{
return {index * 7, index * 7 + 7, bits};
}
size_t size()
{
return bits.size() / 7;
}
void push_back(uint7_t val)
{
for(int mask{1}; mask != 1 << 7; mask <<= 1){
bits.push_back(val.i & mask);
}
}
seven_bit_ref begin()
{
return {0, 7, bits};
}
seven_bit_ref end()
{
return {size() * 7, size() * 7 + 7, bits};
}
std::vector<bool> bits;
};
std::ostream& operator<<(std::ostream& os, uint7_t val)
{
os << val.i;
return os;
}
int main()
{
uint7_t x{5};               // simple value
Arr<uint7_t> arr(10);       // array of size 10
arr[0] = x;                 // set element
uint7_t y = arr[0];         // get element
arr.push_back(uint7_t{9});  // add element
arr.push_back(x);           //
std::cout << "Array size is " 
<< arr.size() << 'n';  // get size
for(auto&& i : arr) 
std::cout << i << 'n'; // range-for to read values
int z{50};
for(auto&& i : arr)
i = z++;                // range-for to change values
auto&& v = arr[1];          // get reference
v = 99;                     // change via reference
std::cout << "nAfter changes:n";
for(auto&& i : arr)
std::cout << i << 'n';
}

以下代码按照您的要求工作，但首先是 ideone 上的输出和实时示例。

输出：

Before changing values...:
7 bit representation: 1111111 0000000 0000000 0000000 0000000 0000000 0000000 0000000 
8 bit representation: 11111110 00000000 00000000 00000000 00000000 00000000 00000000 
After changing values...:
7 bit representation: 1000000 1001100 1110010 1011010 1010100 0000111 1111110 0000000 
8 bit representation: 10000001 00110011 10010101 10101010 10000001 11111111 00000000 
8 Bits: 11111111 to ulong: 255
7 Bits: 1111110 to ulong: 126
After changing values...:
7 bit representation: 0010000 0101010 0100000 0000000 0000000 0000000 0000000 0000000 
8 bit representation: 00100000 10101001 00000000 00000000 00000000 00000000 00000000

在名为BitVector的类中使用 std：：bitset 非常简单。我实现一个吸气器和二传器。getter 还会在给定的索引selIdx返回一个 std：：bitset，具有给定的模板参数大小M。给定的 idx 将乘以给定的大小M以获得正确的位置。返回的位集也可以转换为数字或字符串值。
资源库使用uint8_t值作为输入，索引再次selIdx。位将移动到位集中的正确位置。

此外，由于模板参数M，您可以使用不同大小的 getter 和 setter，这意味着您可以使用 7 位或 8 位表示，也可以使用 3 位或任何您喜欢的表示。

我确信这段代码不是关于速度的最佳代码，但我认为这是一个非常清晰和干净的解决方案。此外，它根本不完整，因为只有一个getter，一个setter和两个构造函数。请记住对索引和大小实施错误检查。

法典：

#include <iostream>
#include <bitset>
template <size_t N> class BitVector
{
private:
std::bitset<N> _data;
public:
BitVector (unsigned long num) : _data (num) { };
BitVector (const std::string& str) : _data (str) { };
template <size_t M>
std::bitset<M> getBits (size_t selIdx)
{
std::bitset<M> retBitset;
for (size_t idx = 0; idx < M; ++idx)
{
retBitset |= (_data[M * selIdx + idx] << (M - 1 - idx));
}
return retBitset;
}
template <size_t M>
void setBits (size_t selIdx, uint8_t num)
{
const unsigned char* curByte = reinterpret_cast<const unsigned char*> (&num);
for (size_t bitIdx = 0; bitIdx < 8; ++bitIdx)
{
bool bitSet = (1 == ((*curByte & (1 << (8 - 1 - bitIdx))) >> (8 - 1 - bitIdx)));
_data.set(M * selIdx + bitIdx, bitSet);
}
}
void print_7_8()
{
std:: cout << "n7 bit representation: ";
for (size_t idx = 0; idx < (N / 7); ++idx)
{
std::cout << getBits<7>(idx) << " ";
}
std:: cout << "n8 bit representation: ";
for (size_t idx = 0; idx < N / 8; ++idx)
{
std::cout << getBits<8>(idx) << " ";
}
}
};
int main ()
{
BitVector<56> num = 127;
std::cout << "Before changing values...:";
num.print_7_8();
num.setBits<8>(0, 0x81);
num.setBits<8>(1, 0b00110011);
num.setBits<8>(2, 0b10010101);
num.setBits<8>(3, 0xAA);
num.setBits<8>(4, 0x81);
num.setBits<8>(5, 0xFF);
num.setBits<8>(6, 0x00);
std::cout << "nnAfter changing values...:";
num.print_7_8();
std::cout << "nn8 Bits: " << num.getBits<8>(5) << " to ulong: " << num.getBits<8>(5).to_ulong();
std::cout << "n7 Bits: " << num.getBits<7>(6) << " to ulong: " << num.getBits<7>(6).to_ulong();
num = BitVector<56>(std::string("1001010100000100"));
std::cout << "nnAfter changing values...:";
num.print_7_8();
return 0;
}

这是一种无需手动换档的方法。这只是一个粗略的 POC，但希望您能够从中得到一些东西。我不知道您是否能够轻松地将输入转换为位集，但我认为这应该是可能的。

int bytes = 0x01234567;
bitset<32> bs(bytes);
cout << "Input: " << bs << endl;
for(int i = 0; i < 5; i++)
{
bitset<7> slice(bs.to_string().substr(i*7, 7));
cout << slice << endl;
}

此外，这可能比位移版本的性能要差得多，所以我不建议将其用于繁重的工作。

您可以使用它从in中获取索引的第 7 位元素(请注意，它没有正确的数组处理结束)。简单，快速。

int get7(const uint8_t *in, int index) {
int fidx = index*7;
int idx = fidx>>3;
int sidx = fidx&7;
return (in[idx]>>sidx|in[idx+1]<<(8-sidx))&0x7f;
}

您可以使用直接访问或批量位打包/解包，如 TurboPFor：Integer 压缩

// Direct read access 
// b : bit width 0-16 (7 in your case)
#define bzhi32(u,b) ((u) & ((1u  <<(b))-1))
static inline unsigned  bitgetx16(unsigned char *in, 
unsigned  idx, 
unsigned b) { 
unsigned bidx = b*idx; 
return bzhi32( *(unsigned *)((uint16_t *)in+(bidx>>4)) >> (bidx& 0xf), b );
}